From c5a75b89716eabcefd1fe4cb880ffd98669a48a6 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 19 Apr 2017 22:59:21 -0400 Subject: a bit of refactoring --- unbiased/main.py | 55 ++++++++++++++++++++++++++++--------------- unbiased/unbiasedFunctions.py | 39 ++++++++++++------------------ 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/unbiased/main.py b/unbiased/main.py index 60211ea..ba72710 100755 --- a/unbiased/main.py +++ b/unbiased/main.py @@ -34,54 +34,71 @@ def main(): time.sleep(sleeptime) def run(webroot): - sourceList=[] + sources = [] ''' - SOURCES TO ADD NEXT: - -ABC -REUTERS -Town Hall - ''' logger.debug('Running with webroot="{}"'.format(webroot)) - ### These values have to be the second half of the function name ### E.g. Guardian calls buildGuardian(), etc. - sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS', - 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze' + sourceFnArr = [ + 'Guardian', + 'TheHill', + 'NPR', + 'BBC', + 'NBC', + 'CBS', + 'FoxNews', + 'WashTimes', + 'CSM', + 'ABC', + ] for source in sourceFnArr: logger.info('Crawling {}'.format(source)) - tries=0 - while tries<3: + tries = 0 + while tries < 3: time.sleep(tries) try: - fn='build'+source + fn = 'build' + source possibles = globals().copy() possibles.update(locals()) method = possibles.get(fn) - src=method() - sourceList.append(src) + src = method() + sources.append(src) break except Exception as ex: - tries+=1 + tries += 1 if tries == 3: logger.error('Build failed. source={} ex={}'.format(source, ex)) else: logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex)) - - #scrape all urls and build data structure - newsSourceArr = sourceList + logger.info('Parsed home pages for: {}'.format([x.name for x in sources])) + + top_stories, middle_stories, bottom_stories = pickStories(sources) + logger.info('Picked top stories from: {}'.format([x.source for x in top_stories])) + logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories])) + logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories])) + + # download images + img_idx = 0 + for story in top_stories: + story.img = pullImage(story.img, img_idx, webroot, 350, 200) + img_idx += 1 + for story in middle_stories: + story.img = pullImage(story.img, img_idx, webroot, 150, 100) + img_idx += 1 #build the output file HTML - outputHTML=buildOutput(newsSourceArr, webroot) + outputHTML = buildOutput(top_stories, middle_stories, bottom_stories) #print the output file HTML - printOutputHTML(outputHTML, webroot) - + writeOutputHTML(outputHTML, webroot) if __name__=="__main__": main() diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py index 76c80b0..2053ba5 100644 --- a/unbiased/unbiasedFunctions.py +++ b/unbiased/unbiasedFunctions.py @@ -141,15 +141,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, return None -def buildOutput(newsSourceArr, webroot): - #read in the template html file - from jinja2 import Environment, PackageLoader, select_autoescape - env = Environment( - loader=PackageLoader('unbiased', 'html_template'), - autoescape=select_autoescape(['html', 'xml']) - ) - template = env.get_template('unbiased.jinja.html') - +def pickStories(newsSourceArr): #set the random order for sources h1RandomSources=[] while len(h1RandomSources)<4: @@ -192,18 +184,12 @@ def buildOutput(newsSourceArr, webroot): source=newsSourceArr[h1RandomSources[i]] randomArticle=random.sample(range(len(source.h1Arr)), 1)[0] article=source.h1Arr[randomArticle] - img_name = pullImage(article.img, image_index, webroot, 350, 200) - image_index += 1 - article.img = img_name top_stories.append(article) middle_stories = [] for i in range(len(h2RandomPairs)): pair=h2RandomPairs[i] article=newsSourceArr[pair[0]].h2Arr[pair[1]] - img_name = pullImage(article.img, image_index, webroot, 150, 100) - image_index += 1 - article.img = img_name middle_stories.append(article) bottom_stories = [] @@ -212,14 +198,21 @@ def buildOutput(newsSourceArr, webroot): article=newsSourceArr[pair[0]].h3Arr[pair[1]] bottom_stories.append(article) - sourcesStr='' - for i in range(len(newsSourceArr)-1): - sourcesStr+=newsSourceArr[i].name+', ' - sourcesStr+=newsSourceArr[-1].name - logger.info('Successfully parsed: '+sourcesStr) + return top_stories, middle_stories, bottom_stories + +def buildOutput(top_stories, middle_stories, bottom_stories): + #read in the template html file + from jinja2 import Environment, PackageLoader, select_autoescape + env = Environment( + loader=PackageLoader('unbiased', 'html_template'), + autoescape=select_autoescape(['html', 'xml']) + ) + template = env.get_template('unbiased.jinja.html') timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime()) + sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories])) + html = template.render( timestamp = timestamp, top_stories = top_stories, @@ -228,13 +221,11 @@ def buildOutput(newsSourceArr, webroot): sources = sourcesStr, ) - #return updated text return html -def printOutputHTML(outputHTML, outDir): - timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime()) - outputHTML=outputHTML.replace('xxTimexx', timestamp) +def writeOutputHTML(outputHTML, outDir): + timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime()) with open(os.path.join(outDir, 'index.html'), 'w') as fp: fp.write(outputHTML) -- cgit v1.2.3