diff options
author | Matt Singleton <matt@xcolour.net> | 2017-04-19 22:59:21 -0400 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2017-04-19 22:59:21 -0400 |
commit | c5a75b89716eabcefd1fe4cb880ffd98669a48a6 (patch) | |
tree | bd6135626fc15554fb3e39a136378036eb604ee5 | |
parent | 8dffc67fae2c5a6cc1fe125809e0b74d8b4b28f3 (diff) |
a bit of refactoring
-rwxr-xr-x | unbiased/main.py | 55 | ||||
-rw-r--r-- | unbiased/unbiasedFunctions.py | 39 |
2 files changed, 51 insertions, 43 deletions
diff --git a/unbiased/main.py b/unbiased/main.py index 60211ea..ba72710 100755 --- a/unbiased/main.py +++ b/unbiased/main.py @@ -34,54 +34,71 @@ def main(): time.sleep(sleeptime) def run(webroot): - sourceList=[] + sources = [] ''' - SOURCES TO ADD NEXT: - -ABC -REUTERS -Town Hall - ''' logger.debug('Running with webroot="{}"'.format(webroot)) - ### These values have to be the second half of the function name ### E.g. Guardian calls buildGuardian(), etc. - sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS', - 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze' + sourceFnArr = [ + 'Guardian', + 'TheHill', + 'NPR', + 'BBC', + 'NBC', + 'CBS', + 'FoxNews', + 'WashTimes', + 'CSM', + 'ABC', + ] for source in sourceFnArr: logger.info('Crawling {}'.format(source)) - tries=0 - while tries<3: + tries = 0 + while tries < 3: time.sleep(tries) try: - fn='build'+source + fn = 'build' + source possibles = globals().copy() possibles.update(locals()) method = possibles.get(fn) - src=method() - sourceList.append(src) + src = method() + sources.append(src) break except Exception as ex: - tries+=1 + tries += 1 if tries == 3: logger.error('Build failed. source={} ex={}'.format(source, ex)) else: logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex)) - - #scrape all urls and build data structure - newsSourceArr = sourceList + logger.info('Parsed home pages for: {}'.format([x.name for x in sources])) + + top_stories, middle_stories, bottom_stories = pickStories(sources) + logger.info('Picked top stories from: {}'.format([x.source for x in top_stories])) + logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories])) + logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories])) + + # download images + img_idx = 0 + for story in top_stories: + story.img = pullImage(story.img, img_idx, webroot, 350, 200) + img_idx += 1 + for story in middle_stories: + story.img = pullImage(story.img, img_idx, webroot, 150, 100) + img_idx += 1 #build the output file HTML - outputHTML=buildOutput(newsSourceArr, webroot) + outputHTML = buildOutput(top_stories, middle_stories, bottom_stories) #print the output file HTML - printOutputHTML(outputHTML, webroot) - + writeOutputHTML(outputHTML, webroot) if __name__=="__main__": main() diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py index 76c80b0..2053ba5 100644 --- a/unbiased/unbiasedFunctions.py +++ b/unbiased/unbiasedFunctions.py @@ -141,15 +141,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, return None
-def buildOutput(newsSourceArr, webroot):
- #read in the template html file
- from jinja2 import Environment, PackageLoader, select_autoescape
- env = Environment(
- loader=PackageLoader('unbiased', 'html_template'),
- autoescape=select_autoescape(['html', 'xml'])
- )
- template = env.get_template('unbiased.jinja.html')
-
+def pickStories(newsSourceArr):
#set the random order for sources
h1RandomSources=[]
while len(h1RandomSources)<4:
@@ -192,18 +184,12 @@ def buildOutput(newsSourceArr, webroot): source=newsSourceArr[h1RandomSources[i]]
randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
article=source.h1Arr[randomArticle]
- img_name = pullImage(article.img, image_index, webroot, 350, 200)
- image_index += 1
- article.img = img_name
top_stories.append(article)
middle_stories = []
for i in range(len(h2RandomPairs)):
pair=h2RandomPairs[i]
article=newsSourceArr[pair[0]].h2Arr[pair[1]]
- img_name = pullImage(article.img, image_index, webroot, 150, 100)
- image_index += 1
- article.img = img_name
middle_stories.append(article)
bottom_stories = []
@@ -212,14 +198,21 @@ def buildOutput(newsSourceArr, webroot): article=newsSourceArr[pair[0]].h3Arr[pair[1]]
bottom_stories.append(article)
- sourcesStr=''
- for i in range(len(newsSourceArr)-1):
- sourcesStr+=newsSourceArr[i].name+', '
- sourcesStr+=newsSourceArr[-1].name
- logger.info('Successfully parsed: '+sourcesStr)
+ return top_stories, middle_stories, bottom_stories
+
+def buildOutput(top_stories, middle_stories, bottom_stories):
+ #read in the template html file
+ from jinja2 import Environment, PackageLoader, select_autoescape
+ env = Environment(
+ loader=PackageLoader('unbiased', 'html_template'),
+ autoescape=select_autoescape(['html', 'xml'])
+ )
+ template = env.get_template('unbiased.jinja.html')
timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+ sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
+
html = template.render(
timestamp = timestamp,
top_stories = top_stories,
@@ -228,13 +221,11 @@ def buildOutput(newsSourceArr, webroot): sources = sourcesStr,
)
-
#return updated text
return html
-def printOutputHTML(outputHTML, outDir):
- timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
- outputHTML=outputHTML.replace('xxTimexx', timestamp)
+def writeOutputHTML(outputHTML, outDir):
+ timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
with open(os.path.join(outDir, 'index.html'), 'w') as fp:
fp.write(outputHTML)
|