a bit of refactoring

author: Matt Singleton <matt@xcolour.net> 2017-04-19 22:59:21 -0400
committer: Matt Singleton <matt@xcolour.net> 2017-04-19 22:59:21 -0400
commit: c5a75b89716eabcefd1fe4cb880ffd98669a48a6 (patch)
tree: bd6135626fc15554fb3e39a136378036eb604ee5
parent: 8dffc67fae2c5a6cc1fe125809e0b74d8b4b28f3 (diff)
2 files changed, 51 insertions, 43 deletions
diff --git a/unbiased/main.py b/unbiased/main.py
index 60211ea..ba72710 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -34,54 +34,71 @@ def main():
             time.sleep(sleeptime)
 
 def run(webroot):
-    sourceList=[]
+    sources = []
 
     '''
-
     SOURCES TO ADD NEXT:
-    -ABC
     -REUTERS
     -Town Hall
-
     '''
 
     logger.debug('Running with webroot="{}"'.format(webroot))
 
-
     ### These values have to be the second half of the function name
     ### E.g. Guardian calls buildGuardian(), etc.
-    sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
-                 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
+    sourceFnArr = [
+        'Guardian',
+        'TheHill',
+        'NPR',
+        'BBC',
+        'NBC',
+        'CBS',
+        'FoxNews',
+        'WashTimes',
+        'CSM',
+        'ABC',
+    ]
 
     for source in sourceFnArr:
         logger.info('Crawling {}'.format(source))
-        tries=0
-        while tries<3:
+        tries = 0
+        while tries < 3:
             time.sleep(tries)
             try:
-                fn='build'+source
+                fn = 'build' + source
                 possibles = globals().copy()
                 possibles.update(locals())
                 method = possibles.get(fn)
-                src=method()
-                sourceList.append(src)
+                src = method()
+                sources.append(src)
                 break
             except Exception as ex:
-                tries+=1
+                tries += 1
                 if tries == 3:
                     logger.error('Build failed. source={} ex={}'.format(source, ex))
                 else:
                     logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex))
-
-    #scrape all urls and build data structure
-    newsSourceArr = sourceList
+    logger.info('Parsed home pages for: {}'.format([x.name for x in sources]))
+
+    top_stories, middle_stories, bottom_stories = pickStories(sources)
+    logger.info('Picked top stories from: {}'.format([x.source for x in top_stories]))
+    logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories]))
+    logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories]))
+
+    # download images
+    img_idx = 0
+    for story in top_stories:
+        story.img = pullImage(story.img, img_idx, webroot, 350, 200)
+        img_idx += 1
+    for story in middle_stories:
+        story.img = pullImage(story.img, img_idx, webroot, 150, 100)
+        img_idx += 1
 
     #build the output file HTML
-    outputHTML=buildOutput(newsSourceArr, webroot)
+    outputHTML = buildOutput(top_stories, middle_stories, bottom_stories)
 
     #print the output file HTML
-    printOutputHTML(outputHTML, webroot)
-
+    writeOutputHTML(outputHTML, webroot)
 
 if __name__=="__main__":
     main()
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 76c80b0..2053ba5 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -141,15 +141,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
         return None
 
 
-def buildOutput(newsSourceArr, webroot):
-    #read in the template html file
-    from jinja2 import Environment, PackageLoader, select_autoescape
-    env = Environment(
-        loader=PackageLoader('unbiased', 'html_template'),
-        autoescape=select_autoescape(['html', 'xml'])
-    )
-    template = env.get_template('unbiased.jinja.html')
-
+def pickStories(newsSourceArr):
     #set the random order for sources
     h1RandomSources=[]
     while len(h1RandomSources)<4:
@@ -192,18 +184,12 @@ def buildOutput(newsSourceArr, webroot):
         source=newsSourceArr[h1RandomSources[i]]
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
-        img_name = pullImage(article.img, image_index, webroot, 350, 200)
-        image_index += 1
-        article.img = img_name
         top_stories.append(article)
 
     middle_stories = []
     for i in range(len(h2RandomPairs)):
         pair=h2RandomPairs[i]
         article=newsSourceArr[pair[0]].h2Arr[pair[1]]
-        img_name = pullImage(article.img, image_index, webroot, 150, 100)
-        image_index += 1
-        article.img = img_name
         middle_stories.append(article)
 
     bottom_stories = []
@@ -212,14 +198,21 @@ def buildOutput(newsSourceArr, webroot):
         article=newsSourceArr[pair[0]].h3Arr[pair[1]]
         bottom_stories.append(article)
 
-    sourcesStr=''
-    for i in range(len(newsSourceArr)-1):
-        sourcesStr+=newsSourceArr[i].name+', '
-    sourcesStr+=newsSourceArr[-1].name
-    logger.info('Successfully parsed: '+sourcesStr)
+    return top_stories, middle_stories, bottom_stories
+
+def buildOutput(top_stories, middle_stories, bottom_stories):
+    #read in the template html file
+    from jinja2 import Environment, PackageLoader, select_autoescape
+    env = Environment(
+        loader=PackageLoader('unbiased', 'html_template'),
+        autoescape=select_autoescape(['html', 'xml'])
+    )
+    template = env.get_template('unbiased.jinja.html')
 
     timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
 
+    sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
+
     html = template.render(
         timestamp = timestamp,
         top_stories = top_stories,
@@ -228,13 +221,11 @@ def buildOutput(newsSourceArr, webroot):
         sources = sourcesStr,
     )
 
-
     #return updated text
     return html
 
-def printOutputHTML(outputHTML, outDir):
-    timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
-    outputHTML=outputHTML.replace('xxTimexx', timestamp)
+def writeOutputHTML(outputHTML, outDir):
+    timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
 
     with open(os.path.join(outDir, 'index.html'), 'w') as fp:
         fp.write(outputHTML)
author	Matt Singleton <matt@xcolour.net>	2017-04-19 22:59:21 -0400
committer	Matt Singleton <matt@xcolour.net>	2017-04-19 22:59:21 -0400
commit	c5a75b89716eabcefd1fe4cb880ffd98669a48a6 (patch)
tree	bd6135626fc15554fb3e39a136378036eb604ee5
parent	8dffc67fae2c5a6cc1fe125809e0b74d8b4b28f3 (diff)