summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xunbiased/main.py55
-rw-r--r--unbiased/unbiasedFunctions.py39
2 files changed, 51 insertions, 43 deletions
diff --git a/unbiased/main.py b/unbiased/main.py
index 60211ea..ba72710 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -34,54 +34,71 @@ def main():
time.sleep(sleeptime)
def run(webroot):
- sourceList=[]
+ sources = []
'''
-
SOURCES TO ADD NEXT:
- -ABC
-REUTERS
-Town Hall
-
'''
logger.debug('Running with webroot="{}"'.format(webroot))
-
### These values have to be the second half of the function name
### E.g. Guardian calls buildGuardian(), etc.
- sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
- 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
+ sourceFnArr = [
+ 'Guardian',
+ 'TheHill',
+ 'NPR',
+ 'BBC',
+ 'NBC',
+ 'CBS',
+ 'FoxNews',
+ 'WashTimes',
+ 'CSM',
+ 'ABC',
+ ]
for source in sourceFnArr:
logger.info('Crawling {}'.format(source))
- tries=0
- while tries<3:
+ tries = 0
+ while tries < 3:
time.sleep(tries)
try:
- fn='build'+source
+ fn = 'build' + source
possibles = globals().copy()
possibles.update(locals())
method = possibles.get(fn)
- src=method()
- sourceList.append(src)
+ src = method()
+ sources.append(src)
break
except Exception as ex:
- tries+=1
+ tries += 1
if tries == 3:
logger.error('Build failed. source={} ex={}'.format(source, ex))
else:
logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex))
-
- #scrape all urls and build data structure
- newsSourceArr = sourceList
+ logger.info('Parsed home pages for: {}'.format([x.name for x in sources]))
+
+ top_stories, middle_stories, bottom_stories = pickStories(sources)
+ logger.info('Picked top stories from: {}'.format([x.source for x in top_stories]))
+ logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories]))
+ logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories]))
+
+ # download images
+ img_idx = 0
+ for story in top_stories:
+ story.img = pullImage(story.img, img_idx, webroot, 350, 200)
+ img_idx += 1
+ for story in middle_stories:
+ story.img = pullImage(story.img, img_idx, webroot, 150, 100)
+ img_idx += 1
#build the output file HTML
- outputHTML=buildOutput(newsSourceArr, webroot)
+ outputHTML = buildOutput(top_stories, middle_stories, bottom_stories)
#print the output file HTML
- printOutputHTML(outputHTML, webroot)
-
+ writeOutputHTML(outputHTML, webroot)
if __name__=="__main__":
main()
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 76c80b0..2053ba5 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -141,15 +141,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
return None
-def buildOutput(newsSourceArr, webroot):
- #read in the template html file
- from jinja2 import Environment, PackageLoader, select_autoescape
- env = Environment(
- loader=PackageLoader('unbiased', 'html_template'),
- autoescape=select_autoescape(['html', 'xml'])
- )
- template = env.get_template('unbiased.jinja.html')
-
+def pickStories(newsSourceArr):
#set the random order for sources
h1RandomSources=[]
while len(h1RandomSources)<4:
@@ -192,18 +184,12 @@ def buildOutput(newsSourceArr, webroot):
source=newsSourceArr[h1RandomSources[i]]
randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
article=source.h1Arr[randomArticle]
- img_name = pullImage(article.img, image_index, webroot, 350, 200)
- image_index += 1
- article.img = img_name
top_stories.append(article)
middle_stories = []
for i in range(len(h2RandomPairs)):
pair=h2RandomPairs[i]
article=newsSourceArr[pair[0]].h2Arr[pair[1]]
- img_name = pullImage(article.img, image_index, webroot, 150, 100)
- image_index += 1
- article.img = img_name
middle_stories.append(article)
bottom_stories = []
@@ -212,14 +198,21 @@ def buildOutput(newsSourceArr, webroot):
article=newsSourceArr[pair[0]].h3Arr[pair[1]]
bottom_stories.append(article)
- sourcesStr=''
- for i in range(len(newsSourceArr)-1):
- sourcesStr+=newsSourceArr[i].name+', '
- sourcesStr+=newsSourceArr[-1].name
- logger.info('Successfully parsed: '+sourcesStr)
+ return top_stories, middle_stories, bottom_stories
+
+def buildOutput(top_stories, middle_stories, bottom_stories):
+ #read in the template html file
+ from jinja2 import Environment, PackageLoader, select_autoescape
+ env = Environment(
+ loader=PackageLoader('unbiased', 'html_template'),
+ autoescape=select_autoescape(['html', 'xml'])
+ )
+ template = env.get_template('unbiased.jinja.html')
timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+ sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
+
html = template.render(
timestamp = timestamp,
top_stories = top_stories,
@@ -228,13 +221,11 @@ def buildOutput(newsSourceArr, webroot):
sources = sourcesStr,
)
-
#return updated text
return html
-def printOutputHTML(outputHTML, outDir):
- timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
- outputHTML=outputHTML.replace('xxTimexx', timestamp)
+def writeOutputHTML(outputHTML, outDir):
+ timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
with open(os.path.join(outDir, 'index.html'), 'w') as fp:
fp.write(outputHTML)