From 80f76db4e9846ed809f1c1310615e6f36421c824 Mon Sep 17 00:00:00 2001 From: ssstvinc2 Date: Thu, 23 Mar 2017 08:35:22 -0400 Subject: reworked main loop to hopefully prevent crashing --- main.py | 63 +++++++++++++++++++++------------------------------------------ parser.py | 39 +++++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 58 deletions(-) diff --git a/main.py b/main.py index 182ae26..c54487e 100755 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ from unbiasedFunctions import * from parser import * import time + def main(): while True: print('-----------------------') @@ -23,49 +24,27 @@ def run(): ''' - #for some reason, The Guardian sometimes just doesn't work right? - #loop until it gets it right - ''' - h1='https://www.theguardian.com/us' - looped=False - while h1=='https://www.theguardian.com/us': - try: - gdn=buildGuardian() - h1=gdn.h1Arr[0] - except: - print('The Guardian: build error. Looping again.') - looped=True - ''' - gdn=buildGuardian() - sourceList.append(gdn) - - hil=buildTheHill() - sourceList.append(hil) - - #nyt=buildNYT() - #sourceList.append(nyt) - - npr=buildNPR() - sourceList.append(npr) - blz=buildBlaze() - sourceList.append(blz) - - bbc=buildBBC() - sourceList.append(bbc) - - nbc=buildNBC() - sourceList.append(nbc) - - cbs=buildCBS() - sourceList.append(cbs) - - #Weekly standard just doesn't update frequently enough - #wkl=buildWeeklyStandard() - #sourceList.append(wkl) - - fox=buildFoxNews() - sourceList.append(fox) + ### These values have to be the second half of the function name + ### E.g. Guardian calls buildGuardian(), etc. + sourceFnArr=['Guardian', 'TheHill', 'NPR', 'Blaze', 'BBC', 'NBC', 'CBS', + 'FoxNews', ] + + for source in sourceFnArr: + tries=0 + while tries<3: + try: + fn='build'+source + possibles = globals().copy() + possibles.update(locals()) + method = possibles.get(fn) + src=method() + sourceList.append(src) + break + except: + print('Build error. Looping again: '+source) + tries+=1 + time.sleep(tries) #scrape all urls and build data structure newsSourceArr=buildNewsSourceArr(sourceList) diff --git a/parser.py b/parser.py index 21f0669..671e2e5 100755 --- a/parser.py +++ b/parser.py @@ -119,7 +119,7 @@ def removeBadStoriesHelper(source, element, badStringList, arr): for i in range(len(arr)): for hed in arr[i]: if hed==None: - print("////////\nNone type found in removeBadStoriesHelper for "+source+"\n/////////") + print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////") break for item in badStringList: if item in getattr(hed, element): @@ -197,14 +197,23 @@ def buildGuardian(): url='http://www.theguardian.com/us' name='The Guardian US' - #DOWNLOAD HOMEPAGE CONTENT - content=urlToContent(url, 'utf8') - - #get main headline - h1=content - h1=h1.split('', 1)[1] - h1=h1.split('', 1)[0] h1=h1.split('', 1)[1] h2=h2.split('', 1)[0] - while '\n\n\n\n