diff options
-rw-r--r-- | main.py | 2 | ||||
-rw-r--r-- | parser.py | 22 | ||||
-rw-r--r-- | unbiasedFunctions.py | 18 |
3 files changed, 28 insertions, 14 deletions
@@ -42,8 +42,6 @@ def run(): h1=gdn.h1Arr[0] except: print('The Guardian: build error. Looping again.') - if looped: - print('Guardian loop') looped=True sourceList.append(gdn) @@ -100,6 +100,16 @@ def removeDuplicates(h1s, h2s, h3s): +def removalNotification(source, title, reason, value): + print('*************************') + print('\t\tSTORY REMOVED') + print('SOURCE: '+source) + print('TITLE: \t'+title) + print('REASON: '+reason) + print('VALUE: \t'+value) + print('*************************\n\n') + + def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr=None): arr=[source.h1Arr, source.h2Arr, source.h3Arr] @@ -115,7 +125,7 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b if i==0: arr[0].append(arr[1][0]) arr[1].remove(arr[1][0]) - print('Removed:\n'+source.name+'\n'+hed.title+' from '+source.name+'\nReason: Title ('+item+')\n') + removalNotification(source.name, hed.title, 'Title', item) if badDescArr!=None: @@ -129,7 +139,7 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b if i==0: arr[0].append(arr[1][0]) arr[1].remove(arr[1][0]) - print('Removed:\n'+source.name+'\n'+hed.title+' from '+source.name+'\nReason: Description ('+item+')\n') + removalNotification(source.name, hed.title, 'Description', item) if badAuthorArr!=None: @@ -143,7 +153,7 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b if i==0: arr[0].append(arr[1][0]) arr[1].remove(arr[1][0]) - print('Removed:\n'+source.name+'\n'+hed.title+' from '+source.name+'\nReason: Author ('+item+')\n') + removalNotification(source.name, hed.title, 'Author', item) if badImgArr!=None: @@ -157,7 +167,7 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b if i==0: arr[0].append(arr[1][0]) arr[1].remove(arr[1][0]) - print('Removed:\n'+source.name+'\n'+hed.title+' from '+source.name+'\nReason: Image ('+item+')\n') + removalNotification(source.name, hed.title, 'Image', item) if badURLArr!=None: for i in range(len(arr)): @@ -170,7 +180,7 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b if i==0: arr[0].append(arr[1][0]) arr[1].remove(arr[1][0]) - print('Removed:\n'+source.name+'\n'+hed.title+' from '+source.name+'\nReason: URL ('+item+')\n') + removalNotification(source.name, hed.title, 'URL', item) return source @@ -328,7 +338,7 @@ def buildBlaze(): blz=buildNewsSource2(name, url, h1s, h2s, h3s) - blz=removeBadStories(blz, None, None, ['Matt Walsh', 'Tomi Lahren', 'Dana Loesch', 'Mike Opelka'], None) + blz=removeBadStories(blz, None, ['Lawrence Jones'], ['Matt Walsh', 'Tomi Lahren', 'Dana Loesch', 'Mike Opelka'], None) #The Blaze has dumb, short description fields, so we need to grab #the first x characters of actual article text instead diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py index cab7681..444428f 100644 --- a/unbiasedFunctions.py +++ b/unbiasedFunctions.py @@ -106,7 +106,11 @@ def buildArticle(url, sourceName):#, titleDelStart, titleDelEnd, imgDelStart, im return a
except:
- print("Article parsing error in buildArticle() for URL: "+url+" in source "+sourceName+'\n')
+ print('^^^^^^^^^^^^^^^^^^^^^^^^^')
+ print('\tARTICLE PARSING ERROR')
+ print('SOURCE: '+sourceName)
+ print('URL: \t'+url)
+ print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
return None
@@ -131,11 +135,13 @@ def buildOutput(newsSourceArr): while len(h3RandomPairs) < 12:
x=random.sample(range(len(newsSourceArr)), 1)[0]
print(newsSourceArr[x].name)
- y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
- pair=[x,y]
- if not pair in h3RandomPairs:
- h3RandomPairs.append(pair)
-
+ if len(newsSourceArr[x].h3Arr) > 0:
+ y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
+ pair=[x,y]
+ if not pair in h3RandomPairs:
+ h3RandomPairs.append(pair)
+ else:
+ continue
#replace html template locations with data from newsSourceArr
for i in range(len(h1RandomSources)):
|