diff options
author | ssstvinc2 <sstvinc2@gmail.com> | 2017-03-23 15:46:10 -0400 |
---|---|---|
committer | ssstvinc2 <sstvinc2@gmail.com> | 2017-03-23 15:46:10 -0400 |
commit | 85f03a6d410295e1a59c6a8b579a32d9dbfe50ea (patch) | |
tree | 3dbcfe8d0ce61480a7e12b7ad626621a0859e7d0 | |
parent | 80f76db4e9846ed809f1c1310615e6f36421c824 (diff) |
Fixed H1 parsing for None types. Should resolve further crashes.
-rwxr-xr-x | parser.py | 1 | ||||
-rw-r--r-- | unbiasedFunctions.py | 15 |
2 files changed, 11 insertions, 5 deletions
@@ -208,7 +208,6 @@ def buildGuardian(): h1=h1.split('<a href="', 1)[1]
h1=h1.split('"', 1)[0]
- print(h1)
if h1!='https://www.theguardian.com/us':
break
else:
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py index 950e16d..fca2f2d 100644 --- a/unbiasedFunctions.py +++ b/unbiasedFunctions.py @@ -96,9 +96,9 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, print("SHOULDN'T GET HERE")
#strip out self-references
- description=description.replace(sourceName+"'s", 'our')
- description=description.replace(sourceName+"'", 'our')
- description=description.replace(sourceName, 'our')
+ description=description.replace(sourceName+"'s", '***')
+ description=description.replace(sourceName+"'", '***')
+ description=description.replace(sourceName, '***')
if debugging:
print(description)
@@ -123,7 +123,14 @@ def buildOutput(newsSourceArr): f.close()
#set the random order for sources
- h1RandomSources=random.sample(range(len(newsSourceArr)), 4)
+ h1RandomSources=[]
+ while len(h1RandomSources)<4:
+ x=random.sample(range(len(newsSourceArr)), 1)[0]
+ if len(newsSourceArr[x].h1Arr)>0:
+ if x not in h1RandomSources:
+ h1RandomSources.append(x)
+ else:
+ print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
#For h2s and h3s, select N random sources (can repeat), then
#a non-repetitive random article from within
|