diff options
author | ssstvinc2 <sstvinc2@gmail.com> | 2017-03-23 15:46:10 -0400 |
---|---|---|
committer | ssstvinc2 <sstvinc2@gmail.com> | 2017-03-23 15:46:10 -0400 |
commit | 85f03a6d410295e1a59c6a8b579a32d9dbfe50ea (patch) | |
tree | 3dbcfe8d0ce61480a7e12b7ad626621a0859e7d0 /unbiasedFunctions.py | |
parent | 80f76db4e9846ed809f1c1310615e6f36421c824 (diff) |
Fixed H1 parsing for None types. Should resolve further crashes.
Diffstat (limited to 'unbiasedFunctions.py')
-rw-r--r-- | unbiasedFunctions.py | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py index 950e16d..fca2f2d 100644 --- a/unbiasedFunctions.py +++ b/unbiasedFunctions.py @@ -96,9 +96,9 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, print("SHOULDN'T GET HERE")
#strip out self-references
- description=description.replace(sourceName+"'s", 'our')
- description=description.replace(sourceName+"'", 'our')
- description=description.replace(sourceName, 'our')
+ description=description.replace(sourceName+"'s", '***')
+ description=description.replace(sourceName+"'", '***')
+ description=description.replace(sourceName, '***')
if debugging:
print(description)
@@ -123,7 +123,14 @@ def buildOutput(newsSourceArr): f.close()
#set the random order for sources
- h1RandomSources=random.sample(range(len(newsSourceArr)), 4)
+ h1RandomSources=[]
+ while len(h1RandomSources)<4:
+ x=random.sample(range(len(newsSourceArr)), 1)[0]
+ if len(newsSourceArr[x].h1Arr)>0:
+ if x not in h1RandomSources:
+ h1RandomSources.append(x)
+ else:
+ print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
#For h2s and h3s, select N random sources (can repeat), then
#a non-repetitive random article from within
|