summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorssstvinc2 <sstvinc2@gmail.com>2017-03-23 15:46:10 -0400
committerssstvinc2 <sstvinc2@gmail.com>2017-03-23 15:46:10 -0400
commit85f03a6d410295e1a59c6a8b579a32d9dbfe50ea (patch)
tree3dbcfe8d0ce61480a7e12b7ad626621a0859e7d0
parent80f76db4e9846ed809f1c1310615e6f36421c824 (diff)
Fixed H1 parsing for None types. Should resolve further crashes.
-rwxr-xr-xparser.py1
-rw-r--r--unbiasedFunctions.py15
2 files changed, 11 insertions, 5 deletions
diff --git a/parser.py b/parser.py
index 671e2e5..19333e8 100755
--- a/parser.py
+++ b/parser.py
@@ -208,7 +208,6 @@ def buildGuardian():
h1=h1.split('<a href="', 1)[1]
h1=h1.split('"', 1)[0]
- print(h1)
if h1!='https://www.theguardian.com/us':
break
else:
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py
index 950e16d..fca2f2d 100644
--- a/unbiasedFunctions.py
+++ b/unbiasedFunctions.py
@@ -96,9 +96,9 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
print("SHOULDN'T GET HERE")
#strip out self-references
- description=description.replace(sourceName+"'s", 'our')
- description=description.replace(sourceName+"'", 'our')
- description=description.replace(sourceName, 'our')
+ description=description.replace(sourceName+"'s", '***')
+ description=description.replace(sourceName+"'", '***')
+ description=description.replace(sourceName, '***')
if debugging:
print(description)
@@ -123,7 +123,14 @@ def buildOutput(newsSourceArr):
f.close()
#set the random order for sources
- h1RandomSources=random.sample(range(len(newsSourceArr)), 4)
+ h1RandomSources=[]
+ while len(h1RandomSources)<4:
+ x=random.sample(range(len(newsSourceArr)), 1)[0]
+ if len(newsSourceArr[x].h1Arr)>0:
+ if x not in h1RandomSources:
+ h1RandomSources.append(x)
+ else:
+ print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
#For h2s and h3s, select N random sources (can repeat), then
#a non-repetitive random article from within