summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Singleton <matt@xcolour.net>2017-04-17 14:32:09 -0400
committerMatt Singleton <matt@xcolour.net>2017-04-17 14:32:09 -0400
commite5b8cdc8a02a1d6e026e2e016508a8ecb443e181 (patch)
treeb901b3b4f0656eb3a0ebc2e7447e9d52e6ee997a
parent6a0a5579ea9b3674f011eabd2a4c339100a66ba8 (diff)
fix fox urls
-rwxr-xr-xunbiased/parser.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/unbiased/parser.py b/unbiased/parser.py
index ea2a187..f068ae8 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -827,6 +827,7 @@ def buildFoxNews(scratchDir):
h1=h1.split('<h1><a href="', 1)[1]
h1=h1.split('"', 1)[0]
h1s=[h1]
+ h1s = ['http:' + x if x.startswith('//') else x for x in h1s]
#GET SECONDARY HEADLINES
h2=content
@@ -838,6 +839,7 @@ def buildFoxNews(scratchDir):
x=h2.split('"', 1)[0]
if h1 not in x:
h2s.append(x)
+ h2s = ['http:' + x if x.startswith('//') else x for x in h2s]
#GET TERTIARY HEADLINES
h3=content
@@ -849,8 +851,9 @@ def buildFoxNews(scratchDir):
x=h3.split('"', 1)[0]
if h1 not in x:
h3s.append(x)
+ h3s = ['http:' + x if x.startswith('//') else x for x in h3s]
- h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+ h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
fox=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
#REMOVE BAD STORIES