diff options
author | Matt Singleton <matt@xcolour.net> | 2017-04-17 14:32:09 -0400 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2017-04-17 14:32:09 -0400 |
commit | e5b8cdc8a02a1d6e026e2e016508a8ecb443e181 (patch) | |
tree | b901b3b4f0656eb3a0ebc2e7447e9d52e6ee997a | |
parent | 6a0a5579ea9b3674f011eabd2a4c339100a66ba8 (diff) |
fix fox urls
-rwxr-xr-x | unbiased/parser.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/unbiased/parser.py b/unbiased/parser.py index ea2a187..f068ae8 100755 --- a/unbiased/parser.py +++ b/unbiased/parser.py @@ -827,6 +827,7 @@ def buildFoxNews(scratchDir): h1=h1.split('<h1><a href="', 1)[1]
h1=h1.split('"', 1)[0]
h1s=[h1]
+ h1s = ['http:' + x if x.startswith('//') else x for x in h1s]
#GET SECONDARY HEADLINES
h2=content
@@ -838,6 +839,7 @@ def buildFoxNews(scratchDir): x=h2.split('"', 1)[0]
if h1 not in x:
h2s.append(x)
+ h2s = ['http:' + x if x.startswith('//') else x for x in h2s]
#GET TERTIARY HEADLINES
h3=content
@@ -849,8 +851,9 @@ def buildFoxNews(scratchDir): x=h3.split('"', 1)[0]
if h1 not in x:
h3s.append(x)
+ h3s = ['http:' + x if x.startswith('//') else x for x in h3s]
- h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+ h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
fox=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
#REMOVE BAD STORIES
|