summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.py4
-rw-r--r--parser.py9
2 files changed, 10 insertions, 3 deletions
diff --git a/main.py b/main.py
index 296de05..9120906 100644
--- a/main.py
+++ b/main.py
@@ -16,8 +16,8 @@ def run():
sourceList=[]
- nyt=buildNYT()
- sourceList.append(nyt)
+ #nyt=buildNYT()
+ #sourceList.append(nyt)
blz=buildBlaze()
sourceList.append(blz)
diff --git a/parser.py b/parser.py
index ef90eee..31c09da 100644
--- a/parser.py
+++ b/parser.py
@@ -225,7 +225,7 @@ def buildBlaze():
h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
blz=buildNewsSource2(name, url, h1s, h2s, h3s)
- blz=removeBadStories(blz, None, ['Tomi Lahren'], None)
+ blz=removeBadStories(blz, None, ['Tomi Lahren', 'Dana Loesch'], None)
#The Blaze has dumb, short description fields, so we need to grab
#the first x characters of actual article text instead
@@ -323,6 +323,11 @@ def buildNBC():
if h1 not in x:
h3s.append(url+x)
+ #adjust for today.com urls
+ for arr in [h1s, h2s, h3s]:
+ for i in range(len(arr)):
+ if 'today.com' in arr[i]:
+ arr[i]=arr[i].split('.com', 1)[1]
h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
@@ -549,6 +554,8 @@ def buildNYT():
if (h1 not in x) and (x not in h2s):
h2s.append(x)
+ print(h2s)
+
#GET TERTIARY HEADLINES
h3=content
h3s=[]