From e3d744821919dedcf4f8466c72587008c062acbc Mon Sep 17 00:00:00 2001
From: sstvinc2 <sstvinc2@gmail.com>
Date: Thu, 16 Feb 2017 10:27:51 -0600
Subject: Pulled NYT again; minor fixes for NBC, Blaze

---
 main.py   | 4 ++--
 parser.py | 9 ++++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 296de05..9120906 100644
--- a/main.py
+++ b/main.py
@@ -16,8 +16,8 @@ def run():
     sourceList=[]
 
 
-    nyt=buildNYT()
-    sourceList.append(nyt)
+    #nyt=buildNYT()
+    #sourceList.append(nyt)
 
     blz=buildBlaze()
     sourceList.append(blz)
diff --git a/parser.py b/parser.py
index ef90eee..31c09da 100644
--- a/parser.py
+++ b/parser.py
@@ -225,7 +225,7 @@ def buildBlaze():
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     blz=buildNewsSource2(name, url, h1s, h2s, h3s)
 
-    blz=removeBadStories(blz, None, ['Tomi Lahren'], None)
+    blz=removeBadStories(blz, None, ['Tomi Lahren', 'Dana Loesch'], None)
 
     #The Blaze has dumb, short description fields, so we need to grab
     #the first x characters of actual article text instead
@@ -323,6 +323,11 @@ def buildNBC():
         if h1 not in x:
             h3s.append(url+x)
 
+    #adjust for today.com urls
+    for arr in [h1s, h2s, h3s]:
+        for i in range(len(arr)):
+            if 'today.com' in arr[i]:
+                arr[i]=arr[i].split('.com', 1)[1]
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
@@ -549,6 +554,8 @@ def buildNYT():
         if (h1 not in x) and (x not in h2s):
             h2s.append(x)
 
+    print(h2s)
+
     #GET TERTIARY HEADLINES
     h3=content
     h3s=[]
-- 
cgit v1.2.3