Fixed NYT, plus other parsing fixes and a minor visual tweak

author: sstvinc2 <sstvinc2@gmail.com> 2017-02-15 23:33:56 -0600
committer: sstvinc2 <sstvinc2@gmail.com> 2017-02-15 23:33:56 -0600
commit: 233eb048a9bc2c4b84e1ae6a47de6b088779ee4e (patch)
tree: 95681c7f50d434f4b8380f17656135324632c6a6 /parser.py
parent: 38483987b2389b92ca06ac1b409f358ecd4fa991 (diff)
1 files changed, 14 insertions, 5 deletions
diff --git a/parser.py b/parser.py
index 53b3261..ef90eee 100644
--- a/parser.py
+++ b/parser.py
@@ -225,6 +225,8 @@ def buildBlaze():
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     blz=buildNewsSource2(name, url, h1s, h2s, h3s)
 
+    blz=removeBadStories(blz, None, ['Tomi Lahren'], None)
+
     #The Blaze has dumb, short description fields, so we need to grab
     #the first x characters of actual article text instead
     blz.h1Arr=blazeFixDesc(blz.h1Arr)
@@ -502,10 +504,17 @@ def buildNYT():
     #this will likely need if/else logic
     h1=content
 
-    #This is with a large headline over a and b columns
-    h1=h1.split('story theme-summary banner', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
+    if 'story theme-summary banner' in h1:
+        #This is with a large headline over a and b columns
+        h1=h1.split('story theme-summary banner', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        h1=h1.split('"', 1)[0]
+    else:
+        #otherwise, pull the first story from the A column
+        h1=h1.split('<div class="a-column column">', 1)[1]
+        h1=h1.split('<a href="', 1)[1].split('"', 1)[0]
+    h1s=[h1]
+        
 
     #GET SECONDARY HEADLINES
     #This comes from the a column or b column, above the break
@@ -557,7 +566,7 @@ def buildNYT():
         if (h1 not in x) and (x not in h3s):
             h3s.append(x)
 
-    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     return nyt
author	sstvinc2 <sstvinc2@gmail.com>	2017-02-15 23:33:56 -0600
committer	sstvinc2 <sstvinc2@gmail.com>	2017-02-15 23:33:56 -0600
commit	233eb048a9bc2c4b84e1ae6a47de6b088779ee4e (patch)
tree	95681c7f50d434f4b8380f17656135324632c6a6 /parser.py
parent	38483987b2389b92ca06ac1b409f358ecd4fa991 (diff)