From f8c6b0084e7d0928121d4c05d3b1f47b10c303c7 Mon Sep 17 00:00:00 2001 From: ssstvinc2 Date: Mon, 6 Mar 2017 18:12:59 -0500 Subject: Added spotCheck ability. Other minor tweaks --- parser.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'parser.py') diff --git a/parser.py b/parser.py index 0426df1..be40a3b 100755 --- a/parser.py +++ b/parser.py @@ -226,7 +226,7 @@ def buildTheHill(): h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s) hil=buildNewsSource2(name, url, h1s, h2s, h3s) - hil=removeBadStories(hil, ['THE MEMO'], None, ['Matt Schlapp'], None, None) + hil=removeBadStories(hil, ['THE MEMO'], None, ['Matt Schlapp', 'Juan Williams'], None, None) return hil @@ -235,7 +235,7 @@ def buildTheHill(): def buildGuardian(): - url='http://www.theguardian.com/us' + url='http://www.theguardian.com/us-news' name='The Guardian' #DOWNLOAD HOMEPAGE CONTENT @@ -253,7 +253,7 @@ def buildGuardian(): h2s=[] #only the h1 and the two h2s have this, so split on it and grab #the second two - h2=h2.split('
', 3)[2:] + h2=h2.split('
')[2:] for x in h2: x=x.split('