From 79b293fdc9da9abe9399c727e08efb1b32fd4337 Mon Sep 17 00:00:00 2001 From: ssstvinc2 Date: Thu, 23 Mar 2017 17:00:05 -0400 Subject: added washington times --- main.py | 7 +++++-- parser.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ spotCheck.py | 3 ++- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index c54487e..735ff6b 100755 --- a/main.py +++ b/main.py @@ -21,14 +21,17 @@ def run(): SOURCES TO ADD NEXT: -ABC -REUTERS + -Christian Science Monitor + -Town Hall + -Washington Times ''' ### These values have to be the second half of the function name ### E.g. Guardian calls buildGuardian(), etc. - sourceFnArr=['Guardian', 'TheHill', 'NPR', 'Blaze', 'BBC', 'NBC', 'CBS', - 'FoxNews', ] + sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS', + 'FoxNews', 'WashTimes'] #'Blaze' for source in sourceFnArr: tries=0 diff --git a/parser.py b/parser.py index 19333e8..942612a 100755 --- a/parser.py +++ b/parser.py @@ -248,6 +248,58 @@ def buildGuardian(): return gdn + +def buildWashTimes(): + url='http://www.washingtontimes.com/' + name='Washington Times' + + + #DOWNLOAD HOMEPAGE CONTENT + content=urlToContent(url) + + #get main headline + h1=content + h1=h1.split('top-news', 1)[1] + h1=h1.split('', 1)[1] #end of top-news article + h2=h2.split('
')[1:] + + for x in h2: + x=x.split('', 1)[0] + h3=h3.split('