From 53e8b692f6374b72238df797bf14e94f0567b331 Mon Sep 17 00:00:00 2001 From: sstvinc2 Date: Thu, 16 Feb 2017 16:02:24 -0600 Subject: Added The Guardian to sources --- main.py | 3 +++ parser.py | 48 +++++++++++++++++++++++++++++++++++++++++++++++- unbiasedFunctions.py | 17 ++++++++++++----- 3 files changed, 62 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 9120906..3b39a73 100644 --- a/main.py +++ b/main.py @@ -19,6 +19,9 @@ def run(): #nyt=buildNYT() #sourceList.append(nyt) + gdn=buildGuardian() + sourceList.append(gdn) + blz=buildBlaze() sourceList.append(blz) diff --git a/parser.py b/parser.py index d12b1c2..e6257da 100644 --- a/parser.py +++ b/parser.py @@ -170,6 +170,52 @@ def removeBadStories(source, badDescArr, badAuthorArr, badImgArr): return source + +def buildGuardian(): + url='http://www.theguardian.com/us-news' + name='The Guardian' + + #DOWNLOAD HOMEPAGE CONTENT + content=urlToContent(url) + + #get main headline + h1=content + h1=h1.split('

', 3)[2:] + for x in h2: + x=x.split('

', 1)[1] + h3=h3.split('