From d1c7dfc9c2a47edf80527c2457481b9508087ce6 Mon Sep 17 00:00:00 2001 From: sstvinc2 Date: Sat, 18 Feb 2017 22:23:51 -0600 Subject: Added The Hill; also tweaked buildArticle() --- parser.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'parser.py') diff --git a/parser.py b/parser.py index 5cb1c51..6b7b0a6 100644 --- a/parser.py +++ b/parser.py @@ -176,6 +176,53 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b + +def buildTheHill(): + url='http://thehill.com' + name='The Hill' + + #DOWNLOAD HOMEPAGE CONTENT + content=urlToContent(url) + + #get main headline + h1=content + h1=h1.split('
', 1)[1] + h1=h1.split('', 1)[1] + h2=h2.split('', 1)[0] + while '
', 1)[1] + h3=h3.split('', 1)[0] + while '