From 8e87842bdbd8525c4fa6ec8f1bd95aa42ab9318b Mon Sep 17 00:00:00 2001 From: sstvinc2 Date: Wed, 15 Feb 2017 15:33:50 -0600 Subject: The Blaze added to new parser; also fixed Blaze desription fields --- main.py | 8 ++++++-- parser.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 09bfddc..cf68d01 100644 --- a/main.py +++ b/main.py @@ -15,6 +15,9 @@ def main(): def run(): sourceList=[] + blz=buildBlaze() + sourceList.append(blz) + bbc=buildBBC() sourceList.append(bbc) @@ -24,7 +27,8 @@ def run(): cbs=buildCBS() sourceList.append(cbs) - + + ''' sourceList.append(NewsSource('The Blaze', 'http://theblaze.com', ['', 1)[1] + desc=desc.split('

', 1)[1] + desc=TAG_RE.sub('', desc) + desc=desc.replace('\n', ' ') + desc=desc[:144] + print(desc+'\n\n') + articleArr[i].description=desc + + return articleArr + + + +def buildBlaze(): + url='http://theblaze.com' + name='The Blaze' + + #DOWNLOAD HOMEPAGE CONTENT + content=urlToContent(url) + + #get main headline + h1=content + h1=h1.split('', 1)[1] + h1=h1.split('', 1)[0] + h1=h1.split('', 1)[1] + h2=h2.split('', 1)[0] + while '\n\n

\n\n