From 1b08ad4652091d529588f9fb75f7412a07d2dd28 Mon Sep 17 00:00:00 2001 From: sstvinc2 Date: Thu, 16 Feb 2017 21:20:01 -0600 Subject: Some parsing tweaks, mostly for The Guardian --- main.py | 14 +++++++++++++- parser.py | 21 +++++++++++++-------- unbiasedFunctions.py | 16 ++++++++++++++-- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index 3b39a73..ea1508f 100644 --- a/main.py +++ b/main.py @@ -19,7 +19,19 @@ def run(): #nyt=buildNYT() #sourceList.append(nyt) - gdn=buildGuardian() + #for some reason, The Guardian sometimes just doesn't work right? + #loop until it gets it right + h1='https://www.theguardian.com/us' + looped=False + while h1=='https://www.theguardian.com/us': + try: + gdn=buildGuardian() + h1=gdn.h1Arr[0] + except: + print('The Guardian: build error. Looping again.') + if looped: + print('Guardian loop') + looped=True sourceList.append(gdn) blz=buildBlaze() diff --git a/parser.py b/parser.py index e6257da..41972cd 100644 --- a/parser.py +++ b/parser.py @@ -180,7 +180,7 @@ def buildGuardian(): #get main headline h1=content - h1=h1.split('

')[0] if img[-1]=='/': -- cgit v1.2.3