summaryrefslogtreecommitdiff
path: root/unbiasedFunctions.py
diff options
context:
space:
mode:
authorsstvinc2 <sstvinc2@gmail.com>2017-02-16 21:20:01 -0600
committersstvinc2 <sstvinc2@gmail.com>2017-02-16 21:20:01 -0600
commit1b08ad4652091d529588f9fb75f7412a07d2dd28 (patch)
tree691ab2e5f01fc141a4e3dd182e57108db07134ba /unbiasedFunctions.py
parent53e8b692f6374b72238df797bf14e94f0567b331 (diff)
Some parsing tweaks, mostly for The Guardian
Diffstat (limited to 'unbiasedFunctions.py')
-rw-r--r--unbiasedFunctions.py16
1 files changed, 14 insertions, 2 deletions
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py
index de27228..748aed7 100644
--- a/unbiasedFunctions.py
+++ b/unbiasedFunctions.py
@@ -25,8 +25,20 @@ def buildArticle(url, sourceName):#, titleDelStart, titleDelEnd, imgDelStart, im
if sourceName=='The Guardian':
#The Guardian puts an identifying banner on their og:images
#grab the main image from the page instead
- img=content.split('<img class="maxed', 1)[1]
- img=img.split('src="', 1)[1].split('"')[0]
+
+ #scenario 1: regular image
+ if '<img class="maxed' in content:
+ img=content.split('<img class="maxed', 1)[1]
+ img=img.split('src="', 1)[1].split('"')[0]
+ #scenario 2: video in image spot
+ elif '<meta itemprop="image"' in content:
+ img=content.split('<meta itemprop="image"', 1)[1]
+ img=img.split('content="', 1)[1].split('"')[0]
+ #scenario 3: photo essays
+ elif '<img class="immersive-main-media__media"' in content:
+ img=content.split('<img class="immersive-main-media__media"', 1)[1]
+ img=img.split('src="', 1)[1].split('"')[0]
+
else:
img=content.split('og:image" content=')[1][1:].split('>')[0]
if img[-1]=='/':