summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Singleton <matt@xcolour.net>2019-01-12 18:05:55 -0500
committerMatt Singleton <matt@xcolour.net>2019-01-12 18:05:55 -0500
commitf2e8dcdc0a8cd69b594ed7e6f7f0f5087a5950b2 (patch)
tree13cf26d34502a09165c1a899727facc8e52809dc
parentf1bdf28ae128fb47c38ccd2bf8084c3b79e3805d (diff)
new csm layout
-rw-r--r--unbiased/sources/csm.py36
1 files changed, 11 insertions, 25 deletions
diff --git a/unbiased/sources/csm.py b/unbiased/sources/csm.py
index 4e1eea5..2d24aa3 100644
--- a/unbiased/sources/csm.py
+++ b/unbiased/sources/csm.py
@@ -14,28 +14,14 @@ class CSM(NewsSource):
def _fetch_urls(cls):
soup = cls._fetch_content(cls.url)
- # get primary headline
- h1 = soup.find('div', id='block-0-0')\
- .find('h3', class_='story_headline')\
- .a['href']
- h1s = (h1,)
-
- # get secondary headlines
- h2_blocks = soup.find_all('div', id=['block-1-0', 'block-0-1'])
- h2s = []
- for block in h2_blocks:
- hblocks = block.find_all('h3', class_='story_headline')
- for hblock in hblocks:
- h2s += [x for x in hblock.find_all('a') if 'first-look' not in x['href']]
- h2s = tuple(x['href'] for x in h2s)
-
- # get tertiary headlines
- h3_blocks = soup.find_all('div', id='block-0-2')
- h3s = []
- for block in h3_blocks:
- hblocks = block.find_all('h3', class_='story_headline')
- for hblock in hblocks:
- h3s += [x for x in hblock.find_all('a') if 'first-look' not in x['href']]
- h3s = tuple(x['href'] for x in h3s)
-
- return h1s, h2s, h3s
+ # get all headlines
+ h = soup.find_all('div', class_='ezc-csm-story')
+ h = [x.find('a')['href'] for x in h]
+
+ # get primary headlines (first four)
+ h1s = tuple(h[:4])
+
+ # get secondary headlines (the rest)
+ h2s = tuple(h[4:])
+
+ return h1s, h2s, ()