diff options
author | Matt Singleton <matt@xcolour.net> | 2019-01-12 18:05:55 -0500 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2019-01-12 18:05:55 -0500 |
commit | f2e8dcdc0a8cd69b594ed7e6f7f0f5087a5950b2 (patch) | |
tree | 13cf26d34502a09165c1a899727facc8e52809dc | |
parent | f1bdf28ae128fb47c38ccd2bf8084c3b79e3805d (diff) |
new csm layout
-rw-r--r-- | unbiased/sources/csm.py | 36 |
1 files changed, 11 insertions, 25 deletions
diff --git a/unbiased/sources/csm.py b/unbiased/sources/csm.py index 4e1eea5..2d24aa3 100644 --- a/unbiased/sources/csm.py +++ b/unbiased/sources/csm.py @@ -14,28 +14,14 @@ class CSM(NewsSource): def _fetch_urls(cls): soup = cls._fetch_content(cls.url) - # get primary headline - h1 = soup.find('div', id='block-0-0')\ - .find('h3', class_='story_headline')\ - .a['href'] - h1s = (h1,) - - # get secondary headlines - h2_blocks = soup.find_all('div', id=['block-1-0', 'block-0-1']) - h2s = [] - for block in h2_blocks: - hblocks = block.find_all('h3', class_='story_headline') - for hblock in hblocks: - h2s += [x for x in hblock.find_all('a') if 'first-look' not in x['href']] - h2s = tuple(x['href'] for x in h2s) - - # get tertiary headlines - h3_blocks = soup.find_all('div', id='block-0-2') - h3s = [] - for block in h3_blocks: - hblocks = block.find_all('h3', class_='story_headline') - for hblock in hblocks: - h3s += [x for x in hblock.find_all('a') if 'first-look' not in x['href']] - h3s = tuple(x['href'] for x in h3s) - - return h1s, h2s, h3s + # get all headlines + h = soup.find_all('div', class_='ezc-csm-story') + h = [x.find('a')['href'] for x in h] + + # get primary headlines (first four) + h1s = tuple(h[:4]) + + # get secondary headlines (the rest) + h2s = tuple(h[4:]) + + return h1s, h2s, () |