From f2e8dcdc0a8cd69b594ed7e6f7f0f5087a5950b2 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Sat, 12 Jan 2019 18:05:55 -0500 Subject: new csm layout --- unbiased/sources/csm.py | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/unbiased/sources/csm.py b/unbiased/sources/csm.py index 4e1eea5..2d24aa3 100644 --- a/unbiased/sources/csm.py +++ b/unbiased/sources/csm.py @@ -14,28 +14,14 @@ class CSM(NewsSource): def _fetch_urls(cls): soup = cls._fetch_content(cls.url) - # get primary headline - h1 = soup.find('div', id='block-0-0')\ - .find('h3', class_='story_headline')\ - .a['href'] - h1s = (h1,) - - # get secondary headlines - h2_blocks = soup.find_all('div', id=['block-1-0', 'block-0-1']) - h2s = [] - for block in h2_blocks: - hblocks = block.find_all('h3', class_='story_headline') - for hblock in hblocks: - h2s += [x for x in hblock.find_all('a') if 'first-look' not in x['href']] - h2s = tuple(x['href'] for x in h2s) - - # get tertiary headlines - h3_blocks = soup.find_all('div', id='block-0-2') - h3s = [] - for block in h3_blocks: - hblocks = block.find_all('h3', class_='story_headline') - for hblock in hblocks: - h3s += [x for x in hblock.find_all('a') if 'first-look' not in x['href']] - h3s = tuple(x['href'] for x in h3s) - - return h1s, h2s, h3s + # get all headlines + h = soup.find_all('div', class_='ezc-csm-story') + h = [x.find('a')['href'] for x in h] + + # get primary headlines (first four) + h1s = tuple(h[:4]) + + # get secondary headlines (the rest) + h2s = tuple(h[4:]) + + return h1s, h2s, () -- cgit v1.2.3