From ad9828d234541d077cefd34713d516bff226f19f Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Sun, 13 Jan 2019 08:39:44 -0500 Subject: new cbs layout --- unbiased/sources/cbs.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/unbiased/sources/cbs.py b/unbiased/sources/cbs.py index 295e671..3a6d017 100644 --- a/unbiased/sources/cbs.py +++ b/unbiased/sources/cbs.py @@ -14,24 +14,13 @@ class CBS(NewsSource): def _fetch_urls(cls): soup = cls._fetch_content(cls.url) - # get primary headline - h1 = soup.find('h1', class_='title') - # sometimes they lead with a video - # if so, we'll pull the first h2 into the h1 slot later - if h1 is not None: - h1s = (h1.a['href'],) - - # get secondary headlines - h2s = soup.find('div', attrs={'data-tb-region': 'Big News Area Side Assets'})\ - .ul.find_all('li', attrs={'data-tb-region-item': True}) - h2s = tuple(x.a['href'] for x in h2s) - if h1 is None: - h1s = (h2s[0],) - h2s = tuple(h2s[1:]) - - # get tertiary headlines - h3s = soup.find('div', attrs={'data-tb-region': 'Hard News'})\ - .ul.find_all('li', attrs={'data-tb-region-item': True}) - h3s = tuple(x.a['href'] for x in h3s[:5]) + top = soup.find('section', id='component-latest-news')\ + .find_all('article') + h1s = (top[0].find('a')['href'],) + h2s = tuple([x.find('a')['href'] for x in top[1:]]) + + more = soup.find('section', id='component-more-top-stories')\ + .find_all('article') + h3s = tuple([x.find('a')['href'] for x in more]) return h1s, h2s, h3s -- cgit v1.2.3