diff options
author | Matt Singleton <matt@xcolour.net> | 2019-01-13 08:39:44 -0500 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2019-01-13 08:39:44 -0500 |
commit | ad9828d234541d077cefd34713d516bff226f19f (patch) | |
tree | 46774d69c63cea9b08c33a2258335a552b5ef4af | |
parent | a39dfd89cacbdbc06742cb4c72a981844b2ae371 (diff) |
new cbs layoutbuild-22
-rw-r--r-- | unbiased/sources/cbs.py | 27 |
1 files changed, 8 insertions, 19 deletions
diff --git a/unbiased/sources/cbs.py b/unbiased/sources/cbs.py index 295e671..3a6d017 100644 --- a/unbiased/sources/cbs.py +++ b/unbiased/sources/cbs.py @@ -14,24 +14,13 @@ class CBS(NewsSource): def _fetch_urls(cls): soup = cls._fetch_content(cls.url) - # get primary headline - h1 = soup.find('h1', class_='title') - # sometimes they lead with a video - # if so, we'll pull the first h2 into the h1 slot later - if h1 is not None: - h1s = (h1.a['href'],) - - # get secondary headlines - h2s = soup.find('div', attrs={'data-tb-region': 'Big News Area Side Assets'})\ - .ul.find_all('li', attrs={'data-tb-region-item': True}) - h2s = tuple(x.a['href'] for x in h2s) - if h1 is None: - h1s = (h2s[0],) - h2s = tuple(h2s[1:]) - - # get tertiary headlines - h3s = soup.find('div', attrs={'data-tb-region': 'Hard News'})\ - .ul.find_all('li', attrs={'data-tb-region-item': True}) - h3s = tuple(x.a['href'] for x in h3s[:5]) + top = soup.find('section', id='component-latest-news')\ + .find_all('article') + h1s = (top[0].find('a')['href'],) + h2s = tuple([x.find('a')['href'] for x in top[1:]]) + + more = soup.find('section', id='component-more-top-stories')\ + .find_all('article') + h3s = tuple([x.find('a')['href'] for x in more]) return h1s, h2s, h3s |