summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Singleton <matt@xcolour.net>2019-01-13 08:39:44 -0500
committerMatt Singleton <matt@xcolour.net>2019-01-13 08:39:44 -0500
commitad9828d234541d077cefd34713d516bff226f19f (patch)
tree46774d69c63cea9b08c33a2258335a552b5ef4af
parenta39dfd89cacbdbc06742cb4c72a981844b2ae371 (diff)
new cbs layoutbuild-22
-rw-r--r--unbiased/sources/cbs.py27
1 files changed, 8 insertions, 19 deletions
diff --git a/unbiased/sources/cbs.py b/unbiased/sources/cbs.py
index 295e671..3a6d017 100644
--- a/unbiased/sources/cbs.py
+++ b/unbiased/sources/cbs.py
@@ -14,24 +14,13 @@ class CBS(NewsSource):
def _fetch_urls(cls):
soup = cls._fetch_content(cls.url)
- # get primary headline
- h1 = soup.find('h1', class_='title')
- # sometimes they lead with a video
- # if so, we'll pull the first h2 into the h1 slot later
- if h1 is not None:
- h1s = (h1.a['href'],)
-
- # get secondary headlines
- h2s = soup.find('div', attrs={'data-tb-region': 'Big News Area Side Assets'})\
- .ul.find_all('li', attrs={'data-tb-region-item': True})
- h2s = tuple(x.a['href'] for x in h2s)
- if h1 is None:
- h1s = (h2s[0],)
- h2s = tuple(h2s[1:])
-
- # get tertiary headlines
- h3s = soup.find('div', attrs={'data-tb-region': 'Hard News'})\
- .ul.find_all('li', attrs={'data-tb-region-item': True})
- h3s = tuple(x.a['href'] for x in h3s[:5])
+ top = soup.find('section', id='component-latest-news')\
+ .find_all('article')
+ h1s = (top[0].find('a')['href'],)
+ h2s = tuple([x.find('a')['href'] for x in top[1:]])
+
+ more = soup.find('section', id='component-more-top-stories')\
+ .find_all('article')
+ h3s = tuple([x.find('a')['href'] for x in more])
return h1s, h2s, h3s