new cbs layoutbuild-22

author: Matt Singleton <matt@xcolour.net> 2019-01-13 08:39:44 -0500
committer: Matt Singleton <matt@xcolour.net> 2019-01-13 08:39:44 -0500
commit: ad9828d234541d077cefd34713d516bff226f19f (patch)
tree: 46774d69c63cea9b08c33a2258335a552b5ef4af
parent: a39dfd89cacbdbc06742cb4c72a981844b2ae371 (diff)
1 files changed, 8 insertions, 19 deletions
diff --git a/unbiased/sources/cbs.py b/unbiased/sources/cbs.py
index 295e671..3a6d017 100644
--- a/unbiased/sources/cbs.py
+++ b/unbiased/sources/cbs.py
@@ -14,24 +14,13 @@ class CBS(NewsSource):
     def _fetch_urls(cls):
         soup = cls._fetch_content(cls.url)
 
-        # get primary headline
-        h1 = soup.find('h1', class_='title')
-        # sometimes they lead with a video
-        # if so, we'll pull the first h2 into the h1 slot later
-        if h1 is not None:
-            h1s = (h1.a['href'],)
-
-        # get secondary headlines
-        h2s = soup.find('div', attrs={'data-tb-region': 'Big News Area Side Assets'})\
-                .ul.find_all('li', attrs={'data-tb-region-item': True})
-        h2s = tuple(x.a['href'] for x in h2s)
-        if h1 is None:
-            h1s = (h2s[0],)
-            h2s = tuple(h2s[1:])
-
-        # get tertiary headlines
-        h3s = soup.find('div', attrs={'data-tb-region': 'Hard News'})\
-                .ul.find_all('li', attrs={'data-tb-region-item': True})
-        h3s = tuple(x.a['href'] for x in h3s[:5])
+        top = soup.find('section', id='component-latest-news')\
+            .find_all('article')
+        h1s = (top[0].find('a')['href'],)
+        h2s = tuple([x.find('a')['href'] for x in top[1:]])
+
+        more = soup.find('section', id='component-more-top-stories')\
+            .find_all('article')
+        h3s = tuple([x.find('a')['href'] for x in more])
 
         return h1s, h2s, h3s
author	Matt Singleton <matt@xcolour.net>	2019-01-13 08:39:44 -0500
committer	Matt Singleton <matt@xcolour.net>	2019-01-13 08:39:44 -0500
commit	ad9828d234541d077cefd34713d516bff226f19f (patch)
tree	46774d69c63cea9b08c33a2258335a552b5ef4af
parent	a39dfd89cacbdbc06742cb4c72a981844b2ae371 (diff)