diff options
author | Matt Singleton <msingleton@aclu.org> | 2017-10-14 19:05:39 -0400 |
---|---|---|
committer | Matt Singleton <msingleton@aclu.org> | 2017-10-14 19:05:39 -0400 |
commit | 4fa6bb4c64e90eb5c3c11074cf83747f01bd7fd7 (patch) | |
tree | 03b9c55f9635cc565728fd29d0dfd47497585725 | |
parent | fde7eb18c21626739936ab5072d8e537bc3a16de (diff) |
BBC News closes #3
-rw-r--r-- | unbiased/sources/bbc.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/unbiased/sources/bbc.py b/unbiased/sources/bbc.py new file mode 100644 index 0000000..0dd0f80 --- /dev/null +++ b/unbiased/sources/bbc.py @@ -0,0 +1,26 @@ +from unbiased.sources.base import NewsSource + +class BBC(NewsSource): + + name = 'BBC News' + shortname = 'bbc' + url = 'http://www.bbc.com/news/world/us_and_canada' + + bad_images = ['bbc_news_logo.png'] + + @classmethod + def _fetch_urls(cls): + soup = cls._fetch_content(cls.url) + + h1s = soup.find('div', class_='buzzard-item')\ + .find('a', class_='title-link') + h1s = (h1s['href'],) + + h2s = soup.find_all('div', attrs={'class': 'pigeon__column', 'data-entityid': True}) + h2s = tuple(x.find('a', class_='title-link')['href'] for x in h2s) + + # get tertiary headlines + h3s = soup.find_all('div', attrs={'class': 'macaw-item', 'data-entityid': True}) + h3s = tuple(x.find('a', class_='title-link')['href'] for x in h3s) + + return h1s, h2s, h3s |