diff options
author | Matt Singleton <matt@xcolour.net> | 2017-10-22 21:30:10 -0400 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2017-10-22 21:30:10 -0400 |
commit | 39454dd270132acbda18b5dc26d4bcd585ca28dc (patch) | |
tree | 5aa980aeccd2714f59ba4ca244675d4a28bb143c | |
parent | f9ef3b242558dca8e2ad6a5592eee13be4d592d1 (diff) |
new source, nbc news, closes #4
-rw-r--r-- | unbiased/sources/nbc.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/unbiased/sources/nbc.py b/unbiased/sources/nbc.py new file mode 100644 index 0000000..9ce131f --- /dev/null +++ b/unbiased/sources/nbc.py @@ -0,0 +1,32 @@ +from unbiased.sources.base import NewsSource + +class NBC(NewsSource): + + name = 'NBC News' + shortname = 'nbc' + url = 'https://www.nbcnews.com/' + + bad_urls = ['/opinion/'] + + @classmethod + def _fetch_urls(cls): + soup = cls._fetch_content(cls.url) + + h1s = soup.find('div', class_='js-top-stories-content')\ + .find('div', class_='panel_hero')\ + .a + h1s = (h1s['href'],) + + rows = soup.find('div', class_='js-top-stories-content')\ + .div.find_all('div', class_='row') + h2s = [] + for row in rows: + for fragment in row.find_all('div', class_='media-body'): + h2s.append(fragment.a['href']) + h2s = tuple(h2s) + + links = soup.find('div', class_='js-more-topstories')\ + .div.find_all('div', class_='story-link') + h3s = tuple(x.a['href'] for x in links) + + return h1s, h2s, h3s |