From 39454dd270132acbda18b5dc26d4bcd585ca28dc Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Sun, 22 Oct 2017 21:30:10 -0400 Subject: new source, nbc news, closes #4 --- unbiased/sources/nbc.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 unbiased/sources/nbc.py diff --git a/unbiased/sources/nbc.py b/unbiased/sources/nbc.py new file mode 100644 index 0000000..9ce131f --- /dev/null +++ b/unbiased/sources/nbc.py @@ -0,0 +1,32 @@ +from unbiased.sources.base import NewsSource + +class NBC(NewsSource): + + name = 'NBC News' + shortname = 'nbc' + url = 'https://www.nbcnews.com/' + + bad_urls = ['/opinion/'] + + @classmethod + def _fetch_urls(cls): + soup = cls._fetch_content(cls.url) + + h1s = soup.find('div', class_='js-top-stories-content')\ + .find('div', class_='panel_hero')\ + .a + h1s = (h1s['href'],) + + rows = soup.find('div', class_='js-top-stories-content')\ + .div.find_all('div', class_='row') + h2s = [] + for row in rows: + for fragment in row.find_all('div', class_='media-body'): + h2s.append(fragment.a['href']) + h2s = tuple(h2s) + + links = soup.find('div', class_='js-more-topstories')\ + .div.find_all('div', class_='story-link') + h3s = tuple(x.a['href'] for x in links) + + return h1s, h2s, h3s -- cgit v1.2.3