diff options
author | Matt Singleton <msingleton@aclu.org> | 2017-10-14 18:44:06 -0400 |
---|---|---|
committer | Matt Singleton <msingleton@aclu.org> | 2017-10-14 18:44:06 -0400 |
commit | fde7eb18c21626739936ab5072d8e537bc3a16de (patch) | |
tree | af5909c78eb3352110ae14b9f68b992311b3976d | |
parent | ff01ea02a0cd85d7199455de1a053b57fdc27eee (diff) |
NPR News closes #2
-rw-r--r-- | unbiased/sources/npr.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/unbiased/sources/npr.py b/unbiased/sources/npr.py new file mode 100644 index 0000000..e52459f --- /dev/null +++ b/unbiased/sources/npr.py @@ -0,0 +1,29 @@ +from unbiased.sources.base import NewsSource + +class NPR(NewsSource): + + name = 'NPR News' + shortname = 'npr' + url = 'http://www.npr.org/sections/news/' + + bad_titles = ['The Two-Way'] + bad_authors = ['Domenico Montanaro'] + + @classmethod + def _fetch_urls(cls): + soup = cls._fetch_content(cls.url) + + featured = soup.find('div', class_='featured-3-up')\ + .find_all('article', recursive=False) + + h1s = featured[:1] + h1s = tuple(x.find('h2', class_='title').a['href'] for x in h1s) + h2s = featured[1:] + h2s = tuple(x.find('h2', class_='title').a['href'] for x in h2s) + + # get tertiary headlines + h3s = soup.find('div', id='overflow')\ + .find_all('article', recursive=False) + h3s = tuple(x.find('h2', class_='title').a['href'] for x in h3s[:5]) + + return h1s, h2s, h3s |