NPR News closes #2

author: Matt Singleton <msingleton@aclu.org> 2017-10-14 18:44:06 -0400
committer: Matt Singleton <msingleton@aclu.org> 2017-10-14 18:44:06 -0400
commit: fde7eb18c21626739936ab5072d8e537bc3a16de (patch)
tree: af5909c78eb3352110ae14b9f68b992311b3976d
parent: ff01ea02a0cd85d7199455de1a053b57fdc27eee (diff)
1 files changed, 29 insertions, 0 deletions
diff --git a/unbiased/sources/npr.py b/unbiased/sources/npr.py
new file mode 100644
index 0000000..e52459f
--- /dev/null
+++ b/unbiased/sources/npr.py
@@ -0,0 +1,29 @@
+from unbiased.sources.base import NewsSource
+
+class NPR(NewsSource):
+
+    name = 'NPR News'
+    shortname = 'npr'
+    url = 'http://www.npr.org/sections/news/'
+
+    bad_titles = ['The Two-Way']
+    bad_authors = ['Domenico Montanaro']
+
+    @classmethod
+    def _fetch_urls(cls):
+        soup = cls._fetch_content(cls.url)
+
+        featured = soup.find('div', class_='featured-3-up')\
+                .find_all('article', recursive=False)
+
+        h1s = featured[:1]
+        h1s = tuple(x.find('h2', class_='title').a['href'] for x in h1s)
+        h2s = featured[1:]
+        h2s = tuple(x.find('h2', class_='title').a['href'] for x in h2s)
+
+        # get tertiary headlines
+        h3s = soup.find('div', id='overflow')\
+                .find_all('article', recursive=False)
+        h3s = tuple(x.find('h2', class_='title').a['href'] for x in h3s[:5])
+
+        return h1s, h2s, h3s
author	Matt Singleton <msingleton@aclu.org>	2017-10-14 18:44:06 -0400
committer	Matt Singleton <msingleton@aclu.org>	2017-10-14 18:44:06 -0400
commit	fde7eb18c21626739936ab5072d8e537bc3a16de (patch)
tree	af5909c78eb3352110ae14b9f68b992311b3976d
parent	ff01ea02a0cd85d7199455de1a053b57fdc27eee (diff)