new source The Washington Times

author: Matt Singleton <matt@xcolour.net> 2017-09-12 22:53:36 -0400
committer: Matt Singleton <matt@xcolour.net> 2017-09-12 22:53:36 -0400
commit: b50c2e7acc6ef45eb859acba645b628e444d7939 (patch)
tree: abfdc4bf68581835573c9455d25c6ab11c13b860
parent: 9b5f9b4f1be2563ebb639f90a943649d0165b7b8 (diff)
1 files changed, 34 insertions, 0 deletions
diff --git a/unbiased/sources/washtimes.py b/unbiased/sources/washtimes.py
new file mode 100644
index 0000000..e344af6
--- /dev/null
+++ b/unbiased/sources/washtimes.py
@@ -0,0 +1,34 @@
+import urllib
+
+from unbiased.sources.base import NewsSource
+
+class TheWashingtonTimes(NewsSource):
+
+    name = 'The Washington Times'
+    shortname = 'WashTimes'
+    url = 'http://www.washingtontimes.com/'
+
+    @classmethod
+    def _fetch_urls(cls):
+        soup = cls._fetch_content(cls.url)
+
+        h1 = soup.find('article', class_='lead-story')\
+                .find(class_='article-headline')\
+                .a['href']
+        h1 = urllib.parse.urljoin(cls.url, h1)
+        h1s = (h1,)
+
+        top_articles = soup.find('section', class_='top-news')\
+                .find_all('article', recursive=False)
+        h2s = []
+        for a in top_articles:
+            if a.attrs.get('class') is None:
+                h2s.append(a.a['href'])
+        h2s = tuple(urllib.parse.urljoin(cls.url, x) for x in h2s)
+
+        h3s = soup.find('section', class_='more-from desktop-only')\
+                .ul.find_all('a')
+        h3s = [x['href'] for x in h3s]
+        h3s = tuple(urllib.parse.urljoin(cls.url, x) for x in h3s)
+
+        return h1s, h2s, h3s
author	Matt Singleton <matt@xcolour.net>	2017-09-12 22:53:36 -0400
committer	Matt Singleton <matt@xcolour.net>	2017-09-12 22:53:36 -0400
commit	b50c2e7acc6ef45eb859acba645b628e444d7939 (patch)
tree	abfdc4bf68581835573c9455d25c6ab11c13b860
parent	9b5f9b4f1be2563ebb639f90a943649d0165b7b8 (diff)