diff options
author | Matt Singleton <matt@xcolour.net> | 2017-09-12 22:53:36 -0400 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2017-09-12 22:53:36 -0400 |
commit | b50c2e7acc6ef45eb859acba645b628e444d7939 (patch) | |
tree | abfdc4bf68581835573c9455d25c6ab11c13b860 | |
parent | 9b5f9b4f1be2563ebb639f90a943649d0165b7b8 (diff) |
new source The Washington Times
-rw-r--r-- | unbiased/sources/washtimes.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/unbiased/sources/washtimes.py b/unbiased/sources/washtimes.py new file mode 100644 index 0000000..e344af6 --- /dev/null +++ b/unbiased/sources/washtimes.py @@ -0,0 +1,34 @@ +import urllib + +from unbiased.sources.base import NewsSource + +class TheWashingtonTimes(NewsSource): + + name = 'The Washington Times' + shortname = 'WashTimes' + url = 'http://www.washingtontimes.com/' + + @classmethod + def _fetch_urls(cls): + soup = cls._fetch_content(cls.url) + + h1 = soup.find('article', class_='lead-story')\ + .find(class_='article-headline')\ + .a['href'] + h1 = urllib.parse.urljoin(cls.url, h1) + h1s = (h1,) + + top_articles = soup.find('section', class_='top-news')\ + .find_all('article', recursive=False) + h2s = [] + for a in top_articles: + if a.attrs.get('class') is None: + h2s.append(a.a['href']) + h2s = tuple(urllib.parse.urljoin(cls.url, x) for x in h2s) + + h3s = soup.find('section', class_='more-from desktop-only')\ + .ul.find_all('a') + h3s = [x['href'] for x in h3s] + h3s = tuple(urllib.parse.urljoin(cls.url, x) for x in h3s) + + return h1s, h2s, h3s |