From 753b48246a8e3eb5bfffa77814ff297287951e03 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 27 Sep 2017 21:19:32 -0400 Subject: ABC news source, closes #7 --- unbiased/sources/abc.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 unbiased/sources/abc.py diff --git a/unbiased/sources/abc.py b/unbiased/sources/abc.py new file mode 100644 index 0000000..2ea7aff --- /dev/null +++ b/unbiased/sources/abc.py @@ -0,0 +1,43 @@ +from unbiased.sources.base import NewsSource + +class ABC(NewsSource): + + name = 'ABC News' + shortname = 'ABC' + url = 'http://abcnews.go.com/' + + @classmethod + def _fetch_urls(cls): + """ + Returns three tuples of urls, one for each of + the three tiers. + """ + soup = cls._fetch_content(cls.url) + + # get primary headline + h1 = soup.find('article', class_='hero')\ + .find('div', class_='caption-wrapper').h1.a['href'] + h1s = (h1,) + print(h1) + + # get secondary headlines + h2s = soup.find('div', id='row-2')\ + .find_all('article', class_='card single row-item') + h2s = tuple(x.find('div', class_='caption-wrapper').h1.a['href'] for x in h2s) + + # get tertiary headlines + h3s = soup.find('div', id='row-1')\ + .find('article', class_='headlines')\ + .find('div', id='tab-content')\ + .find_all('li', class_=['story', 'wirestory']) + h3s = tuple(x.div.h1.a['href'] for x in h3s) + + return h1s, h2s, h3s + + @classmethod + def _normalize_url(cls, url): + """ + ABC News urls include an 'id' query param that we need to + keep in order for the URL to work. + """ + return NewsSource._normalize_url(url, ['id']) -- cgit v1.2.3