summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Singleton <msingleton@aclu.org>2017-09-27 21:19:32 -0400
committerMatt Singleton <msingleton@aclu.org>2017-09-27 21:19:32 -0400
commit753b48246a8e3eb5bfffa77814ff297287951e03 (patch)
treebbe6ed46529489b161cffca838fbed1d66fcae22
parente674ae4ca972e2f902dcc96d65fd4e792668b8a2 (diff)
ABC news source, closes #7
-rw-r--r--unbiased/sources/abc.py43
1 files changed, 43 insertions, 0 deletions
diff --git a/unbiased/sources/abc.py b/unbiased/sources/abc.py
new file mode 100644
index 0000000..2ea7aff
--- /dev/null
+++ b/unbiased/sources/abc.py
@@ -0,0 +1,43 @@
+from unbiased.sources.base import NewsSource
+
+class ABC(NewsSource):
+
+ name = 'ABC News'
+ shortname = 'ABC'
+ url = 'http://abcnews.go.com/'
+
+ @classmethod
+ def _fetch_urls(cls):
+ """
+ Returns three tuples of urls, one for each of
+ the three tiers.
+ """
+ soup = cls._fetch_content(cls.url)
+
+ # get primary headline
+ h1 = soup.find('article', class_='hero')\
+ .find('div', class_='caption-wrapper').h1.a['href']
+ h1s = (h1,)
+ print(h1)
+
+ # get secondary headlines
+ h2s = soup.find('div', id='row-2')\
+ .find_all('article', class_='card single row-item')
+ h2s = tuple(x.find('div', class_='caption-wrapper').h1.a['href'] for x in h2s)
+
+ # get tertiary headlines
+ h3s = soup.find('div', id='row-1')\
+ .find('article', class_='headlines')\
+ .find('div', id='tab-content')\
+ .find_all('li', class_=['story', 'wirestory'])
+ h3s = tuple(x.div.h1.a['href'] for x in h3s)
+
+ return h1s, h2s, h3s
+
+ @classmethod
+ def _normalize_url(cls, url):
+ """
+ ABC News urls include an 'id' query param that we need to
+ keep in order for the URL to work.
+ """
+ return NewsSource._normalize_url(url, ['id'])