summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--unbiased/sources/guardian.py19
1 files changed, 9 insertions, 10 deletions
diff --git a/unbiased/sources/guardian.py b/unbiased/sources/guardian.py
index 3356b8a..6fb513c 100644
--- a/unbiased/sources/guardian.py
+++ b/unbiased/sources/guardian.py
@@ -18,16 +18,15 @@ class TheGuardian(NewsSource):
def _fetch_urls(cls):
soup = cls._fetch_content(cls.url)
- url_groups = []
- for htag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
- hblocks = soup.find('section', id='headlines').find_all(htag)
- urls = [x.a['href'] for x in hblocks]
- url_groups.append(urls)
- url_groups = [x for x in url_groups if len(url_groups) > 0]
- if len(url_groups) < 3:
- raise Exception('not enough article groups on Guardian home page!')
-
- return tuple(url_groups[0]), tuple(url_groups[1]), tuple(url_groups[2])
+ h = soup.find(id='headlines')\
+ .find_all(class_='fc-item__link')
+ h = [x['href'] for x in h]
+
+ h1s = (h[0],)
+ h2s = tuple(h[1:4])
+ h3s = tuple(h[4:])
+
+ return h1s, h2s, h3s
@classmethod
def _get_image(cls, soup):