diff options
author | Matt Singleton <matt@xcolour.net> | 2019-01-12 23:12:38 -0500 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2019-01-12 23:12:38 -0500 |
commit | a7292dfe34a3ff9e45af797ea086c05250fdf44a (patch) | |
tree | 77dec960c3fc3441606b5f880eb859f65c16aa0b | |
parent | f2e8dcdc0a8cd69b594ed7e6f7f0f5087a5950b2 (diff) |
new guardian layoutbuild-15
-rw-r--r-- | unbiased/sources/guardian.py | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/unbiased/sources/guardian.py b/unbiased/sources/guardian.py index 3356b8a..6fb513c 100644 --- a/unbiased/sources/guardian.py +++ b/unbiased/sources/guardian.py @@ -18,16 +18,15 @@ class TheGuardian(NewsSource): def _fetch_urls(cls): soup = cls._fetch_content(cls.url) - url_groups = [] - for htag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: - hblocks = soup.find('section', id='headlines').find_all(htag) - urls = [x.a['href'] for x in hblocks] - url_groups.append(urls) - url_groups = [x for x in url_groups if len(url_groups) > 0] - if len(url_groups) < 3: - raise Exception('not enough article groups on Guardian home page!') - - return tuple(url_groups[0]), tuple(url_groups[1]), tuple(url_groups[2]) + h = soup.find(id='headlines')\ + .find_all(class_='fc-item__link') + h = [x['href'] for x in h] + + h1s = (h[0],) + h2s = tuple(h[1:4]) + h3s = tuple(h[4:]) + + return h1s, h2s, h3s @classmethod def _get_image(cls, soup): |