From 1c1523ac3b695b84c055a1114ade017057999b2d Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 5 Apr 2022 11:38:11 -0500 Subject: fix the hill --- unbiased/sources/thehill.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/unbiased/sources/thehill.py b/unbiased/sources/thehill.py index 0c37aec..83a60e4 100644 --- a/unbiased/sources/thehill.py +++ b/unbiased/sources/thehill.py @@ -5,7 +5,7 @@ class TheHill(NewsSource): name = 'The Hill' shortname = 'Hill' - url = 'http://thehill.com' + url = 'https://thehill.com' bad_titles = ['THE MEMO'] bad_authors = ['Matt Schlapp', 'Juan Williams', 'Judd Gregg'] @@ -14,17 +14,16 @@ class TheHill(NewsSource): def _fetch_urls(cls): soup = cls._fetch_content(cls.url) - h1 = soup.find('h1', class_='top-story-headline')\ - .find('a')['href'] + h1 = soup.find('a', class_='hero__text__title')['href'] h1s = (h1,) - h23s = soup.find('div', class_='section-top-content')\ - .find_all('div', class_='top-story-item') - h2s = set([x.h4.a['href'] for x in h23s if 'small' not in x['class']]) - h2s = tuple(h2s) + h2s = soup.find('div', class_='col-main')\ + .find_all('article', class_='featured-cards__medium') + h2s = (x.find('a')['href'] for x in h2s) - h3s = set([x.h4.a['href'] for x in h23s if 'small' in x['class']]) - h3s = tuple(h3s) + h3s = soup.find('div', class_='col-main')\ + .find_all('article', class_='featured-cards__small') + h3s = (x.find('a')['href'] for x in h3s) return h1s, h2s, h3s -- cgit v1.2.3