From 924e6e0ece7ef9e85cfe761c5383a54000dad2f7 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Sun, 3 Sep 2017 14:10:31 -0400 Subject: rewrite fox parser to use beautifulsoup --- unbiased/parser.py | 50 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/unbiased/parser.py b/unbiased/parser.py index 7d2d788..e3344f4 100755 --- a/unbiased/parser.py +++ b/unbiased/parser.py @@ -5,6 +5,7 @@ import os import re import urllib.parse +from bs4 import BeautifulSoup import requests from unbiased.unbiasedObjects import * @@ -818,41 +819,32 @@ def buildABC(): def buildFoxNews(): - url='http://foxnews.com' - name='Fox News' - - #DOWNLOAD HOMEPAGE CONTENT - content=urlToContent(url) - - #get main headline - h1=content - h1=h1.split('

', 1)[1] - h2=h2.split('
a') + h2s = [x['href'] for x in h2s] h2s = ['http:' + x if x.startswith('//') else x for x in h2s] #GET TERTIARY HEADLINES - h3=content - h3s=[] - h3=h3.split('div id="big-top"', 1)[1] - h3=h3.split('
', 1)[0] - while '