main.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

#!/usr/bin/env python3

import argparse
import os

from unbiasedObjects import *
from unbiasedFunctions import *
from parser import *
import time


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
    args = parser.parse_args()

    while True:
        print('-----------------------')
        run(args.webroot)
        print('-----------------------')
        time.sleep(600)

def run(webroot):
    sourceList=[]

    '''

    SOURCES TO ADD NEXT:
    -ABC
    -REUTERS
    -Town Hall

    '''

    print('running with webroot="{}"'.format(webroot))


    ### These values have to be the second half of the function name
    ### E.g. Guardian calls buildGuardian(), etc.
    sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
                 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
    
    for source in sourceFnArr:
        tries=0
        while tries<3:
            try:
                fn='build'+source
                possibles = globals().copy()
                possibles.update(locals())
                method = possibles.get(fn)
                src=method()
                sourceList.append(src)
                break
            except:
                print('Build error. Looping again: '+source)
                tries+=1
                time.sleep(tries)
    
    #scrape all urls and build data structure
    newsSourceArr=buildNewsSourceArr(sourceList)

    #build the output file HTML
    outputHTML=buildOutput(newsSourceArr)

    #print the output file HTML
    printOutputHTML(outputHTML, os.path.join(webroot, 'index.html'))


if __name__=="__main__":
    main()