1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
#!/usr/bin/env python3
import argparse
import os
from unbiasedObjects import *
from unbiasedFunctions import *
from parser import *
import time
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
args = parser.parse_args()
while True:
print('-----------------------')
run(args.webroot)
print('-----------------------')
time.sleep(600)
def run(webroot):
sourceList=[]
'''
SOURCES TO ADD NEXT:
-ABC
-REUTERS
-Town Hall
'''
print('running with webroot="{}"'.format(webroot))
### These values have to be the second half of the function name
### E.g. Guardian calls buildGuardian(), etc.
sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
for source in sourceFnArr:
tries=0
while tries<3:
try:
fn='build'+source
possibles = globals().copy()
possibles.update(locals())
method = possibles.get(fn)
src=method()
sourceList.append(src)
break
except:
print('Build error. Looping again: '+source)
tries+=1
time.sleep(tries)
#scrape all urls and build data structure
newsSourceArr=buildNewsSourceArr(sourceList)
#build the output file HTML
outputHTML=buildOutput(newsSourceArr)
#print the output file HTML
printOutputHTML(outputHTML, os.path.join(webroot, 'index.html'))
if __name__=="__main__":
main()
|