blob: 5f9830f71b15ce3a0d4c0316ddeb75579b394c8c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
#!/usr/bin/env python3
from unbiasedObjects import *
from unbiasedFunctions import *
from parser import *
import time
def main():
while True:
print('-----------------------')
run()
print('-----------------------')
time.sleep(600)
def run():
sourceList=[]
'''
SOURCES TO ADD NEXT:
-ABC
-REUTERS
'''
hil=buildTheHill()
sourceList.append(hil)
nyt=buildNYT()
sourceList.append(nyt)
npr=buildNPR()
sourceList.append(npr)
#for some reason, The Guardian sometimes just doesn't work right?
#loop until it gets it right
h1='https://www.theguardian.com/us'
looped=False
while h1=='https://www.theguardian.com/us':
try:
gdn=buildGuardian()
h1=gdn.h1Arr[0]
except:
print('The Guardian: build error. Looping again.')
looped=True
sourceList.append(gdn)
blz=buildBlaze()
sourceList.append(blz)
bbc=buildBBC()
sourceList.append(bbc)
nbc=buildNBC()
sourceList.append(nbc)
cbs=buildCBS()
sourceList.append(cbs)
#Weekly standard just doesn't update frequently enough
#wkl=buildWeeklyStandard()
#sourceList.append(wkl)
fox=buildFoxNews()
sourceList.append(fox)
#scrape all urls and build data structure
newsSourceArr=buildNewsSourceArr(sourceList)
#build the output file HTML
outputHTML=buildOutput(newsSourceArr)
#print the output file HTML
printOutputHTML(outputHTML, '/var/www/html/index.html')
if __name__=="__main__":
main()
|