main.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

#!/usr/bin/env python3

from unbiasedObjects import *
from unbiasedFunctions import *
from parser import *
import time

def main():
    while True:
        print('-----------------------')
        run()
        print('-----------------------')
        time.sleep(600)

def run():
    sourceList=[]

    '''

    SOURCES TO ADD NEXT:
    -ABC
    -REUTERS

    '''

    #for some reason, The Guardian sometimes just doesn't work right?
    #loop until it gets it right
    '''
    h1='https://www.theguardian.com/us'
    looped=False
    while h1=='https://www.theguardian.com/us':
        try:
            gdn=buildGuardian()
            h1=gdn.h1Arr[0]
        except:
            print('The Guardian: build error. Looping again.')
        looped=True
    '''
    gdn=buildGuardian()
    sourceList.append(gdn)

    hil=buildTheHill()
    sourceList.append(hil)

    #nyt=buildNYT()
    #sourceList.append(nyt)

    npr=buildNPR()
    sourceList.append(npr)

    blz=buildBlaze()
    sourceList.append(blz)

    bbc=buildBBC()
    sourceList.append(bbc)

    nbc=buildNBC()
    sourceList.append(nbc)

    cbs=buildCBS()
    sourceList.append(cbs)

    #Weekly standard just doesn't update frequently enough
    #wkl=buildWeeklyStandard()
    #sourceList.append(wkl)

    fox=buildFoxNews()
    sourceList.append(fox)
    
    #scrape all urls and build data structure
    newsSourceArr=buildNewsSourceArr(sourceList)

    #build the output file HTML
    outputHTML=buildOutput(newsSourceArr)

    #print the output file HTML
    printOutputHTML(outputHTML, '/var/www/html/index.html')


if __name__=="__main__":
    main()