summaryrefslogtreecommitdiff
path: root/main.py
blob: 7fbcc23daecb65ba9d2afa8509f20f553c06f729 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3

from unbiasedObjects import *
from unbiasedFunctions import *
from parser import *
import time

def main():
    while True:
        print('-----------------------')
        run()
        print('-----------------------')
        time.sleep(600)

def run():
    sourceList=[]

    bbc=buildBBC()
    sourceList.append(bbc)

    
    sourceList.append(NewsSource('NBC News',
                                 'http://nbcnews.com',
                                 ['top-stories-section', 'panel_hero', '<a href="'],
                                 ['<div class="story-link', '<a href="'],
                                 [],
                                 None, None,
                                 'ad-content ad-xs mobilebox1', 'taboola-native-top-stories-thumbnail',
                                 None, None))


    sourceList.append(NewsSource('CBS News',
                                 'http://cbsnews.com',
                                 ['<h1 class="title">', '<a href="'],
                                 ['<li data-tb-region-item>', '<a href="'],
                                 [],
                                 None, None, #'Big News Area Side Assets', '</a>'
                                 'Big News Area Side Assets', '</ul></div>',
                                 None, None))


    
    sourceList.append(NewsSource('The Blaze',
                                 'http://theblaze.com',
                                 ['<a class="gallery-link" href="'],
                                 ['</figure>\n\n<figure class="gallery-item">', 'href="'],
                                 [],
                                 '<!-- home -->', '<!-- loop-home -->',
                                 '<!-- home -->', '<!-- loop-home -->',
                                 None, None))
    

    wkl=buildWeeklyStandard()
    sourceList.append(wkl)

    nyt=buildNYT()
    sourceList.append(nyt)

    fox=buildFoxNews()
    sourceList.append(fox)
    
    #scrape all urls and build data structure
    newsSourceArr=buildNewsSourceArr(sourceList)

    #build the output file HTML
    outputHTML=buildOutput(newsSourceArr)
    #print the output file HTML
    printOutputHTML(outputHTML, '/var/www/html/index.html')


if __name__=="__main__":
    main()