summaryrefslogtreecommitdiff
path: root/main.py
blob: 19fe8b0fc9bf9bb51144eef03a55a49bf5ac7a71 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3

from unbiasedObjects import *
from unbiasedFunctions import *
import time

def main():
    while True:
        print('-----------------------')
        run()
        print('-----------------------')
        time.sleep(120)

def run():
    sourceList=[]
    sourceList.append(NewsSource('New York Times',
                                 'http://nytimes.com',
                                 ['<a href="'],#'<h1 class="story-heading"><a href="'],#['"b-column column', 'h2 class="story-heading"><a href="'],
                                 ['article class="story theme-summary', 'h2 class="story-heading"><a href="'],
                                 ['<hr class="single-rule"', 'article class="story theme-summary', 'h2 class="story-heading"><a href="'],
                                 '<div class="b-column column">', '<!-- close photo-spot-region -->',
                                 'section id="top-news" class="top-news"', '</div><!-- close a-column -->',
                                 'class="second-column-region region"', 'html.geo-dma-501 .nythpNYRegionPromo'))

    sourceList.append(NewsSource('Fox News',
                                 'http://foxnews.com',
                                 ['<h1><a href="'],
                                 ['<li data-vr-contentbox=""><a href="'],
                                 [],
                                 None, None,
                                 '<div class="top-stories">', '<section id="latest"',
                                 None, None))



    sourceList.append(NewsSource('NBC News',
                                 'http://nbcnews.com',
                                 ['top-stories-section', 'panel_hero', '<a href="'],
                                 ['panel panel_default', '<a href="'],
                                 [],
                                 None, None,
                                 'row_no-clear ad-container ad-container_default ad-hide ad-container-mobilebox1', 'js-more-topstories',
                                 None, None))


    sourceList.append(NewsSource('CBS News',
                                 'http://cbsnews.com',
                                 ['<h1 class="title"><a href="'],
                                 ['<li data-tb-region-item>', '<a href="'],
                                 [],
                                 None, None,
                                 'Big News Area Side Assets', '</ul></div>',
                                 None, None))
    
    #scrape all urls and build data structure
    newsSourceArr=buildNewsSourceArr(sourceList)

    #build the output file HTML
    outputHTML=buildOutput(newsSourceArr)
    #print the output file HTML
    printOutputHTML(outputHTML, '/var/www/html/index.html')#'unbiased.html')


if __name__=="__main__":
    main()