From e690fdfa6f1eebac5a4790668ab946e82f947eaf Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Sun, 16 Apr 2017 16:59:02 -0400
Subject: take webroot as a command line argument

---
 main.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index a109d2f..f1c3317 100755
--- a/main.py
+++ b/main.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+import argparse
+import os
+
 from unbiasedObjects import *
 from unbiasedFunctions import *
 from parser import *
@@ -7,13 +10,17 @@ import time
 
 
 def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
+    args = parser.parse_args()
+
     while True:
         print('-----------------------')
-        run()
+        run(args.webroot)
         print('-----------------------')
         time.sleep(600)
 
-def run():
+def run(webroot):
     sourceList=[]
 
     '''
@@ -25,6 +32,8 @@ def run():
 
     '''
 
+    print('running with webroot="{}"'.format(webroot))
+
 
     ### These values have to be the second half of the function name
     ### E.g. Guardian calls buildGuardian(), etc.
@@ -54,7 +63,7 @@ def run():
     outputHTML=buildOutput(newsSourceArr)
 
     #print the output file HTML
-    printOutputHTML(outputHTML, '/var/www/html/index.html')
+    printOutputHTML(outputHTML, os.path.join(webroot, 'index.html'))
 
 
 if __name__=="__main__":
-- 
cgit v1.2.3


From 4622a264b8d6e0446a52d96b7df220d357c082a9 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Sun, 16 Apr 2017 23:21:22 -0400
Subject: move files around for packaging reasons

---
 .gitignore                              |   4 +-
 html_template/newtemplate.html          | 150 -----
 html_template/template.html             | 173 ------
 html_template/unbiased.css              | 220 -------
 main.py                                 |  70 ---
 parser.py                               | 986 --------------------------------
 scratch/do_not_delete                   |   0
 spotCheck.py                            |  41 --
 unbiased/html_template/newtemplate.html | 150 +++++
 unbiased/html_template/template.html    | 173 ++++++
 unbiased/html_template/unbiased.css     | 220 +++++++
 unbiased/main.py                        |  70 +++
 unbiased/parser.py                      | 986 ++++++++++++++++++++++++++++++++
 unbiased/scratch/do_not_delete          |   0
 unbiased/spotCheck.py                   |  41 ++
 unbiased/unbiasedFunctions.py           | 259 +++++++++
 unbiased/unbiasedObjects.py             |  90 +++
 unbiasedFunctions.py                    | 259 ---------
 unbiasedObjects.py                      |  90 ---
 19 files changed, 1991 insertions(+), 1991 deletions(-)
 delete mode 100644 html_template/newtemplate.html
 delete mode 100755 html_template/template.html
 delete mode 100755 html_template/unbiased.css
 delete mode 100755 main.py
 delete mode 100755 parser.py
 delete mode 100644 scratch/do_not_delete
 delete mode 100755 spotCheck.py
 create mode 100644 unbiased/html_template/newtemplate.html
 create mode 100755 unbiased/html_template/template.html
 create mode 100755 unbiased/html_template/unbiased.css
 create mode 100755 unbiased/main.py
 create mode 100755 unbiased/parser.py
 create mode 100644 unbiased/scratch/do_not_delete
 create mode 100755 unbiased/spotCheck.py
 create mode 100644 unbiased/unbiasedFunctions.py
 create mode 100644 unbiased/unbiasedObjects.py
 delete mode 100644 unbiasedFunctions.py
 delete mode 100644 unbiasedObjects.py

diff --git a/.gitignore b/.gitignore
index 65c8f8e..90bf98d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,10 @@
 *.pyc
 *~
 __pycache__/
-scratch/*.html
+unbiased/scratch/*.html
 legacy_py/
 unbiased.html
 html_template/Penguins.jpg
 html_template/BAK*
 #*
-.#*
\ No newline at end of file
+.#*
diff --git a/html_template/newtemplate.html b/html_template/newtemplate.html
deleted file mode 100644
index 0cec766..0000000
--- a/html_template/newtemplate.html
+++ /dev/null
@@ -1,150 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="unbiased.css">
-    <title>UnBiased</title>
-  </head>
-<body>
-
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
-</div>
-
-<div id="page-container">
-  <div id="top-stories">
-
-    <div class="top-story">
-      <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
-	<div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
-	</div>
-	<div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
-      </a>
-      <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen &hellip;</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-2'">
-	<div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
-	</div>
-	<div class="top-stories-hed">Bibi and Donald</div>
-      </a>
-      <div class="top-stories-desc">This week, Israel&#039;s prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
-    </div>
-
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-3'">
-	<div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
-	</div>
-	<div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
-      </a>
-      <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-4'">
-	<div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
-	</div>
-	<div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
-      </a>
-      <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
-    </div>
-        
-  </div>
-
-  <div id="middle-stories">
-  
-    <a target="_blank" href="" onclick="location.href='xxURL2-1'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
-	</div>
-	<div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-2'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
-	</div>
-	<div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-3'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
-	</div>
-	<div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-4'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
-	</div>
-	<div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" >
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
-	</div>
-	<div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" >
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
-	</div>
-	<div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
-      </div>
-    </a>
-    
-    
-  </div>
-  
-  <div id="bottom-stories">
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-1xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-2xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-3xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-4xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-5xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-6xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-7xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-8xx</a>
-    </div>
-</div>
-
-</div>
-
-<div id="sources">
-  Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
-</div>
-</body>
-</html>
diff --git a/html_template/template.html b/html_template/template.html
deleted file mode 100755
index fc17006..0000000
--- a/html_template/template.html
+++ /dev/null
@@ -1,173 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="unbiased.css">
-    <title>UnBiased</title>
-  </head>
-<body>
-
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: xxTimexx</p>
-</div>
-
-<div id="page-container">
-  <div id="top-stories">
-    <div class="row">
-
-      <div class="top-story">
-	<a target="_blank" onclick="window.open('xxURL1-1xx', '_blank')">
-	  <div class="top-stories-img" style="background-image: url('xxImg1-1xx');" /></div>
-      <div class="top-stories-hed">xxTitle1-1xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-1xx</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('xxURL1-2xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-2xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-2xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-2xx</div>
-    </div>
-
-  </div>
-
-<div class="row">
-
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('xxURL1-3xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-3xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-3xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-3xx</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('xxURL1-4xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-4xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-4xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-4xx</div>
-    </div>
-
-  </div>
-
-  </div>
-
-  <div id="middle-stories">
-  
-    <a target="_blank" onclick="window.open('xxURL2-1xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-1xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-1xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-2xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-2xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-2xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-3xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-3xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-3xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-4xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-4xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-4xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-5xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-5xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-5xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-6xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-6xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-6xx</div>
-      </div>
-    </a>
-    
-    
-  </div>
-  
-  <div id="bottom-stories">
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-1xx', '_blank')">xxTitle3-1xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-2xx', '_blank')">xxTitle3-2xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-3xx', '_blank')">xxTitle3-3xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-4xx', '_blank')">xxTitle3-4xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-5xx', '_blank')">xxTitle3-5xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-6xx', '_blank')">xxTitle3-6xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-7xx', '_blank')">xxTitle3-7xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-8xx', '_blank')">xxTitle3-8xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-9xx', '_blank')">xxTitle3-9xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-10xx', '_blank')">xxTitle3-10xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-11xx', '_blank')">xxTitle3-11xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-12xx', '_blank')">xxTitle3-12xx</a>
-    </div>
-</div>
-
-</div>
-
-<div id="sources">
-  Sources: xxSourcesxx
-</div>
-</body>
-</html>
diff --git a/html_template/unbiased.css b/html_template/unbiased.css
deleted file mode 100755
index 244f100..0000000
--- a/html_template/unbiased.css
+++ /dev/null
@@ -1,220 +0,0 @@
-/*body{
-    width:900px;
-    margin-left:auto;
-    margin-right:auto;
-}*/
-
-
-body{
-    margin:0;
-}
-
-a:link, a:visited, a:hover, a:active {
-			color: #00f;
-			text-decoration:none;
-			    }
-
-a:hover{
-    cursor:pointer;
-}
-
-#page-header{
-    width:100%;
-    text-align:center;
-    padding:.5em 0 1em;
-    margin-bottom:1em;
-    border-bottom:3px solid #BB133E;
-    background:#002147;
-}
-
-.title{
-    font-size:3em;
-}
-
-#title-1{
-    font-style:italic;
-    color:#fff;
-}
-
-#title-2{
-    color:#fff;
-}
-
-#subtitle{
-    font-size:1.25em;
-    color:#ccc;
-}
-
-#timestamp{
-    margin:.5em 0 0 0;
-    font-size:.8em;
-    color:#cc6;
-}
-
-#page-container{
-    width:900px;
-    margin-left:auto;
-    margin-right:auto;
-}
-
-@media only screen and (max-width:900px){
-    #page-container{
-	width:100%
-    }
-}
-
-#top-stories{
-    width:95%;
-    display:block;
-    overflow:auto;
-    padding:10px;
-    margin-left:auto;
-    margin-right:auto;
-    text-align:center;
-    border-bottom: 3px solid #BB133E;
-    margin-bottom: 10px;
-}
-
-.row{
-    display:flex;
-}
-
-.top-story{
-    display:inline-block;
-    vertical-align:top;
-    text-align:left;
-    width:360px;
-    height:auto;
-    overflow:hidden;
-    background:#fff;
-    margin:10px;
-    padding:10px;
-    border:2px solid #ccc;
-    flex:1;
-}
-
-@media only screen and (max-width:500px){
-    .row{
-	display:block;
-    }
-    .top-story{
-	display:block;
-	width:auto;
-	height:auto;
-    }
-}
-
-.top-stories-img{
-    width:350px;
-    height:200px;
-    overflow:hidden;
-    background-size: auto 234px;/*cover;*/
-    background-position: top center;/*center center;*/
-    margin:0 auto;
-}
-
-@media only screen and (max-width:500px){
-    .top-stories-img{
-	width:auto;
-    }
-}
-    
-
-.top-stories-hed{
-    font-weight:bold;
-    font-size:1.35em;
-    margin:10px 10px 0;
-    color:#00f;
-}
-
-.top-stories-desc{
-    font-size:1em;
-    padding-top:.5em;
-    margin:0 .75em;
-}
-
-#middle-stories{
-    clear:both;
-    width:500px;
-    margin:0 auto;
-    padding:0;
-    display:block;
-    overflow:auto;
-    float:left;
-}
-
-@media only screen and (max-width:500px){
-    #middle-stories{
-	width:100%;
-	float:none;
-    }
-}
-
-.middle-story{
-    margin:5px 10px;
-    padding:10px;
-    background:#fff;
-    border:2px solid #ddd;
-    width:460px;
-    float:left;
-}
-
-@media only screen and (max-width:500px){
-    .middle-story{
-	width:auto;
-    }
-}
-
-.middle-stories-img{
-    width:150px;
-    height:100px;
-    overflow:hidden;
-    background-size: auto 117px;/*cover;*/
-    background-position: top center;/*center center;*/
-    float:left;
-    max-width:35%;
-}
-
-.middle-stories-hed{
-    font-size:1.2em;
-    float:left;
-    width:300px;
-    margin-left:10px;
-    color:#00f;
-}
-
-@media only screen and (max-width:500px){
-    .middle-stories-hed{
-	max-width:60%;
-    }
-}
-
-#bottom-stories{
-    margin:0 10px;
-    padding:10px;
-    display:block;
-    overflow:auto;
-    float:left;
-    width:350px;
-    border:5px solid #ddd;
-}
-
-@media only screen and (max-width:900px){
-    #bottom-stories{
-	width:auto;
-	border-width:3px;
-	float:none;
-    }
-}
-
-.bottom-story{    color:#00f;
-
-    padding:15px 0;
-    color:#00f;
-}
-
-#sources{
-    clear:both;
-    padding-top:4em;
-    font-size:.8em;
-}
\ No newline at end of file
diff --git a/main.py b/main.py
deleted file mode 100755
index f1c3317..0000000
--- a/main.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import os
-
-from unbiasedObjects import *
-from unbiasedFunctions import *
-from parser import *
-import time
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
-    args = parser.parse_args()
-
-    while True:
-        print('-----------------------')
-        run(args.webroot)
-        print('-----------------------')
-        time.sleep(600)
-
-def run(webroot):
-    sourceList=[]
-
-    '''
-
-    SOURCES TO ADD NEXT:
-    -ABC
-    -REUTERS
-    -Town Hall
-
-    '''
-
-    print('running with webroot="{}"'.format(webroot))
-
-
-    ### These values have to be the second half of the function name
-    ### E.g. Guardian calls buildGuardian(), etc.
-    sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
-                 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
-    
-    for source in sourceFnArr:
-        tries=0
-        while tries<3:
-            try:
-                fn='build'+source
-                possibles = globals().copy()
-                possibles.update(locals())
-                method = possibles.get(fn)
-                src=method()
-                sourceList.append(src)
-                break
-            except:
-                print('Build error. Looping again: '+source)
-                tries+=1
-                time.sleep(tries)
-    
-    #scrape all urls and build data structure
-    newsSourceArr=buildNewsSourceArr(sourceList)
-
-    #build the output file HTML
-    outputHTML=buildOutput(newsSourceArr)
-
-    #print the output file HTML
-    printOutputHTML(outputHTML, os.path.join(webroot, 'index.html'))
-
-
-if __name__=="__main__":
-    main()
diff --git a/parser.py b/parser.py
deleted file mode 100755
index f69281b..0000000
--- a/parser.py
+++ /dev/null
@@ -1,986 +0,0 @@
-#!/usr/bin/env python3
-
-from unbiasedObjects import *
-from unbiasedFunctions import buildArticle
-import os
-import re
-
-
-'''
-Takes in a URL, downloads the file to a temp file,
-reads the file into a string, and returns that string
-'''
-def urlToContent(url, sourceEncoding='utf8'):
-    #download file
-    os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
-    
-    #read file
-    if sourceEncoding=='utf8':
-        f=open('scratch/temp1.html', 'r', encoding="utf8")
-    else:
-        f=open('scratch/temp1.html', 'r', encoding="latin-1")
-    content=f.read()
-    f.close()
-
-    return content
-
-
-'''
-Creates a new newsSource2 object. For each URL in h1-h3URLs,
-calls the file scraper and appends the new Article object.
-Returns a newsSource2 object
-'''
-def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
-    h1Arr=[]
-    a=buildArticle(h1URLs[0], name)
-    if a==None:
-        print('................\nH1 Nonetype in '+name+'\n................')
-    else:
-        h1Arr.append(a)
-
-    h2Arr=[]
-    for x in h2URLs:
-        a=buildArticle(x, name)
-        if a!=None:
-            h2Arr.append(a)
-        else:
-            print('................\nH2 Nonetype in '+name+'\n................')
-
-            
-    h3Arr=[]
-    for x in h3URLs:
-        a=buildArticle(x, name)
-        if a!=None:
-            h3Arr.append(a)
-        else:
-            print('................\nH3 Nonetype in '+name+'\n................')
-
-    #BUILD THE NEWS SOURCE
-    newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
-
-    return newsSource
-
-
-'''
-Some sites will replicate URLs across the page. This function removes them.
-Check hierarchically: if h3 exists in h1s or h2s, remove from h3s;
-if h2 exists in h1s, remove from h2s
-
-also check partial URLs (e.g. nytimes.com/story.html is the same as
-nytimes.com/story.html?var=x
-'''
-def removeDuplicates(h1s, h2s, h3s):
-    #Assume h1s is one element, and keep it
-
-    #remove h2 duplicates
-    removeArr=[]
-    for i in range(len(h2s)):
-        #check internally
-        for j in range(len(h2s)):
-            if i==j:
-                continue
-            else:
-                if h2s[i] in h2s[j]:
-                    removeArr.append(h2s[j])
-        #check against h1s
-        for k in range(len(h1s)):
-            if (h2s[i] in h1s[k]) or (h1s[k] in h2s[i]):
-                removeArr.append(h2s[i])
-    for x in removeArr:
-        h2s.remove(x)
-    
-    #remove h3 duplicates
-    removeArr=[]
-    for i in range(len(h3s)):
-        #check internally
-        for j in range(len(h3s)):
-            if i==j:
-                continue
-            else:
-                if h3s[i] in h3s[j]:
-                    removeArr.append(h3s[j])
-        #check against h1s and h2s
-        h1and2=h1s+h2s
-        for k in range(len(h1and2)):
-            if (h3s[i] in h1and2[k]) or (h1and2[k] in h3s[i]):
-                removeArr.append(h3s[i])
-    for x in removeArr:
-        if x in h3s:
-            h3s.remove(x)
-    
-
-    return h1s, h2s, h3s
-
-
-
-def removalNotification(source, title, reason, value):
-    print('*************************')
-    print('\t\tSTORY REMOVED')
-    print('SOURCE: '+source)
-    print('TITLE: \t'+title)
-    print('REASON: '+reason)
-    print('VALUE: \t'+value)
-    print('*************************\n\n')
-
-
-def removeBadStoriesHelper(source, element, badStringList, arr):
-    if badStringList!=None:
-        for i in range(len(arr)):
-            for hed in arr[i]:
-                if hed==None:
-                    print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////")
-                    break
-                for item in badStringList:
-                    if item in getattr(hed, element):
-                        arr[i].remove(hed)
-                        #if it's in the h1 slot, bump up the 
-                        #  first h2 into the h1 slot
-                        if i==0:
-                            arr[0].append(arr[1][0])
-                            arr[1].remove(arr[1][0])
-                        removalNotification(source.name, hed.title, element, item)
-                        
-    
-def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr=None):
-
-    arr=[source.h1Arr, source.h2Arr, source.h3Arr]
-
-    removeBadStoriesHelper(source, "title", badTitleArr, arr)
-    removeBadStoriesHelper(source, "description", badDescArr, arr)
-    removeBadStoriesHelper(source, "author", badAuthorArr, arr)
-    removeBadStoriesHelper(source, "img", badImgArr, arr)
-    removeBadStoriesHelper(source, "url", badURLArr, arr)
-                    
-    return source
-
-
-
-
-def buildTheHill():
-    url='http://thehill.com'
-    name='The Hill'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('<div class="headline-story-image">', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[url+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="section-top-content">', 1)[1]
-    h2=h2.split('</ul>', 1)[0]
-    while '<div class="top-story-item' in h2 and len(h2s)<4:
-        h2=h2.split('<div class="top-story-item', 1)[1]
-        x=h2.split('<a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div class="section-top-content">', 1)[1]
-    h3=h3.split('</ul>', 1)[0]
-    while '<div class="top-story-item small' in h3:
-        h3=h3.split('<div class="top-story-item small', 1)[1]
-        x=h3.split('<a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    hil=buildNewsSource2(name, url, h1s, h2s, h3s)
-    hil=removeBadStories(hil, ['THE MEMO'], None, ['Matt Schlapp', 'Juan Williams', 'Judd Gregg'], None, None)
-
-    return hil
-
-
-
-
-
-def buildGuardian():
-    url='http://www.theguardian.com/us'
-    name='The Guardian US'
-
-
-    while True:
-        #DOWNLOAD HOMEPAGE CONTENT
-        content=urlToContent(url, 'utf8')
-        
-        #get main headline
-        h1=content
-        h1=h1.split('<h1', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        h1=h1.split('"', 1)[0]
-
-        if h1!='https://www.theguardian.com/us':
-            break
-        else:
-            print('Guardian loop')
-        
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    #only the h1 and the two h2s have this, so split on it and grab
-    #the second two
-    h2=h2.split('<div class="fc-item__image-container u-responsive-ratio inlined-image">')[2:]
-    for x in h2:
-        if '<h2 class="fc-item__title"><a href="' in x:
-            x=x.split('<h2 class="fc-item__title"><a href="', 1)[1]
-            x=x.split('"', 1)[0]
-            h2s.append(x)
-        else:
-            break
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div class="fc-slice-wrapper">', 1)[1]
-    h3=h3.split('<div class="fc-container__inner">', 1)[0]#'<div class="js-show-more-placeholder">', 1)[0]
-    #this story section goes on forever; just grab the first 5
-    while '<h2 class="fc-item__title"><a href="' in h3:
-        h3=h3.split('<h2 class="fc-item__title"><a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    
-    gdn=buildNewsSource2(name, url, h1s, h2s, h3s)
-    gdn=removeBadStories(gdn, None, ['Tom McCarthy', 'Andy Hunter'], ['https://www.theguardian.com/profile/ben-jacobs'], None)
-
-    return gdn
-
-
-
-def buildWashTimes():
-    url='http://www.washingtontimes.com/'
-    name='Washington Times'
-
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('top-news', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-
-    h1s=[url+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('class="top-news', 1)[1]
-    h2=h2.split('</article>', 1)[1] #end of top-news article
-    h2=h2.split('<article ', 1)[0] #note the space; we want unclassed articles
-    h2=h2.split('<article>')[1:]
-    
-    for x in h2:
-        x=x.split('<a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('more-from desktop-only', 1)[1]
-    h3=h3.split('</section>', 1)[0]
-    h3=h3.split('<a href="')[1:]
-    
-    for x in h3:
-        x=x.split('"', 1)[0]
-        h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    wat=buildNewsSource2(name, url, h1s, h2s, h3s)
-    wat=removeBadStories(wat, None, None, None, None)
-
-    return wat
-
-
-def buildCSM():
-    url='http://www.csmonitor.com/USA'
-    name='Christian Science Monitor'
-
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #this makes sure we don't get '/USA' in the URL twice
-    url=url.split('/USA')[0]
-    
-    #get main headline
-    h1=content
-    h1=h1.split('block-0-0', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-
-    h1s=[url+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('block-1-0', 1)[1]
-    h2=h2.split('ui-section-middle', 1)[0]
-    h2=h2.split('<h3 class="story_headline">')[1:]
-    
-    for x in h2:
-        temp=x.split('<a href="', 2)[1:]
-        x=temp[0]
-        x=x.split('"', 1)[0]
-        if x=='/csmlists/special/first-look':
-            x=temp[1]
-            x=x.split('"', 1)[0]
-
-        h2s.append(url+x)
-    #also add in the floating story on the left
-    h2=content
-    h2=h2.split('block-0-1', 1)[1]
-    h2=h2.split('<h3 class="story_headline">')[1]
-    h2=h2.split('<a href="', 2)[2]
-    h2=h2.split('"', 1)[0]
-    h2s.append(url+h2)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('block-0-2', 1)[1]
-    h3=h3.split('ui-section-top-right', 1)[0]
-    h3=h3.split('<h3 class="story_headline')[1:]
-    
-    for x in h3:
-        x=x.split('<a href="', 2)[-1]
-        x=x.split('"', 1)[0]
-        h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    csm=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    badTitleArr=['Change Agent']
-    badDescArr=None
-    badAuthorArr=None
-    badImgArr=['csm_logo']
-    badURLArr=['difference-maker']
-    csm=removeBadStories(csm, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
-
-    return csm
-
-
-
-'''
-Function to fix the oddly short og:descriptions provided
-in The Blaze articles by grabbing the first portion of the story instead
-'''
-def blazeFixDesc(articleArr):
-    TAG_RE = re.compile(r'<[^>]+>')
-    for i in range(len(articleArr)):
-        desc=urlToContent(articleArr[i].url)
-        desc=desc.split('<div class="entry-content article-styles">', 1)[1]
-        desc=desc.split('<p>', 1)[1]
-        desc=TAG_RE.sub('', desc)
-        desc=desc.replace('\n', ' ')
-        desc=desc[:144]
-        articleArr[i].description=desc
-
-    return articleArr
-    
-
-
-def buildBlaze():
-    url='http://theblaze.com'
-    name='The Blaze'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('<!-- home -->', 1)[1]
-    h1=h1.split('<a class="gallery-link" href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[url+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<!-- home -->', 1)[1]
-    h2=h2.split('<!-- loop-home -->', 1)[0]
-    while '<a class="gallery-link" href="' in h2:#'</figure>\n\n<figure class="gallery-item">' in h2:
-        h2=h2.split('<a class="gallery-link" href="', 1)[1]#'</figure>\n\n<figure class="gallery-item">', 1)[1]
-        #h2=h2.split('href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<!-- loop-home -->', 1)[1]
-    #this story section goes on forever; just grab the first 5
-    while len(h3s)<5:
-        h3=h3.split('<a class="feed-link" href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    blz=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    badTitleArr=['Tucker Carlson', 'Mark Levin']
-    badDescArr=['Lawrence Jones', 'Mike Slater']
-    badAuthorArr=['Matt Walsh', 'Tomi Lahren', 'Dana Loesch', 'Mike Opelka', 'Chris Salcedo', 'Justin Haskins', 'Sara Gonzales', 'Doc Thompson', 'Glenn Beck']
-    badImgArr=None
-    badURLArr=None
-    blz=removeBadStories(blz, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
-
-    
-    #The Blaze has dumb, short description fields, so we need to grab
-    #the first x characters of actual article text instead
-    blz.h1Arr=blazeFixDesc(blz.h1Arr)
-    blz.h2Arr=blazeFixDesc(blz.h2Arr)
-    blz.h3Arr=blazeFixDesc(blz.h3Arr)
-
-    return blz
-
-
-
-def buildCBS():
-    url='http://cbsnews.com'
-    name='CBS News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    if '<h1 class="title">' in content:
-        h1=h1.split('<h1 class="title">', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        h1=h1.split('"', 1)[0]
-        h1s=[url+h1]
-    else:
-        #for cases where they lead with a video, pull the first h2 as h1
-        h1=h1.split('Big News Area Side Assets', 1)[1]
-        h1=h1.split('</ul></div>', 1)[0]
-        h1=h1.split('<li data-tb-region-item>', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        x=h1.split('"', 1)[0]
-        h1s=[url+x]
-        
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('Big News Area Side Assets', 1)[1]
-    h2=h2.split('</ul></div>', 1)[0]
-    while '<li data-tb-region-item>' in h2:
-        h2=h2.split('<li data-tb-region-item>', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('Latest News', 1)[1]
-    #this story section goes on forever; just grab the first 5
-    while len(h3s)<5:
-        h3=h3.split('<li class="item-full-lead"', 1)[1]
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    cbs=buildNewsSource2(name, url, h1s, h2s, h3s)
-    cbs=removeBadStories(cbs, ['60 Minutes'], ['60 Minutes'], None, None, ['whats-in-the-news-coverart'])
-
-    return cbs
-
-
-
-
-
-def buildNBC():    
-    url='http://nbcnews.com'
-    name='NBC News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('top-stories-section', 1)[1]
-    h1=h1.split('panel_hero', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    if '.com' not in h1:
-        h1=url+h1
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('ad-content ad-xs mobilebox1', 1)[1]
-    h2=h2.split('taboola-native-top-stories-thumbnail', 1)[0]
-    while '<div class="story-link' in h2:
-        h2=h2.split('<div class="story-link', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            if '.com' not in x:
-                x=url+x
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('js-more-topstories', 1)[1]
-    h3=h3.split('<div class="panel-section', 1)[0]
-    while '<div class="story-link' in h3:
-        h3=h3.split('<div class="story-link', 1)[1]
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            if '.com' not in x:
-                x=url+x
-            h3s.append(x)
-
-    #adjust for today.com urls
-    '''
-    for arr in [h1s, h2s, h3s]:
-        for i in range(len(arr)):
-            if 'today.com' in arr[i]:
-                arr[i]=arr[i].split('.com', 1)[1]
-    '''
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
-    nbc=removeBadStories(nbc, None, ['First Read'], None, None, None)
-
-
-    return nbc
-
-
-
-
-def buildBBC():    
-    url='http://www.bbc.com/news/world/us_and_canada'
-    name='BBC US & Canada'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('buzzard-item', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=['http://www.bbc.com'+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="pigeon">', 1)[1]
-    h2=h2.split('<div id=', 1)[0]
-    while 'top_stories#' in h2:
-        h2=h2.split('top_stories#', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append('http://www.bbc.com'+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div class="macaw">', 1)[1]
-    h3=h3.split('Watch/Listen', 1)[0]
-    while '<div class="macaw-item' in h3:
-        h3=h3.split('<div class="macaw-item', 1)[1]
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append('http://www.bbc.com'+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    bbc=buildNewsSource2(name, url, h1s, h2s, h3s)
-    badTitleArr=None
-    badDescArr=None
-    badAuthorArr=None
-    badImgArr=['bbc_news_logo.png']
-    bbc=removeBadStories(bbc, badTitleArr, badDescArr, badAuthorArr, badImgArr)
-
-    
-    #REMOVE ' - BBC News' from headlines
-    for i in range(len(bbc.h1Arr)):
-        if ' - BBC News' in bbc.h1Arr[i].title:
-            bbc.h1Arr[i].title=bbc.h1Arr[i].title.split(' - BBC News', 1)[0]
-    for i in range(len(bbc.h2Arr)):
-        if ' - BBC News' in bbc.h2Arr[i].title:
-            bbc.h2Arr[i].title=bbc.h2Arr[i].title.split(' - BBC News', 1)[0]
-    for i in range(len(bbc.h3Arr)):
-        if ' - BBC News' in bbc.h3Arr[i].title:
-            bbc.h3Arr[i].title=bbc.h3Arr[i].title.split(' - BBC News', 1)[0]
-
-    return bbc
-
-
-
-def buildWeeklyStandard():
-    url='http://www.weeklystandard.com'
-    name='Weekly Standard'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('<div id="region_1"', 1)[1]
-    h1=h1.split('<div id="region_2"', 1)[0]
-    h1=h1.split('<div class="lead-photo">', 1)[1]
-    h1=h1.split('href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="widget lead-story layout-3col-feature" data-count="2">', 1)[1]
-    h2=h2.split('<div id="region_2"', 1)[0]
-    while '<div class="lead-photo">' in h2:
-        h2=h2.split('<div class="lead-photo">', 1)[1]
-        h2=h2.split('href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('Today\'s Standard', 1)[1]
-    h3=h3.split('<div id="region_3"', 1)[0]
-    while '<div class="lead-photo">' in h3:
-        h3=h3.split('<div class="lead-photo">', 1)[1]
-        h3=h3.split('href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    #Need to add URL prefix to all URLs
-    for i in range(len(h1s)):
-        h1s[i]=url+h1s[i]
-    for i in range(len(h2s)):
-        h2s[i]=url+h2s[i]
-    for i in range(len(h3s)):
-        h3s[i]=url+h3s[i]
-        
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=None
-    ## if flagged again, remove Micah Mattix
-    badDescArr=['Matt Labash']
-    badAuthorArr=['MATT LABASH', 'TWS PODCAST', 'ERIC FELTEN', 'Steven J. Lenzner', 'MARK HEMINGWAY']
-    badImgArr=['http://www.weeklystandard.com/s3/tws15/images/twitter/tws-twitter_1024x512.png']
-    wkl=removeBadStories(wkl, badTitleArr, badDescArr, badAuthorArr, badImgArr)
-
-    return wkl
-
-
-
-
-def buildNPR():
-    url='http://www.npr.org/sections/news/'
-    name='NPR'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('<a id="mainContent">', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<article class="item has-image">', 1)[1]
-    h2=h2.split('<!-- END CLASS=\'FEATURED-3-UP\' -->', 1)[0]
-    while '<article class="item has-image">' in h2:
-        h2=h2.split('<article class="item has-image">', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div id="overflow" class="list-overflow"', 1)[1]
-    h3=h3.split('<!-- END ID="OVERFLOW" CLASS="LIST-OVERFLOW"', 1)[0]
-    while '<h2 class="title"><a href="' in h3:
-        h3=h3.split('<h2 class="title"><a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    npr=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=['The Two-Way']
-    badDescArr=None
-    badAuthorArr=['Domenico Montanaro']
-    badImgArr=None
-    npr=removeBadStories(npr, badTitleArr, badDescArr, badAuthorArr, badImgArr)
-
-    return npr
-
-
-
-
-
-def buildABC():
-    url='http://www.abcnews.go.com'
-    name='ABC News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('id="row-1"', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('id="row-2"', 1)[1]
-    h2=h2.split('id="row-3"', 1)[0]
-    h2=h2.split('card single row-item')[1:3] #should just be 2 of these
-    for x in h2:
-        x=x.split('<a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('id="row-1"', 1)[1]
-    h3=h3.split('tab-data active', 1)[1]
-    h3=h3.split('tab-data"', 1)[0] #note the trailing quotation
-    while '<a href="' in h3:
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
-    abc=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=None
-    badDescArr=None
-    badAuthorArr=None
-    badImgArr=None
-    badURLArr=None
-    abc=removeBadStories(abc, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
-
-    return abc
-
-
-
-
-def buildFoxNews():
-    url='http://foxnews.com'
-    name='Fox News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('<h1><a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="top-stories">', 1)[1]
-    h2=h2.split('<section id="latest"', 1)[0]
-    while '<li data-vr-contentbox=""><a href="' in h2:
-        h2=h2.split('<li data-vr-contentbox=""><a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('div id="big-top"', 1)[1]
-    h3=h3.split('<div class="top-stories">', 1)[0]
-    while '<a href="' in h3:
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
-    fox=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=['O&#039;Reilly', 'Fox News', 'Brett Baier', 'Tucker']
-    badDescArr=['Sean Hannity']
-    badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
-    badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
-    badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
-    fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
-
-    return fox
-
-
-
-def buildNYT():
-    url='http://www.nytimes.com'
-    name='New York Times'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    #this will likely need if/else logic
-    h1=content
-
-    if 'story theme-summary banner' in h1:
-        #This is with a large headline over a and b columns
-        h1=h1.split('story theme-summary banner', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        h1=h1.split('"', 1)[0]
-    else:
-        #otherwise, pull the first story from the A column
-        h1=h1.split('<div class="a-column column">', 1)[1]
-        h1=h1.split('<article class="story theme-summary lede"', 1)[1]
-        h1=h1.split('<a href="', 1)[1].split('"', 1)[0]
-    h1s=[h1]
-        
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    #A column
-    h2=h2.split('<div class="a-column column">', 1)[1]
-    h2=h2.split('<!-- close a-column -->', 1)[0]
-    #remove "collection" sets
-    while '<div class="collection headlines">' in h2:
-        arr=h2.split('<div class="collection headlines">', 1)
-        h2=arr[0]+arr[1].split('</ul>', 1)[1]
-    #Grab the remaining URLs
-    while '<a href="' in h2:
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3s=[]
-    #B column
-    h3=content
-    h3=h3.split('<div class="b-column column">', 1)[1]
-    h3=h3.split('<!-- close b-column -->', 1)[0]
-    #remove "collection" sets
-    while '<div class="collection headlines">' in h3:
-        arr=h3.split('<div class="collection headlines">', 1)
-        h3=arr[0]+arr[1].split('</ul>', 1)[1]
-    #Grab the remaining URLs
-    while '<a href="' in h3:
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if (h1 not in x) and (x not in h3s):
-            h3s.append(x)
-
-    '''
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    if '<!-- close lede-package-region -->' in h3:
-        h3=h3.split('<!-- close lede-package-region -->', 1)[1]
-        h3=h3.split('<a href="https://www.nytimes.com/tips">', 1)[0]
-    elif '/video/the-daily-360' in h3:
-        h3=h3.split('/video/the-daily-360')[-1]
-        h3=h3.split('More News', 1)[0]
-    #remove "collection" sets
-    while '<div class="collection headlines">' in h2:
-        arr=h3.split('<div class="collection headlines">', 1)
-        h3=arr[0]+arr[1].split('</ul>', 1)[1]
-    
-    #Grab the remaining URLs
-    while '<a href="' in h3:
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if (h1 not in x) and (x not in h3s):
-            h3s.append(x)
-    '''
-            
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
-    nyt=removeBadStories(nyt, None, None, None, None, ['https://www.nytimes.com/section/magazine', 'https://www.nytimes.com/newsletters/the-interpreter'])
-
-    
-    return nyt
-
-
-
-
-'''
-NYT
-EXAMPLE OF BIG HEADLINE SPANNING BOTH A AND B COLUMNS
-
-<div class="span-ab-layout layout">
-
-    <div class="ab-column column">
-
-        <section id="top-news" class="top-news">
-            <h2 class="section-heading visually-hidden">Top News</h2>
-
-                            <div class="above-banner-region region">
-
-                    <div class="collection">
-            <div class="hpHeader" id="top-megapackage-kicker">
-  <h6><a href="http://www.nytimes.com/pages/politics/index.html?src=hpHeader">The 45th President</a></h6>
-</div>
-
-</div>
-
-                </div><!-- close above-banner-region -->
-            
-                            <div class="span-ab-top-region region">
-
-                    <div class="collection">
-            <article class="story theme-summary banner" id="topnews-100000004932040" data-story-id="100000004932040" data-rank="0" data-collection-renderstyle="Banner">
-            <h1 class="story-heading"><a href="https://www.nytimes.com/2017/02/14/us/politics/fbi-interviewed-mike-flynn.html">F.B.I. Questioned Flynn About Russia Call</a></h1>
-</article>
-</div>
-
-                </div><!-- close span-ab-top-region -->
-'''
diff --git a/scratch/do_not_delete b/scratch/do_not_delete
deleted file mode 100644
index e69de29..0000000
diff --git a/spotCheck.py b/spotCheck.py
deleted file mode 100755
index d1edda4..0000000
--- a/spotCheck.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python3
-
-
-from parser import *
-from unbiasedObjects import *
-import sys
-
-def spotCheck(src):
-
-    fns = {'hil' : buildTheHill,
-           'cbs' : buildCBS,
-           'npr' : buildNPR,
-           'fox' : buildFoxNews,
-           'gdn' : buildGuardian,
-           'blz' : buildBlaze,
-           'bbc' : buildBBC,
-           'nbc' : buildNBC,
-           'wat' : buildWashTimes,
-           'csm' : buildCSM,
-           'abc' : buildABC}
-
-    data=fns[src]()
-
-    print('H1s:\n--------------')
-    for h in data.h1Arr:
-        print(h.title)
-
-    print('\n\nH2s:\n--------------')
-    for h in data.h2Arr:
-        print(h.title)
-
-    print('\n\nH3s:\n--------------')
-    for h in data.h3Arr:
-        print(h.title)
-
-    print('\n\n')
-
-
-
-if __name__=='__main__':
-    spotCheck(sys.argv[1])
diff --git a/unbiased/html_template/newtemplate.html b/unbiased/html_template/newtemplate.html
new file mode 100644
index 0000000..0cec766
--- /dev/null
+++ b/unbiased/html_template/newtemplate.html
@@ -0,0 +1,150 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="unbiased.css">
+    <title>UnBiased</title>
+  </head>
+<body>
+
+<div id="page-header">
+  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+  <span id="subtitle">a different way to read the news</span>
+  <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
+</div>
+
+<div id="page-container">
+  <div id="top-stories">
+
+    <div class="top-story">
+      <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
+	<div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
+	</div>
+	<div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
+      </a>
+      <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen &hellip;</div>
+    </div>
+    
+    <div class="top-story">
+      <a target="_blank" href="" onclick="location.href='xxURL1-2'">
+	<div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
+	</div>
+	<div class="top-stories-hed">Bibi and Donald</div>
+      </a>
+      <div class="top-stories-desc">This week, Israel&#039;s prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
+    </div>
+
+    <div class="top-story">
+      <a target="_blank" href="" onclick="location.href='xxURL1-3'">
+	<div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
+	</div>
+	<div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
+      </a>
+      <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
+    </div>
+    
+    <div class="top-story">
+      <a target="_blank" href="" onclick="location.href='xxURL1-4'">
+	<div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
+	</div>
+	<div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
+      </a>
+      <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
+    </div>
+        
+  </div>
+
+  <div id="middle-stories">
+  
+    <a target="_blank" href="" onclick="location.href='xxURL2-1'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
+	</div>
+	<div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" onclick="location.href='xxURL2-2'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
+	</div>
+	<div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" onclick="location.href='xxURL2-3'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
+	</div>
+	<div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" onclick="location.href='xxURL2-4'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
+	</div>
+	<div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" >
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
+	</div>
+	<div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" >
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
+	</div>
+	<div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
+      </div>
+    </a>
+    
+    
+  </div>
+  
+  <div id="bottom-stories">
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-1xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-2xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-3xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-4xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-5xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-6xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-7xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-8xx</a>
+    </div>
+</div>
+
+</div>
+
+<div id="sources">
+  Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
+</div>
+</body>
+</html>
diff --git a/unbiased/html_template/template.html b/unbiased/html_template/template.html
new file mode 100755
index 0000000..fc17006
--- /dev/null
+++ b/unbiased/html_template/template.html
@@ -0,0 +1,173 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="unbiased.css">
+    <title>UnBiased</title>
+  </head>
+<body>
+
+<div id="page-header">
+  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+  <span id="subtitle">a different way to read the news</span>
+  <p id="timestamp">Last updated: xxTimexx</p>
+</div>
+
+<div id="page-container">
+  <div id="top-stories">
+    <div class="row">
+
+      <div class="top-story">
+	<a target="_blank" onclick="window.open('xxURL1-1xx', '_blank')">
+	  <div class="top-stories-img" style="background-image: url('xxImg1-1xx');" /></div>
+      <div class="top-stories-hed">xxTitle1-1xx</div>
+      </a>
+      <div class="top-stories-desc">xxDesc1-1xx</div>
+    </div>
+    
+    <div class="top-story">
+      <a target="_blank" onclick="window.open('xxURL1-2xx', '_blank')">
+	<div class="top-stories-img" style="background-image: url('xxImg1-2xx');" />
+	</div>
+	<div class="top-stories-hed">xxTitle1-2xx</div>
+      </a>
+      <div class="top-stories-desc">xxDesc1-2xx</div>
+    </div>
+
+  </div>
+
+<div class="row">
+
+    <div class="top-story">
+      <a target="_blank" onclick="window.open('xxURL1-3xx', '_blank')">
+	<div class="top-stories-img" style="background-image: url('xxImg1-3xx');" />
+	</div>
+	<div class="top-stories-hed">xxTitle1-3xx</div>
+      </a>
+      <div class="top-stories-desc">xxDesc1-3xx</div>
+    </div>
+    
+    <div class="top-story">
+      <a target="_blank" onclick="window.open('xxURL1-4xx', '_blank')">
+	<div class="top-stories-img" style="background-image: url('xxImg1-4xx');" />
+	</div>
+	<div class="top-stories-hed">xxTitle1-4xx</div>
+      </a>
+      <div class="top-stories-desc">xxDesc1-4xx</div>
+    </div>
+
+  </div>
+
+  </div>
+
+  <div id="middle-stories">
+  
+    <a target="_blank" onclick="window.open('xxURL2-1xx', '_blank')">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('xxImg2-1xx');">
+	</div>
+	<div class="middle-stories-hed">xxTitle2-1xx</div>
+      </div>
+    </a>
+    
+    <a target="_blank" onclick="window.open('xxURL2-2xx', '_blank')">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('xxImg2-2xx');">
+	</div>
+	<div class="middle-stories-hed">xxTitle2-2xx</div>
+      </div>
+    </a>
+    
+    <a target="_blank" onclick="window.open('xxURL2-3xx', '_blank')">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('xxImg2-3xx');">
+	</div>
+	<div class="middle-stories-hed">xxTitle2-3xx</div>
+      </div>
+    </a>
+    
+    <a target="_blank" onclick="window.open('xxURL2-4xx', '_blank')">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('xxImg2-4xx');">
+	</div>
+	<div class="middle-stories-hed">xxTitle2-4xx</div>
+      </div>
+    </a>
+    
+    <a target="_blank" onclick="window.open('xxURL2-5xx', '_blank')">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('xxImg2-5xx');">
+	</div>
+	<div class="middle-stories-hed">xxTitle2-5xx</div>
+      </div>
+    </a>
+    
+    <a target="_blank" onclick="window.open('xxURL2-6xx', '_blank')">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('xxImg2-6xx');">
+	</div>
+	<div class="middle-stories-hed">xxTitle2-6xx</div>
+      </div>
+    </a>
+    
+    
+  </div>
+  
+  <div id="bottom-stories">
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-1xx', '_blank')">xxTitle3-1xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-2xx', '_blank')">xxTitle3-2xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-3xx', '_blank')">xxTitle3-3xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-4xx', '_blank')">xxTitle3-4xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-5xx', '_blank')">xxTitle3-5xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-6xx', '_blank')">xxTitle3-6xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-7xx', '_blank')">xxTitle3-7xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-8xx', '_blank')">xxTitle3-8xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-9xx', '_blank')">xxTitle3-9xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-10xx', '_blank')">xxTitle3-10xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-11xx', '_blank')">xxTitle3-11xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('xxURL3-12xx', '_blank')">xxTitle3-12xx</a>
+    </div>
+</div>
+
+</div>
+
+<div id="sources">
+  Sources: xxSourcesxx
+</div>
+</body>
+</html>
diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
new file mode 100755
index 0000000..244f100
--- /dev/null
+++ b/unbiased/html_template/unbiased.css
@@ -0,0 +1,220 @@
+/*body{
+    width:900px;
+    margin-left:auto;
+    margin-right:auto;
+}*/
+
+
+body{
+    margin:0;
+}
+
+a:link, a:visited, a:hover, a:active {
+			color: #00f;
+			text-decoration:none;
+			    }
+
+a:hover{
+    cursor:pointer;
+}
+
+#page-header{
+    width:100%;
+    text-align:center;
+    padding:.5em 0 1em;
+    margin-bottom:1em;
+    border-bottom:3px solid #BB133E;
+    background:#002147;
+}
+
+.title{
+    font-size:3em;
+}
+
+#title-1{
+    font-style:italic;
+    color:#fff;
+}
+
+#title-2{
+    color:#fff;
+}
+
+#subtitle{
+    font-size:1.25em;
+    color:#ccc;
+}
+
+#timestamp{
+    margin:.5em 0 0 0;
+    font-size:.8em;
+    color:#cc6;
+}
+
+#page-container{
+    width:900px;
+    margin-left:auto;
+    margin-right:auto;
+}
+
+@media only screen and (max-width:900px){
+    #page-container{
+	width:100%
+    }
+}
+
+#top-stories{
+    width:95%;
+    display:block;
+    overflow:auto;
+    padding:10px;
+    margin-left:auto;
+    margin-right:auto;
+    text-align:center;
+    border-bottom: 3px solid #BB133E;
+    margin-bottom: 10px;
+}
+
+.row{
+    display:flex;
+}
+
+.top-story{
+    display:inline-block;
+    vertical-align:top;
+    text-align:left;
+    width:360px;
+    height:auto;
+    overflow:hidden;
+    background:#fff;
+    margin:10px;
+    padding:10px;
+    border:2px solid #ccc;
+    flex:1;
+}
+
+@media only screen and (max-width:500px){
+    .row{
+	display:block;
+    }
+    .top-story{
+	display:block;
+	width:auto;
+	height:auto;
+    }
+}
+
+.top-stories-img{
+    width:350px;
+    height:200px;
+    overflow:hidden;
+    background-size: auto 234px;/*cover;*/
+    background-position: top center;/*center center;*/
+    margin:0 auto;
+}
+
+@media only screen and (max-width:500px){
+    .top-stories-img{
+	width:auto;
+    }
+}
+    
+
+.top-stories-hed{
+    font-weight:bold;
+    font-size:1.35em;
+    margin:10px 10px 0;
+    color:#00f;
+}
+
+.top-stories-desc{
+    font-size:1em;
+    padding-top:.5em;
+    margin:0 .75em;
+}
+
+#middle-stories{
+    clear:both;
+    width:500px;
+    margin:0 auto;
+    padding:0;
+    display:block;
+    overflow:auto;
+    float:left;
+}
+
+@media only screen and (max-width:500px){
+    #middle-stories{
+	width:100%;
+	float:none;
+    }
+}
+
+.middle-story{
+    margin:5px 10px;
+    padding:10px;
+    background:#fff;
+    border:2px solid #ddd;
+    width:460px;
+    float:left;
+}
+
+@media only screen and (max-width:500px){
+    .middle-story{
+	width:auto;
+    }
+}
+
+.middle-stories-img{
+    width:150px;
+    height:100px;
+    overflow:hidden;
+    background-size: auto 117px;/*cover;*/
+    background-position: top center;/*center center;*/
+    float:left;
+    max-width:35%;
+}
+
+.middle-stories-hed{
+    font-size:1.2em;
+    float:left;
+    width:300px;
+    margin-left:10px;
+    color:#00f;
+}
+
+@media only screen and (max-width:500px){
+    .middle-stories-hed{
+	max-width:60%;
+    }
+}
+
+#bottom-stories{
+    margin:0 10px;
+    padding:10px;
+    display:block;
+    overflow:auto;
+    float:left;
+    width:350px;
+    border:5px solid #ddd;
+}
+
+@media only screen and (max-width:900px){
+    #bottom-stories{
+	width:auto;
+	border-width:3px;
+	float:none;
+    }
+}
+
+.bottom-story{    color:#00f;
+
+    padding:15px 0;
+    color:#00f;
+}
+
+#sources{
+    clear:both;
+    padding-top:4em;
+    font-size:.8em;
+}
\ No newline at end of file
diff --git a/unbiased/main.py b/unbiased/main.py
new file mode 100755
index 0000000..f1c3317
--- /dev/null
+++ b/unbiased/main.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+
+from unbiasedObjects import *
+from unbiasedFunctions import *
+from parser import *
+import time
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
+    args = parser.parse_args()
+
+    while True:
+        print('-----------------------')
+        run(args.webroot)
+        print('-----------------------')
+        time.sleep(600)
+
+def run(webroot):
+    sourceList=[]
+
+    '''
+
+    SOURCES TO ADD NEXT:
+    -ABC
+    -REUTERS
+    -Town Hall
+
+    '''
+
+    print('running with webroot="{}"'.format(webroot))
+
+
+    ### These values have to be the second half of the function name
+    ### E.g. Guardian calls buildGuardian(), etc.
+    sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
+                 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
+    
+    for source in sourceFnArr:
+        tries=0
+        while tries<3:
+            try:
+                fn='build'+source
+                possibles = globals().copy()
+                possibles.update(locals())
+                method = possibles.get(fn)
+                src=method()
+                sourceList.append(src)
+                break
+            except:
+                print('Build error. Looping again: '+source)
+                tries+=1
+                time.sleep(tries)
+    
+    #scrape all urls and build data structure
+    newsSourceArr=buildNewsSourceArr(sourceList)
+
+    #build the output file HTML
+    outputHTML=buildOutput(newsSourceArr)
+
+    #print the output file HTML
+    printOutputHTML(outputHTML, os.path.join(webroot, 'index.html'))
+
+
+if __name__=="__main__":
+    main()
diff --git a/unbiased/parser.py b/unbiased/parser.py
new file mode 100755
index 0000000..f69281b
--- /dev/null
+++ b/unbiased/parser.py
@@ -0,0 +1,986 @@
+#!/usr/bin/env python3
+
+from unbiasedObjects import *
+from unbiasedFunctions import buildArticle
+import os
+import re
+
+
+'''
+Takes in a URL, downloads the file to a temp file,
+reads the file into a string, and returns that string
+'''
+def urlToContent(url, sourceEncoding='utf8'):
+    #download file
+    os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
+    
+    #read file
+    if sourceEncoding=='utf8':
+        f=open('scratch/temp1.html', 'r', encoding="utf8")
+    else:
+        f=open('scratch/temp1.html', 'r', encoding="latin-1")
+    content=f.read()
+    f.close()
+
+    return content
+
+
+'''
+Creates a new newsSource2 object. For each URL in h1-h3URLs,
+calls the file scraper and appends the new Article object.
+Returns a newsSource2 object
+'''
+def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
+    h1Arr=[]
+    a=buildArticle(h1URLs[0], name)
+    if a==None:
+        print('................\nH1 Nonetype in '+name+'\n................')
+    else:
+        h1Arr.append(a)
+
+    h2Arr=[]
+    for x in h2URLs:
+        a=buildArticle(x, name)
+        if a!=None:
+            h2Arr.append(a)
+        else:
+            print('................\nH2 Nonetype in '+name+'\n................')
+
+            
+    h3Arr=[]
+    for x in h3URLs:
+        a=buildArticle(x, name)
+        if a!=None:
+            h3Arr.append(a)
+        else:
+            print('................\nH3 Nonetype in '+name+'\n................')
+
+    #BUILD THE NEWS SOURCE
+    newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
+
+    return newsSource
+
+
+'''
+Some sites will replicate URLs across the page. This function removes them.
+Check hierarchically: if h3 exists in h1s or h2s, remove from h3s;
+if h2 exists in h1s, remove from h2s
+
+also check partial URLs (e.g. nytimes.com/story.html is the same as
+nytimes.com/story.html?var=x
+'''
+def removeDuplicates(h1s, h2s, h3s):
+    #Assume h1s is one element, and keep it
+
+    #remove h2 duplicates
+    removeArr=[]
+    for i in range(len(h2s)):
+        #check internally
+        for j in range(len(h2s)):
+            if i==j:
+                continue
+            else:
+                if h2s[i] in h2s[j]:
+                    removeArr.append(h2s[j])
+        #check against h1s
+        for k in range(len(h1s)):
+            if (h2s[i] in h1s[k]) or (h1s[k] in h2s[i]):
+                removeArr.append(h2s[i])
+    for x in removeArr:
+        h2s.remove(x)
+    
+    #remove h3 duplicates
+    removeArr=[]
+    for i in range(len(h3s)):
+        #check internally
+        for j in range(len(h3s)):
+            if i==j:
+                continue
+            else:
+                if h3s[i] in h3s[j]:
+                    removeArr.append(h3s[j])
+        #check against h1s and h2s
+        h1and2=h1s+h2s
+        for k in range(len(h1and2)):
+            if (h3s[i] in h1and2[k]) or (h1and2[k] in h3s[i]):
+                removeArr.append(h3s[i])
+    for x in removeArr:
+        if x in h3s:
+            h3s.remove(x)
+    
+
+    return h1s, h2s, h3s
+
+
+
+def removalNotification(source, title, reason, value):
+    print('*************************')
+    print('\t\tSTORY REMOVED')
+    print('SOURCE: '+source)
+    print('TITLE: \t'+title)
+    print('REASON: '+reason)
+    print('VALUE: \t'+value)
+    print('*************************\n\n')
+
+
+def removeBadStoriesHelper(source, element, badStringList, arr):
+    if badStringList!=None:
+        for i in range(len(arr)):
+            for hed in arr[i]:
+                if hed==None:
+                    print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////")
+                    break
+                for item in badStringList:
+                    if item in getattr(hed, element):
+                        arr[i].remove(hed)
+                        #if it's in the h1 slot, bump up the 
+                        #  first h2 into the h1 slot
+                        if i==0:
+                            arr[0].append(arr[1][0])
+                            arr[1].remove(arr[1][0])
+                        removalNotification(source.name, hed.title, element, item)
+                        
+    
+def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr=None):
+
+    arr=[source.h1Arr, source.h2Arr, source.h3Arr]
+
+    removeBadStoriesHelper(source, "title", badTitleArr, arr)
+    removeBadStoriesHelper(source, "description", badDescArr, arr)
+    removeBadStoriesHelper(source, "author", badAuthorArr, arr)
+    removeBadStoriesHelper(source, "img", badImgArr, arr)
+    removeBadStoriesHelper(source, "url", badURLArr, arr)
+                    
+    return source
+
+
+
+
+def buildTheHill():
+    url='http://thehill.com'
+    name='The Hill'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('<div class="headline-story-image">', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[url+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="section-top-content">', 1)[1]
+    h2=h2.split('</ul>', 1)[0]
+    while '<div class="top-story-item' in h2 and len(h2s)<4:
+        h2=h2.split('<div class="top-story-item', 1)[1]
+        x=h2.split('<a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div class="section-top-content">', 1)[1]
+    h3=h3.split('</ul>', 1)[0]
+    while '<div class="top-story-item small' in h3:
+        h3=h3.split('<div class="top-story-item small', 1)[1]
+        x=h3.split('<a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    hil=buildNewsSource2(name, url, h1s, h2s, h3s)
+    hil=removeBadStories(hil, ['THE MEMO'], None, ['Matt Schlapp', 'Juan Williams', 'Judd Gregg'], None, None)
+
+    return hil
+
+
+
+
+
+def buildGuardian():
+    url='http://www.theguardian.com/us'
+    name='The Guardian US'
+
+
+    while True:
+        #DOWNLOAD HOMEPAGE CONTENT
+        content=urlToContent(url, 'utf8')
+        
+        #get main headline
+        h1=content
+        h1=h1.split('<h1', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        h1=h1.split('"', 1)[0]
+
+        if h1!='https://www.theguardian.com/us':
+            break
+        else:
+            print('Guardian loop')
+        
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    #only the h1 and the two h2s have this, so split on it and grab
+    #the second two
+    h2=h2.split('<div class="fc-item__image-container u-responsive-ratio inlined-image">')[2:]
+    for x in h2:
+        if '<h2 class="fc-item__title"><a href="' in x:
+            x=x.split('<h2 class="fc-item__title"><a href="', 1)[1]
+            x=x.split('"', 1)[0]
+            h2s.append(x)
+        else:
+            break
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div class="fc-slice-wrapper">', 1)[1]
+    h3=h3.split('<div class="fc-container__inner">', 1)[0]#'<div class="js-show-more-placeholder">', 1)[0]
+    #this story section goes on forever; just grab the first 5
+    while '<h2 class="fc-item__title"><a href="' in h3:
+        h3=h3.split('<h2 class="fc-item__title"><a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    
+    gdn=buildNewsSource2(name, url, h1s, h2s, h3s)
+    gdn=removeBadStories(gdn, None, ['Tom McCarthy', 'Andy Hunter'], ['https://www.theguardian.com/profile/ben-jacobs'], None)
+
+    return gdn
+
+
+
+def buildWashTimes():
+    url='http://www.washingtontimes.com/'
+    name='Washington Times'
+
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('top-news', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+
+    h1s=[url+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('class="top-news', 1)[1]
+    h2=h2.split('</article>', 1)[1] #end of top-news article
+    h2=h2.split('<article ', 1)[0] #note the space; we want unclassed articles
+    h2=h2.split('<article>')[1:]
+    
+    for x in h2:
+        x=x.split('<a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('more-from desktop-only', 1)[1]
+    h3=h3.split('</section>', 1)[0]
+    h3=h3.split('<a href="')[1:]
+    
+    for x in h3:
+        x=x.split('"', 1)[0]
+        h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    wat=buildNewsSource2(name, url, h1s, h2s, h3s)
+    wat=removeBadStories(wat, None, None, None, None)
+
+    return wat
+
+
+def buildCSM():
+    url='http://www.csmonitor.com/USA'
+    name='Christian Science Monitor'
+
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #this makes sure we don't get '/USA' in the URL twice
+    url=url.split('/USA')[0]
+    
+    #get main headline
+    h1=content
+    h1=h1.split('block-0-0', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+
+    h1s=[url+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('block-1-0', 1)[1]
+    h2=h2.split('ui-section-middle', 1)[0]
+    h2=h2.split('<h3 class="story_headline">')[1:]
+    
+    for x in h2:
+        temp=x.split('<a href="', 2)[1:]
+        x=temp[0]
+        x=x.split('"', 1)[0]
+        if x=='/csmlists/special/first-look':
+            x=temp[1]
+            x=x.split('"', 1)[0]
+
+        h2s.append(url+x)
+    #also add in the floating story on the left
+    h2=content
+    h2=h2.split('block-0-1', 1)[1]
+    h2=h2.split('<h3 class="story_headline">')[1]
+    h2=h2.split('<a href="', 2)[2]
+    h2=h2.split('"', 1)[0]
+    h2s.append(url+h2)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('block-0-2', 1)[1]
+    h3=h3.split('ui-section-top-right', 1)[0]
+    h3=h3.split('<h3 class="story_headline')[1:]
+    
+    for x in h3:
+        x=x.split('<a href="', 2)[-1]
+        x=x.split('"', 1)[0]
+        h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    csm=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    badTitleArr=['Change Agent']
+    badDescArr=None
+    badAuthorArr=None
+    badImgArr=['csm_logo']
+    badURLArr=['difference-maker']
+    csm=removeBadStories(csm, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
+
+    return csm
+
+
+
+'''
+Function to fix the oddly short og:descriptions provided
+in The Blaze articles by grabbing the first portion of the story instead
+'''
+def blazeFixDesc(articleArr):
+    TAG_RE = re.compile(r'<[^>]+>')
+    for i in range(len(articleArr)):
+        desc=urlToContent(articleArr[i].url)
+        desc=desc.split('<div class="entry-content article-styles">', 1)[1]
+        desc=desc.split('<p>', 1)[1]
+        desc=TAG_RE.sub('', desc)
+        desc=desc.replace('\n', ' ')
+        desc=desc[:144]
+        articleArr[i].description=desc
+
+    return articleArr
+    
+
+
+def buildBlaze():
+    url='http://theblaze.com'
+    name='The Blaze'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('<!-- home -->', 1)[1]
+    h1=h1.split('<a class="gallery-link" href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[url+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<!-- home -->', 1)[1]
+    h2=h2.split('<!-- loop-home -->', 1)[0]
+    while '<a class="gallery-link" href="' in h2:#'</figure>\n\n<figure class="gallery-item">' in h2:
+        h2=h2.split('<a class="gallery-link" href="', 1)[1]#'</figure>\n\n<figure class="gallery-item">', 1)[1]
+        #h2=h2.split('href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<!-- loop-home -->', 1)[1]
+    #this story section goes on forever; just grab the first 5
+    while len(h3s)<5:
+        h3=h3.split('<a class="feed-link" href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    blz=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    badTitleArr=['Tucker Carlson', 'Mark Levin']
+    badDescArr=['Lawrence Jones', 'Mike Slater']
+    badAuthorArr=['Matt Walsh', 'Tomi Lahren', 'Dana Loesch', 'Mike Opelka', 'Chris Salcedo', 'Justin Haskins', 'Sara Gonzales', 'Doc Thompson', 'Glenn Beck']
+    badImgArr=None
+    badURLArr=None
+    blz=removeBadStories(blz, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
+
+    
+    #The Blaze has dumb, short description fields, so we need to grab
+    #the first x characters of actual article text instead
+    blz.h1Arr=blazeFixDesc(blz.h1Arr)
+    blz.h2Arr=blazeFixDesc(blz.h2Arr)
+    blz.h3Arr=blazeFixDesc(blz.h3Arr)
+
+    return blz
+
+
+
+def buildCBS():
+    url='http://cbsnews.com'
+    name='CBS News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    if '<h1 class="title">' in content:
+        h1=h1.split('<h1 class="title">', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        h1=h1.split('"', 1)[0]
+        h1s=[url+h1]
+    else:
+        #for cases where they lead with a video, pull the first h2 as h1
+        h1=h1.split('Big News Area Side Assets', 1)[1]
+        h1=h1.split('</ul></div>', 1)[0]
+        h1=h1.split('<li data-tb-region-item>', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        x=h1.split('"', 1)[0]
+        h1s=[url+x]
+        
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('Big News Area Side Assets', 1)[1]
+    h2=h2.split('</ul></div>', 1)[0]
+    while '<li data-tb-region-item>' in h2:
+        h2=h2.split('<li data-tb-region-item>', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('Latest News', 1)[1]
+    #this story section goes on forever; just grab the first 5
+    while len(h3s)<5:
+        h3=h3.split('<li class="item-full-lead"', 1)[1]
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    cbs=buildNewsSource2(name, url, h1s, h2s, h3s)
+    cbs=removeBadStories(cbs, ['60 Minutes'], ['60 Minutes'], None, None, ['whats-in-the-news-coverart'])
+
+    return cbs
+
+
+
+
+
+def buildNBC():    
+    url='http://nbcnews.com'
+    name='NBC News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('top-stories-section', 1)[1]
+    h1=h1.split('panel_hero', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    if '.com' not in h1:
+        h1=url+h1
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('ad-content ad-xs mobilebox1', 1)[1]
+    h2=h2.split('taboola-native-top-stories-thumbnail', 1)[0]
+    while '<div class="story-link' in h2:
+        h2=h2.split('<div class="story-link', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            if '.com' not in x:
+                x=url+x
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('js-more-topstories', 1)[1]
+    h3=h3.split('<div class="panel-section', 1)[0]
+    while '<div class="story-link' in h3:
+        h3=h3.split('<div class="story-link', 1)[1]
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            if '.com' not in x:
+                x=url+x
+            h3s.append(x)
+
+    #adjust for today.com urls
+    '''
+    for arr in [h1s, h2s, h3s]:
+        for i in range(len(arr)):
+            if 'today.com' in arr[i]:
+                arr[i]=arr[i].split('.com', 1)[1]
+    '''
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
+    nbc=removeBadStories(nbc, None, ['First Read'], None, None, None)
+
+
+    return nbc
+
+
+
+
+def buildBBC():    
+    url='http://www.bbc.com/news/world/us_and_canada'
+    name='BBC US & Canada'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('buzzard-item', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=['http://www.bbc.com'+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="pigeon">', 1)[1]
+    h2=h2.split('<div id=', 1)[0]
+    while 'top_stories#' in h2:
+        h2=h2.split('top_stories#', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append('http://www.bbc.com'+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div class="macaw">', 1)[1]
+    h3=h3.split('Watch/Listen', 1)[0]
+    while '<div class="macaw-item' in h3:
+        h3=h3.split('<div class="macaw-item', 1)[1]
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append('http://www.bbc.com'+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    bbc=buildNewsSource2(name, url, h1s, h2s, h3s)
+    badTitleArr=None
+    badDescArr=None
+    badAuthorArr=None
+    badImgArr=['bbc_news_logo.png']
+    bbc=removeBadStories(bbc, badTitleArr, badDescArr, badAuthorArr, badImgArr)
+
+    
+    #REMOVE ' - BBC News' from headlines
+    for i in range(len(bbc.h1Arr)):
+        if ' - BBC News' in bbc.h1Arr[i].title:
+            bbc.h1Arr[i].title=bbc.h1Arr[i].title.split(' - BBC News', 1)[0]
+    for i in range(len(bbc.h2Arr)):
+        if ' - BBC News' in bbc.h2Arr[i].title:
+            bbc.h2Arr[i].title=bbc.h2Arr[i].title.split(' - BBC News', 1)[0]
+    for i in range(len(bbc.h3Arr)):
+        if ' - BBC News' in bbc.h3Arr[i].title:
+            bbc.h3Arr[i].title=bbc.h3Arr[i].title.split(' - BBC News', 1)[0]
+
+    return bbc
+
+
+
+def buildWeeklyStandard():
+    url='http://www.weeklystandard.com'
+    name='Weekly Standard'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('<div id="region_1"', 1)[1]
+    h1=h1.split('<div id="region_2"', 1)[0]
+    h1=h1.split('<div class="lead-photo">', 1)[1]
+    h1=h1.split('href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="widget lead-story layout-3col-feature" data-count="2">', 1)[1]
+    h2=h2.split('<div id="region_2"', 1)[0]
+    while '<div class="lead-photo">' in h2:
+        h2=h2.split('<div class="lead-photo">', 1)[1]
+        h2=h2.split('href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('Today\'s Standard', 1)[1]
+    h3=h3.split('<div id="region_3"', 1)[0]
+    while '<div class="lead-photo">' in h3:
+        h3=h3.split('<div class="lead-photo">', 1)[1]
+        h3=h3.split('href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    #Need to add URL prefix to all URLs
+    for i in range(len(h1s)):
+        h1s[i]=url+h1s[i]
+    for i in range(len(h2s)):
+        h2s[i]=url+h2s[i]
+    for i in range(len(h3s)):
+        h3s[i]=url+h3s[i]
+        
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=None
+    ## if flagged again, remove Micah Mattix
+    badDescArr=['Matt Labash']
+    badAuthorArr=['MATT LABASH', 'TWS PODCAST', 'ERIC FELTEN', 'Steven J. Lenzner', 'MARK HEMINGWAY']
+    badImgArr=['http://www.weeklystandard.com/s3/tws15/images/twitter/tws-twitter_1024x512.png']
+    wkl=removeBadStories(wkl, badTitleArr, badDescArr, badAuthorArr, badImgArr)
+
+    return wkl
+
+
+
+
+def buildNPR():
+    url='http://www.npr.org/sections/news/'
+    name='NPR'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('<a id="mainContent">', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<article class="item has-image">', 1)[1]
+    h2=h2.split('<!-- END CLASS=\'FEATURED-3-UP\' -->', 1)[0]
+    while '<article class="item has-image">' in h2:
+        h2=h2.split('<article class="item has-image">', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div id="overflow" class="list-overflow"', 1)[1]
+    h3=h3.split('<!-- END ID="OVERFLOW" CLASS="LIST-OVERFLOW"', 1)[0]
+    while '<h2 class="title"><a href="' in h3:
+        h3=h3.split('<h2 class="title"><a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    npr=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=['The Two-Way']
+    badDescArr=None
+    badAuthorArr=['Domenico Montanaro']
+    badImgArr=None
+    npr=removeBadStories(npr, badTitleArr, badDescArr, badAuthorArr, badImgArr)
+
+    return npr
+
+
+
+
+
+def buildABC():
+    url='http://www.abcnews.go.com'
+    name='ABC News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('id="row-1"', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('id="row-2"', 1)[1]
+    h2=h2.split('id="row-3"', 1)[0]
+    h2=h2.split('card single row-item')[1:3] #should just be 2 of these
+    for x in h2:
+        x=x.split('<a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('id="row-1"', 1)[1]
+    h3=h3.split('tab-data active', 1)[1]
+    h3=h3.split('tab-data"', 1)[0] #note the trailing quotation
+    while '<a href="' in h3:
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+    abc=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=None
+    badDescArr=None
+    badAuthorArr=None
+    badImgArr=None
+    badURLArr=None
+    abc=removeBadStories(abc, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
+
+    return abc
+
+
+
+
+def buildFoxNews():
+    url='http://foxnews.com'
+    name='Fox News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('<h1><a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="top-stories">', 1)[1]
+    h2=h2.split('<section id="latest"', 1)[0]
+    while '<li data-vr-contentbox=""><a href="' in h2:
+        h2=h2.split('<li data-vr-contentbox=""><a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('div id="big-top"', 1)[1]
+    h3=h3.split('<div class="top-stories">', 1)[0]
+    while '<a href="' in h3:
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+    fox=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=['O&#039;Reilly', 'Fox News', 'Brett Baier', 'Tucker']
+    badDescArr=['Sean Hannity']
+    badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
+    badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
+    badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
+    fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
+
+    return fox
+
+
+
+def buildNYT():
+    url='http://www.nytimes.com'
+    name='New York Times'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    #this will likely need if/else logic
+    h1=content
+
+    if 'story theme-summary banner' in h1:
+        #This is with a large headline over a and b columns
+        h1=h1.split('story theme-summary banner', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        h1=h1.split('"', 1)[0]
+    else:
+        #otherwise, pull the first story from the A column
+        h1=h1.split('<div class="a-column column">', 1)[1]
+        h1=h1.split('<article class="story theme-summary lede"', 1)[1]
+        h1=h1.split('<a href="', 1)[1].split('"', 1)[0]
+    h1s=[h1]
+        
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    #A column
+    h2=h2.split('<div class="a-column column">', 1)[1]
+    h2=h2.split('<!-- close a-column -->', 1)[0]
+    #remove "collection" sets
+    while '<div class="collection headlines">' in h2:
+        arr=h2.split('<div class="collection headlines">', 1)
+        h2=arr[0]+arr[1].split('</ul>', 1)[1]
+    #Grab the remaining URLs
+    while '<a href="' in h2:
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3s=[]
+    #B column
+    h3=content
+    h3=h3.split('<div class="b-column column">', 1)[1]
+    h3=h3.split('<!-- close b-column -->', 1)[0]
+    #remove "collection" sets
+    while '<div class="collection headlines">' in h3:
+        arr=h3.split('<div class="collection headlines">', 1)
+        h3=arr[0]+arr[1].split('</ul>', 1)[1]
+    #Grab the remaining URLs
+    while '<a href="' in h3:
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if (h1 not in x) and (x not in h3s):
+            h3s.append(x)
+
+    '''
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    if '<!-- close lede-package-region -->' in h3:
+        h3=h3.split('<!-- close lede-package-region -->', 1)[1]
+        h3=h3.split('<a href="https://www.nytimes.com/tips">', 1)[0]
+    elif '/video/the-daily-360' in h3:
+        h3=h3.split('/video/the-daily-360')[-1]
+        h3=h3.split('More News', 1)[0]
+    #remove "collection" sets
+    while '<div class="collection headlines">' in h2:
+        arr=h3.split('<div class="collection headlines">', 1)
+        h3=arr[0]+arr[1].split('</ul>', 1)[1]
+    
+    #Grab the remaining URLs
+    while '<a href="' in h3:
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if (h1 not in x) and (x not in h3s):
+            h3s.append(x)
+    '''
+            
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
+    nyt=removeBadStories(nyt, None, None, None, None, ['https://www.nytimes.com/section/magazine', 'https://www.nytimes.com/newsletters/the-interpreter'])
+
+    
+    return nyt
+
+
+
+
+'''
+NYT
+EXAMPLE OF BIG HEADLINE SPANNING BOTH A AND B COLUMNS
+
+<div class="span-ab-layout layout">
+
+    <div class="ab-column column">
+
+        <section id="top-news" class="top-news">
+            <h2 class="section-heading visually-hidden">Top News</h2>
+
+                            <div class="above-banner-region region">
+
+                    <div class="collection">
+            <div class="hpHeader" id="top-megapackage-kicker">
+  <h6><a href="http://www.nytimes.com/pages/politics/index.html?src=hpHeader">The 45th President</a></h6>
+</div>
+
+</div>
+
+                </div><!-- close above-banner-region -->
+            
+                            <div class="span-ab-top-region region">
+
+                    <div class="collection">
+            <article class="story theme-summary banner" id="topnews-100000004932040" data-story-id="100000004932040" data-rank="0" data-collection-renderstyle="Banner">
+            <h1 class="story-heading"><a href="https://www.nytimes.com/2017/02/14/us/politics/fbi-interviewed-mike-flynn.html">F.B.I. Questioned Flynn About Russia Call</a></h1>
+</article>
+</div>
+
+                </div><!-- close span-ab-top-region -->
+'''
diff --git a/unbiased/scratch/do_not_delete b/unbiased/scratch/do_not_delete
new file mode 100644
index 0000000..e69de29
diff --git a/unbiased/spotCheck.py b/unbiased/spotCheck.py
new file mode 100755
index 0000000..d1edda4
--- /dev/null
+++ b/unbiased/spotCheck.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+
+from parser import *
+from unbiasedObjects import *
+import sys
+
+def spotCheck(src):
+
+    fns = {'hil' : buildTheHill,
+           'cbs' : buildCBS,
+           'npr' : buildNPR,
+           'fox' : buildFoxNews,
+           'gdn' : buildGuardian,
+           'blz' : buildBlaze,
+           'bbc' : buildBBC,
+           'nbc' : buildNBC,
+           'wat' : buildWashTimes,
+           'csm' : buildCSM,
+           'abc' : buildABC}
+
+    data=fns[src]()
+
+    print('H1s:\n--------------')
+    for h in data.h1Arr:
+        print(h.title)
+
+    print('\n\nH2s:\n--------------')
+    for h in data.h2Arr:
+        print(h.title)
+
+    print('\n\nH3s:\n--------------')
+    for h in data.h3Arr:
+        print(h.title)
+
+    print('\n\n')
+
+
+
+if __name__=='__main__':
+    spotCheck(sys.argv[1])
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
new file mode 100644
index 0000000..1a80d7a
--- /dev/null
+++ b/unbiased/unbiasedFunctions.py
@@ -0,0 +1,259 @@
+from unbiasedObjects import *
+import os
+import random
+import time
+import re
+
+
+#take in a url and delimiters, return twitter card
+def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
+
+    debugging=False
+    if debugging:
+        print(sourceName)
+        print(url)
+        print()
+    
+    #download url
+    os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
+
+    #read the file in
+    f=open('scratch/temp_article.html', 'r', encoding="utf8")
+    content=f.read()
+    f.close()
+
+    try:
+        if sourceName=='The Guardian':
+            #The Guardian puts an identifying banner on their og:images
+            #grab the main image from the page instead
+
+            #scenario 1: regular image
+            if '<img class="maxed' in content:
+                img=content.split('<img class="maxed', 1)[1]
+                img=img.split('src="', 1)[1].split('"')[0]
+            #scenario 2: video in image spot
+            elif '<meta itemprop="image"' in content:
+                img=content.split('<meta itemprop="image"', 1)[1]
+                img=img.split('content="', 1)[1].split('"')[0]
+            #scenario 3: photo essays
+            elif '<img class="immersive-main-media__media"' in content:
+                img=content.split('<img class="immersive-main-media__media"', 1)[1]
+                img=img.split('src="', 1)[1].split('"')[0]
+            
+        else:
+            if 'og:image' in content:
+                img=content.split('og:image" content=')[1][1:].split('>')[0]
+            elif sourceName=='ABC News':
+                img='https://c1.staticflickr.com/7/6042/6276688407_12900948a2_b.jpgX'
+            if img[-1]=='/':
+                #because the quote separator could be ' or ", 
+                #trim to just before it then lop it off
+                img=img[:-1].strip()
+            img=img[:-1]
+
+        if debugging:
+            print(img)
+
+        title=content.split('og:title" content=')[1][1:].split('>')[0]
+        if title[-1]=='/':
+            title=title[:-1].strip()
+        title=title[:-1]
+
+        if debugging:
+            print(title)
+
+
+        author=''
+        if sourceName=='The Blaze':
+            if 'class="article-author">' in content:
+                author=content.split('class="article-author">')[1].split('<')[0]
+            elif 'class="article-author" href="' in content:
+                author=content.split('class="article-author" href="')[1]
+                author=author.split('>')[1].split('<')[0].strip()
+        else:
+            authorTags=['article:author', 'dc.creator', 'property="author']
+            for tag in authorTags:
+                if tag in content:
+                    author=content.split(tag+'" content=')[1][1:].split('>')[0]
+                    author=author[:-1]
+                    #trim an extra quotation mark for The Hill
+                    if sourceName=='The Hill':
+                        author=author.split('"', 1)[0]
+                    break
+
+        if debugging:
+            print(author)
+
+
+        if 'og:description' in content:
+            description=content.split('og:description" content=')[1][1:].split('>')[0]
+            if description[-1]=='/':
+                description=description[:-1].strip()
+            description=description[:-1]
+        else:
+            if sourceName=='The Hill':
+                description=content.split('div class="field-items"')[-1]
+                description=re.sub('<[^<]+?>', '', description)
+                description=description[1:200]
+            else:
+                print("SHOULDN'T GET HERE")
+
+        #strip out self-references
+        description=description.replace(sourceName+"'s", '***')
+        description=description.replace(sourceName+"'", '***')
+        description=description.replace(sourceName, '***')
+
+        if debugging:
+            print(description)
+
+
+        a=Article(title, url, img, description, sourceName, author)
+        return a
+
+    except:
+        print('^^^^^^^^^^^^^^^^^^^^^^^^^')
+        print('\tARTICLE PARSING ERROR')
+        print('SOURCE: '+sourceName)
+        print('URL: \t'+url)
+        print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
+        return None
+
+
+def buildOutput(newsSourceArr):
+    #read in the template html file
+    f=open('html_template/template.html', 'r')
+    template=f.read()
+    f.close()
+    
+    #set the random order for sources
+    h1RandomSources=[]
+    while len(h1RandomSources)<4:
+        x=random.sample(range(len(newsSourceArr)), 1)[0]
+        if len(newsSourceArr[x].h1Arr)>0:
+            if x not in h1RandomSources:
+                h1RandomSources.append(x)
+        else:
+            print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+    
+    #For h2s and h3s, select N random sources (can repeat), then
+    #a non-repetitive random article from within 
+    h2RandomPairs=[]
+    while len(h2RandomPairs) < 6:
+        x=random.sample(range(len(newsSourceArr)), 1)[0]
+        if len(newsSourceArr[x].h2Arr) > 0:
+            y=random.sample(range(len(newsSourceArr[x].h2Arr)), 1)[0]
+            pair=[x,y]
+            if not pair in h2RandomPairs:
+                h2RandomPairs.append(pair)
+        else:
+            print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+
+    h3RandomPairs=[]
+    while len(h3RandomPairs) < 12:
+        x=random.sample(range(len(newsSourceArr)), 1)[0]
+        print(newsSourceArr[x].name)
+        if len(newsSourceArr[x].h3Arr) > 0:
+            y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
+            pair=[x,y]
+            if not pair in h3RandomPairs:
+                h3RandomPairs.append(pair)
+        else:
+            print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+
+    #replace html template locations with data from newsSourceArr
+    for i in range(len(h1RandomSources)):
+        source=newsSourceArr[h1RandomSources[i]]
+        randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
+        article=source.h1Arr[randomArticle]
+        template=template.replace('xxURL1-'+str(i+1)+'xx', article.url)
+        template=template.replace('xxTitle1-'+str(i+1)+'xx', article.title)
+        template=template.replace('xxImg1-'+str(i+1)+'xx', article.img)
+        desc=article.description
+        if len(desc)>144:
+            desc=desc[:141]
+            desc=desc.split()[:-1]
+            desc=' '.join(desc)+' ...'
+        template=template.replace('xxDesc1-'+str(i+1)+'xx', desc)
+
+    for i in range(len(h2RandomPairs)):
+        pair=h2RandomPairs[i]
+        article=newsSourceArr[pair[0]].h2Arr[pair[1]]
+        template=template.replace('xxURL2-'+str(i+1)+'xx', article.url)
+        template=template.replace('xxTitle2-'+str(i+1)+'xx', article.title)
+        template=template.replace('xxImg2-'+str(i+1)+'xx', article.img)
+
+    for i in range(len(h3RandomPairs)):
+        pair=h3RandomPairs[i]
+        article=newsSourceArr[pair[0]].h3Arr[pair[1]]
+        template=template.replace('xxURL3-'+str(i+1)+'xx', article.url)
+        template=template.replace('xxTitle3-'+str(i+1)+'xx', article.title)
+        template=template.replace('xxImg3-'+str(i+1)+'xx', article.img)
+
+
+    sourcesStr=''
+    for i in range(len(newsSourceArr)-1):
+        sourcesStr+=newsSourceArr[i].name+', '
+    sourcesStr+=newsSourceArr[-1].name
+    print('Successfully parsed: '+sourcesStr)
+    template=template.replace('xxSourcesxx', sourcesStr)
+        
+
+    #return updated text
+    return template
+
+def printOutputHTML(outputHTML, outFile):
+    timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+    outputHTML=outputHTML.replace('xxTimexx', timestamp)
+    
+    f=open(outFile, 'w')
+    f.write(outputHTML)
+    f.close()
+
+def buildNewsSourceArr(sourceList):
+
+    #build the data structure
+    i=0
+    listLen=len(sourceList)
+    while i < listLen:
+        source=sourceList[i]
+
+        if type(source) is NewsSource2:
+            i+=1
+            continue
+
+        url=source.url
+
+        #download file
+        os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
+
+        #read file
+        f=open('scratch/temp'+str(i)+'.html', 'r', encoding="utf8")
+        content=f.read()
+        f.close()
+        
+        #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
+        #os.remove('scratch/temp'+str(i)+'.html')
+
+        #add stories etc to the NewsSource object
+        h1s, h2s, h3s=extractURLs(content, source)
+        
+        #build the Article objects and add to newsSource's appropriate list
+        if h1s!=None and h2s!=None:
+            for url in h1s:
+                article=buildArticle(url, source.name)
+                if article!=None: source.addArticle(article, 1) #sourceList[i].h1Arr.append(article)
+            for url in h2s:
+                article=buildArticle(url, source.name)
+                if article!=None: sourceList[i].h2Arr.append(article)
+            for url in h3s:
+                article=buildArticle(url, source.name)
+                if article!=None: sourceList[i].h3Arr.append(article)
+            i+=1
+        else:
+            sourceList.remove(source)
+            listLen-=1
+
+            
+    #return the original sourceList,
+    #since everything should have been modified in place
+    return sourceList        
diff --git a/unbiased/unbiasedObjects.py b/unbiased/unbiasedObjects.py
new file mode 100644
index 0000000..3affbe6
--- /dev/null
+++ b/unbiased/unbiasedObjects.py
@@ -0,0 +1,90 @@
+class Article():
+    title=''
+    url=''
+    img=''
+    description=''
+    source=''
+    author=''
+
+    def __init__(self, title, url, img, description, source, author):
+        self.title=title
+        self.url=url
+        self.img=img
+        self.description=description
+        self.source=source
+        self.author=author
+
+    def __str__(self):
+        return '-----------\n'+self.title+'\n'+self.author+'\n'+self.source+'\n'+self.description+'\n'+self.url+'\n'+self.img+'\n'+'-----------'
+
+
+class NewsSource2():
+    name=''
+    url=''
+    h1Arr=[]
+    h2Arr=[]
+    h3Arr=[]
+    def __init__(self, name, url, h1Arr, h2Arr, h3Arr):
+        self.name=name
+        self.url=url
+        self.h1Arr=h1Arr
+        self.h2Arr=h2Arr
+        self.h3Arr=h3Arr
+        
+
+        
+class NewsSource():
+    name=''
+    url=''
+    #multiple start values to step through file. end value default to '"'
+    h1SectionDividerStart=None
+    h1SectionDividerEnd=None
+    h1DelStart=[]
+    h1DelEnd='"'
+    h2SectionDividerStart=None
+    h2SectionDividerEnd=None
+    h2DelStart=[]
+    h2DelEnd='"'
+    h3SectionDividerStart=None
+    h3SectionDividerEnd=None
+    h3DelStart=[]
+    h3DelEnd='"'
+    #arrays of Article object types
+    h1Arr=None
+    h2Arr=None
+    h3Arr=None
+    #url to attach to stub links
+    stubURL=''
+    
+    def __init__(self, name, url,
+                 h1DelStart, h2DelStart, h3DelStart,
+                 h1SectionDividerStart=None, h1SectionDividerEnd=None,
+                 h2SectionDividerStart=None, h2SectionDividerEnd=None,
+                 h3SectionDividerStart=None, h3SectionDividerEnd=None,
+                 stubURL=None):
+        self.name=name
+        self.url=url
+        self.h1DelStart=h1DelStart
+        self.h2DelStart=h2DelStart
+        self.h3DelStart=h3DelStart
+        self.h1SectionDividerStart=h1SectionDividerStart
+        self.h2SectionDividerStart=h2SectionDividerStart
+        self.h3SectionDividerStart=h3SectionDividerStart
+        self.h1SectionDividerEnd=h1SectionDividerEnd
+        self.h2SectionDividerEnd=h2SectionDividerEnd
+        self.h3SectionDividerEnd=h3SectionDividerEnd
+        self.h1Arr=[]
+        self.h2Arr=[]
+        self.h3Arr=[]
+        self.stubURL=stubURL
+
+    def addArticle(self, article, level):
+        if level==1:
+            self.h1Arr.append(article)
+        elif level==2:
+            self.h2Arr.append(article)
+        elif level==3:
+            self.h3Arr.append(article)
+        else:
+            print("Error: invalid level in NewsSource.addArtlce: ", level)
+
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py
deleted file mode 100644
index 1a80d7a..0000000
--- a/unbiasedFunctions.py
+++ /dev/null
@@ -1,259 +0,0 @@
-from unbiasedObjects import *
-import os
-import random
-import time
-import re
-
-
-#take in a url and delimiters, return twitter card
-def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
-
-    debugging=False
-    if debugging:
-        print(sourceName)
-        print(url)
-        print()
-    
-    #download url
-    os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
-
-    #read the file in
-    f=open('scratch/temp_article.html', 'r', encoding="utf8")
-    content=f.read()
-    f.close()
-
-    try:
-        if sourceName=='The Guardian':
-            #The Guardian puts an identifying banner on their og:images
-            #grab the main image from the page instead
-
-            #scenario 1: regular image
-            if '<img class="maxed' in content:
-                img=content.split('<img class="maxed', 1)[1]
-                img=img.split('src="', 1)[1].split('"')[0]
-            #scenario 2: video in image spot
-            elif '<meta itemprop="image"' in content:
-                img=content.split('<meta itemprop="image"', 1)[1]
-                img=img.split('content="', 1)[1].split('"')[0]
-            #scenario 3: photo essays
-            elif '<img class="immersive-main-media__media"' in content:
-                img=content.split('<img class="immersive-main-media__media"', 1)[1]
-                img=img.split('src="', 1)[1].split('"')[0]
-            
-        else:
-            if 'og:image' in content:
-                img=content.split('og:image" content=')[1][1:].split('>')[0]
-            elif sourceName=='ABC News':
-                img='https://c1.staticflickr.com/7/6042/6276688407_12900948a2_b.jpgX'
-            if img[-1]=='/':
-                #because the quote separator could be ' or ", 
-                #trim to just before it then lop it off
-                img=img[:-1].strip()
-            img=img[:-1]
-
-        if debugging:
-            print(img)
-
-        title=content.split('og:title" content=')[1][1:].split('>')[0]
-        if title[-1]=='/':
-            title=title[:-1].strip()
-        title=title[:-1]
-
-        if debugging:
-            print(title)
-
-
-        author=''
-        if sourceName=='The Blaze':
-            if 'class="article-author">' in content:
-                author=content.split('class="article-author">')[1].split('<')[0]
-            elif 'class="article-author" href="' in content:
-                author=content.split('class="article-author" href="')[1]
-                author=author.split('>')[1].split('<')[0].strip()
-        else:
-            authorTags=['article:author', 'dc.creator', 'property="author']
-            for tag in authorTags:
-                if tag in content:
-                    author=content.split(tag+'" content=')[1][1:].split('>')[0]
-                    author=author[:-1]
-                    #trim an extra quotation mark for The Hill
-                    if sourceName=='The Hill':
-                        author=author.split('"', 1)[0]
-                    break
-
-        if debugging:
-            print(author)
-
-
-        if 'og:description' in content:
-            description=content.split('og:description" content=')[1][1:].split('>')[0]
-            if description[-1]=='/':
-                description=description[:-1].strip()
-            description=description[:-1]
-        else:
-            if sourceName=='The Hill':
-                description=content.split('div class="field-items"')[-1]
-                description=re.sub('<[^<]+?>', '', description)
-                description=description[1:200]
-            else:
-                print("SHOULDN'T GET HERE")
-
-        #strip out self-references
-        description=description.replace(sourceName+"'s", '***')
-        description=description.replace(sourceName+"'", '***')
-        description=description.replace(sourceName, '***')
-
-        if debugging:
-            print(description)
-
-
-        a=Article(title, url, img, description, sourceName, author)
-        return a
-
-    except:
-        print('^^^^^^^^^^^^^^^^^^^^^^^^^')
-        print('\tARTICLE PARSING ERROR')
-        print('SOURCE: '+sourceName)
-        print('URL: \t'+url)
-        print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
-        return None
-
-
-def buildOutput(newsSourceArr):
-    #read in the template html file
-    f=open('html_template/template.html', 'r')
-    template=f.read()
-    f.close()
-    
-    #set the random order for sources
-    h1RandomSources=[]
-    while len(h1RandomSources)<4:
-        x=random.sample(range(len(newsSourceArr)), 1)[0]
-        if len(newsSourceArr[x].h1Arr)>0:
-            if x not in h1RandomSources:
-                h1RandomSources.append(x)
-        else:
-            print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
-    
-    #For h2s and h3s, select N random sources (can repeat), then
-    #a non-repetitive random article from within 
-    h2RandomPairs=[]
-    while len(h2RandomPairs) < 6:
-        x=random.sample(range(len(newsSourceArr)), 1)[0]
-        if len(newsSourceArr[x].h2Arr) > 0:
-            y=random.sample(range(len(newsSourceArr[x].h2Arr)), 1)[0]
-            pair=[x,y]
-            if not pair in h2RandomPairs:
-                h2RandomPairs.append(pair)
-        else:
-            print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
-
-    h3RandomPairs=[]
-    while len(h3RandomPairs) < 12:
-        x=random.sample(range(len(newsSourceArr)), 1)[0]
-        print(newsSourceArr[x].name)
-        if len(newsSourceArr[x].h3Arr) > 0:
-            y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
-            pair=[x,y]
-            if not pair in h3RandomPairs:
-                h3RandomPairs.append(pair)
-        else:
-            print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
-
-    #replace html template locations with data from newsSourceArr
-    for i in range(len(h1RandomSources)):
-        source=newsSourceArr[h1RandomSources[i]]
-        randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
-        article=source.h1Arr[randomArticle]
-        template=template.replace('xxURL1-'+str(i+1)+'xx', article.url)
-        template=template.replace('xxTitle1-'+str(i+1)+'xx', article.title)
-        template=template.replace('xxImg1-'+str(i+1)+'xx', article.img)
-        desc=article.description
-        if len(desc)>144:
-            desc=desc[:141]
-            desc=desc.split()[:-1]
-            desc=' '.join(desc)+' ...'
-        template=template.replace('xxDesc1-'+str(i+1)+'xx', desc)
-
-    for i in range(len(h2RandomPairs)):
-        pair=h2RandomPairs[i]
-        article=newsSourceArr[pair[0]].h2Arr[pair[1]]
-        template=template.replace('xxURL2-'+str(i+1)+'xx', article.url)
-        template=template.replace('xxTitle2-'+str(i+1)+'xx', article.title)
-        template=template.replace('xxImg2-'+str(i+1)+'xx', article.img)
-
-    for i in range(len(h3RandomPairs)):
-        pair=h3RandomPairs[i]
-        article=newsSourceArr[pair[0]].h3Arr[pair[1]]
-        template=template.replace('xxURL3-'+str(i+1)+'xx', article.url)
-        template=template.replace('xxTitle3-'+str(i+1)+'xx', article.title)
-        template=template.replace('xxImg3-'+str(i+1)+'xx', article.img)
-
-
-    sourcesStr=''
-    for i in range(len(newsSourceArr)-1):
-        sourcesStr+=newsSourceArr[i].name+', '
-    sourcesStr+=newsSourceArr[-1].name
-    print('Successfully parsed: '+sourcesStr)
-    template=template.replace('xxSourcesxx', sourcesStr)
-        
-
-    #return updated text
-    return template
-
-def printOutputHTML(outputHTML, outFile):
-    timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
-    outputHTML=outputHTML.replace('xxTimexx', timestamp)
-    
-    f=open(outFile, 'w')
-    f.write(outputHTML)
-    f.close()
-
-def buildNewsSourceArr(sourceList):
-
-    #build the data structure
-    i=0
-    listLen=len(sourceList)
-    while i < listLen:
-        source=sourceList[i]
-
-        if type(source) is NewsSource2:
-            i+=1
-            continue
-
-        url=source.url
-
-        #download file
-        os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
-
-        #read file
-        f=open('scratch/temp'+str(i)+'.html', 'r', encoding="utf8")
-        content=f.read()
-        f.close()
-        
-        #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
-        #os.remove('scratch/temp'+str(i)+'.html')
-
-        #add stories etc to the NewsSource object
-        h1s, h2s, h3s=extractURLs(content, source)
-        
-        #build the Article objects and add to newsSource's appropriate list
-        if h1s!=None and h2s!=None:
-            for url in h1s:
-                article=buildArticle(url, source.name)
-                if article!=None: source.addArticle(article, 1) #sourceList[i].h1Arr.append(article)
-            for url in h2s:
-                article=buildArticle(url, source.name)
-                if article!=None: sourceList[i].h2Arr.append(article)
-            for url in h3s:
-                article=buildArticle(url, source.name)
-                if article!=None: sourceList[i].h3Arr.append(article)
-            i+=1
-        else:
-            sourceList.remove(source)
-            listLen-=1
-
-            
-    #return the original sourceList,
-    #since everything should have been modified in place
-    return sourceList        
diff --git a/unbiasedObjects.py b/unbiasedObjects.py
deleted file mode 100644
index 3affbe6..0000000
--- a/unbiasedObjects.py
+++ /dev/null
@@ -1,90 +0,0 @@
-class Article():
-    title=''
-    url=''
-    img=''
-    description=''
-    source=''
-    author=''
-
-    def __init__(self, title, url, img, description, source, author):
-        self.title=title
-        self.url=url
-        self.img=img
-        self.description=description
-        self.source=source
-        self.author=author
-
-    def __str__(self):
-        return '-----------\n'+self.title+'\n'+self.author+'\n'+self.source+'\n'+self.description+'\n'+self.url+'\n'+self.img+'\n'+'-----------'
-
-
-class NewsSource2():
-    name=''
-    url=''
-    h1Arr=[]
-    h2Arr=[]
-    h3Arr=[]
-    def __init__(self, name, url, h1Arr, h2Arr, h3Arr):
-        self.name=name
-        self.url=url
-        self.h1Arr=h1Arr
-        self.h2Arr=h2Arr
-        self.h3Arr=h3Arr
-        
-
-        
-class NewsSource():
-    name=''
-    url=''
-    #multiple start values to step through file. end value default to '"'
-    h1SectionDividerStart=None
-    h1SectionDividerEnd=None
-    h1DelStart=[]
-    h1DelEnd='"'
-    h2SectionDividerStart=None
-    h2SectionDividerEnd=None
-    h2DelStart=[]
-    h2DelEnd='"'
-    h3SectionDividerStart=None
-    h3SectionDividerEnd=None
-    h3DelStart=[]
-    h3DelEnd='"'
-    #arrays of Article object types
-    h1Arr=None
-    h2Arr=None
-    h3Arr=None
-    #url to attach to stub links
-    stubURL=''
-    
-    def __init__(self, name, url,
-                 h1DelStart, h2DelStart, h3DelStart,
-                 h1SectionDividerStart=None, h1SectionDividerEnd=None,
-                 h2SectionDividerStart=None, h2SectionDividerEnd=None,
-                 h3SectionDividerStart=None, h3SectionDividerEnd=None,
-                 stubURL=None):
-        self.name=name
-        self.url=url
-        self.h1DelStart=h1DelStart
-        self.h2DelStart=h2DelStart
-        self.h3DelStart=h3DelStart
-        self.h1SectionDividerStart=h1SectionDividerStart
-        self.h2SectionDividerStart=h2SectionDividerStart
-        self.h3SectionDividerStart=h3SectionDividerStart
-        self.h1SectionDividerEnd=h1SectionDividerEnd
-        self.h2SectionDividerEnd=h2SectionDividerEnd
-        self.h3SectionDividerEnd=h3SectionDividerEnd
-        self.h1Arr=[]
-        self.h2Arr=[]
-        self.h3Arr=[]
-        self.stubURL=stubURL
-
-    def addArticle(self, article, level):
-        if level==1:
-            self.h1Arr.append(article)
-        elif level==2:
-            self.h2Arr.append(article)
-        elif level==3:
-            self.h3Arr.append(article)
-        else:
-            print("Error: invalid level in NewsSource.addArtlce: ", level)
-
-- 
cgit v1.2.3


From 5b0c9c5daa36878513bcc5edbe87a5fe52fdbb82 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 00:34:26 -0400
Subject: get it to run from the package

---
 .gitignore                    |  4 ++++
 setup.py                      | 20 ++++++++++++++++++++
 unbiased/__init__.py          |  0
 unbiased/main.py              | 19 ++++++++++---------
 unbiased/parser.py            |  5 +++--
 unbiased/spotCheck.py         |  6 +++---
 unbiased/unbiasedFunctions.py | 28 +++++++++++++++++-----------
 7 files changed, 57 insertions(+), 25 deletions(-)
 create mode 100644 setup.py
 create mode 100644 unbiased/__init__.py

diff --git a/.gitignore b/.gitignore
index 90bf98d..238da47 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,9 @@ legacy_py/
 unbiased.html
 html_template/Penguins.jpg
 html_template/BAK*
+build/
+dist/
+venv/
+unbiased.egg-info/
 #*
 .#*
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..8b73e6d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup
+
+setup(
+    name="unbiased",
+    version="0",
+    packages=['unbiased'],
+    package_data={
+        'unbiased': [
+            'html_template/*.html',
+            'html_template/*.css',
+        ],
+    },
+    install_requires=[
+    ],
+    entry_points={
+        'console_scripts': [
+            'unbiased = unbiased.main:main',
+        ],
+    },
+)
diff --git a/unbiased/__init__.py b/unbiased/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/unbiased/main.py b/unbiased/main.py
index f1c3317..b8bd4cb 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -1,26 +1,26 @@
 #!/usr/bin/env python3
 
 import argparse
-import os
-
-from unbiasedObjects import *
-from unbiasedFunctions import *
-from parser import *
 import time
 
+from unbiased.unbiasedObjects import *
+from unbiased.unbiasedFunctions import *
+from unbiased.parser import *
+
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
+    parser.add_argument('-s', '--scratch', default='/opt/unbiased/scratch', help='writable scratch workspace')
     args = parser.parse_args()
 
     while True:
         print('-----------------------')
-        run(args.webroot)
+        run(args.webroot, args.scratch)
         print('-----------------------')
         time.sleep(600)
 
-def run(webroot):
+def run(webroot, scratch):
     sourceList=[]
 
     '''
@@ -33,6 +33,7 @@ def run(webroot):
     '''
 
     print('running with webroot="{}"'.format(webroot))
+    print('running with scratch="{}"'.format(scratch))
 
 
     ### These values have to be the second half of the function name
@@ -51,7 +52,7 @@ def run(webroot):
                 src=method()
                 sourceList.append(src)
                 break
-            except:
+            except Exception:
                 print('Build error. Looping again: '+source)
                 tries+=1
                 time.sleep(tries)
@@ -63,7 +64,7 @@ def run(webroot):
     outputHTML=buildOutput(newsSourceArr)
 
     #print the output file HTML
-    printOutputHTML(outputHTML, os.path.join(webroot, 'index.html'))
+    printOutputHTML(outputHTML, webroot)
 
 
 if __name__=="__main__":
diff --git a/unbiased/parser.py b/unbiased/parser.py
index f69281b..1f9bc5c 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
 
-from unbiasedObjects import *
-from unbiasedFunctions import buildArticle
 import os
 import re
 
+from unbiased.unbiasedObjects import *
+from unbiased.unbiasedFunctions import buildArticle
+
 
 '''
 Takes in a URL, downloads the file to a temp file,
diff --git a/unbiased/spotCheck.py b/unbiased/spotCheck.py
index d1edda4..7ce50d3 100755
--- a/unbiased/spotCheck.py
+++ b/unbiased/spotCheck.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 
-
-from parser import *
-from unbiasedObjects import *
 import sys
 
+from unbiased.parser import *
+from unbiased.unbiasedObjects import *
+
 def spotCheck(src):
 
     fns = {'hil' : buildTheHill,
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 1a80d7a..3d3363b 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -1,8 +1,10 @@
-from unbiasedObjects import *
 import os
+import pkgutil
 import random
-import time
 import re
+import time
+
+from unbiased.unbiasedObjects import *
 
 
 #take in a url and delimiters, return twitter card
@@ -110,7 +112,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
         a=Article(title, url, img, description, sourceName, author)
         return a
 
-    except:
+    except Exception:
         print('^^^^^^^^^^^^^^^^^^^^^^^^^')
         print('\tARTICLE PARSING ERROR')
         print('SOURCE: '+sourceName)
@@ -121,9 +123,8 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
 
 def buildOutput(newsSourceArr):
     #read in the template html file
-    f=open('html_template/template.html', 'r')
-    template=f.read()
-    f.close()
+    template=pkgutil.get_data('unbiased', 'html_template/template.html')
+    template = template.decode('utf8')
     
     #set the random order for sources
     h1RandomSources=[]
@@ -201,13 +202,18 @@ def buildOutput(newsSourceArr):
     #return updated text
     return template
 
-def printOutputHTML(outputHTML, outFile):
+def printOutputHTML(outputHTML, outDir):
     timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
     outputHTML=outputHTML.replace('xxTimexx', timestamp)
-    
-    f=open(outFile, 'w')
-    f.write(outputHTML)
-    f.close()
+
+    with open(os.path.join(outDir, 'index.html'), 'w') as fp:
+        fp.write(outputHTML)
+
+    # copy over the template css file
+    css = pkgutil.get_data('unbiased', 'html_template/unbiased.css')
+    css = css.decode('utf8')
+    with open(os.path.join(outDir, 'unbiased.css'), 'w') as fp:
+        fp.write(css)
 
 def buildNewsSourceArr(sourceList):
 
-- 
cgit v1.2.3


From 6a0a5579ea9b3674f011eabd2a4c339100a66ba8 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 13:44:46 -0400
Subject: read the scratch dir path on the command line

---
 unbiased/main.py              |   7 +--
 unbiased/parser.py            | 100 ++++++++++++++++++++++--------------------
 unbiased/unbiasedFunctions.py |  29 +++++++-----
 3 files changed, 74 insertions(+), 62 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index b8bd4cb..159a98b 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -49,16 +49,17 @@ def run(webroot, scratch):
                 possibles = globals().copy()
                 possibles.update(locals())
                 method = possibles.get(fn)
-                src=method()
+                src=method(scratch)
                 sourceList.append(src)
                 break
-            except Exception:
+            except Exception as ex:
+                print(ex)
                 print('Build error. Looping again: '+source)
                 tries+=1
                 time.sleep(tries)
     
     #scrape all urls and build data structure
-    newsSourceArr=buildNewsSourceArr(sourceList)
+    newsSourceArr=buildNewsSourceArr(sourceList, scratch)
 
     #build the output file HTML
     outputHTML=buildOutput(newsSourceArr)
diff --git a/unbiased/parser.py b/unbiased/parser.py
index 1f9bc5c..ea2a187 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -2,6 +2,7 @@
 
 import os
 import re
+import subprocess
 
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import buildArticle
@@ -11,15 +12,18 @@ from unbiased.unbiasedFunctions import buildArticle
 Takes in a URL, downloads the file to a temp file,
 reads the file into a string, and returns that string
 '''
-def urlToContent(url, sourceEncoding='utf8'):
+def urlToContent(url, scratchDir, sourceEncoding='utf8'):
+    temp_file = os.path.join(scratchDir, 'temp1.html')
+
     #download file
-    os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
+    #os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
+    subprocess.check_call(['wget', '-q', '-O', temp_file, '--no-check-certificate', url])
     
     #read file
     if sourceEncoding=='utf8':
-        f=open('scratch/temp1.html', 'r', encoding="utf8")
+        f=open(temp_file, 'r', encoding="utf8")
     else:
-        f=open('scratch/temp1.html', 'r', encoding="latin-1")
+        f=open(temp_file, 'r', encoding="latin-1")
     content=f.read()
     f.close()
 
@@ -31,9 +35,9 @@ Creates a new newsSource2 object. For each URL in h1-h3URLs,
 calls the file scraper and appends the new Article object.
 Returns a newsSource2 object
 '''
-def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
+def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
     h1Arr=[]
-    a=buildArticle(h1URLs[0], name)
+    a=buildArticle(h1URLs[0], name, scratchDir)
     if a==None:
         print('................\nH1 Nonetype in '+name+'\n................')
     else:
@@ -41,7 +45,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
 
     h2Arr=[]
     for x in h2URLs:
-        a=buildArticle(x, name)
+        a=buildArticle(x, name, scratchDir)
         if a!=None:
             h2Arr.append(a)
         else:
@@ -50,7 +54,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
             
     h3Arr=[]
     for x in h3URLs:
-        a=buildArticle(x, name)
+        a=buildArticle(x, name, scratchDir)
         if a!=None:
             h3Arr.append(a)
         else:
@@ -157,12 +161,12 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b
 
 
-def buildTheHill():
+def buildTheHill(scratchDir):
     url='http://thehill.com'
     name='The Hill'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #get main headline
     h1=content
@@ -194,7 +198,7 @@ def buildTheHill():
         h3s.append(url+x)
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    hil=buildNewsSource2(name, url, h1s, h2s, h3s)
+    hil=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     hil=removeBadStories(hil, ['THE MEMO'], None, ['Matt Schlapp', 'Juan Williams', 'Judd Gregg'], None, None)
 
     return hil
@@ -203,14 +207,14 @@ def buildTheHill():
 
 
-def buildGuardian():
+def buildGuardian(scratchDir):
     url='http://www.theguardian.com/us'
     name='The Guardian US'
 
 
     while True:
         #DOWNLOAD HOMEPAGE CONTENT
-        content=urlToContent(url, 'utf8')
+        content=urlToContent(url, scratchDir, 'utf8')
         
         #get main headline
         h1=content
@@ -252,20 +256,20 @@ def buildGuardian():
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     
-    gdn=buildNewsSource2(name, url, h1s, h2s, h3s)
+    gdn=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     gdn=removeBadStories(gdn, None, ['Tom McCarthy', 'Andy Hunter'], ['https://www.theguardian.com/profile/ben-jacobs'], None)
 
     return gdn
 
 
-def buildWashTimes():
+def buildWashTimes(scratchDir):
     url='http://www.washingtontimes.com/'
     name='Washington Times'
 
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
     
     #get main headline
     h1=content
@@ -301,19 +305,19 @@ def buildWashTimes():
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    wat=buildNewsSource2(name, url, h1s, h2s, h3s)
+    wat=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     wat=removeBadStories(wat, None, None, None, None)
 
     return wat
 
 
-def buildCSM():
+def buildCSM(scratchDir):
     url='http://www.csmonitor.com/USA'
     name='Christian Science Monitor'
 
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #this makes sure we don't get '/USA' in the URL twice
     url=url.split('/USA')[0]
@@ -364,7 +368,7 @@ def buildCSM():
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    csm=buildNewsSource2(name, url, h1s, h2s, h3s)
+    csm=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     badTitleArr=['Change Agent']
     badDescArr=None
@@ -384,7 +388,7 @@ in The Blaze articles by grabbing the first portion of the story instead
 def blazeFixDesc(articleArr):
     TAG_RE = re.compile(r'<[^>]+>')
     for i in range(len(articleArr)):
-        desc=urlToContent(articleArr[i].url)
+        desc=urlToContent(articleArr[i].url, scratchDir)
         desc=desc.split('<div class="entry-content article-styles">', 1)[1]
         desc=desc.split('<p>', 1)[1]
         desc=TAG_RE.sub('', desc)
@@ -396,12 +400,12 @@ def blazeFixDesc(articleArr):
     
 
-def buildBlaze():
+def buildBlaze(scratchDir):
     url='http://theblaze.com'
     name='The Blaze'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #get main headline
     h1=content
@@ -435,7 +439,7 @@ def buildBlaze():
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    blz=buildNewsSource2(name, url, h1s, h2s, h3s)
+    blz=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     badTitleArr=['Tucker Carlson', 'Mark Levin']
     badDescArr=['Lawrence Jones', 'Mike Slater']
@@ -455,12 +459,12 @@ def buildBlaze():
 
 
-def buildCBS():
+def buildCBS(scratchDir):
     url='http://cbsnews.com'
     name='CBS News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #get main headline
     h1=content
@@ -504,7 +508,7 @@ def buildCBS():
             h3s.append(url+x)
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    cbs=buildNewsSource2(name, url, h1s, h2s, h3s)
+    cbs=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     cbs=removeBadStories(cbs, ['60 Minutes'], ['60 Minutes'], None, None, ['whats-in-the-news-coverart'])
 
     return cbs
@@ -513,12 +517,12 @@ def buildCBS():
 
 
-def buildNBC():    
+def buildNBC(scratchDir):    
     url='http://nbcnews.com'
     name='NBC News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #get main headline
     h1=content
@@ -567,7 +571,7 @@ def buildNBC():
     '''
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
+    nbc=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     nbc=removeBadStories(nbc, None, ['First Read'], None, None, None)
 
 
@@ -576,12 +580,12 @@ def buildNBC():
 
 
-def buildBBC():    
+def buildBBC(scratchDir):    
     url='http://www.bbc.com/news/world/us_and_canada'
     name='BBC US & Canada'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #get main headline
     h1=content
@@ -615,7 +619,7 @@ def buildBBC():
             h3s.append('http://www.bbc.com'+x)
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    bbc=buildNewsSource2(name, url, h1s, h2s, h3s)
+    bbc=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     badTitleArr=None
     badDescArr=None
     badAuthorArr=None
@@ -638,12 +642,12 @@ def buildBBC():
 
 
-def buildWeeklyStandard():
+def buildWeeklyStandard(scratchDir):
     url='http://www.weeklystandard.com'
     name='Weekly Standard'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
     
     #get main headline
     h1=content
@@ -688,7 +692,7 @@ def buildWeeklyStandard():
         
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
+    wkl=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     #REMOVE BAD STORIES
     badTitleArr=None
@@ -703,12 +707,12 @@ def buildWeeklyStandard():
 
 
-def buildNPR():
+def buildNPR(scratchDir):
     url='http://www.npr.org/sections/news/'
     name='NPR'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
     
     #get main headline
     h1=content
@@ -742,7 +746,7 @@ def buildNPR():
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    npr=buildNewsSource2(name, url, h1s, h2s, h3s)
+    npr=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     #REMOVE BAD STORIES
     badTitleArr=['The Two-Way']
@@ -757,12 +761,12 @@ def buildNPR():
 
 
-def buildABC():
+def buildABC(scratchDir):
     url='http://www.abcnews.go.com'
     name='ABC News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
     
     #get main headline
     h1=content
@@ -796,7 +800,7 @@ def buildABC():
             h3s.append(x)
 
     h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
-    abc=buildNewsSource2(name, url, h1s, h2s, h3s)
+    abc=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     #REMOVE BAD STORIES
     badTitleArr=None
@@ -811,12 +815,12 @@ def buildABC():
 
 
-def buildFoxNews():
+def buildFoxNews(scratchDir):
     url='http://foxnews.com'
     name='Fox News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
     
     #get main headline
     h1=content
@@ -847,7 +851,7 @@ def buildFoxNews():
             h3s.append(x)
 
     h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
-    fox=buildNewsSource2(name, url, h1s, h2s, h3s)
+    fox=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     #REMOVE BAD STORIES
     badTitleArr=['O&#039;Reilly', 'Fox News', 'Brett Baier', 'Tucker']
@@ -861,12 +865,12 @@ def buildFoxNews():
 
 
-def buildNYT():
+def buildNYT(scratchDir):
     url='http://www.nytimes.com'
     name='New York Times'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
+    content=urlToContent(url, scratchDir)
 
     #get main headline
     #this will likely need if/else logic
@@ -944,7 +948,7 @@ def buildNYT():
             
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
+    nyt=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
     nyt=removeBadStories(nyt, None, None, None, None, ['https://www.nytimes.com/section/magazine', 'https://www.nytimes.com/newsletters/the-interpreter'])
 
     
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 3d3363b..6210ba8 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -2,25 +2,29 @@ import os
 import pkgutil
 import random
 import re
+import subprocess
 import time
 
 from unbiased.unbiasedObjects import *
 
 
 #take in a url and delimiters, return twitter card
-def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
+def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
 
     debugging=False
     if debugging:
         print(sourceName)
         print(url)
         print()
-    
+
+    temp_article = os.path.join(scratchDir, 'temp_article.html')
+
     #download url
-    os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
+    #os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
+    subprocess.check_call(['wget', '-q', '-O', temp_article, '--no-check-certificate', url])
 
     #read the file in
-    f=open('scratch/temp_article.html', 'r', encoding="utf8")
+    f=open(temp_article, 'r', encoding="utf8")
     content=f.read()
     f.close()
 
@@ -215,7 +219,7 @@ def printOutputHTML(outputHTML, outDir):
     with open(os.path.join(outDir, 'unbiased.css'), 'w') as fp:
         fp.write(css)
 
-def buildNewsSourceArr(sourceList):
+def buildNewsSourceArr(sourceList, scratchDir):
 
     #build the data structure
     i=0
@@ -229,16 +233,19 @@ def buildNewsSourceArr(sourceList):
 
         url=source.url
 
+        temp_file = os.path.join(scratchDir, 'temp{}.html'.format(i))
+
         #download file
-        os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
+        #os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
+        subprocess.check_call(['wget', '-q', '-O', temp_file, '--no-check-certificate', url])
 
         #read file
-        f=open('scratch/temp'+str(i)+'.html', 'r', encoding="utf8")
+        f=open(temp_file, 'r', encoding="utf8")
         content=f.read()
         f.close()
         
         #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
-        #os.remove('scratch/temp'+str(i)+'.html')
+        #os.remove(temp_file)
 
         #add stories etc to the NewsSource object
         h1s, h2s, h3s=extractURLs(content, source)
@@ -246,13 +253,13 @@ def buildNewsSourceArr(sourceList):
         #build the Article objects and add to newsSource's appropriate list
         if h1s!=None and h2s!=None:
             for url in h1s:
-                article=buildArticle(url, source.name)
+                article=buildArticle(url, source.name, scratchDir)
                 if article!=None: source.addArticle(article, 1) #sourceList[i].h1Arr.append(article)
             for url in h2s:
-                article=buildArticle(url, source.name)
+                article=buildArticle(url, source.name, scratchDir)
                 if article!=None: sourceList[i].h2Arr.append(article)
             for url in h3s:
-                article=buildArticle(url, source.name)
+                article=buildArticle(url, source.name, scratchDir)
                 if article!=None: sourceList[i].h3Arr.append(article)
             i+=1
         else:
-- 
cgit v1.2.3


From e5b8cdc8a02a1d6e026e2e016508a8ecb443e181 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 14:32:09 -0400
Subject: fix fox urls

---
 unbiased/parser.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/unbiased/parser.py b/unbiased/parser.py
index ea2a187..f068ae8 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -827,6 +827,7 @@ def buildFoxNews(scratchDir):
     h1=h1.split('<h1><a href="', 1)[1]
     h1=h1.split('"', 1)[0]
     h1s=[h1]
+    h1s = ['http:' + x if x.startswith('//') else x for x in h1s]
 
     #GET SECONDARY HEADLINES
     h2=content
@@ -838,6 +839,7 @@ def buildFoxNews(scratchDir):
         x=h2.split('"', 1)[0]
         if h1 not in x:
             h2s.append(x)
+    h2s = ['http:' + x if x.startswith('//') else x for x in h2s]
 
     #GET TERTIARY HEADLINES
     h3=content
@@ -849,8 +851,9 @@ def buildFoxNews(scratchDir):
         x=h3.split('"', 1)[0]
         if h1 not in x:
             h3s.append(x)
+    h3s = ['http:' + x if x.startswith('//') else x for x in h3s]
 
-    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     fox=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
 
     #REMOVE BAD STORIES
-- 
cgit v1.2.3


From 1560fcc59cf77ca82ce3d6025f019f57d9e83b74 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 14:32:57 -0400
Subject: don't need scratch dir anymore

---
 unbiased/scratch/do_not_delete | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 unbiased/scratch/do_not_delete

diff --git a/unbiased/scratch/do_not_delete b/unbiased/scratch/do_not_delete
deleted file mode 100644
index e69de29..0000000
-- 
cgit v1.2.3


From fd5227f122adf65b8f5340751e037fce67e4d2c4 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 15:52:21 -0400
Subject: use jinja templates to build the output

---
 setup.py                                   |  1 +
 unbiased/html_template/unbiased.jinja.html | 69 ++++++++++++++++++++++++++++++
 unbiased/unbiasedFunctions.py              | 52 +++++++++++-----------
 3 files changed, 98 insertions(+), 24 deletions(-)
 create mode 100644 unbiased/html_template/unbiased.jinja.html

diff --git a/setup.py b/setup.py
index 8b73e6d..0b43b93 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,7 @@ setup(
         ],
     },
     install_requires=[
+        'jinja2',
     ],
     entry_points={
         'console_scripts': [
diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
new file mode 100644
index 0000000..297c4c4
--- /dev/null
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -0,0 +1,69 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="unbiased.css">
+    <title>UnBiased</title>
+  </head>
+<body>
+
+<div id="page-header">
+  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+  <span id="subtitle">a different way to read the news</span>
+  <p id="timestamp">Last updated: {{ timestamp }}</p>
+</div>
+
+<div id="page-container">
+
+  <div id="top-stories">
+
+    {% for story in top_stories %}
+
+    <div class="top-story">
+      <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
+        <div class="top-stories-img" style="background-image: url('{{ story.img }}');" /></div>
+        <div class="top-stories-hed">{{ story.title }}</div>
+      </a>
+      <div class="top-stories-desc">{{ story.desc }}</div>
+    </div>
+
+    {% endfor %}
+
+  </div>
+
+  <div id="middle-stories">
+
+    {% for story in middle_stories %}
+
+    <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
+      <div class="middle-story">
+        <div class="middle-stories-img" style="background-image: url('{{ story.img }}');">
+        </div>
+        <div class="middle-stories-hed">{{ story.title }}</div>
+      </div>
+    </a>
+
+    {% endfor %}
+
+  </div>
+
+  <div id="bottom-stories">
+
+    {% for story in bottom_stories %}
+
+    <div class="bottom-story">
+      <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title }}</a>
+    </div>
+
+    {% endfor %}
+
+  </div>
+
+</div>
+
+<div id="sources">
+  Sources: {{ sources }}
+</div>
+</body>
+</html>
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 6210ba8..192de8c 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -127,9 +127,13 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
 
 def buildOutput(newsSourceArr):
     #read in the template html file
-    template=pkgutil.get_data('unbiased', 'html_template/template.html')
-    template = template.decode('utf8')
-    
+    from jinja2 import Environment, PackageLoader, select_autoescape
+    env = Environment(
+        loader=PackageLoader('unbiased', 'html_template'),
+        autoescape=select_autoescape(['html', 'xml'])
+    )
+    template = env.get_template('unbiased.jinja.html')
+
     #set the random order for sources
     h1RandomSources=[]
     while len(h1RandomSources)<4:
@@ -139,9 +143,9 @@ def buildOutput(newsSourceArr):
                 h1RandomSources.append(x)
         else:
             print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
-    
+
     #For h2s and h3s, select N random sources (can repeat), then
-    #a non-repetitive random article from within 
+    #a non-repetitive random article from within
     h2RandomPairs=[]
     while len(h2RandomPairs) < 6:
         x=random.sample(range(len(newsSourceArr)), 1)[0]
@@ -165,34 +169,25 @@ def buildOutput(newsSourceArr):
         else:
             print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
 
-    #replace html template locations with data from newsSourceArr
+    # collect articles for each section
+    top_stories = []
     for i in range(len(h1RandomSources)):
         source=newsSourceArr[h1RandomSources[i]]
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
-        template=template.replace('xxURL1-'+str(i+1)+'xx', article.url)
-        template=template.replace('xxTitle1-'+str(i+1)+'xx', article.title)
-        template=template.replace('xxImg1-'+str(i+1)+'xx', article.img)
-        desc=article.description
-        if len(desc)>144:
-            desc=desc[:141]
-            desc=desc.split()[:-1]
-            desc=' '.join(desc)+' ...'
-        template=template.replace('xxDesc1-'+str(i+1)+'xx', desc)
+        top_stories.append(article)
 
+    middle_stories = []
     for i in range(len(h2RandomPairs)):
         pair=h2RandomPairs[i]
         article=newsSourceArr[pair[0]].h2Arr[pair[1]]
-        template=template.replace('xxURL2-'+str(i+1)+'xx', article.url)
-        template=template.replace('xxTitle2-'+str(i+1)+'xx', article.title)
-        template=template.replace('xxImg2-'+str(i+1)+'xx', article.img)
+        middle_stories.append(article)
 
+    bottom_stories = []
     for i in range(len(h3RandomPairs)):
         pair=h3RandomPairs[i]
         article=newsSourceArr[pair[0]].h3Arr[pair[1]]
-        template=template.replace('xxURL3-'+str(i+1)+'xx', article.url)
-        template=template.replace('xxTitle3-'+str(i+1)+'xx', article.title)
-        template=template.replace('xxImg3-'+str(i+1)+'xx', article.img)
+        bottom_stories.append(article)
 
 
     sourcesStr=''
@@ -200,11 +195,20 @@ def buildOutput(newsSourceArr):
         sourcesStr+=newsSourceArr[i].name+', '
     sourcesStr+=newsSourceArr[-1].name
     print('Successfully parsed: '+sourcesStr)
-    template=template.replace('xxSourcesxx', sourcesStr)
-        
+
+    timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+
+    html = template.render(
+        timestamp = timestamp,
+        top_stories = top_stories,
+        middle_stories = middle_stories,
+        bottom_stories = bottom_stories,
+        sources = sourcesStr,
+    )
+
 
     #return updated text
-    return template
+    return html
 
 def printOutputHTML(outputHTML, outDir):
     timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
-- 
cgit v1.2.3


From 26f93f9c17dcf81c69b814d9d402cd20ef32e1ef Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 16:00:45 -0400
Subject: clean up template formatting

---
 unbiased/html_template/unbiased.jinja.html | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
index 297c4c4..778bebc 100644
--- a/unbiased/html_template/unbiased.jinja.html
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -23,9 +23,9 @@
     <div class="top-story">
       <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
         <div class="top-stories-img" style="background-image: url('{{ story.img }}');" /></div>
-        <div class="top-stories-hed">{{ story.title }}</div>
+        <div class="top-stories-hed">{{ story.title|safe }}</div>
       </a>
-      <div class="top-stories-desc">{{ story.desc }}</div>
+      <div class="top-stories-desc">{{ story.description|safe|truncate(140) }}</div>
     </div>
 
     {% endfor %}
@@ -40,7 +40,7 @@
       <div class="middle-story">
         <div class="middle-stories-img" style="background-image: url('{{ story.img }}');">
         </div>
-        <div class="middle-stories-hed">{{ story.title }}</div>
+        <div class="middle-stories-hed">{{ story.title|safe }}</div>
       </div>
     </a>
 
@@ -53,7 +53,7 @@
     {% for story in bottom_stories %}
 
     <div class="bottom-story">
-      <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title }}</a>
+      <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title|safe }}</a>
     </div>
 
     {% endfor %}
-- 
cgit v1.2.3


From 8bce5c2280441760db850d92d651d2fb0f181c50 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 21:53:42 -0400
Subject: pull the images locally and resize

---
 setup.py                      |  1 +
 unbiased/main.py              |  2 +-
 unbiased/unbiasedFunctions.py | 27 +++++++++++++++++++++++++--
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 0b43b93..2755304 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,7 @@ setup(
     },
     install_requires=[
         'jinja2',
+        'Pillow',
     ],
     entry_points={
         'console_scripts': [
diff --git a/unbiased/main.py b/unbiased/main.py
index 159a98b..88ceb7e 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -62,7 +62,7 @@ def run(webroot, scratch):
     newsSourceArr=buildNewsSourceArr(sourceList, scratch)
 
     #build the output file HTML
-    outputHTML=buildOutput(newsSourceArr)
+    outputHTML=buildOutput(newsSourceArr, webroot)
 
     #print the output file HTML
     printOutputHTML(outputHTML, webroot)
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 192de8c..16ea07d 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -7,6 +7,8 @@ import time
 
 from unbiased.unbiasedObjects import *
 
+from PIL import Image
+
 
 #take in a url and delimiters, return twitter card
 def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
@@ -125,7 +127,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         return None
 
 
-def buildOutput(newsSourceArr):
+def buildOutput(newsSourceArr, webroot):
     #read in the template html file
     from jinja2 import Environment, PackageLoader, select_autoescape
     env = Environment(
@@ -170,17 +172,25 @@ def buildOutput(newsSourceArr):
             print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
 
     # collect articles for each section
+    image_index = 0
+
     top_stories = []
     for i in range(len(h1RandomSources)):
         source=newsSourceArr[h1RandomSources[i]]
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
+        img_name = pullImage(article.img, image_index, webroot, 350, 200)
+        image_index += 1
+        article.img = img_name
         top_stories.append(article)
 
     middle_stories = []
     for i in range(len(h2RandomPairs)):
         pair=h2RandomPairs[i]
         article=newsSourceArr[pair[0]].h2Arr[pair[1]]
+        img_name = pullImage(article.img, image_index, webroot, 150, 100)
+        image_index += 1
+        article.img = img_name
         middle_stories.append(article)
 
     bottom_stories = []
@@ -189,7 +199,6 @@ def buildOutput(newsSourceArr):
         article=newsSourceArr[pair[0]].h3Arr[pair[1]]
         bottom_stories.append(article)
 
-
     sourcesStr=''
     for i in range(len(newsSourceArr)-1):
         sourcesStr+=newsSourceArr[i].name+', '
@@ -274,3 +283,17 @@ def buildNewsSourceArr(sourceList, scratchDir):
     #return the original sourceList,
     #since everything should have been modified in place
     return sourceList        
+
+def pullImage(url, index, webroot, width=350, height=200):
+    extension = url.split('.')[-1].split('?')[0]
+    img_name = 'img{}.{}'.format(index, extension)
+    out_file = os.path.join(webroot, img_name)
+    try:
+        subprocess.check_call(['wget', '-q', '-O', out_file, '--no-check-certificate', url])
+    except Exception:
+        return ''
+    img = Image.open(out_file)
+    img.resize((width, height))
+    jpg_name = 'img{}.jpg'.format(index)
+    img.save(os.path.join(webroot, jpg_name), 'JPEG')
+    return jpg_name
-- 
cgit v1.2.3


From c3d54bbe304708693891fe68cf3760c5fb2545b3 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Mon, 17 Apr 2017 22:59:02 -0400
Subject: replace print statements with the logging module

---
 unbiased/main.py              | 19 +++++++++++++------
 unbiased/parser.py            | 25 +++++++++++++------------
 unbiased/unbiasedFunctions.py | 34 ++++++++++++++++------------------
 unbiased/unbiasedObjects.py   |  6 +++++-
 4 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index 88ceb7e..ea5412d 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -1,12 +1,20 @@
 #!/usr/bin/env python3
 
 import argparse
+import logging
 import time
 
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import *
 from unbiased.parser import *
 
+logger = logging.getLogger('unbiased')
+logger.setLevel(logging.DEBUG)
+ch = logging.StreamHandler()
+ch.setLevel(logging.DEBUG)
+ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+logger.addHandler(ch)
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -15,9 +23,9 @@ def main():
     args = parser.parse_args()
 
     while True:
-        print('-----------------------')
+        logger.info('Starting crawl')
         run(args.webroot, args.scratch)
-        print('-----------------------')
+        logger.info('Crawl complete. Sleeping for 600s')
         time.sleep(600)
 
 def run(webroot, scratch):
@@ -32,8 +40,8 @@ def run(webroot, scratch):
 
     '''
 
-    print('running with webroot="{}"'.format(webroot))
-    print('running with scratch="{}"'.format(scratch))
+    logger.debug('Running with webroot="{}"'.format(webroot))
+    logger.debug('Running with scratch="{}"'.format(scratch))
 
 
     ### These values have to be the second half of the function name
@@ -53,8 +61,7 @@ def run(webroot, scratch):
                 sourceList.append(src)
                 break
             except Exception as ex:
-                print(ex)
-                print('Build error. Looping again: '+source)
+                logger.error('Build error. Looping again. source={} ex={}'.format(source, ex))
                 tries+=1
                 time.sleep(tries)
     
diff --git a/unbiased/parser.py b/unbiased/parser.py
index f068ae8..2bba27d 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+import logging
 import os
 import re
 import subprocess
@@ -7,6 +8,8 @@ import subprocess
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import buildArticle
 
+logger = logging.getLogger('unbiased')
+
 
 '''
 Takes in a URL, downloads the file to a temp file,
@@ -39,7 +42,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
     h1Arr=[]
     a=buildArticle(h1URLs[0], name, scratchDir)
     if a==None:
-        print('................\nH1 Nonetype in '+name+'\n................')
+        logger.debug('H1 Nonetype in '+name)
     else:
         h1Arr.append(a)
 
@@ -49,7 +52,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
         if a!=None:
             h2Arr.append(a)
         else:
-            print('................\nH2 Nonetype in '+name+'\n................')
+            logger.debug('H2 Nonetype in '+name)
 
             
     h3Arr=[]
@@ -58,7 +61,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
         if a!=None:
             h3Arr.append(a)
         else:
-            print('................\nH3 Nonetype in '+name+'\n................')
+            logger.debug('H3 Nonetype in '+name)
 
     #BUILD THE NEWS SOURCE
     newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
@@ -119,13 +122,11 @@ def removeDuplicates(h1s, h2s, h3s):
 
 
 def removalNotification(source, title, reason, value):
-    print('*************************')
-    print('\t\tSTORY REMOVED')
-    print('SOURCE: '+source)
-    print('TITLE: \t'+title)
-    print('REASON: '+reason)
-    print('VALUE: \t'+value)
-    print('*************************\n\n')
+    logger.debug("""Story removed
+    SOURCE:\t{}
+    TITLE:\t{})
+    REASON:\t{}
+    VALUE:\t{}""".format(source, title, reason, value))
 
 
 def removeBadStoriesHelper(source, element, badStringList, arr):
@@ -133,7 +134,7 @@ def removeBadStoriesHelper(source, element, badStringList, arr):
         for i in range(len(arr)):
             for hed in arr[i]:
                 if hed==None:
-                    print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////")
+                    logger.debug("None type found in removeBadStoriesHelper for "+source.name)
                     break
                 for item in badStringList:
                     if item in getattr(hed, element):
@@ -225,7 +226,7 @@ def buildGuardian(scratchDir):
         if h1!='https://www.theguardian.com/us':
             break
         else:
-            print('Guardian loop')
+            logger.debug('Guardian loop')
         
     h1s=[h1]
 
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 16ea07d..775346f 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import pkgutil
 import random
@@ -9,15 +10,15 @@ from unbiased.unbiasedObjects import *
 
 from PIL import Image
 
+logger = logging.getLogger('unbiased')
 
 #take in a url and delimiters, return twitter card
 def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
 
     debugging=False
     if debugging:
-        print(sourceName)
-        print(url)
-        print()
+        logger.debug(sourceName)
+        logger.debug(url)
 
     temp_article = os.path.join(scratchDir, 'temp_article.html')
 
@@ -60,7 +61,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
             img=img[:-1]
 
         if debugging:
-            print(img)
+            logger.debug(img)
 
         title=content.split('og:title" content=')[1][1:].split('>')[0]
         if title[-1]=='/':
@@ -68,7 +69,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         title=title[:-1]
 
         if debugging:
-            print(title)
+            logger.debug(title)
 
 
         author=''
@@ -90,7 +91,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
                     break
 
         if debugging:
-            print(author)
+            logger.debug(author)
 
 
         if 'og:description' in content:
@@ -104,7 +105,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
                 description=re.sub('<[^<]+?>', '', description)
                 description=description[1:200]
             else:
-                print("SHOULDN'T GET HERE")
+                logger.debug("SHOULDN'T GET HERE")
 
         #strip out self-references
         description=description.replace(sourceName+"'s", '***')
@@ -112,18 +113,16 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         description=description.replace(sourceName, '***')
 
         if debugging:
-            print(description)
+            logger.debug(description)
 
 
         a=Article(title, url, img, description, sourceName, author)
         return a
 
     except Exception:
-        print('^^^^^^^^^^^^^^^^^^^^^^^^^')
-        print('\tARTICLE PARSING ERROR')
-        print('SOURCE: '+sourceName)
-        print('URL: \t'+url)
-        print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
+        logger.error("""ARTICLE PARSING ERROR
+        SOURCE:\t{}
+        URL:\t{}""".format(sourceName, url))
         return None
 
 
@@ -144,7 +143,7 @@ def buildOutput(newsSourceArr, webroot):
             if x not in h1RandomSources:
                 h1RandomSources.append(x)
         else:
-            print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+            logger.debug('No H1 stories in '+newsSourceArr[x].name)
 
     #For h2s and h3s, select N random sources (can repeat), then
     #a non-repetitive random article from within
@@ -157,19 +156,18 @@ def buildOutput(newsSourceArr, webroot):
             if not pair in h2RandomPairs:
                 h2RandomPairs.append(pair)
         else:
-            print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+            logger.debug('No H2 stories in '+newsSourceArr[x].name)
 
     h3RandomPairs=[]
     while len(h3RandomPairs) < 12:
         x=random.sample(range(len(newsSourceArr)), 1)[0]
-        print(newsSourceArr[x].name)
         if len(newsSourceArr[x].h3Arr) > 0:
             y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
             pair=[x,y]
             if not pair in h3RandomPairs:
                 h3RandomPairs.append(pair)
         else:
-            print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+            logger.debug('No H3 stories in '+newsSourceArr[x].name)
 
     # collect articles for each section
     image_index = 0
@@ -203,7 +201,7 @@ def buildOutput(newsSourceArr, webroot):
     for i in range(len(newsSourceArr)-1):
         sourcesStr+=newsSourceArr[i].name+', '
     sourcesStr+=newsSourceArr[-1].name
-    print('Successfully parsed: '+sourcesStr)
+    logger.info('Successfully parsed: '+sourcesStr)
 
     timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
 
diff --git a/unbiased/unbiasedObjects.py b/unbiased/unbiasedObjects.py
index 3affbe6..9372d3a 100644
--- a/unbiased/unbiasedObjects.py
+++ b/unbiased/unbiasedObjects.py
@@ -1,3 +1,7 @@
+import logging
+
+logger = logging.getLogger('unbiased')
+
 class Article():
     title=''
     url=''
@@ -86,5 +90,5 @@ class NewsSource():
         elif level==3:
             self.h3Arr.append(article)
         else:
-            print("Error: invalid level in NewsSource.addArtlce: ", level)
+            logger.error("Invalid level in NewsSource.addArtlce: " + level)
 
-- 
cgit v1.2.3


From 48471019c86d9a78a742b282b1b25df6d69c5752 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 11:02:24 -0400
Subject: fix guardian images and image scaling

---
 unbiased/html_template/unbiased.css |  4 ++--
 unbiased/main.py                    | 12 ++++++++----
 unbiased/unbiasedFunctions.py       | 32 ++++++++++++++++++++++++++------
 3 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index 244f100..6817cc9 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -84,7 +84,7 @@ a:hover{
     vertical-align:top;
     text-align:left;
     width:360px;
-    height:auto;
+    height:350px;
     overflow:hidden;
     background:#fff;
     margin:10px;
@@ -217,4 +217,4 @@ a:hover{
     clear:both;
     padding-top:4em;
     font-size:.8em;
-}
\ No newline at end of file
+}
diff --git a/unbiased/main.py b/unbiased/main.py
index ea5412d..87b1e8c 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -48,10 +48,12 @@ def run(webroot, scratch):
     ### E.g. Guardian calls buildGuardian(), etc.
     sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
                  'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
-    
+
     for source in sourceFnArr:
+        logger.info('Crawling {}'.format(source))
         tries=0
         while tries<3:
+            time.sleep(tries)
             try:
                 fn='build'+source
                 possibles = globals().copy()
@@ -61,10 +63,12 @@ def run(webroot, scratch):
                 sourceList.append(src)
                 break
             except Exception as ex:
-                logger.error('Build error. Looping again. source={} ex={}'.format(source, ex))
                 tries+=1
-                time.sleep(tries)
-    
+                if tries == 3:
+                    logger.error('Build failed. source={} ex={}'.format(source, ex))
+                else:
+                    logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex))
+
     #scrape all urls and build data structure
     newsSourceArr=buildNewsSourceArr(sourceList, scratch)
 
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 775346f..fdf9d8f 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -1,3 +1,4 @@
+import html
 import logging
 import os
 import pkgutil
@@ -32,7 +33,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
     f.close()
 
     try:
-        if sourceName=='The Guardian':
+        if sourceName=='The Guardian US':
             #The Guardian puts an identifying banner on their og:images
             #grab the main image from the page instead
 
@@ -48,14 +49,15 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
             elif '<img class="immersive-main-media__media"' in content:
                 img=content.split('<img class="immersive-main-media__media"', 1)[1]
                 img=img.split('src="', 1)[1].split('"')[0]
-            
+            img = html.unescape(img)
+
         else:
             if 'og:image' in content:
                 img=content.split('og:image" content=')[1][1:].split('>')[0]
             elif sourceName=='ABC News':
                 img='https://c1.staticflickr.com/7/6042/6276688407_12900948a2_b.jpgX'
             if img[-1]=='/':
-                #because the quote separator could be ' or ", 
+                #because the quote separator could be ' or ",
                 #trim to just before it then lop it off
                 img=img[:-1].strip()
             img=img[:-1]
@@ -282,16 +284,34 @@ def buildNewsSourceArr(sourceList, scratchDir):
     #since everything should have been modified in place
     return sourceList        
 
-def pullImage(url, index, webroot, width=350, height=200):
+def pullImage(url, index, webroot, target_width=350, target_height=200):
     extension = url.split('.')[-1].split('?')[0]
     img_name = 'img{}.{}'.format(index, extension)
     out_file = os.path.join(webroot, img_name)
     try:
         subprocess.check_call(['wget', '-q', '-O', out_file, '--no-check-certificate', url])
-    except Exception:
+    except Exception as ex:
+        logger.error('Failed to pull image: url={} ex={}'.format(url, ex))
         return ''
     img = Image.open(out_file)
-    img.resize((width, height))
+    # crop to aspect ratio
+    target_ar = target_width / target_height
+    left, top, right, bottom = img.getbbox()
+    height = bottom - top
+    width = right - left
+    ar = width / height
+    if target_ar > ar:
+        new_height = (target_height / target_width) * width
+        bbox = (left, top + ((height - new_height) / 2), right, bottom - ((height - new_height) / 2))
+        img = img.crop(bbox)
+    elif target_ar < ar:
+        new_width = (target_width / target_height) * height
+        bbox = (left + ((width - new_width) / 2), top, right - ((width - new_width) / 2), bottom)
+        img = img.crop(bbox)
+    # resize if larger
+    if target_width * 2 < width or target_height * 2 < height:
+        img = img.resize((target_width*2, target_height*2), Image.LANCZOS)
+    # TODO: create retina images
     jpg_name = 'img{}.jpg'.format(index)
     img.save(os.path.join(webroot, jpg_name), 'JPEG')
     return jpg_name
-- 
cgit v1.2.3


From 761f5d564bf3d60acdeb5581d687c0c8c4b22a69 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 13:22:38 -0400
Subject: add favicons and write intermediate images to scratch

---
 unbiased/html_template/apple-touch-icon.png | Bin 0 -> 7036 bytes
 unbiased/html_template/favicon.ico          | Bin 0 -> 4414 bytes
 unbiased/html_template/favicon.png          | Bin 0 -> 1093 bytes
 unbiased/html_template/unbiased.jinja.html  |   3 +++
 unbiased/main.py                            |   2 +-
 unbiased/unbiasedFunctions.py               |  37 +++++++++++++++-------------
 6 files changed, 24 insertions(+), 18 deletions(-)
 create mode 100644 unbiased/html_template/apple-touch-icon.png
 create mode 100644 unbiased/html_template/favicon.ico
 create mode 100644 unbiased/html_template/favicon.png

diff --git a/unbiased/html_template/apple-touch-icon.png b/unbiased/html_template/apple-touch-icon.png
new file mode 100644
index 0000000..93c33aa
Binary files /dev/null and b/unbiased/html_template/apple-touch-icon.png differ
diff --git a/unbiased/html_template/favicon.ico b/unbiased/html_template/favicon.ico
new file mode 100644
index 0000000..b2b29c6
Binary files /dev/null and b/unbiased/html_template/favicon.ico differ
diff --git a/unbiased/html_template/favicon.png b/unbiased/html_template/favicon.png
new file mode 100644
index 0000000..0b94313
Binary files /dev/null and b/unbiased/html_template/favicon.png differ
diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
index 778bebc..40c9582 100644
--- a/unbiased/html_template/unbiased.jinja.html
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -4,6 +4,9 @@
     <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
     <meta charset="utf-8">
     <link rel="stylesheet" href="unbiased.css">
+    <link rel="icon" sizes="32x32" href="/favicon.ico">
+    <link rel="icon" sizes="32x32" href="/favicon.png">
+    <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
     <title>UnBiased</title>
   </head>
 <body>
diff --git a/unbiased/main.py b/unbiased/main.py
index 87b1e8c..f784bce 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -73,7 +73,7 @@ def run(webroot, scratch):
     newsSourceArr=buildNewsSourceArr(sourceList, scratch)
 
     #build the output file HTML
-    outputHTML=buildOutput(newsSourceArr, webroot)
+    outputHTML=buildOutput(newsSourceArr, webroot, scratch)
 
     #print the output file HTML
     printOutputHTML(outputHTML, webroot)
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index fdf9d8f..415a3cc 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -128,7 +128,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         return None
 
 
-def buildOutput(newsSourceArr, webroot):
+def buildOutput(newsSourceArr, webroot, scratch):
     #read in the template html file
     from jinja2 import Environment, PackageLoader, select_autoescape
     env = Environment(
@@ -179,7 +179,7 @@ def buildOutput(newsSourceArr, webroot):
         source=newsSourceArr[h1RandomSources[i]]
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
-        img_name = pullImage(article.img, image_index, webroot, 350, 200)
+        img_name = pullImage(article.img, image_index, webroot, scratch, 350, 200)
         image_index += 1
         article.img = img_name
         top_stories.append(article)
@@ -188,7 +188,7 @@ def buildOutput(newsSourceArr, webroot):
     for i in range(len(h2RandomPairs)):
         pair=h2RandomPairs[i]
         article=newsSourceArr[pair[0]].h2Arr[pair[1]]
-        img_name = pullImage(article.img, image_index, webroot, 150, 100)
+        img_name = pullImage(article.img, image_index, webroot, scratch, 150, 100)
         image_index += 1
         article.img = img_name
         middle_stories.append(article)
@@ -226,11 +226,11 @@ def printOutputHTML(outputHTML, outDir):
     with open(os.path.join(outDir, 'index.html'), 'w') as fp:
         fp.write(outputHTML)
 
-    # copy over the template css file
-    css = pkgutil.get_data('unbiased', 'html_template/unbiased.css')
-    css = css.decode('utf8')
-    with open(os.path.join(outDir, 'unbiased.css'), 'w') as fp:
-        fp.write(css)
+    # copy over static package files
+    for filename in ['unbiased.css', 'favicon.ico', 'favicon.png', 'apple-touch-icon.png']:
+        data = pkgutil.get_data('unbiased', os.path.join('html_template', filename))
+        with open(os.path.join(outDir, filename), 'wb') as fp:
+            fp.write(data)
 
 def buildNewsSourceArr(sourceList, scratchDir):
 
@@ -256,13 +256,13 @@ def buildNewsSourceArr(sourceList, scratchDir):
         f=open(temp_file, 'r', encoding="utf8")
         content=f.read()
         f.close()
-        
+
         #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
         #os.remove(temp_file)
 
         #add stories etc to the NewsSource object
         h1s, h2s, h3s=extractURLs(content, source)
-        
+
         #build the Article objects and add to newsSource's appropriate list
         if h1s!=None and h2s!=None:
             for url in h1s:
@@ -279,21 +279,21 @@ def buildNewsSourceArr(sourceList, scratchDir):
             sourceList.remove(source)
             listLen-=1
 
-            
+
     #return the original sourceList,
     #since everything should have been modified in place
-    return sourceList        
+    return sourceList
 
-def pullImage(url, index, webroot, target_width=350, target_height=200):
+def pullImage(url, index, webroot, scratch, target_width=350, target_height=200):
     extension = url.split('.')[-1].split('?')[0]
     img_name = 'img{}.{}'.format(index, extension)
-    out_file = os.path.join(webroot, img_name)
+    tmp_file = os.path.join(scratch, img_name)
     try:
-        subprocess.check_call(['wget', '-q', '-O', out_file, '--no-check-certificate', url])
+        subprocess.check_call(['wget', '-q', '-O', tmp_file, '--no-check-certificate', url])
     except Exception as ex:
         logger.error('Failed to pull image: url={} ex={}'.format(url, ex))
         return ''
-    img = Image.open(out_file)
+    img = Image.open(tmp_file)
     # crop to aspect ratio
     target_ar = target_width / target_height
     left, top, right, bottom = img.getbbox()
@@ -313,5 +313,8 @@ def pullImage(url, index, webroot, target_width=350, target_height=200):
         img = img.resize((target_width*2, target_height*2), Image.LANCZOS)
     # TODO: create retina images
     jpg_name = 'img{}.jpg'.format(index)
-    img.save(os.path.join(webroot, jpg_name), 'JPEG')
+    out_file = os.path.join(webroot, jpg_name)
+    img.save(out_file, 'JPEG')
+    if tmp_file != out_file:
+        os.remove(tmp_file)
     return jpg_name
-- 
cgit v1.2.3


From 4a8cb231c3974d3f2000e170ca0c56850bc66c7f Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 13:53:33 -0400
Subject: more consistent crawl frequency

---
 unbiased/main.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index f784bce..c8a113e 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -22,11 +22,17 @@ def main():
     parser.add_argument('-s', '--scratch', default='/opt/unbiased/scratch', help='writable scratch workspace')
     args = parser.parse_args()
 
+    crawl_frequency = 600
     while True:
         logger.info('Starting crawl')
+        start = time.time()
         run(args.webroot, args.scratch)
-        logger.info('Crawl complete. Sleeping for 600s')
-        time.sleep(600)
+        finish = time.time()
+        runtime = finish - start
+        sleeptime = crawl_frequency - runtime
+        logger.info('Crawl complete in {}s. Sleeping for {}s'.format(int(runtime), int(sleeptime)))
+        if sleeptime > 0:
+            time.sleep(sleeptime)
 
 def run(webroot, scratch):
     sourceList=[]
-- 
cgit v1.2.3


From 7a8efb94dc2463a6d30afc77f10df78ebfa4c353 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 16:39:03 -0400
Subject: replace wget with requests library

---
 setup.py                            |  1 +
 unbiased/html_template/unbiased.css |  8 +--
 unbiased/main.py                    |  2 +-
 unbiased/parser.py                  | 29 ++++++-----
 unbiased/unbiasedFunctions.py       | 98 +++++++++++--------------------------
 5 files changed, 49 insertions(+), 89 deletions(-)

diff --git a/setup.py b/setup.py
index 2755304..57c27c0 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@ setup(
     install_requires=[
         'jinja2',
         'Pillow',
+        'requests',
     ],
     entry_points={
         'console_scripts': [
diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index 6817cc9..1424ee9 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -108,8 +108,8 @@ a:hover{
     width:350px;
     height:200px;
     overflow:hidden;
-    background-size: auto 234px;/*cover;*/
-    background-position: top center;/*center center;*/
+    background-size: 100%;
+    background-position: center center;
     margin:0 auto;
 }
 
@@ -169,8 +169,8 @@ a:hover{
     width:150px;
     height:100px;
     overflow:hidden;
-    background-size: auto 117px;/*cover;*/
-    background-position: top center;/*center center;*/
+    background-size: 100%;
+    background-position: center center;
     float:left;
     max-width:35%;
 }
diff --git a/unbiased/main.py b/unbiased/main.py
index c8a113e..c760788 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -76,7 +76,7 @@ def run(webroot, scratch):
                     logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex))
 
     #scrape all urls and build data structure
-    newsSourceArr=buildNewsSourceArr(sourceList, scratch)
+    newsSourceArr = sourceList
 
     #build the output file HTML
     outputHTML=buildOutput(newsSourceArr, webroot, scratch)
diff --git a/unbiased/parser.py b/unbiased/parser.py
index 2bba27d..0a8398c 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -4,6 +4,9 @@ import logging
 import os
 import re
 import subprocess
+import urllib.parse
+
+import requests
 
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import buildArticle
@@ -16,21 +19,11 @@ Takes in a URL, downloads the file to a temp file,
 reads the file into a string, and returns that string
 '''
 def urlToContent(url, scratchDir, sourceEncoding='utf8'):
-    temp_file = os.path.join(scratchDir, 'temp1.html')
-
-    #download file
-    #os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
-    subprocess.check_call(['wget', '-q', '-O', temp_file, '--no-check-certificate', url])
-    
-    #read file
-    if sourceEncoding=='utf8':
-        f=open(temp_file, 'r', encoding="utf8")
+    res = requests.get(url)
+    if res.status_code == 200:
+        return res.text
     else:
-        f=open(temp_file, 'r', encoding="latin-1")
-    content=f.read()
-    f.close()
-
-    return content
+        raise Exception("Failed to download {}".format(url))
 
 
 '''
@@ -39,6 +32,13 @@ calls the file scraper and appends the new Article object.
 Returns a newsSource2 object
 '''
 def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
+
+    url_parts = urllib.parse.urlparse(url)
+    scheme = url_parts.scheme
+    h1URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h1URLs]
+    h2URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h2URLs]
+    h3URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h3URLs]
+
     h1Arr=[]
     a=buildArticle(h1URLs[0], name, scratchDir)
     if a==None:
@@ -54,7 +54,6 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
         else:
             logger.debug('H2 Nonetype in '+name)
 
-            
     h3Arr=[]
     for x in h3URLs:
         a=buildArticle(x, name, scratchDir)
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 415a3cc..0181beb 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -1,4 +1,5 @@
 import html
+import io
 import logging
 import os
 import pkgutil
@@ -6,10 +7,12 @@ import random
 import re
 import subprocess
 import time
-
-from unbiased.unbiasedObjects import *
+import urllib.parse
 
 from PIL import Image
+import requests
+
+from unbiased.unbiasedObjects import *
 
 logger = logging.getLogger('unbiased')
 
@@ -21,16 +24,25 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         logger.debug(sourceName)
         logger.debug(url)
 
-    temp_article = os.path.join(scratchDir, 'temp_article.html')
+    url_parts = urllib.parse.urlparse(url)
+    scheme = url_parts.scheme
 
     #download url
-    #os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
-    subprocess.check_call(['wget', '-q', '-O', temp_article, '--no-check-certificate', url])
+    try:
+        res = requests.get(url)
+    except Exception as ex:
+        logger.error("""ARTICLE DOWNLOADING ERROR
+        SOURCE:\t{}
+        URL:\t{}""".format(sourceName, url))
+        return None
 
-    #read the file in
-    f=open(temp_article, 'r', encoding="utf8")
-    content=f.read()
-    f.close()
+    if res.status_code == 200:
+        content = res.text
+    else:
+        logger.error("""ARTICLE DOWNLOADING ERROR
+        SOURCE:\t{}
+        URL:\t{}""".format(sourceName, url))
+        return None
 
     try:
         if sourceName=='The Guardian US':
@@ -61,6 +73,8 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
                 #trim to just before it then lop it off
                 img=img[:-1].strip()
             img=img[:-1]
+        # fix the scheme if it's missing
+        img = urllib.parse.urlparse(img, scheme=scheme).geturl()
 
         if debugging:
             logger.debug(img)
@@ -232,68 +246,16 @@ def printOutputHTML(outputHTML, outDir):
         with open(os.path.join(outDir, filename), 'wb') as fp:
             fp.write(data)
 
-def buildNewsSourceArr(sourceList, scratchDir):
-
-    #build the data structure
-    i=0
-    listLen=len(sourceList)
-    while i < listLen:
-        source=sourceList[i]
-
-        if type(source) is NewsSource2:
-            i+=1
-            continue
-
-        url=source.url
-
-        temp_file = os.path.join(scratchDir, 'temp{}.html'.format(i))
-
-        #download file
-        #os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
-        subprocess.check_call(['wget', '-q', '-O', temp_file, '--no-check-certificate', url])
-
-        #read file
-        f=open(temp_file, 'r', encoding="utf8")
-        content=f.read()
-        f.close()
-
-        #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
-        #os.remove(temp_file)
-
-        #add stories etc to the NewsSource object
-        h1s, h2s, h3s=extractURLs(content, source)
-
-        #build the Article objects and add to newsSource's appropriate list
-        if h1s!=None and h2s!=None:
-            for url in h1s:
-                article=buildArticle(url, source.name, scratchDir)
-                if article!=None: source.addArticle(article, 1) #sourceList[i].h1Arr.append(article)
-            for url in h2s:
-                article=buildArticle(url, source.name, scratchDir)
-                if article!=None: sourceList[i].h2Arr.append(article)
-            for url in h3s:
-                article=buildArticle(url, source.name, scratchDir)
-                if article!=None: sourceList[i].h3Arr.append(article)
-            i+=1
-        else:
-            sourceList.remove(source)
-            listLen-=1
-
-
-    #return the original sourceList,
-    #since everything should have been modified in place
-    return sourceList
-
 def pullImage(url, index, webroot, scratch, target_width=350, target_height=200):
     extension = url.split('.')[-1].split('?')[0]
     img_name = 'img{}.{}'.format(index, extension)
-    tmp_file = os.path.join(scratch, img_name)
-    try:
-        subprocess.check_call(['wget', '-q', '-O', tmp_file, '--no-check-certificate', url])
-    except Exception as ex:
-        logger.error('Failed to pull image: url={} ex={}'.format(url, ex))
+    res = requests.get(url)
+    if res.status_code == 200:
+        content = res.content
+    else:
+        logger.error('Image not found: url={}'.format(url))
         return ''
-    img = Image.open(tmp_file)
+    img = Image.open(io.BytesIO(content))
     # crop to aspect ratio
     target_ar = target_width / target_height
     left, top, right, bottom = img.getbbox()
@@ -315,6 +277,4 @@ def pullImage(url, index, webroot, scratch, target_width=350, target_height=200)
     jpg_name = 'img{}.jpg'.format(index)
     out_file = os.path.join(webroot, jpg_name)
     img.save(out_file, 'JPEG')
-    if tmp_file != out_file:
-        os.remove(tmp_file)
     return jpg_name
-- 
cgit v1.2.3


From 8dffc67fae2c5a6cc1fe125809e0b74d8b4b28f3 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 16:47:30 -0400
Subject: don't need a scratch directory any more

---
 unbiased/main.py              | 10 ++---
 unbiased/parser.py            | 91 +++++++++++++++++++++----------------------
 unbiased/unbiasedFunctions.py | 11 +++---
 3 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index c760788..60211ea 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -19,14 +19,13 @@ logger.addHandler(ch)
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
-    parser.add_argument('-s', '--scratch', default='/opt/unbiased/scratch', help='writable scratch workspace')
     args = parser.parse_args()
 
     crawl_frequency = 600
     while True:
         logger.info('Starting crawl')
         start = time.time()
-        run(args.webroot, args.scratch)
+        run(args.webroot)
         finish = time.time()
         runtime = finish - start
         sleeptime = crawl_frequency - runtime
@@ -34,7 +33,7 @@ def main():
         if sleeptime > 0:
             time.sleep(sleeptime)
 
-def run(webroot, scratch):
+def run(webroot):
     sourceList=[]
 
     '''
@@ -47,7 +46,6 @@ def run(webroot, scratch):
     '''
 
     logger.debug('Running with webroot="{}"'.format(webroot))
-    logger.debug('Running with scratch="{}"'.format(scratch))
 
 
     ### These values have to be the second half of the function name
@@ -65,7 +63,7 @@ def run(webroot, scratch):
                 possibles = globals().copy()
                 possibles.update(locals())
                 method = possibles.get(fn)
-                src=method(scratch)
+                src=method()
                 sourceList.append(src)
                 break
             except Exception as ex:
@@ -79,7 +77,7 @@ def run(webroot, scratch):
     newsSourceArr = sourceList
 
     #build the output file HTML
-    outputHTML=buildOutput(newsSourceArr, webroot, scratch)
+    outputHTML=buildOutput(newsSourceArr, webroot)
 
     #print the output file HTML
     printOutputHTML(outputHTML, webroot)
diff --git a/unbiased/parser.py b/unbiased/parser.py
index 0a8398c..41727f5 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -3,7 +3,6 @@
 import logging
 import os
 import re
-import subprocess
 import urllib.parse
 
 import requests
@@ -18,7 +17,7 @@ logger = logging.getLogger('unbiased')
 Takes in a URL, downloads the file to a temp file,
 reads the file into a string, and returns that string
 '''
-def urlToContent(url, scratchDir, sourceEncoding='utf8'):
+def urlToContent(url, sourceEncoding='utf8'):
     res = requests.get(url)
     if res.status_code == 200:
         return res.text
@@ -31,7 +30,7 @@ Creates a new newsSource2 object. For each URL in h1-h3URLs,
 calls the file scraper and appends the new Article object.
 Returns a newsSource2 object
 '''
-def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
+def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
 
     url_parts = urllib.parse.urlparse(url)
     scheme = url_parts.scheme
@@ -40,7 +39,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
     h3URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h3URLs]
 
     h1Arr=[]
-    a=buildArticle(h1URLs[0], name, scratchDir)
+    a=buildArticle(h1URLs[0], name)
     if a==None:
         logger.debug('H1 Nonetype in '+name)
     else:
@@ -48,7 +47,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
 
     h2Arr=[]
     for x in h2URLs:
-        a=buildArticle(x, name, scratchDir)
+        a=buildArticle(x, name)
         if a!=None:
             h2Arr.append(a)
         else:
@@ -56,7 +55,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
 
     h3Arr=[]
     for x in h3URLs:
-        a=buildArticle(x, name, scratchDir)
+        a=buildArticle(x, name)
         if a!=None:
             h3Arr.append(a)
         else:
@@ -161,12 +160,12 @@ def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, b
 
 
-def buildTheHill(scratchDir):
+def buildTheHill():
     url='http://thehill.com'
     name='The Hill'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #get main headline
     h1=content
@@ -198,7 +197,7 @@ def buildTheHill(scratchDir):
         h3s.append(url+x)
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    hil=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    hil=buildNewsSource2(name, url, h1s, h2s, h3s)
     hil=removeBadStories(hil, ['THE MEMO'], None, ['Matt Schlapp', 'Juan Williams', 'Judd Gregg'], None, None)
 
     return hil
@@ -207,14 +206,14 @@ def buildTheHill(scratchDir):
 
 
-def buildGuardian(scratchDir):
+def buildGuardian():
     url='http://www.theguardian.com/us'
     name='The Guardian US'
 
 
     while True:
         #DOWNLOAD HOMEPAGE CONTENT
-        content=urlToContent(url, scratchDir, 'utf8')
+        content=urlToContent(url, 'utf8')
         
         #get main headline
         h1=content
@@ -256,20 +255,20 @@ def buildGuardian(scratchDir):
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
     
-    gdn=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    gdn=buildNewsSource2(name, url, h1s, h2s, h3s)
     gdn=removeBadStories(gdn, None, ['Tom McCarthy', 'Andy Hunter'], ['https://www.theguardian.com/profile/ben-jacobs'], None)
 
     return gdn
 
 
-def buildWashTimes(scratchDir):
+def buildWashTimes():
     url='http://www.washingtontimes.com/'
     name='Washington Times'
 
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
     
     #get main headline
     h1=content
@@ -305,19 +304,19 @@ def buildWashTimes(scratchDir):
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    wat=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    wat=buildNewsSource2(name, url, h1s, h2s, h3s)
     wat=removeBadStories(wat, None, None, None, None)
 
     return wat
 
 
-def buildCSM(scratchDir):
+def buildCSM():
     url='http://www.csmonitor.com/USA'
     name='Christian Science Monitor'
 
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #this makes sure we don't get '/USA' in the URL twice
     url=url.split('/USA')[0]
@@ -368,7 +367,7 @@ def buildCSM(scratchDir):
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    csm=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    csm=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     badTitleArr=['Change Agent']
     badDescArr=None
@@ -388,7 +387,7 @@ in The Blaze articles by grabbing the first portion of the story instead
 def blazeFixDesc(articleArr):
     TAG_RE = re.compile(r'<[^>]+>')
     for i in range(len(articleArr)):
-        desc=urlToContent(articleArr[i].url, scratchDir)
+        desc=urlToContent(articleArr[i].url)
         desc=desc.split('<div class="entry-content article-styles">', 1)[1]
         desc=desc.split('<p>', 1)[1]
         desc=TAG_RE.sub('', desc)
@@ -400,12 +399,12 @@ def blazeFixDesc(articleArr):
     
 
-def buildBlaze(scratchDir):
+def buildBlaze():
     url='http://theblaze.com'
     name='The Blaze'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #get main headline
     h1=content
@@ -439,7 +438,7 @@ def buildBlaze(scratchDir):
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    blz=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    blz=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     badTitleArr=['Tucker Carlson', 'Mark Levin']
     badDescArr=['Lawrence Jones', 'Mike Slater']
@@ -459,12 +458,12 @@ def buildBlaze(scratchDir):
 
 
-def buildCBS(scratchDir):
+def buildCBS():
     url='http://cbsnews.com'
     name='CBS News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #get main headline
     h1=content
@@ -508,7 +507,7 @@ def buildCBS(scratchDir):
             h3s.append(url+x)
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    cbs=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    cbs=buildNewsSource2(name, url, h1s, h2s, h3s)
     cbs=removeBadStories(cbs, ['60 Minutes'], ['60 Minutes'], None, None, ['whats-in-the-news-coverart'])
 
     return cbs
@@ -517,12 +516,12 @@ def buildCBS(scratchDir):
 
 
-def buildNBC(scratchDir):    
+def buildNBC():    
     url='http://nbcnews.com'
     name='NBC News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #get main headline
     h1=content
@@ -571,7 +570,7 @@ def buildNBC(scratchDir):
     '''
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    nbc=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
     nbc=removeBadStories(nbc, None, ['First Read'], None, None, None)
 
 
@@ -580,12 +579,12 @@ def buildNBC(scratchDir):
 
 
-def buildBBC(scratchDir):    
+def buildBBC():    
     url='http://www.bbc.com/news/world/us_and_canada'
     name='BBC US & Canada'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #get main headline
     h1=content
@@ -619,7 +618,7 @@ def buildBBC(scratchDir):
             h3s.append('http://www.bbc.com'+x)
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    bbc=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    bbc=buildNewsSource2(name, url, h1s, h2s, h3s)
     badTitleArr=None
     badDescArr=None
     badAuthorArr=None
@@ -642,12 +641,12 @@ def buildBBC(scratchDir):
 
 
-def buildWeeklyStandard(scratchDir):
+def buildWeeklyStandard():
     url='http://www.weeklystandard.com'
     name='Weekly Standard'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
     
     #get main headline
     h1=content
@@ -692,7 +691,7 @@ def buildWeeklyStandard(scratchDir):
         
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    wkl=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     #REMOVE BAD STORIES
     badTitleArr=None
@@ -707,12 +706,12 @@ def buildWeeklyStandard(scratchDir):
 
 
-def buildNPR(scratchDir):
+def buildNPR():
     url='http://www.npr.org/sections/news/'
     name='NPR'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
     
     #get main headline
     h1=content
@@ -746,7 +745,7 @@ def buildNPR(scratchDir):
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    npr=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    npr=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     #REMOVE BAD STORIES
     badTitleArr=['The Two-Way']
@@ -761,12 +760,12 @@ def buildNPR(scratchDir):
 
 
-def buildABC(scratchDir):
+def buildABC():
     url='http://www.abcnews.go.com'
     name='ABC News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
     
     #get main headline
     h1=content
@@ -800,7 +799,7 @@ def buildABC(scratchDir):
             h3s.append(x)
 
     h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
-    abc=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    abc=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     #REMOVE BAD STORIES
     badTitleArr=None
@@ -815,12 +814,12 @@ def buildABC(scratchDir):
 
 
-def buildFoxNews(scratchDir):
+def buildFoxNews():
     url='http://foxnews.com'
     name='Fox News'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
     
     #get main headline
     h1=content
@@ -854,7 +853,7 @@ def buildFoxNews(scratchDir):
     h3s = ['http:' + x if x.startswith('//') else x for x in h3s]
 
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    fox=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    fox=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     #REMOVE BAD STORIES
     badTitleArr=['O&#039;Reilly', 'Fox News', 'Brett Baier', 'Tucker']
@@ -868,12 +867,12 @@ def buildFoxNews(scratchDir):
 
 
-def buildNYT(scratchDir):
+def buildNYT():
     url='http://www.nytimes.com'
     name='New York Times'
 
     #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url, scratchDir)
+    content=urlToContent(url)
 
     #get main headline
     #this will likely need if/else logic
@@ -951,7 +950,7 @@ def buildNYT(scratchDir):
             
     h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
 
-    nyt=buildNewsSource2(name, url, h1s, h2s, h3s, scratchDir)
+    nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
     nyt=removeBadStories(nyt, None, None, None, None, ['https://www.nytimes.com/section/magazine', 'https://www.nytimes.com/newsletters/the-interpreter'])
 
     
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 0181beb..76c80b0 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -5,7 +5,6 @@ import os
 import pkgutil
 import random
 import re
-import subprocess
 import time
 import urllib.parse
 
@@ -17,7 +16,7 @@ from unbiased.unbiasedObjects import *
 logger = logging.getLogger('unbiased')
 
 #take in a url and delimiters, return twitter card
-def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
+def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
 
     debugging=False
     if debugging:
@@ -142,7 +141,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         return None
 
 
-def buildOutput(newsSourceArr, webroot, scratch):
+def buildOutput(newsSourceArr, webroot):
     #read in the template html file
     from jinja2 import Environment, PackageLoader, select_autoescape
     env = Environment(
@@ -193,7 +192,7 @@ def buildOutput(newsSourceArr, webroot, scratch):
         source=newsSourceArr[h1RandomSources[i]]
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
-        img_name = pullImage(article.img, image_index, webroot, scratch, 350, 200)
+        img_name = pullImage(article.img, image_index, webroot, 350, 200)
         image_index += 1
         article.img = img_name
         top_stories.append(article)
@@ -202,7 +201,7 @@ def buildOutput(newsSourceArr, webroot, scratch):
     for i in range(len(h2RandomPairs)):
         pair=h2RandomPairs[i]
         article=newsSourceArr[pair[0]].h2Arr[pair[1]]
-        img_name = pullImage(article.img, image_index, webroot, scratch, 150, 100)
+        img_name = pullImage(article.img, image_index, webroot, 150, 100)
         image_index += 1
         article.img = img_name
         middle_stories.append(article)
@@ -246,7 +245,7 @@ def printOutputHTML(outputHTML, outDir):
         with open(os.path.join(outDir, filename), 'wb') as fp:
             fp.write(data)
 
-def pullImage(url, index, webroot, scratch, target_width=350, target_height=200):
+def pullImage(url, index, webroot, target_width=350, target_height=200):
     extension = url.split('.')[-1].split('?')[0]
     img_name = 'img{}.{}'.format(index, extension)
     res = requests.get(url)
-- 
cgit v1.2.3


From c5a75b89716eabcefd1fe4cb880ffd98669a48a6 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 22:59:21 -0400
Subject: a bit of refactoring

---
 unbiased/main.py              | 55 ++++++++++++++++++++++++++++---------------
 unbiased/unbiasedFunctions.py | 39 ++++++++++++------------------
 2 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index 60211ea..ba72710 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -34,54 +34,71 @@ def main():
             time.sleep(sleeptime)
 
 def run(webroot):
-    sourceList=[]
+    sources = []
 
     '''
-
     SOURCES TO ADD NEXT:
-    -ABC
     -REUTERS
     -Town Hall
-
     '''
 
     logger.debug('Running with webroot="{}"'.format(webroot))
 
-
     ### These values have to be the second half of the function name
     ### E.g. Guardian calls buildGuardian(), etc.
-    sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
-                 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
+    sourceFnArr = [
+        'Guardian',
+        'TheHill',
+        'NPR',
+        'BBC',
+        'NBC',
+        'CBS',
+        'FoxNews',
+        'WashTimes',
+        'CSM',
+        'ABC',
+    ]
 
     for source in sourceFnArr:
         logger.info('Crawling {}'.format(source))
-        tries=0
-        while tries<3:
+        tries = 0
+        while tries < 3:
             time.sleep(tries)
             try:
-                fn='build'+source
+                fn = 'build' + source
                 possibles = globals().copy()
                 possibles.update(locals())
                 method = possibles.get(fn)
-                src=method()
-                sourceList.append(src)
+                src = method()
+                sources.append(src)
                 break
             except Exception as ex:
-                tries+=1
+                tries += 1
                 if tries == 3:
                     logger.error('Build failed. source={} ex={}'.format(source, ex))
                 else:
                     logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex))
-
-    #scrape all urls and build data structure
-    newsSourceArr = sourceList
+    logger.info('Parsed home pages for: {}'.format([x.name for x in sources]))
+
+    top_stories, middle_stories, bottom_stories = pickStories(sources)
+    logger.info('Picked top stories from: {}'.format([x.source for x in top_stories]))
+    logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories]))
+    logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories]))
+
+    # download images
+    img_idx = 0
+    for story in top_stories:
+        story.img = pullImage(story.img, img_idx, webroot, 350, 200)
+        img_idx += 1
+    for story in middle_stories:
+        story.img = pullImage(story.img, img_idx, webroot, 150, 100)
+        img_idx += 1
 
     #build the output file HTML
-    outputHTML=buildOutput(newsSourceArr, webroot)
+    outputHTML = buildOutput(top_stories, middle_stories, bottom_stories)
 
     #print the output file HTML
-    printOutputHTML(outputHTML, webroot)
-
+    writeOutputHTML(outputHTML, webroot)
 
 if __name__=="__main__":
     main()
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 76c80b0..2053ba5 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -141,15 +141,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
         return None
 
 
-def buildOutput(newsSourceArr, webroot):
-    #read in the template html file
-    from jinja2 import Environment, PackageLoader, select_autoescape
-    env = Environment(
-        loader=PackageLoader('unbiased', 'html_template'),
-        autoescape=select_autoescape(['html', 'xml'])
-    )
-    template = env.get_template('unbiased.jinja.html')
-
+def pickStories(newsSourceArr):
     #set the random order for sources
     h1RandomSources=[]
     while len(h1RandomSources)<4:
@@ -192,18 +184,12 @@ def buildOutput(newsSourceArr, webroot):
         source=newsSourceArr[h1RandomSources[i]]
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
-        img_name = pullImage(article.img, image_index, webroot, 350, 200)
-        image_index += 1
-        article.img = img_name
         top_stories.append(article)
 
     middle_stories = []
     for i in range(len(h2RandomPairs)):
         pair=h2RandomPairs[i]
         article=newsSourceArr[pair[0]].h2Arr[pair[1]]
-        img_name = pullImage(article.img, image_index, webroot, 150, 100)
-        image_index += 1
-        article.img = img_name
         middle_stories.append(article)
 
     bottom_stories = []
@@ -212,14 +198,21 @@ def buildOutput(newsSourceArr, webroot):
         article=newsSourceArr[pair[0]].h3Arr[pair[1]]
         bottom_stories.append(article)
 
-    sourcesStr=''
-    for i in range(len(newsSourceArr)-1):
-        sourcesStr+=newsSourceArr[i].name+', '
-    sourcesStr+=newsSourceArr[-1].name
-    logger.info('Successfully parsed: '+sourcesStr)
+    return top_stories, middle_stories, bottom_stories
+
+def buildOutput(top_stories, middle_stories, bottom_stories):
+    #read in the template html file
+    from jinja2 import Environment, PackageLoader, select_autoescape
+    env = Environment(
+        loader=PackageLoader('unbiased', 'html_template'),
+        autoescape=select_autoescape(['html', 'xml'])
+    )
+    template = env.get_template('unbiased.jinja.html')
 
     timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
 
+    sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
+
     html = template.render(
         timestamp = timestamp,
         top_stories = top_stories,
@@ -228,13 +221,11 @@ def buildOutput(newsSourceArr, webroot):
         sources = sourcesStr,
     )
 
-
     #return updated text
     return html
 
-def printOutputHTML(outputHTML, outDir):
-    timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
-    outputHTML=outputHTML.replace('xxTimexx', timestamp)
+def writeOutputHTML(outputHTML, outDir):
+    timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
 
     with open(os.path.join(outDir, 'index.html'), 'w') as fp:
         fp.write(outputHTML)
-- 
cgit v1.2.3


From f3d9287481b0ebba2b6dcb687e461dbc79074ad1 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 22:59:37 -0400
Subject: tweaking top story responsiveness

---
 unbiased/html_template/unbiased.css | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index 1424ee9..60932d8 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -115,10 +115,12 @@ a:hover{
 
 @media only screen and (max-width:500px){
     .top-stories-img{
-	width:auto;
+        width:inherit;
+        height:inherit;
+        max-width:350px;
+        padding-top:57.14%;
     }
 }
-    
 
 .top-stories-hed{
     font-weight:bold;
-- 
cgit v1.2.3


From 9a8eff98fc5dec755683ce1708bf0caf578c5752 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Wed, 19 Apr 2017 23:31:37 -0400
Subject: tweaking responsive css

---
 unbiased/html_template/unbiased.css | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index 60932d8..24b1959 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -174,20 +174,18 @@ a:hover{
     background-size: 100%;
     background-position: center center;
     float:left;
-    max-width:35%;
 }
 
 .middle-stories-hed{
     font-size:1.2em;
-    float:left;
-    width:300px;
     margin-left:10px;
     color:#00f;
+    padding-left:150px;
 }
 
 @media only screen and (max-width:500px){
     .middle-stories-hed{
-	max-width:60%;
+    max-width:60%;
     }
 }
 
-- 
cgit v1.2.3


From deca37e1ae9da82bfd4ef5edf95fd9c22b871cd0 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Thu, 20 Apr 2017 13:38:04 -0400
Subject: switch to dict logging and configure separate log writers for console
 and debug

---
 unbiased/main.py | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index ba72710..df2b209 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -2,19 +2,48 @@
 
 import argparse
 import logging
+import logging.config
 import time
 
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import *
 from unbiased.parser import *
 
+logging.config.dictConfig({
+    'version': 1,
+    'formatters': {
+        'console': {
+            'format': '%(levelname)s %(filename)s:%(lineno)d %(message)s',
+        },
+        'file': {
+            'format': '%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'level': 'INFO',
+            'formatter': 'console',
+        },
+        'file': {
+            'class': 'logging.handlers.RotatingFileHandler',
+            'level': 'DEBUG',
+            'formatter': 'file',
+            'filename': '/opt/unbiased/logs/unbiased.debug.log',
+            'maxBytes': 1024 * 1024,
+            'backupCount': 3,
+        },
+    },
+    'loggers': {
+        'unbiased': {
+            'handlers': ['console', 'file'],
+        },
+    },
+    'root': {
+        'level': 'DEBUG',
+    }
+})
 logger = logging.getLogger('unbiased')
-logger.setLevel(logging.DEBUG)
-ch = logging.StreamHandler()
-ch.setLevel(logging.DEBUG)
-ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
-logger.addHandler(ch)
-
 
 def main():
     parser = argparse.ArgumentParser()
-- 
cgit v1.2.3


From b936c9b7385bd4330c7f9fda3775f9dc1483a328 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Thu, 20 Apr 2017 13:48:17 -0400
Subject: read log dir from command line and quiet down console logging

---
 unbiased/main.py              | 15 ++++++++++-----
 unbiased/unbiasedFunctions.py |  8 ++++----
 unbiased/unbiasedObjects.py   |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index df2b209..f81321e 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -9,7 +9,9 @@ from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import *
 from unbiased.parser import *
 
-logging.config.dictConfig({
+logger = logging.getLogger('unbiased')
+
+logging_config = {
     'version': 1,
     'formatters': {
         'console': {
@@ -29,7 +31,7 @@ logging.config.dictConfig({
             'class': 'logging.handlers.RotatingFileHandler',
             'level': 'DEBUG',
             'formatter': 'file',
-            'filename': '/opt/unbiased/logs/unbiased.debug.log',
+            'filename': '',
             'maxBytes': 1024 * 1024,
             'backupCount': 3,
         },
@@ -42,14 +44,17 @@ logging.config.dictConfig({
     'root': {
         'level': 'DEBUG',
     }
-})
-logger = logging.getLogger('unbiased')
+}
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-w', '--webroot', default='/var/www/ubiased', help='location to write the output html')
+    parser.add_argument('-w', '--webroot', help='location of config file')
+    parser.add_argument('-l', '--log-dir', help='location to write logs')
     args = parser.parse_args()
 
+    logging_config['handlers']['file']['filename'] = os.path.join(args.log_dir, 'unbiased.debug.log')
+    logging.config.dictConfig(logging_config)
+
     crawl_frequency = 600
     while True:
         logger.info('Starting crawl')
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 2053ba5..46dae19 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -30,7 +30,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
     try:
         res = requests.get(url)
     except Exception as ex:
-        logger.error("""ARTICLE DOWNLOADING ERROR
+        logger.debug("""ARTICLE DOWNLOADING ERROR
         SOURCE:\t{}
         URL:\t{}""".format(sourceName, url))
         return None
@@ -38,7 +38,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
     if res.status_code == 200:
         content = res.text
     else:
-        logger.error("""ARTICLE DOWNLOADING ERROR
+        logger.debug("""ARTICLE DOWNLOADING ERROR
         SOURCE:\t{}
         URL:\t{}""".format(sourceName, url))
         return None
@@ -135,7 +135,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
         return a
 
     except Exception:
-        logger.error("""ARTICLE PARSING ERROR
+        logger.debug("""ARTICLE PARSING ERROR
         SOURCE:\t{}
         URL:\t{}""".format(sourceName, url))
         return None
@@ -243,7 +243,7 @@ def pullImage(url, index, webroot, target_width=350, target_height=200):
     if res.status_code == 200:
         content = res.content
     else:
-        logger.error('Image not found: url={}'.format(url))
+        logger.debug('Image not found: url={}'.format(url))
         return ''
     img = Image.open(io.BytesIO(content))
     # crop to aspect ratio
diff --git a/unbiased/unbiasedObjects.py b/unbiased/unbiasedObjects.py
index 9372d3a..7908fbb 100644
--- a/unbiased/unbiasedObjects.py
+++ b/unbiased/unbiasedObjects.py
@@ -90,5 +90,5 @@ class NewsSource():
         elif level==3:
             self.h3Arr.append(article)
         else:
-            logger.error("Invalid level in NewsSource.addArtlce: " + level)
+            logger.debug("Invalid level in NewsSource.addArtlce: " + level)
 
-- 
cgit v1.2.3


From 45926db9caed33062ab491df63f33ee3b3f5c468 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Fri, 21 Apr 2017 22:33:40 -0400
Subject: rewrite css to use flexbox for responsive display

---
 unbiased/html_template/unbiased.css        | 276 ++++++++++++-----------------
 unbiased/html_template/unbiased.jinja.html |  83 +++++----
 2 files changed, 156 insertions(+), 203 deletions(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index 24b1959..caf4691 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -1,220 +1,166 @@
-/*body{
-    width:900px;
-    margin-left:auto;
-    margin-right:auto;
-}*/
+body {
+    margin: 0;
+}
 
+a:link, a:visited, a:hover, a:active, a {
+    color: #00f;
+    text-decoration:none;
+}
 
-body{
-    margin:0;
+a:hover {
+    cursor:pointer;
 }
 
-a:link, a:visited, a:hover, a:active {
-			color: #00f;
-			text-decoration:none;
-			    }
+hr {
+  max-width: 890px;
+  margin: 5px auto;
+  border: 0;
+  height: 3px;
+  background-color: #BB133E;
+}
 
-a:hover{
-    cursor:pointer;
+#page-header {
+    width: 100%;
+    text-align: center;
+    padding: .5em 0 1em;
+    margin-bottom: 1em;
+    border-bottom: 3px solid #BB133E;
+    background: #002147;
 }
 
-#page-header{
-    width:100%;
-    text-align:center;
-    padding:.5em 0 1em;
-    margin-bottom:1em;
-    border-bottom:3px solid #BB133E;
-    background:#002147;
+.title {
+    font-size: 3em;
 }
 
-.title{
-    font-size:3em;
+#title-1 {
+    font-style: italic;
+    color: #fff;
 }
 
-#title-1{
-    font-style:italic;
-    color:#fff;
+#title-2 {
+    color: #fff;
 }
 
-#title-2{
-    color:#fff;
+#subtitle {
+    font-size: 1.25em;
+    color: #ccc;
 }
 
-#subtitle{
-    font-size:1.25em;
-    color:#ccc;
+#timestamp {
+    margin: .5em 0 0 0;
+    font-size: .8em;
+    color: #cc6;
 }
 
-#timestamp{
-    margin:.5em 0 0 0;
-    font-size:.8em;
-    color:#cc6;
+#top-stories {
+    max-width: 900px;
+    display: flex;
+    flex-wrap: wrap;
+    margin: 5px auto;
 }
 
-#page-container{
-    width:900px;
-    margin-left:auto;
-    margin-right:auto;
+.top-story {
+    flex: 1 0 350px;
+    margin: 5px;
+    padding: 10px;
+    border:2px solid #eee;
 }
 
-@media only screen and (max-width:900px){
-    #page-container{
-	width:100%
-    }
+.top-stories-img {
+  width: 100%;
+  padding-bottom: 57%;
+  background-size: 100%;
+  background-position: center center;
 }
 
-#top-stories{
-    width:95%;
-    display:block;
-    overflow:auto;
-    padding:10px;
-    margin-left:auto;
-    margin-right:auto;
-    text-align:center;
-    border-bottom: 3px solid #BB133E;
-    margin-bottom: 10px;
-}
-
-.row{
-    display:flex;
-}
-
-.top-story{
-    display:inline-block;
-    vertical-align:top;
-    text-align:left;
-    width:360px;
-    height:350px;
-    overflow:hidden;
-    background:#fff;
-    margin:10px;
-    padding:10px;
-    border:2px solid #ccc;
-    flex:1;
-}
-
-@media only screen and (max-width:500px){
-    .row{
-	display:block;
-    }
-    .top-story{
-	display:block;
-	width:auto;
-	height:auto;
-    }
-}
-
-.top-stories-img{
-    width:350px;
-    height:200px;
-    overflow:hidden;
-    background-size: 100%;
-    background-position: center center;
-    margin:0 auto;
+.top-stories-hed {
+    font-size: 1.3em;
+    margin: 10px 0;
+    color: #00f;
 }
 
-@media only screen and (max-width:500px){
-    .top-stories-img{
-        width:inherit;
-        height:inherit;
-        max-width:350px;
-        padding-top:57.14%;
-    }
+.top-stories-desc {
+    font-size: 1em;
 }
 
-.top-stories-hed{
-    font-weight:bold;
-    font-size:1.35em;
-    margin:10px 10px 0;
-    color:#00f;
+.c2 {
+  max-width: 900px;
+  display: flex;
+  flex-wrap: wrap;
+  margin: 5px auto;
 }
 
-.top-stories-desc{
-    font-size:1em;
-    padding-top:.5em;
-    margin:0 .75em;
+.c2 hr {
+  display: none;
 }
 
-#middle-stories{
-    clear:both;
-    width:500px;
-    margin:0 auto;
-    padding:0;
-    display:block;
-    overflow:auto;
-    float:left;
+#middle-stories {
+    flex: 7 0 200px;
 }
 
-@media only screen and (max-width:500px){
-    #middle-stories{
-	width:100%;
-	float:none;
-    }
+.middle-story {
+    margin: 5px;
+    border: 2px solid #eee;
 }
 
-.middle-story{
-    margin:5px 10px;
-    padding:10px;
-    background:#fff;
-    border:2px solid #ddd;
-    width:460px;
-    float:left;
+.middle-story a {
+    padding: 10px;
+    display: inline-block;
 }
 
-@media only screen and (max-width:500px){
-    .middle-story{
-	width:auto;
-    }
+.middle-story a p {
+    margin: 0;
 }
 
 .middle-stories-img{
-    width:150px;
-    height:100px;
-    overflow:hidden;
+    width: 150px;
+    height: 100px;
     background-size: 100%;
     background-position: center center;
-    float:left;
+    float: left;
+    margin-right: 10px;
 }
 
-.middle-stories-hed{
-    font-size:1.2em;
-    margin-left:10px;
-    color:#00f;
-    padding-left:150px;
+#middle-stories a {
+    font-size: 1.1em;
+    color: #00f;
 }
 
-@media only screen and (max-width:500px){
-    .middle-stories-hed{
-    max-width:60%;
-    }
+#bottom-stories {
+  flex: 3 0 200px;
+  border: 2px solid #eee;
+  margin: 5px;
 }
 
-#bottom-stories{
-    margin:0 10px;
-    padding:10px;
-    display:block;
-    overflow:auto;
-    float:left;
-    width:350px;
-    border:5px solid #ddd;
+.bottom-story {
+    padding: 10px;
+    color: #00f;
 }
 
-@media only screen and (max-width:900px){
-    #bottom-stories{
-	width:auto;
-	border-width:3px;
-	float:none;
-    }
+#sources {
+    margin: 2em 5px 0 5px;
+    font-size: .8em;
 }
 
-.bottom-story{    color:#00f;
-
-    padding:15px 0;
-    color:#00f;
+@media (max-width: 900px) {
+  hr {
+    width: inherit;
+    margin: 5px;
+  }
 }
 
-#sources{
-    clear:both;
-    padding-top:4em;
-    font-size:.8em;
+@media (max-width: 767px) {
+  .top-stories {
+    flex-wrap: nowrap;
+    flex-direction: column;
+  }
+  .top-story {
+    flex: 1 0 250px;
+  }
+  .c2 {
+    flex-direction: column;
+  }
+  .c2 hr {
+    display: inherit;
+  }
 }
diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
index 40c9582..fcca97f 100644
--- a/unbiased/html_template/unbiased.jinja.html
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -9,64 +9,71 @@
     <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
     <title>UnBiased</title>
   </head>
-<body>
 
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: {{ timestamp }}</p>
-</div>
+  <body>
 
-<div id="page-container">
+    <div id="page-header">
+      <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+      <span id="subtitle">a different way to read the news</span>
+      <p id="timestamp">Last updated: {{ timestamp }}</p>
+    </div>
+
+    <div id="top-stories">
+
+      {% for story in top_stories %}
 
-  <div id="top-stories">
+      <div class="top-story">
+        <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
+          <div class="top-stories-img" style="background-image: url('{{ story.img }}');" /></div>
+          <div class="top-stories-hed">{{ story.title|safe }}</div>
+        </a>
+        <div class="top-stories-desc">{{ story.description|safe|truncate(140) }}</div>
+      </div>
 
-    {% for story in top_stories %}
+      {% endfor %}
 
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
-        <div class="top-stories-img" style="background-image: url('{{ story.img }}');" /></div>
-        <div class="top-stories-hed">{{ story.title|safe }}</div>
-      </a>
-      <div class="top-stories-desc">{{ story.description|safe|truncate(140) }}</div>
     </div>
 
-    {% endfor %}
+    <hr/>
 
-  </div>
+    <div class="c2">
 
-  <div id="middle-stories">
+      <div id="middle-stories">
 
-    {% for story in middle_stories %}
+        {% for story in middle_stories %}
 
-    <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
-      <div class="middle-story">
-        <div class="middle-stories-img" style="background-image: url('{{ story.img }}');">
+        <div class="middle-story">
+          <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
+            <p>
+              <div class="middle-stories-img" style="background-image: url('{{ story.img }}');"></div>
+              {{ story.title|safe }}
+            </p>
+          </a>
         </div>
-        <div class="middle-stories-hed">{{ story.title|safe }}</div>
+
+        {% endfor %}
+
       </div>
-    </a>
 
-    {% endfor %}
+      <hr/>
 
-  </div>
+      <div id="bottom-stories">
 
-  <div id="bottom-stories">
+        {% for story in bottom_stories %}
 
-    {% for story in bottom_stories %}
+        <div class="bottom-story">
+          <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title|safe }}</a>
+        </div>
 
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title|safe }}</a>
-    </div>
+        {% endfor %}
 
-    {% endfor %}
+      </div>
 
-  </div>
+    </div>
 
-</div>
+    <div id="sources">
+      Sources: {{ sources }}
+    </div>
 
-<div id="sources">
-  Sources: {{ sources }}
-</div>
-</body>
+  </body>
 </html>
-- 
cgit v1.2.3


From 1cbd15b3f35e162a21b2dc2ac784b9acf71b6c3d Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Fri, 21 Apr 2017 22:40:34 -0400
Subject: include favicons in the distribution

---
 setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 57c27c0..2761041 100644
--- a/setup.py
+++ b/setup.py
@@ -2,12 +2,14 @@ from setuptools import setup
 
 setup(
     name="unbiased",
-    version="0",
+    version="1",
     packages=['unbiased'],
     package_data={
         'unbiased': [
             'html_template/*.html',
             'html_template/*.css',
+            'html_template/*.ico',
+            'html_template/*.png',
         ],
     },
     install_requires=[
-- 
cgit v1.2.3


From 5cbce38c92953d24b48f714b1fc33d5cafdf874a Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Fri, 21 Apr 2017 23:38:55 -0400
Subject: fix safari rendering bug

---
 unbiased/html_template/unbiased.css | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index caf4691..5995dfd 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -150,7 +150,7 @@ hr {
 }
 
 @media (max-width: 767px) {
-  .top-stories {
+  #top-stories {
     flex-wrap: nowrap;
     flex-direction: column;
   }
@@ -158,7 +158,7 @@ hr {
     flex: 1 0 250px;
   }
   .c2 {
-    flex-direction: column;
+    display: inherit;
   }
   .c2 hr {
     display: inherit;
-- 
cgit v1.2.3


From 020c9908def3a816e05984c3ee55457fc423a931 Mon Sep 17 00:00:00 2001
From: Matt Singleton <msingleton@aclu.org>
Date: Sat, 22 Apr 2017 11:11:34 -0400
Subject: update command line arguments

---
 unbiased/main.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/unbiased/main.py b/unbiased/main.py
index f81321e..caf77eb 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -48,11 +48,18 @@ logging_config = {
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-w', '--webroot', help='location of config file')
-    parser.add_argument('-l', '--log-dir', help='location to write logs')
+    parser.add_argument('webroot', help='location to write html output')
+    parser.add_argument('-l', '--log-dir', help='location to write detailed logs')
+    parser.add_argument('-d', '--debug', action='store_true', help='run in debug mode')
     args = parser.parse_args()
 
-    logging_config['handlers']['file']['filename'] = os.path.join(args.log_dir, 'unbiased.debug.log')
+    if args.log_dir:
+        logging_config['handlers']['file']['filename'] = os.path.join(args.log_dir, 'unbiased.debug.log')
+    else:
+        logging_config['loggers']['unbiased']['handlers'].remove('file')
+        del logging_config['handlers']['file']
+    if args.debug:
+        logging_config['handlers']['console']['level'] = 'DEBUG'
     logging.config.dictConfig(logging_config)
 
     crawl_frequency = 600
-- 
cgit v1.2.3


From 76336db3237e122515f0ecec8c6a6c86790117c2 Mon Sep 17 00:00:00 2001
From: Matt Singleton <msingleton@aclu.org>
Date: Sat, 22 Apr 2017 11:12:13 -0400
Subject: systemd daemon service config

---
 etc/unbiased.service | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 etc/unbiased.service

diff --git a/etc/unbiased.service b/etc/unbiased.service
new file mode 100644
index 0000000..391e4ff
--- /dev/null
+++ b/etc/unbiased.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Unbiased News
+
+[Service]
+Type=simple
+ExecStart=/opt/unbiased/venv/bin/unbiased /opt/unbiased/webroot -l /opt/unbiased/logs
+WorkingDirectory=/opt/unbiased
+User=www-data
+Restart=on-failure
+
+[Install]
+WantedBy=multi-user.target
-- 
cgit v1.2.3


From 218d3d2e6336f3ccf5e8329e2f0bb15efc7df94a Mon Sep 17 00:00:00 2001
From: Matt Singleton <msingleton@aclu.org>
Date: Sat, 22 Apr 2017 11:12:42 -0400
Subject: gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 238da47..9e0f924 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.pyc
 *~
+.DS_Store
 __pycache__/
 unbiased/scratch/*.html
 legacy_py/
-- 
cgit v1.2.3


From 53ccd0a1b21963021f2281c5df1a557f95514225 Mon Sep 17 00:00:00 2001
From: Matt Singleton <msingleton@aclu.org>
Date: Sat, 22 Apr 2017 11:16:39 -0400
Subject: remove old templates

---
 unbiased/html_template/newtemplate.html | 150 ---------------------------
 unbiased/html_template/template.html    | 173 --------------------------------
 2 files changed, 323 deletions(-)
 delete mode 100644 unbiased/html_template/newtemplate.html
 delete mode 100755 unbiased/html_template/template.html

diff --git a/unbiased/html_template/newtemplate.html b/unbiased/html_template/newtemplate.html
deleted file mode 100644
index 0cec766..0000000
--- a/unbiased/html_template/newtemplate.html
+++ /dev/null
@@ -1,150 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="unbiased.css">
-    <title>UnBiased</title>
-  </head>
-<body>
-
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
-</div>
-
-<div id="page-container">
-  <div id="top-stories">
-
-    <div class="top-story">
-      <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
-	<div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
-	</div>
-	<div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
-      </a>
-      <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen &hellip;</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-2'">
-	<div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
-	</div>
-	<div class="top-stories-hed">Bibi and Donald</div>
-      </a>
-      <div class="top-stories-desc">This week, Israel&#039;s prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
-    </div>
-
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-3'">
-	<div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
-	</div>
-	<div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
-      </a>
-      <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-4'">
-	<div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
-	</div>
-	<div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
-      </a>
-      <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
-    </div>
-        
-  </div>
-
-  <div id="middle-stories">
-  
-    <a target="_blank" href="" onclick="location.href='xxURL2-1'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
-	</div>
-	<div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-2'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
-	</div>
-	<div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-3'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
-	</div>
-	<div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-4'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
-	</div>
-	<div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" >
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
-	</div>
-	<div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" >
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
-	</div>
-	<div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
-      </div>
-    </a>
-    
-    
-  </div>
-  
-  <div id="bottom-stories">
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-1xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-2xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-3xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-4xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-5xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-6xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-7xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-8xx</a>
-    </div>
-</div>
-
-</div>
-
-<div id="sources">
-  Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
-</div>
-</body>
-</html>
diff --git a/unbiased/html_template/template.html b/unbiased/html_template/template.html
deleted file mode 100755
index fc17006..0000000
--- a/unbiased/html_template/template.html
+++ /dev/null
@@ -1,173 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="unbiased.css">
-    <title>UnBiased</title>
-  </head>
-<body>
-
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: xxTimexx</p>
-</div>
-
-<div id="page-container">
-  <div id="top-stories">
-    <div class="row">
-
-      <div class="top-story">
-	<a target="_blank" onclick="window.open('xxURL1-1xx', '_blank')">
-	  <div class="top-stories-img" style="background-image: url('xxImg1-1xx');" /></div>
-      <div class="top-stories-hed">xxTitle1-1xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-1xx</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('xxURL1-2xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-2xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-2xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-2xx</div>
-    </div>
-
-  </div>
-
-<div class="row">
-
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('xxURL1-3xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-3xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-3xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-3xx</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" onclick="window.open('xxURL1-4xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-4xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-4xx</div>
-      </a>
-      <div class="top-stories-desc">xxDesc1-4xx</div>
-    </div>
-
-  </div>
-
-  </div>
-
-  <div id="middle-stories">
-  
-    <a target="_blank" onclick="window.open('xxURL2-1xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-1xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-1xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-2xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-2xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-2xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-3xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-3xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-3xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-4xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-4xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-4xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-5xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-5xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-5xx</div>
-      </div>
-    </a>
-    
-    <a target="_blank" onclick="window.open('xxURL2-6xx', '_blank')">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('xxImg2-6xx');">
-	</div>
-	<div class="middle-stories-hed">xxTitle2-6xx</div>
-      </div>
-    </a>
-    
-    
-  </div>
-  
-  <div id="bottom-stories">
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-1xx', '_blank')">xxTitle3-1xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-2xx', '_blank')">xxTitle3-2xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-3xx', '_blank')">xxTitle3-3xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-4xx', '_blank')">xxTitle3-4xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-5xx', '_blank')">xxTitle3-5xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-6xx', '_blank')">xxTitle3-6xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-7xx', '_blank')">xxTitle3-7xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-8xx', '_blank')">xxTitle3-8xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-9xx', '_blank')">xxTitle3-9xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-10xx', '_blank')">xxTitle3-10xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-11xx', '_blank')">xxTitle3-11xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" onclick="window.open('xxURL3-12xx', '_blank')">xxTitle3-12xx</a>
-    </div>
-</div>
-
-</div>
-
-<div id="sources">
-  Sources: xxSourcesxx
-</div>
-</body>
-</html>
-- 
cgit v1.2.3


From 1c825c79a17f9ba1e7c81668921c87ccecd672d3 Mon Sep 17 00:00:00 2001
From: Matt Singleton <msingleton@aclu.org>
Date: Sun, 23 Apr 2017 05:33:23 -0400
Subject: tweak the rendering

---
 unbiased/html_template/unbiased.jinja.html | 12 ++++++------
 unbiased/unbiasedFunctions.py              |  6 ++++--
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
index fcca97f..4a07d0b 100644
--- a/unbiased/html_template/unbiased.jinja.html
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -24,10 +24,10 @@
 
       <div class="top-story">
         <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
-          <div class="top-stories-img" style="background-image: url('{{ story.img }}');" /></div>
-          <div class="top-stories-hed">{{ story.title|safe }}</div>
+          <div class="top-stories-img" style="background-image: url('{{ story.img }}?{{ utime }}');" /></div>
+          <div class="top-stories-hed">{{ story.title }}</div>
         </a>
-        <div class="top-stories-desc">{{ story.description|safe|truncate(140) }}</div>
+        <div class="top-stories-desc">{{ story.description }}</div>
       </div>
 
       {% endfor %}
@@ -45,8 +45,8 @@
         <div class="middle-story">
           <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
             <p>
-              <div class="middle-stories-img" style="background-image: url('{{ story.img }}');"></div>
-              {{ story.title|safe }}
+              <div class="middle-stories-img" style="background-image: url('{{ story.img }}?{{ utime }}');"></div>
+              {{ story.title }}
             </p>
           </a>
         </div>
@@ -62,7 +62,7 @@
         {% for story in bottom_stories %}
 
         <div class="bottom-story">
-          <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title|safe }}</a>
+          <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title }}</a>
         </div>
 
         {% endfor %}
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 46dae19..cb13a44 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -131,7 +131,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
             logger.debug(description)
 
 
-        a=Article(title, url, img, description, sourceName, author)
+        a=Article(html.unescape(title), url, img, html.unescape(description), sourceName, html.unescape(author))
         return a
 
     except Exception:
@@ -209,12 +209,14 @@ def buildOutput(top_stories, middle_stories, bottom_stories):
     )
     template = env.get_template('unbiased.jinja.html')
 
-    timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+    timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+    utime = int(time.time())
 
     sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
 
     html = template.render(
         timestamp = timestamp,
+        utime = utime,
         top_stories = top_stories,
         middle_stories = middle_stories,
         bottom_stories = bottom_stories,
-- 
cgit v1.2.3


From 91851fc975169fe63d7e646d03cd1e7ad57553e5 Mon Sep 17 00:00:00 2001
From: Matt Singleton <msingleton@aclu.org>
Date: Fri, 2 Jun 2017 11:28:21 -0400
Subject: too many opinions buffalo

---
 unbiased/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unbiased/parser.py b/unbiased/parser.py
index 41727f5..05a7fc1 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -858,7 +858,7 @@ def buildFoxNews():
     #REMOVE BAD STORIES
     badTitleArr=['O&#039;Reilly', 'Fox News', 'Brett Baier', 'Tucker']
     badDescArr=['Sean Hannity']
-    badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
+    badAuthorArr=['Bill O\'Reilly', 'Sean Hannity', 'Howard Kurtz']
     badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
     badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
     fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
-- 
cgit v1.2.3


From d64c47dbe07f944703c01179ccba57a8e6bfb523 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Fri, 19 May 2017 18:57:35 -0400
Subject: fix padding between sections

---
 unbiased/html_template/unbiased.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index 5995dfd..a68a4c2 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -158,7 +158,7 @@ hr {
     flex: 1 0 250px;
   }
   .c2 {
-    display: inherit;
+    flex-direction: column;
   }
   .c2 hr {
     display: inherit;
-- 
cgit v1.2.3


From a82318fbdfc1af624fd9bf9bbae316ad45f43611 Mon Sep 17 00:00:00 2001
From: Matt Singleton <matt@xcolour.net>
Date: Fri, 2 Jun 2017 11:30:14 -0400
Subject: tweaks for ios

---
 unbiased/html_template/unbiased.css        | 6 ++++++
 unbiased/html_template/unbiased.jinja.html | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
index a68a4c2..dc99ab7 100755
--- a/unbiased/html_template/unbiased.css
+++ b/unbiased/html_template/unbiased.css
@@ -163,4 +163,10 @@ hr {
   .c2 hr {
     display: inherit;
   }
+  #middle-stories {
+    flex: inherit;
+  }
+  #bottom-stories {
+    flex: inherit;
+  }
 }
diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
index 4a07d0b..0d191e7 100644
--- a/unbiased/html_template/unbiased.jinja.html
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html>
   <head>
-    <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0" />
     <meta charset="utf-8">
     <link rel="stylesheet" href="unbiased.css">
     <link rel="icon" sizes="32x32" href="/favicon.ico">
-- 
cgit v1.2.3