Fixed bounding box on h1s

author: ssstvinc2 <sstvinc2@gmail.com> 2017-02-19 23:04:31 -0500
committer: ssstvinc2 <sstvinc2@gmail.com> 2017-02-19 23:04:31 -0500
commit: b544a59cb96193ddcd0b8c0f9cc70bda973415a5 (patch)
tree: ec5edbe35869f1b5b65a57d1a3b746c83dda3829
parent: 53de97fd3c6fdb4c95a89171b52064a05b157fbf (diff)
8 files changed, 1194 insertions, 1183 deletions
diff --git a/html_template/BAKtemplate.html b/html_template/BAKtemplate.html
index ab1dbb9..94a3796 100644..100755
--- a/html_template/BAKtemplate.html
+++ b/html_template/BAKtemplate.html
@@ -1,118 +1,118 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="html_template/unbiased.css">
-    <link rel="stylesheet" href="unbiased.css">
-    <title>UnBiased</title>
-  </head>
-<body>
-
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: xxTimexx</p>
-</div>
-
-<div id="page-container">
-<div id="top-stories">
-  <div class="top-story">
-    <a target="_blank" href="redirects/h1-1.html" id="top-story-1">
-      <div class="top-stories-img">
-	<img src="xxImg1-1xx" />
-      </div>
-      <div class="top-stories-hed">xxTitle1-1xx</div>
-    </a>
-    <div class="top-stories-desc">xxDesc1-1xx</div>
-  </div>
-
-  <div class="top-story">
-    <a target="_blank" href="redirects/h1-2.html" id="top-story-2">
-      <div class="top-stories-img">
-	<img src="xxImg1-2xx" />
-      </div>
-      <div class="top-stories-hed">xxTitle1-2xx</div>
-    </a>
-    <div class="top-stories-desc">xxDesc1-2xx</div>
-  </div>
-
-  <div class="top-story">
-    <a target="_blank" href="redirects/h1-3.html" id="top-story-3">
-      <div class="top-stories-img">
-	<img src="xxImg1-3xx" />
-      </div>
-      <div class="top-stories-hed">xxTitle1-3xx</div>
-    </a>
-    <div class="top-stories-desc">xxDesc1-3xx</div>
-  </div>
-</div>
-
-<div id="middle-stories">
-  
-  <a target="_blank" href="redirects/h2-1.html" >
-    <div class="middle-story">
-      <div class="middle-stories-img">
-	<img src="xxImg2-1xx" />
-	<p class="middle-stories-hed">xxTitle2-1xx</p>
-      </div>
-    </div>
-  </a>
-
-  <a target="_blank" href="redirects/h2-2.html" >
-    <div class="middle-story">
-      <div class="middle-stories-img">
-	<img src="xxImg2-2xx" />
-	<span class="middle-stories-hed">xxTitle2-2xx</span>
-      </div>
-    </div>
-  </a>
-
-  <a target="_blank" href="redirects/h2-3.html" >
-    <div class="middle-story">
-      <div class="middle-stories-img">
-	<img src="xxImg2-3xx" />
-	<span class="middle-stories-hed">xxTitle2-3xx</span>
-      </div>
-    </div>
-  </a>
-
-
-  <a target="_blank" href="redirects/h2-4.html" >
-    <div class="middle-story">
-      <div class="middle-stories-img">
-	<img src="xxImg2-4xx" />
-	<span class="middle-stories-hed">xxTitle2-4xx</span>
-      </div>
-    </div>
-  </a>
-
-  <a target="_blank" href="redirects/h2-5.html" >
-    <div class="middle-story">
-      <div class="middle-stories-img">
-	<img src="xxImg2-5xx" />
-	<span class="middle-stories-hed">xxTitle2-5xx</span>
-      </div>
-    </div>
-  </a>
-
-  <a target="_blank" href="redirects/h2-6.html" >
-    <div class="middle-story">
-      <div class="middle-stories-img">
-	<img src="xxImg2-6xx" />
-	<span class="middle-stories-hed">xxTitle2-6xx</span>
-      </div>
-    </div>
-  </a>
-
-</div>
-
-<div id="bottom-stories">
-</div>
-
-</div>
-
-<div id="sources">
-  Sources: xxSourcesxx
-</div>
-</body>
-</html>
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="html_template/unbiased.css">
+    <link rel="stylesheet" href="unbiased.css">
+    <title>UnBiased</title>
+  </head>
+<body>
+
+<div id="page-header">
+  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+  <span id="subtitle">a different way to read the news</span>
+  <p id="timestamp">Last updated: xxTimexx</p>
+</div>
+
+<div id="page-container">
+<div id="top-stories">
+  <div class="top-story">
+    <a target="_blank" href="redirects/h1-1.html" id="top-story-1">
+      <div class="top-stories-img">
+	<img src="xxImg1-1xx" />
+      </div>
+      <div class="top-stories-hed">xxTitle1-1xx</div>
+    </a>
+    <div class="top-stories-desc">xxDesc1-1xx</div>
+  </div>
+
+  <div class="top-story">
+    <a target="_blank" href="redirects/h1-2.html" id="top-story-2">
+      <div class="top-stories-img">
+	<img src="xxImg1-2xx" />
+      </div>
+      <div class="top-stories-hed">xxTitle1-2xx</div>
+    </a>
+    <div class="top-stories-desc">xxDesc1-2xx</div>
+  </div>
+
+  <div class="top-story">
+    <a target="_blank" href="redirects/h1-3.html" id="top-story-3">
+      <div class="top-stories-img">
+	<img src="xxImg1-3xx" />
+      </div>
+      <div class="top-stories-hed">xxTitle1-3xx</div>
+    </a>
+    <div class="top-stories-desc">xxDesc1-3xx</div>
+  </div>
+</div>
+
+<div id="middle-stories">
+  
+  <a target="_blank" href="redirects/h2-1.html" >
+    <div class="middle-story">
+      <div class="middle-stories-img">
+	<img src="xxImg2-1xx" />
+	<p class="middle-stories-hed">xxTitle2-1xx</p>
+      </div>
+    </div>
+  </a>
+
+  <a target="_blank" href="redirects/h2-2.html" >
+    <div class="middle-story">
+      <div class="middle-stories-img">
+	<img src="xxImg2-2xx" />
+	<span class="middle-stories-hed">xxTitle2-2xx</span>
+      </div>
+    </div>
+  </a>
+
+  <a target="_blank" href="redirects/h2-3.html" >
+    <div class="middle-story">
+      <div class="middle-stories-img">
+	<img src="xxImg2-3xx" />
+	<span class="middle-stories-hed">xxTitle2-3xx</span>
+      </div>
+    </div>
+  </a>
+
+
+  <a target="_blank" href="redirects/h2-4.html" >
+    <div class="middle-story">
+      <div class="middle-stories-img">
+	<img src="xxImg2-4xx" />
+	<span class="middle-stories-hed">xxTitle2-4xx</span>
+      </div>
+    </div>
+  </a>
+
+  <a target="_blank" href="redirects/h2-5.html" >
+    <div class="middle-story">
+      <div class="middle-stories-img">
+	<img src="xxImg2-5xx" />
+	<span class="middle-stories-hed">xxTitle2-5xx</span>
+      </div>
+    </div>
+  </a>
+
+  <a target="_blank" href="redirects/h2-6.html" >
+    <div class="middle-story">
+      <div class="middle-stories-img">
+	<img src="xxImg2-6xx" />
+	<span class="middle-stories-hed">xxTitle2-6xx</span>
+      </div>
+    </div>
+  </a>
+
+</div>
+
+<div id="bottom-stories">
+</div>
+
+</div>
+
+<div id="sources">
+  Sources: xxSourcesxx
+</div>
+</body>
+</html>
diff --git a/html_template/BAKunbiased.css b/html_template/BAKunbiased.css
index 49b6dce..ade390b 100644..100755
--- a/html_template/BAKunbiased.css
+++ b/html_template/BAKunbiased.css
@@ -1,104 +1,104 @@
-a:link, a:visited, a:hover, a:active {
-			color: #00f;
-			text-decoration:none;
-		    }
-
-#page-header{
-    text-align:center;
-    padding:.5em 0 1em;
-    margin-bottom:1em;
-    border-bottom:1px solid #000;
-}
-
-.title{
-    font-size:3em;
-}
-
-#title-1{
-    font-style:italic;
-    color:#d00;
-}
-
-#title-2{
-    color:#00d;
-}
-
-#subtitle{
-    font-size:1.25em;
-}
-
-#timestamp{
-    margin:.5em 0 0 0;
-    font-size:.8em;
-}
-
-#page-container{
-    width:1150px;
-    padding:0 1em;
-    margin-left:auto;
-    margin-right:auto;
-}
-
-#top-stories{
-    width:1150px;
-    margin-left:auto;
-    margin-right:auto;
-    font-size:1.25em;
-}
-
-.top-story{
-    width:350px;
-    float:left;
-    margin:0 .5em;
-}
-
-.top-stories-img{
-    width:350px;
-    height:200px;
-    overflow:hidden;
-}
-
-.top-stories-img img{
-    width:100%;
-    display:block;
-    vertical-align:text-bottom;
-}
-
-.top-stories-desc{
-    font-size:.8em;
-    padding-top:.5em;
-}
-
-#middle-stories{
-    clear:both;
-    width:1000px;
-    margin:0 auto;
-}
-
-.middle-story{
-    margin:2em 5px;
-    width:45%;
-    float:left;
-    height:100px;
-}
-
-.middle-story img{
-    vertical-align:middle;
-    height:100px;
-    float:left;
-    margin-right:1em;
-}
-
-.middle-stories-hed{
-    font-size:1.1em;
-}
-
-.middle-story p{
-    display:block;
-}
-
-#sources{
-    clear:both;
-    padding-top:4em;
-    font-size:.8em;
+a:link, a:visited, a:hover, a:active {
+			color: #00f;
+			text-decoration:none;
+		    }
+
+#page-header{
+    text-align:center;
+    padding:.5em 0 1em;
+    margin-bottom:1em;
+    border-bottom:1px solid #000;
+}
+
+.title{
+    font-size:3em;
+}
+
+#title-1{
+    font-style:italic;
+    color:#d00;
+}
+
+#title-2{
+    color:#00d;
+}
+
+#subtitle{
+    font-size:1.25em;
+}
+
+#timestamp{
+    margin:.5em 0 0 0;
+    font-size:.8em;
+}
+
+#page-container{
+    width:1150px;
+    padding:0 1em;
+    margin-left:auto;
+    margin-right:auto;
+}
+
+#top-stories{
+    width:1150px;
+    margin-left:auto;
+    margin-right:auto;
+    font-size:1.25em;
+}
+
+.top-story{
+    width:350px;
+    float:left;
+    margin:0 .5em;
+}
+
+.top-stories-img{
+    width:350px;
+    height:200px;
+    overflow:hidden;
+}
+
+.top-stories-img img{
+    width:100%;
+    display:block;
+    vertical-align:text-bottom;
+}
+
+.top-stories-desc{
+    font-size:.8em;
+    padding-top:.5em;
+}
+
+#middle-stories{
+    clear:both;
+    width:1000px;
+    margin:0 auto;
+}
+
+.middle-story{
+    margin:2em 5px;
+    width:45%;
+    float:left;
+    height:100px;
+}
+
+.middle-story img{
+    vertical-align:middle;
+    height:100px;
+    float:left;
+    margin-right:1em;
+}
+
+.middle-stories-hed{
+    font-size:1.1em;
+}
+
+.middle-story p{
+    display:block;
+}
+
+#sources{
+    clear:both;
+    padding-top:4em;
+    font-size:.8em;
 }
 \ No newline at end of file
diff --git a/html_template/Penguins.jpg b/html_template/Penguins.jpg
index 030ab8a..030ab8a 100644..100755
--- a/html_template/Penguins.jpg
+++ b/html_template/Penguins.jpg
diff --git a/html_template/newtemplate.html b/html_template/newtemplate.html
index 923dee2..0cec766 100644
--- a/html_template/newtemplate.html
+++ b/html_template/newtemplate.html
@@ -1,150 +1,150 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="unbiased.css">
-    <title>UnBiased</title>
-  </head>
-<body>
-
-<div id="page-header">
-  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
-  <span id="subtitle">a different way to read the news</span>
-  <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
-</div>
-
-<div id="page-container">
-  <div id="top-stories">
-
-    <div class="top-story">
-      <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
-	<div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
-	</div>
-	<div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
-      </a>
-      <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen &hellip;</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-2'">
-	<div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
-	</div>
-	<div class="top-stories-hed">Bibi and Donald</div>
-      </a>
-      <div class="top-stories-desc">This week, Israel&#039;s prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
-    </div>
-
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-3'">
-	<div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
-	</div>
-	<div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
-      </a>
-      <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
-    </div>
-    
-    <div class="top-story">
-      <a target="_blank" href="" onclick="location.href='xxURL1-4'">
-	<div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
-	</div>
-	<div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
-      </a>
-      <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
-    </div>
-        
-  </div>
-
-  <div id="middle-stories">
-  
-    <a target="_blank" href="" onclick="location.href='xxURL2-1'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
-	</div>
-	<div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-2'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
-	</div>
-	<div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-3'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
-	</div>
-	<div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" onclick="location.href='xxURL2-4'">
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
-	</div>
-	<div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" >
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
-	</div>
-	<div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
-      </div>
-    </a>
-    
-    <a target="_blank" href="" >
-      <div class="middle-story">
-	<div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
-	</div>
-	<div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
-      </div>
-    </a>
-    
-    
-  </div>
-  
-  <div id="bottom-stories">
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-1xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-2xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-3xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-4xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-5xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-6xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-7xx</a>
-    </div>
-
-    <div class="bottom-story">
-      <a target="_blank" href="">xxTitle3-8xx</a>
-    </div>
-</div>
-
-</div>
-
-<div id="sources">
-  Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
-</div>
-</body>
-</html>
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="unbiased.css">
+    <title>UnBiased</title>
+  </head>
+<body>
+
+<div id="page-header">
+  <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+  <span id="subtitle">a different way to read the news</span>
+  <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
+</div>
+
+<div id="page-container">
+  <div id="top-stories">
+
+    <div class="top-story">
+      <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
+	<div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
+	</div>
+	<div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
+      </a>
+      <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen &hellip;</div>
+    </div>
+    
+    <div class="top-story">
+      <a target="_blank" href="" onclick="location.href='xxURL1-2'">
+	<div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
+	</div>
+	<div class="top-stories-hed">Bibi and Donald</div>
+      </a>
+      <div class="top-stories-desc">This week, Israel&#039;s prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
+    </div>
+
+    <div class="top-story">
+      <a target="_blank" href="" onclick="location.href='xxURL1-3'">
+	<div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
+	</div>
+	<div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
+      </a>
+      <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
+    </div>
+    
+    <div class="top-story">
+      <a target="_blank" href="" onclick="location.href='xxURL1-4'">
+	<div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
+	</div>
+	<div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
+      </a>
+      <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
+    </div>
+        
+  </div>
+
+  <div id="middle-stories">
+  
+    <a target="_blank" href="" onclick="location.href='xxURL2-1'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
+	</div>
+	<div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" onclick="location.href='xxURL2-2'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
+	</div>
+	<div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" onclick="location.href='xxURL2-3'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
+	</div>
+	<div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" onclick="location.href='xxURL2-4'">
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
+	</div>
+	<div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" >
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
+	</div>
+	<div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
+      </div>
+    </a>
+    
+    <a target="_blank" href="" >
+      <div class="middle-story">
+	<div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
+	</div>
+	<div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
+      </div>
+    </a>
+    
+    
+  </div>
+  
+  <div id="bottom-stories">
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-1xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-2xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-3xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-4xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-5xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-6xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-7xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="">xxTitle3-8xx</a>
+    </div>
+</div>
+
+</div>
+
+<div id="sources">
+  Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
+</div>
+</body>
+</html>
diff --git a/html_template/template.html b/html_template/template.html
index c0e0711..41eb86e 100644..100755
--- a/html_template/template.html
+++ b/html_template/template.html
@@ -16,12 +16,12 @@
 
 <div id="page-container">
   <div id="top-stories">
+    <div class="row">
 
-    <div class="top-story">
-      <a target="_blank" href="" onclick="window.open('xxURL1-1xx', '_blank')">
-	<div class="top-stories-img" style="background-image: url('xxImg1-1xx');" />
-	</div>
-	<div class="top-stories-hed">xxTitle1-1xx</div>
+      <div class="top-story">
+	<a target="_blank" href="" onclick="window.open('xxURL1-1xx', '_blank')">
+	  <div class="top-stories-img" style="background-image: url('xxImg1-1xx');" /></div>
+      <div class="top-stories-hed">xxTitle1-1xx</div>
       </a>
       <div class="top-stories-desc">xxDesc1-1xx</div>
     </div>
@@ -35,6 +35,10 @@
       <div class="top-stories-desc">xxDesc1-2xx</div>
     </div>
 
+  </div>
+
+<div class="row">
+
     <div class="top-story">
       <a target="_blank" href="" onclick="window.open('xxURL1-3xx', '_blank')">
 	<div class="top-stories-img" style="background-image: url('xxImg1-3xx');" />
@@ -52,7 +56,9 @@
       </a>
       <div class="top-stories-desc">xxDesc1-4xx</div>
     </div>
-        
+
+  </div>
+
   </div>
 
   <div id="middle-stories">
diff --git a/html_template/unbiased.css b/html_template/unbiased.css
index 126e194..c0bb121 100644..100755
--- a/html_template/unbiased.css
+++ b/html_template/unbiased.css
@@ -71,17 +71,22 @@ a:link, a:visited, a:hover, a:active {
     margin-bottom: 10px;
 }
 
+.row{
+    display:flex;
+}
+
 .top-story{
     display:inline-block;
     vertical-align:top;
     text-align:left;
     width:360px;
-    height:352px;
+    height:auto;
     overflow:hidden;
     background:#fff;
     margin:10px;
     padding:10px;
     border:2px solid #ccc;
+    flex:1;
 }
 
 @media only screen and (max-width:500px){
diff --git a/main.py b/main.py
index 5f9830f..5f9830f 100644..100755
--- a/main.py
+++ b/main.py
diff --git a/parser.py b/parser.py
index a537d48..2c22a87 100644..100755
--- a/parser.py
+++ b/parser.py
@@ -1,805 +1,805 @@
-#!/usr/bin/env python3
-
-from unbiasedObjects import *
-from unbiasedFunctions import buildArticle
-import os
-import re
-
-
-'''
-Takes in a URL, downloads the file to a temp file,
-reads the file into a string, and returns that string
-'''
-def urlToContent(url):
-    #download file
-    os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
-    
-    #read file
-    f=open('scratch/temp1.html', 'r')#, encoding="utf8")
-    content=f.read()
-    f.close()
-
-    return content
-
-
-'''
-Creates a new newsSource2 object. For each URL in h1-h3URLs,
-calls the file scraper and appends the new Article object.
-Returns a newsSource2 object
-'''
-def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
-    h1Arr=[]
-    h1Arr.append(buildArticle(h1URLs[0], name))
-
-    h2Arr=[]
-    for x in h2URLs:
-        a=buildArticle(x, name)
-        if a!=None:
-            h2Arr.append(a)
-
-    h3Arr=[]
-    for x in h3URLs:
-        a=buildArticle(x, name)
-        if a!=None:
-            h3Arr.append(a)
-
-    #BUILD THE NEWS SOURCE
-    newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
-
-    return newsSource
-
-
-'''
-Some sites will replicate URLs across the page. This function removes them.
-Check hierarchically: if h3 exists in h1s or h2s, remove from h3s;
-if h2 exists in h1s, remove from h2s
-
-also check partial URLs (e.g. nytimes.com/story.html is the same as
-nytimes.com/story.html?var=x
-'''
-def removeDuplicates(h1s, h2s, h3s):
-    #Assume h1s is one element, and keep it
-
-    #remove h2 duplicates
-    removeArr=[]
-    for i in range(len(h2s)):
-        #check internally
-        for j in range(len(h2s)):
-            if i==j:
-                continue
-            else:
-                if h2s[i] in h2s[j]:
-                    removeArr.append(h2s[j])
-        #check against h1s
-        for k in range(len(h1s)):
-            if (h2s[i] in h1s[k]) or (h1s[k] in h2s[i]):
-                removeArr.append(h2s[i])
-    for x in removeArr:
-        h2s.remove(x)
-    
-    #remove h3 duplicates
-    removeArr=[]
-    for i in range(len(h3s)):
-        #check internally
-        for j in range(len(h3s)):
-            if i==j:
-                continue
-            else:
-                if h3s[i] in h3s[j]:
-                    removeArr.append(h3s[j])
-        #check against h1s and h2s
-        h1and2=h1s+h2s
-        for k in range(len(h1and2)):
-            if (h3s[i] in h1and2[k]) or (h1and2[k] in h3s[i]):
-                removeArr.append(h3s[i])
-    for x in removeArr:
-        h3s.remove(x)
-    
-
-    return h1s, h2s, h3s
-
-
-
-def removalNotification(source, title, reason, value):
-    print('*************************')
-    print('\t\tSTORY REMOVED')
-    print('SOURCE: '+source)
-    print('TITLE: \t'+title)
-    print('REASON: '+reason)
-    print('VALUE: \t'+value)
-    print('*************************\n\n')
-
-
-def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr=None):
-
-    arr=[source.h1Arr, source.h2Arr, source.h3Arr]
-
-    if badTitleArr!=None:
-        for i in range(len(arr)):
-            for hed in arr[i]:
-                for item in badTitleArr:
-                    if item in hed.title:
-                        arr[i].remove(hed)
-                        #if it's in the h1 slot, bump up the 
-                        #  first h2 into the h1 slot
-                        if i==0:
-                            arr[0].append(arr[1][0])
-                            arr[1].remove(arr[1][0])
-                        removalNotification(source.name, hed.title, 'Title', item)
-                    
-
-    if badDescArr!=None:
-        for i in range(len(arr)):
-            for hed in arr[i]:
-                for item in badDescArr:
-                    if item in hed.description:
-                        arr[i].remove(hed)
-                        #if it's in the h1 slot, bump up the 
-                        #  first h2 into the h1 slot
-                        if i==0:
-                            arr[0].append(arr[1][0])
-                            arr[1].remove(arr[1][0])
-                        removalNotification(source.name, hed.title, 'Description', item)
-                    
-
-    if badAuthorArr!=None:
-        for i in range(len(arr)):
-            for hed in arr[i]:
-                for item in badAuthorArr:
-                    if item in hed.author:
-                        arr[i].remove(hed)
-                        #if it's in the h1 slot, bump up the 
-                        #  first h2 into the h1 slot
-                        if i==0:
-                            arr[0].append(arr[1][0])
-                            arr[1].remove(arr[1][0])
-                        removalNotification(source.name, hed.title, 'Author', item)
-                    
-
-    if badImgArr!=None:
-        for i in range(len(arr)):
-            for hed in arr[i]:
-                for item in badImgArr:
-                    if item in hed.img:
-                        arr[i].remove(hed)
-                        #if it's in the h1 slot, bump up the 
-                        #  first h2 into the h1 slot
-                        if i==0:
-                            arr[0].append(arr[1][0])
-                            arr[1].remove(arr[1][0])
-                        removalNotification(source.name, hed.title, 'Image', item)
-                    
-    if badURLArr!=None:
-        for i in range(len(arr)):
-            for hed in arr[i]:
-                for item in badURLArr:
-                    if item in hed.url:
-                        arr[i].remove(hed)
-                        #if it's in the h1 slot, bump up the 
-                        #  first h2 into the h1 slot
-                        if i==0:
-                            arr[0].append(arr[1][0])
-                            arr[1].remove(arr[1][0])
-                        removalNotification(source.name, hed.title, 'URL', item)
-                    
-    return source
-
-
-
-
-def buildTheHill():
-    url='http://thehill.com'
-    name='The Hill'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('<div class="headline-story-image">', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[url+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="section-top-content">', 1)[1]
-    h2=h2.split('</ul>', 1)[0]
-    while '<div class="top-story-item' in h2 and len(h2s)<4:
-        h2=h2.split('<div class="top-story-item', 1)[1]
-        x=h2.split('<a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div class="section-top-content">', 1)[1]
-    h3=h3.split('</ul>', 1)[0]
-    while '<div class="top-story-item small' in h3:
-        h3=h3.split('<div class="top-story-item small', 1)[1]
-        x=h3.split('<a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    hil=buildNewsSource2(name, url, h1s, h2s, h3s)
-    #hil=removeBadStories(gdn, None, None, None, None)
-
-    return hil
-
-
-
-
-
-def buildGuardian():
-    url='http://www.theguardian.com/us-news'
-    name='The Guardian'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('<h1', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    #only the h1 and the two h2s have this, so split on it and grab
-    #the second two
-    h2=h2.split('<div class="fc-item__image-container u-responsive-ratio inlined-image">', 3)[2:]
-    for x in h2:
-        x=x.split('<h2 class="fc-item__title"><a href="', 1)[1]
-        x=x.split('"', 1)[0]
-        h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div class="fc-slice-wrapper">', 1)[1]
-    h3=h3.split('<div class="js-show-more-placeholder">', 1)[0]
-    #this story section goes on forever; just grab the first 5
-    while '<h2 class="fc-item__title"><a href="' in h3:
-        h3=h3.split('<h2 class="fc-item__title"><a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    gdn=buildNewsSource2(name, url, h1s, h2s, h3s)
-    gdn=removeBadStories(gdn, None, ['Tom McCarthy'], ['https://www.theguardian.com/profile/ben-jacobs'], None)
-
-    return gdn
-
-
-'''
-Function to fix the oddly short og:descriptions provided
-in The Blaze articles by grabbing the first portion of the story instead
-'''
-def blazeFixDesc(articleArr):
-    TAG_RE = re.compile(r'<[^>]+>')
-    for i in range(len(articleArr)):
-        desc=urlToContent(articleArr[i].url)
-        desc=desc.split('<div class="entry-content article-styles">', 1)[1]
-        desc=desc.split('<p>', 1)[1]
-        desc=TAG_RE.sub('', desc)
-        desc=desc.replace('\n', ' ')
-        desc=desc[:144]
-        articleArr[i].description=desc
-
-    return articleArr
-    
-
-
-def buildBlaze():
-    url='http://theblaze.com'
-    name='The Blaze'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('<!-- home -->', 1)[1]
-    h1=h1.split('<!-- loop-home -->', 1)[0]
-    h1=h1.split('<a class="gallery-link" href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[url+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<!-- home -->', 1)[1]
-    h2=h2.split('<!-- loop-home -->', 1)[0]
-    while '</figure>\n\n<figure class="gallery-item">' in h2:
-        h2=h2.split('</figure>\n\n<figure class="gallery-item">', 1)[1]
-        h2=h2.split('href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<!-- loop-home -->', 1)[1]
-    #this story section goes on forever; just grab the first 5
-    while len(h3s)<5:
-        h3=h3.split('<a class="feed-link" href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-
-    blz=buildNewsSource2(name, url, h1s, h2s, h3s)
-    blz=removeBadStories(blz, None, ['Lawrence Jones'], ['Matt Walsh', 'Tomi Lahren', 'Dana Loesch', 'Mike Opelka'], None)
-
-    #The Blaze has dumb, short description fields, so we need to grab
-    #the first x characters of actual article text instead
-    blz.h1Arr=blazeFixDesc(blz.h1Arr)
-    blz.h2Arr=blazeFixDesc(blz.h2Arr)
-    blz.h3Arr=blazeFixDesc(blz.h3Arr)
-
-    return blz
-
-
-
-def buildCBS():
-    url='http://cbsnews.com'
-    name='CBS News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    if '<h1 class="title">' in content:
-        h1=h1.split('<h1 class="title">', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        h1=h1.split('"', 1)[0]
-        h1s=[url+h1]
-    else:
-        #for cases where they lead with a video, pull the first h2 as h1
-        h1=h1.split('Big News Area Side Assets', 1)[1]
-        h1=h1.split('</ul></div>', 1)[0]
-        h1=h1.split('<li data-tb-region-item>', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        x=h1.split('"', 1)[0]
-        h1s=[url+x]
-        
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('Big News Area Side Assets', 1)[1]
-    h2=h2.split('</ul></div>', 1)[0]
-    while '<li data-tb-region-item>' in h2:
-        h2=h2.split('<li data-tb-region-item>', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(url+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('Latest News', 1)[1]
-    #this story section goes on forever; just grab the first 5
-    while len(h3s)<5:
-        h3=h3.split('<li class="item-full-lead"', 1)[1]
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(url+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    cbs=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    return cbs
-
-
-
-
-
-def buildNBC():    
-    url='http://nbcnews.com'
-    name='NBC News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('top-stories-section', 1)[1]
-    h1=h1.split('panel_hero', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    if '.com' not in h1:
-        h1=url+h1
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('ad-content ad-xs mobilebox1', 1)[1]
-    h2=h2.split('taboola-native-top-stories-thumbnail', 1)[0]
-    while '<div class="story-link' in h2:
-        h2=h2.split('<div class="story-link', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            if '.com' not in x:
-                x=url+x
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('js-more-topstories', 1)[1]
-    h3=h3.split('<div class="panel-section', 1)[0]
-    while '<div class="story-link' in h3:
-        h3=h3.split('<div class="story-link', 1)[1]
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            if '.com' not in x:
-                x=url+x
-            h3s.append(x)
-
-    #adjust for today.com urls
-    for arr in [h1s, h2s, h3s]:
-        for i in range(len(arr)):
-            if 'today.com' in arr[i]:
-                arr[i]=arr[i].split('.com', 1)[1]
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    return nbc
-
-
-
-
-def buildBBC():    
-    url='http://www.bbc.com/news/world/us_and_canada'
-    name='BBC US & Canada'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    h1=content
-    h1=h1.split('buzzard-item', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=['http://www.bbc.com'+h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="pigeon">', 1)[1]
-    h2=h2.split('<div id=', 1)[0]
-    while 'top_stories#' in h2:
-        h2=h2.split('top_stories#', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append('http://www.bbc.com'+x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div class="macaw">', 1)[1]
-    h3=h3.split('Watch/Listen', 1)[0]
-    while '<div class="macaw-item' in h3:
-        h3=h3.split('<div class="macaw-item', 1)[1]
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append('http://www.bbc.com'+x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    bbc=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE ' - BBC News' from headlines
-    for i in range(len(bbc.h1Arr)):
-        if ' - BBC News' in bbc.h1Arr[i].title:
-            bbc.h1Arr[i].title=bbc.h1Arr[i].title.split(' - BBC News', 1)[0]
-    for i in range(len(bbc.h2Arr)):
-        if ' - BBC News' in bbc.h2Arr[i].title:
-            bbc.h2Arr[i].title=bbc.h2Arr[i].title.split(' - BBC News', 1)[0]
-    for i in range(len(bbc.h3Arr)):
-        if ' - BBC News' in bbc.h3Arr[i].title:
-            bbc.h3Arr[i].title=bbc.h3Arr[i].title.split(' - BBC News', 1)[0]
-
-    return bbc
-
-
-
-def buildWeeklyStandard():
-    url='http://www.weeklystandard.com'
-    name='Weekly Standard'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('<div id="region_1"', 1)[1]
-    h1=h1.split('<div id="region_2"', 1)[0]
-    h1=h1.split('<div class="lead-photo">', 1)[1]
-    h1=h1.split('href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="widget lead-story layout-3col-feature" data-count="2">', 1)[1]
-    h2=h2.split('<div id="region_2"', 1)[0]
-    while '<div class="lead-photo">' in h2:
-        h2=h2.split('<div class="lead-photo">', 1)[1]
-        h2=h2.split('href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('Today\'s Standard', 1)[1]
-    h3=h3.split('<div id="region_3"', 1)[0]
-    while '<div class="lead-photo">' in h3:
-        h3=h3.split('<div class="lead-photo">', 1)[1]
-        h3=h3.split('href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    #Need to add URL prefix to all URLs
-    for i in range(len(h1s)):
-        h1s[i]=url+h1s[i]
-    for i in range(len(h2s)):
-        h2s[i]=url+h2s[i]
-    for i in range(len(h3s)):
-        h3s[i]=url+h3s[i]
-        
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=None
-    ## if flagged again, remove Micah Mattix
-    badDescArr=['Matt Labash']
-    badAuthorArr=['MATT LABASH', 'TWS PODCAST', 'ERIC FELTEN', 'Steven J. Lenzner', 'MARK HEMINGWAY']
-    badImgArr=['http://www.weeklystandard.com/s3/tws15/images/twitter/tws-twitter_1024x512.png']
-    wkl=removeBadStories(wkl, badTitleArr, badDescArr, badAuthorArr, badImgArr)
-
-    return wkl
-
-
-
-
-def buildNPR():
-    url='http://www.npr.org/sections/news/'
-    name='NPR'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('<a id="mainContent">', 1)[1]
-    h1=h1.split('<a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<article class="item has-image">', 1)[1]
-    h2=h2.split('<!-- END CLASS=\'FEATURED-3-UP\' -->', 1)[0]
-    while '<article class="item has-image">' in h2:
-        h2=h2.split('<article class="item has-image">', 1)[1]
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<div id="overflow" class="list-overflow"', 1)[1]
-    h3=h3.split('<!-- END ID="OVERFLOW" CLASS="LIST-OVERFLOW"', 1)[0]
-    while '<h2 class="title"><a href="' in h3:
-        h3=h3.split('<h2 class="title"><a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-
-    npr=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=None
-    badDescArr=None
-    badAuthorArr=None
-    badImgArr=None
-    #npr=removeBadStories(npr, badTitleArr, badDescArr, badAuthorArr, badImgArr)
-
-    return npr
-
-
-
-
-def buildFoxNews():
-    url='http://foxnews.com'
-    name='Fox News'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-    
-    #get main headline
-    h1=content
-    h1=h1.split('<h1><a href="', 1)[1]
-    h1=h1.split('"', 1)[0]
-    h1s=[h1]
-
-    #GET SECONDARY HEADLINES
-    h2=content
-    h2s=[]
-    h2=h2.split('<div class="top-stories">', 1)[1]
-    h2=h2.split('<section id="latest"', 1)[0]
-    while '<li data-vr-contentbox=""><a href="' in h2:
-        h2=h2.split('<li data-vr-contentbox=""><a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('div id="big-top"', 1)[1]
-    h3=h3.split('<div class="top-stories">', 1)[0]
-    while '<a href="' in h3:
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if h1 not in x:
-            h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
-    fox=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    #REMOVE BAD STORIES
-    badTitleArr=['O&#039;Reilly']
-    badDescArr=None
-    badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
-    badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
-    badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
-    fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
-
-    return fox
-
-
-
-def buildNYT():
-    url='http://www.nytimes.com'
-    name='New York Times'
-
-    #DOWNLOAD HOMEPAGE CONTENT
-    content=urlToContent(url)
-
-    #get main headline
-    #this will likely need if/else logic
-    h1=content
-
-    if 'story theme-summary banner' in h1:
-        #This is with a large headline over a and b columns
-        h1=h1.split('story theme-summary banner', 1)[1]
-        h1=h1.split('<a href="', 1)[1]
-        h1=h1.split('"', 1)[0]
-    else:
-        #otherwise, pull the first story from the A column
-        h1=h1.split('<div class="a-column column">', 1)[1]
-        h1=h1.split('<a href="', 1)[1].split('"', 1)[0]
-    h1s=[h1]
-        
-
-    #GET SECONDARY HEADLINES
-    #This comes from the a column or b column, above the break
-    h2=content
-    h2s=[]
-    #A column
-    h2=h2.split('<div class="a-column column">', 1)[1]
-    h2=h2.split('<!-- close a-column -->', 1)[0]
-    #remove "collection" sets
-    while '<div class="collection headlines">' in h2:
-        arr=h2.split('<div class="collection headlines">', 1)
-        h2=arr[0]+arr[1].split('</ul>', 1)[1]
-    #Grab the remaining URLs
-    while '<a href="' in h2:
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if h1 not in x:
-            h2s.append(x)
-
-    #B column
-    h2=content
-    h2=h2.split('<div class="b-column column">', 1)[1]
-    h2=h2.split('<!-- close b-column -->', 1)[0]
-    #remove "collection" sets
-    while '<div class="collection headlines">' in h2:
-        arr=h2.split('<div class="collection headlines">', 1)
-        h2=arr[0]+arr[1].split('</ul>', 1)[1]
-    #Grab the remaining URLs
-    while '<a href="' in h2:
-        h2=h2.split('<a href="', 1)[1]
-        x=h2.split('"', 1)[0]
-        if (h1 not in x) and (x not in h2s):
-            h2s.append(x)
-
-    #GET TERTIARY HEADLINES
-    h3=content
-    h3s=[]
-    h3=h3.split('<!-- close lede-package-region -->', 1)[1]
-    h3=h3.split('<a href="https://www.nytimes.com/tips">', 1)[0]
-    #remove "collection" sets
-    while '<div class="collection headlines">' in h2:
-        arr=h3.split('<div class="collection headlines">', 1)
-        h3=arr[0]+arr[1].split('</ul>', 1)[1]
-    
-    #Grab the remaining URLs
-    while '<a href="' in h3:
-        h3=h3.split('<a href="', 1)[1]
-        x=h3.split('"', 1)[0]
-        if (h1 not in x) and (x not in h3s):
-            h3s.append(x)
-
-    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
-    nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
-
-    return nyt
-
-
-
-
-'''
-NYT
-EXAMPLE OF BIG HEADLINE SPANNING BOTH A AND B COLUMNS
-
-<div class="span-ab-layout layout">
-
-    <div class="ab-column column">
-
-        <section id="top-news" class="top-news">
-            <h2 class="section-heading visually-hidden">Top News</h2>
-
-                            <div class="above-banner-region region">
-
-                    <div class="collection">
-            <div class="hpHeader" id="top-megapackage-kicker">
-  <h6><a href="http://www.nytimes.com/pages/politics/index.html?src=hpHeader">The 45th President</a></h6>
-</div>
-
-</div>
-
-                </div><!-- close above-banner-region -->
-            
-                            <div class="span-ab-top-region region">
-
-                    <div class="collection">
-            <article class="story theme-summary banner" id="topnews-100000004932040" data-story-id="100000004932040" data-rank="0" data-collection-renderstyle="Banner">
-            <h1 class="story-heading"><a href="https://www.nytimes.com/2017/02/14/us/politics/fbi-interviewed-mike-flynn.html">F.B.I. Questioned Flynn About Russia Call</a></h1>
-</article>
-</div>
-
-                </div><!-- close span-ab-top-region -->
-'''
+#!/usr/bin/env python3
+
+from unbiasedObjects import *
+from unbiasedFunctions import buildArticle
+import os
+import re
+
+
+'''
+Takes in a URL, downloads the file to a temp file,
+reads the file into a string, and returns that string
+'''
+def urlToContent(url):
+    #download file
+    os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
+    
+    #read file
+    f=open('scratch/temp1.html', 'r')#, encoding="utf8")
+    content=f.read()
+    f.close()
+
+    return content
+
+
+'''
+Creates a new newsSource2 object. For each URL in h1-h3URLs,
+calls the file scraper and appends the new Article object.
+Returns a newsSource2 object
+'''
+def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
+    h1Arr=[]
+    h1Arr.append(buildArticle(h1URLs[0], name))
+
+    h2Arr=[]
+    for x in h2URLs:
+        a=buildArticle(x, name)
+        if a!=None:
+            h2Arr.append(a)
+
+    h3Arr=[]
+    for x in h3URLs:
+        a=buildArticle(x, name)
+        if a!=None:
+            h3Arr.append(a)
+
+    #BUILD THE NEWS SOURCE
+    newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
+
+    return newsSource
+
+
+'''
+Some sites will replicate URLs across the page. This function removes them.
+Check hierarchically: if h3 exists in h1s or h2s, remove from h3s;
+if h2 exists in h1s, remove from h2s
+
+also check partial URLs (e.g. nytimes.com/story.html is the same as
+nytimes.com/story.html?var=x
+'''
+def removeDuplicates(h1s, h2s, h3s):
+    #Assume h1s is one element, and keep it
+
+    #remove h2 duplicates
+    removeArr=[]
+    for i in range(len(h2s)):
+        #check internally
+        for j in range(len(h2s)):
+            if i==j:
+                continue
+            else:
+                if h2s[i] in h2s[j]:
+                    removeArr.append(h2s[j])
+        #check against h1s
+        for k in range(len(h1s)):
+            if (h2s[i] in h1s[k]) or (h1s[k] in h2s[i]):
+                removeArr.append(h2s[i])
+    for x in removeArr:
+        h2s.remove(x)
+    
+    #remove h3 duplicates
+    removeArr=[]
+    for i in range(len(h3s)):
+        #check internally
+        for j in range(len(h3s)):
+            if i==j:
+                continue
+            else:
+                if h3s[i] in h3s[j]:
+                    removeArr.append(h3s[j])
+        #check against h1s and h2s
+        h1and2=h1s+h2s
+        for k in range(len(h1and2)):
+            if (h3s[i] in h1and2[k]) or (h1and2[k] in h3s[i]):
+                removeArr.append(h3s[i])
+    for x in removeArr:
+        h3s.remove(x)
+    
+
+    return h1s, h2s, h3s
+
+
+
+def removalNotification(source, title, reason, value):
+    print('*************************')
+    print('\t\tSTORY REMOVED')
+    print('SOURCE: '+source)
+    print('TITLE: \t'+title)
+    print('REASON: '+reason)
+    print('VALUE: \t'+value)
+    print('*************************\n\n')
+
+
+def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr=None):
+
+    arr=[source.h1Arr, source.h2Arr, source.h3Arr]
+
+    if badTitleArr!=None:
+        for i in range(len(arr)):
+            for hed in arr[i]:
+                for item in badTitleArr:
+                    if item in hed.title:
+                        arr[i].remove(hed)
+                        #if it's in the h1 slot, bump up the 
+                        #  first h2 into the h1 slot
+                        if i==0:
+                            arr[0].append(arr[1][0])
+                            arr[1].remove(arr[1][0])
+                        removalNotification(source.name, hed.title, 'Title', item)
+                    
+
+    if badDescArr!=None:
+        for i in range(len(arr)):
+            for hed in arr[i]:
+                for item in badDescArr:
+                    if item in hed.description:
+                        arr[i].remove(hed)
+                        #if it's in the h1 slot, bump up the 
+                        #  first h2 into the h1 slot
+                        if i==0:
+                            arr[0].append(arr[1][0])
+                            arr[1].remove(arr[1][0])
+                        removalNotification(source.name, hed.title, 'Description', item)
+                    
+
+    if badAuthorArr!=None:
+        for i in range(len(arr)):
+            for hed in arr[i]:
+                for item in badAuthorArr:
+                    if item in hed.author:
+                        arr[i].remove(hed)
+                        #if it's in the h1 slot, bump up the 
+                        #  first h2 into the h1 slot
+                        if i==0:
+                            arr[0].append(arr[1][0])
+                            arr[1].remove(arr[1][0])
+                        removalNotification(source.name, hed.title, 'Author', item)
+                    
+
+    if badImgArr!=None:
+        for i in range(len(arr)):
+            for hed in arr[i]:
+                for item in badImgArr:
+                    if item in hed.img:
+                        arr[i].remove(hed)
+                        #if it's in the h1 slot, bump up the 
+                        #  first h2 into the h1 slot
+                        if i==0:
+                            arr[0].append(arr[1][0])
+                            arr[1].remove(arr[1][0])
+                        removalNotification(source.name, hed.title, 'Image', item)
+                    
+    if badURLArr!=None:
+        for i in range(len(arr)):
+            for hed in arr[i]:
+                for item in badURLArr:
+                    if item in hed.url:
+                        arr[i].remove(hed)
+                        #if it's in the h1 slot, bump up the 
+                        #  first h2 into the h1 slot
+                        if i==0:
+                            arr[0].append(arr[1][0])
+                            arr[1].remove(arr[1][0])
+                        removalNotification(source.name, hed.title, 'URL', item)
+                    
+    return source
+
+
+
+
+def buildTheHill():
+    url='http://thehill.com'
+    name='The Hill'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('<div class="headline-story-image">', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[url+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="section-top-content">', 1)[1]
+    h2=h2.split('</ul>', 1)[0]
+    while '<div class="top-story-item' in h2 and len(h2s)<4:
+        h2=h2.split('<div class="top-story-item', 1)[1]
+        x=h2.split('<a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div class="section-top-content">', 1)[1]
+    h3=h3.split('</ul>', 1)[0]
+    while '<div class="top-story-item small' in h3:
+        h3=h3.split('<div class="top-story-item small', 1)[1]
+        x=h3.split('<a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    hil=buildNewsSource2(name, url, h1s, h2s, h3s)
+    #hil=removeBadStories(gdn, None, None, None, None)
+
+    return hil
+
+
+
+
+
+def buildGuardian():
+    url='http://www.theguardian.com/us-news'
+    name='The Guardian'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('<h1', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    #only the h1 and the two h2s have this, so split on it and grab
+    #the second two
+    h2=h2.split('<div class="fc-item__image-container u-responsive-ratio inlined-image">', 3)[2:]
+    for x in h2:
+        x=x.split('<h2 class="fc-item__title"><a href="', 1)[1]
+        x=x.split('"', 1)[0]
+        h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div class="fc-slice-wrapper">', 1)[1]
+    h3=h3.split('<div class="js-show-more-placeholder">', 1)[0]
+    #this story section goes on forever; just grab the first 5
+    while '<h2 class="fc-item__title"><a href="' in h3:
+        h3=h3.split('<h2 class="fc-item__title"><a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    gdn=buildNewsSource2(name, url, h1s, h2s, h3s)
+    gdn=removeBadStories(gdn, None, ['Tom McCarthy'], ['https://www.theguardian.com/profile/ben-jacobs'], None)
+
+    return gdn
+
+
+'''
+Function to fix the oddly short og:descriptions provided
+in The Blaze articles by grabbing the first portion of the story instead
+'''
+def blazeFixDesc(articleArr):
+    TAG_RE = re.compile(r'<[^>]+>')
+    for i in range(len(articleArr)):
+        desc=urlToContent(articleArr[i].url)
+        desc=desc.split('<div class="entry-content article-styles">', 1)[1]
+        desc=desc.split('<p>', 1)[1]
+        desc=TAG_RE.sub('', desc)
+        desc=desc.replace('\n', ' ')
+        desc=desc[:144]
+        articleArr[i].description=desc
+
+    return articleArr
+    
+
+
+def buildBlaze():
+    url='http://theblaze.com'
+    name='The Blaze'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('<!-- home -->', 1)[1]
+    h1=h1.split('<!-- loop-home -->', 1)[0]
+    h1=h1.split('<a class="gallery-link" href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[url+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<!-- home -->', 1)[1]
+    h2=h2.split('<!-- loop-home -->', 1)[0]
+    while '</figure>\n\n<figure class="gallery-item">' in h2:
+        h2=h2.split('</figure>\n\n<figure class="gallery-item">', 1)[1]
+        h2=h2.split('href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<!-- loop-home -->', 1)[1]
+    #this story section goes on forever; just grab the first 5
+    while len(h3s)<5:
+        h3=h3.split('<a class="feed-link" href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+
+    blz=buildNewsSource2(name, url, h1s, h2s, h3s)
+    blz=removeBadStories(blz, None, ['Lawrence Jones'], ['Matt Walsh', 'Tomi Lahren', 'Dana Loesch', 'Mike Opelka'], None)
+
+    #The Blaze has dumb, short description fields, so we need to grab
+    #the first x characters of actual article text instead
+    blz.h1Arr=blazeFixDesc(blz.h1Arr)
+    blz.h2Arr=blazeFixDesc(blz.h2Arr)
+    blz.h3Arr=blazeFixDesc(blz.h3Arr)
+
+    return blz
+
+
+
+def buildCBS():
+    url='http://cbsnews.com'
+    name='CBS News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    if '<h1 class="title">' in content:
+        h1=h1.split('<h1 class="title">', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        h1=h1.split('"', 1)[0]
+        h1s=[url+h1]
+    else:
+        #for cases where they lead with a video, pull the first h2 as h1
+        h1=h1.split('Big News Area Side Assets', 1)[1]
+        h1=h1.split('</ul></div>', 1)[0]
+        h1=h1.split('<li data-tb-region-item>', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        x=h1.split('"', 1)[0]
+        h1s=[url+x]
+        
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('Big News Area Side Assets', 1)[1]
+    h2=h2.split('</ul></div>', 1)[0]
+    while '<li data-tb-region-item>' in h2:
+        h2=h2.split('<li data-tb-region-item>', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(url+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('Latest News', 1)[1]
+    #this story section goes on forever; just grab the first 5
+    while len(h3s)<5:
+        h3=h3.split('<li class="item-full-lead"', 1)[1]
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(url+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    cbs=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    return cbs
+
+
+
+
+
+def buildNBC():    
+    url='http://nbcnews.com'
+    name='NBC News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('top-stories-section', 1)[1]
+    h1=h1.split('panel_hero', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    if '.com' not in h1:
+        h1=url+h1
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('ad-content ad-xs mobilebox1', 1)[1]
+    h2=h2.split('taboola-native-top-stories-thumbnail', 1)[0]
+    while '<div class="story-link' in h2:
+        h2=h2.split('<div class="story-link', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            if '.com' not in x:
+                x=url+x
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('js-more-topstories', 1)[1]
+    h3=h3.split('<div class="panel-section', 1)[0]
+    while '<div class="story-link' in h3:
+        h3=h3.split('<div class="story-link', 1)[1]
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            if '.com' not in x:
+                x=url+x
+            h3s.append(x)
+
+    #adjust for today.com urls
+    for arr in [h1s, h2s, h3s]:
+        for i in range(len(arr)):
+            if 'today.com' in arr[i]:
+                arr[i]=arr[i].split('.com', 1)[1]
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    nbc=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    return nbc
+
+
+
+
+def buildBBC():    
+    url='http://www.bbc.com/news/world/us_and_canada'
+    name='BBC US & Canada'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    h1=content
+    h1=h1.split('buzzard-item', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=['http://www.bbc.com'+h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="pigeon">', 1)[1]
+    h2=h2.split('<div id=', 1)[0]
+    while 'top_stories#' in h2:
+        h2=h2.split('top_stories#', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append('http://www.bbc.com'+x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div class="macaw">', 1)[1]
+    h3=h3.split('Watch/Listen', 1)[0]
+    while '<div class="macaw-item' in h3:
+        h3=h3.split('<div class="macaw-item', 1)[1]
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append('http://www.bbc.com'+x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    bbc=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE ' - BBC News' from headlines
+    for i in range(len(bbc.h1Arr)):
+        if ' - BBC News' in bbc.h1Arr[i].title:
+            bbc.h1Arr[i].title=bbc.h1Arr[i].title.split(' - BBC News', 1)[0]
+    for i in range(len(bbc.h2Arr)):
+        if ' - BBC News' in bbc.h2Arr[i].title:
+            bbc.h2Arr[i].title=bbc.h2Arr[i].title.split(' - BBC News', 1)[0]
+    for i in range(len(bbc.h3Arr)):
+        if ' - BBC News' in bbc.h3Arr[i].title:
+            bbc.h3Arr[i].title=bbc.h3Arr[i].title.split(' - BBC News', 1)[0]
+
+    return bbc
+
+
+
+def buildWeeklyStandard():
+    url='http://www.weeklystandard.com'
+    name='Weekly Standard'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('<div id="region_1"', 1)[1]
+    h1=h1.split('<div id="region_2"', 1)[0]
+    h1=h1.split('<div class="lead-photo">', 1)[1]
+    h1=h1.split('href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="widget lead-story layout-3col-feature" data-count="2">', 1)[1]
+    h2=h2.split('<div id="region_2"', 1)[0]
+    while '<div class="lead-photo">' in h2:
+        h2=h2.split('<div class="lead-photo">', 1)[1]
+        h2=h2.split('href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('Today\'s Standard', 1)[1]
+    h3=h3.split('<div id="region_3"', 1)[0]
+    while '<div class="lead-photo">' in h3:
+        h3=h3.split('<div class="lead-photo">', 1)[1]
+        h3=h3.split('href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    #Need to add URL prefix to all URLs
+    for i in range(len(h1s)):
+        h1s[i]=url+h1s[i]
+    for i in range(len(h2s)):
+        h2s[i]=url+h2s[i]
+    for i in range(len(h3s)):
+        h3s[i]=url+h3s[i]
+        
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=None
+    ## if flagged again, remove Micah Mattix
+    badDescArr=['Matt Labash']
+    badAuthorArr=['MATT LABASH', 'TWS PODCAST', 'ERIC FELTEN', 'Steven J. Lenzner', 'MARK HEMINGWAY']
+    badImgArr=['http://www.weeklystandard.com/s3/tws15/images/twitter/tws-twitter_1024x512.png']
+    wkl=removeBadStories(wkl, badTitleArr, badDescArr, badAuthorArr, badImgArr)
+
+    return wkl
+
+
+
+
+def buildNPR():
+    url='http://www.npr.org/sections/news/'
+    name='NPR'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('<a id="mainContent">', 1)[1]
+    h1=h1.split('<a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<article class="item has-image">', 1)[1]
+    h2=h2.split('<!-- END CLASS=\'FEATURED-3-UP\' -->', 1)[0]
+    while '<article class="item has-image">' in h2:
+        h2=h2.split('<article class="item has-image">', 1)[1]
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<div id="overflow" class="list-overflow"', 1)[1]
+    h3=h3.split('<!-- END ID="OVERFLOW" CLASS="LIST-OVERFLOW"', 1)[0]
+    while '<h2 class="title"><a href="' in h3:
+        h3=h3.split('<h2 class="title"><a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+
+    npr=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=None
+    badDescArr=None
+    badAuthorArr=None
+    badImgArr=None
+    #npr=removeBadStories(npr, badTitleArr, badDescArr, badAuthorArr, badImgArr)
+
+    return npr
+
+
+
+
+def buildFoxNews():
+    url='http://foxnews.com'
+    name='Fox News'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+    
+    #get main headline
+    h1=content
+    h1=h1.split('<h1><a href="', 1)[1]
+    h1=h1.split('"', 1)[0]
+    h1s=[h1]
+
+    #GET SECONDARY HEADLINES
+    h2=content
+    h2s=[]
+    h2=h2.split('<div class="top-stories">', 1)[1]
+    h2=h2.split('<section id="latest"', 1)[0]
+    while '<li data-vr-contentbox=""><a href="' in h2:
+        h2=h2.split('<li data-vr-contentbox=""><a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('div id="big-top"', 1)[1]
+    h3=h3.split('<div class="top-stories">', 1)[0]
+    while '<a href="' in h3:
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if h1 not in x:
+            h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+    fox=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    #REMOVE BAD STORIES
+    badTitleArr=['O&#039;Reilly']
+    badDescArr=None
+    badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
+    badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
+    badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
+    fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
+
+    return fox
+
+
+
+def buildNYT():
+    url='http://www.nytimes.com'
+    name='New York Times'
+
+    #DOWNLOAD HOMEPAGE CONTENT
+    content=urlToContent(url)
+
+    #get main headline
+    #this will likely need if/else logic
+    h1=content
+
+    if 'story theme-summary banner' in h1:
+        #This is with a large headline over a and b columns
+        h1=h1.split('story theme-summary banner', 1)[1]
+        h1=h1.split('<a href="', 1)[1]
+        h1=h1.split('"', 1)[0]
+    else:
+        #otherwise, pull the first story from the A column
+        h1=h1.split('<div class="a-column column">', 1)[1]
+        h1=h1.split('<a href="', 1)[1].split('"', 1)[0]
+    h1s=[h1]
+        
+
+    #GET SECONDARY HEADLINES
+    #This comes from the a column or b column, above the break
+    h2=content
+    h2s=[]
+    #A column
+    h2=h2.split('<div class="a-column column">', 1)[1]
+    h2=h2.split('<!-- close a-column -->', 1)[0]
+    #remove "collection" sets
+    while '<div class="collection headlines">' in h2:
+        arr=h2.split('<div class="collection headlines">', 1)
+        h2=arr[0]+arr[1].split('</ul>', 1)[1]
+    #Grab the remaining URLs
+    while '<a href="' in h2:
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if h1 not in x:
+            h2s.append(x)
+
+    #B column
+    h2=content
+    h2=h2.split('<div class="b-column column">', 1)[1]
+    h2=h2.split('<!-- close b-column -->', 1)[0]
+    #remove "collection" sets
+    while '<div class="collection headlines">' in h2:
+        arr=h2.split('<div class="collection headlines">', 1)
+        h2=arr[0]+arr[1].split('</ul>', 1)[1]
+    #Grab the remaining URLs
+    while '<a href="' in h2:
+        h2=h2.split('<a href="', 1)[1]
+        x=h2.split('"', 1)[0]
+        if (h1 not in x) and (x not in h2s):
+            h2s.append(x)
+
+    #GET TERTIARY HEADLINES
+    h3=content
+    h3s=[]
+    h3=h3.split('<!-- close lede-package-region -->', 1)[1]
+    h3=h3.split('<a href="https://www.nytimes.com/tips">', 1)[0]
+    #remove "collection" sets
+    while '<div class="collection headlines">' in h2:
+        arr=h3.split('<div class="collection headlines">', 1)
+        h3=arr[0]+arr[1].split('</ul>', 1)[1]
+    
+    #Grab the remaining URLs
+    while '<a href="' in h3:
+        h3=h3.split('<a href="', 1)[1]
+        x=h3.split('"', 1)[0]
+        if (h1 not in x) and (x not in h3s):
+            h3s.append(x)
+
+    h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
+    nyt=buildNewsSource2(name, url, h1s, h2s, h3s)
+
+    return nyt
+
+
+
+
+'''
+NYT
+EXAMPLE OF BIG HEADLINE SPANNING BOTH A AND B COLUMNS
+
+<div class="span-ab-layout layout">
+
+    <div class="ab-column column">
+
+        <section id="top-news" class="top-news">
+            <h2 class="section-heading visually-hidden">Top News</h2>
+
+                            <div class="above-banner-region region">
+
+                    <div class="collection">
+            <div class="hpHeader" id="top-megapackage-kicker">
+  <h6><a href="http://www.nytimes.com/pages/politics/index.html?src=hpHeader">The 45th President</a></h6>
+</div>
+
+</div>
+
+                </div><!-- close above-banner-region -->
+            
+                            <div class="span-ab-top-region region">
+
+                    <div class="collection">
+            <article class="story theme-summary banner" id="topnews-100000004932040" data-story-id="100000004932040" data-rank="0" data-collection-renderstyle="Banner">
+            <h1 class="story-heading"><a href="https://www.nytimes.com/2017/02/14/us/politics/fbi-interviewed-mike-flynn.html">F.B.I. Questioned Flynn About Russia Call</a></h1>
+</article>
+</div>
+
+                </div><!-- close span-ab-top-region -->
+'''
author	ssstvinc2 <sstvinc2@gmail.com>	2017-02-19 23:04:31 -0500
committer	ssstvinc2 <sstvinc2@gmail.com>	2017-02-19 23:04:31 -0500
commit	b544a59cb96193ddcd0b8c0f9cc70bda973415a5 (patch)
tree	ec5edbe35869f1b5b65a57d1a3b746c83dda3829
parent	53de97fd3c6fdb4c95a89171b52064a05b157fbf (diff)