diff options
author | Matt Singleton <matt@xcolour.net> | 2017-06-02 11:34:43 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-02 11:34:43 -0400 |
commit | 0eaa69b2db5feb4095d09bf75d4123f960619e79 (patch) | |
tree | 4ee9a6c6047e9e9dc4a126e472756b0f9b529617 | |
parent | 0ac55bbafa02ad951c9f1708f1fbc7c8746d5fce (diff) | |
parent | a82318fbdfc1af624fd9bf9bbae316ad45f43611 (diff) |
Merge pull request #1 from sstvinc2/matt-refactor
Matt refactor
-rw-r--r-- | .gitignore | 9 | ||||
-rw-r--r-- | etc/unbiased.service | 12 | ||||
-rw-r--r-- | html_template/newtemplate.html | 150 | ||||
-rwxr-xr-x | html_template/template.html | 173 | ||||
-rwxr-xr-x | html_template/unbiased.css | 220 | ||||
-rwxr-xr-x | main.py | 61 | ||||
-rw-r--r-- | setup.py | 25 | ||||
-rw-r--r-- | unbiased/__init__.py (renamed from scratch/do_not_delete) | 0 | ||||
-rw-r--r-- | unbiased/html_template/apple-touch-icon.png | bin | 0 -> 7036 bytes | |||
-rw-r--r-- | unbiased/html_template/favicon.ico | bin | 0 -> 4414 bytes | |||
-rw-r--r-- | unbiased/html_template/favicon.png | bin | 0 -> 1093 bytes | |||
-rwxr-xr-x | unbiased/html_template/unbiased.css | 172 | ||||
-rw-r--r-- | unbiased/html_template/unbiased.jinja.html | 79 | ||||
-rwxr-xr-x | unbiased/main.py | 145 | ||||
-rwxr-xr-x | unbiased/parser.py (renamed from parser.py) | 63 | ||||
-rwxr-xr-x | unbiased/spotCheck.py (renamed from spotCheck.py) | 6 | ||||
-rw-r--r-- | unbiased/unbiasedFunctions.py (renamed from unbiasedFunctions.py) | 251 | ||||
-rw-r--r-- | unbiased/unbiasedObjects.py (renamed from unbiasedObjects.py) | 6 |
18 files changed, 615 insertions, 757 deletions
@@ -1,10 +1,15 @@ *.pyc *~ +.DS_Store __pycache__/ -scratch/*.html +unbiased/scratch/*.html legacy_py/ unbiased.html html_template/Penguins.jpg html_template/BAK* +build/ +dist/ +venv/ +unbiased.egg-info/ #* -.#*
\ No newline at end of file +.#* diff --git a/etc/unbiased.service b/etc/unbiased.service new file mode 100644 index 0000000..391e4ff --- /dev/null +++ b/etc/unbiased.service @@ -0,0 +1,12 @@ +[Unit] +Description=Unbiased News + +[Service] +Type=simple +ExecStart=/opt/unbiased/venv/bin/unbiased /opt/unbiased/webroot -l /opt/unbiased/logs +WorkingDirectory=/opt/unbiased +User=www-data +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/html_template/newtemplate.html b/html_template/newtemplate.html deleted file mode 100644 index 0cec766..0000000 --- a/html_template/newtemplate.html +++ /dev/null @@ -1,150 +0,0 @@ -<!DOCTYPE html>
-<html>
- <head>
- <meta charset="utf-8">
- <link rel="stylesheet" href="unbiased.css">
- <title>UnBiased</title>
- </head>
-<body>
-
-<div id="page-header">
- <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
- <span id="subtitle">a different way to read the news</span>
- <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
-</div>
-
-<div id="page-container">
- <div id="top-stories">
-
- <div class="top-story">
- <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
- <div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
- </div>
- <div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
- </a>
- <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen …</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" href="" onclick="location.href='xxURL1-2'">
- <div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
- </div>
- <div class="top-stories-hed">Bibi and Donald</div>
- </a>
- <div class="top-stories-desc">This week, Israel's prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" href="" onclick="location.href='xxURL1-3'">
- <div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
- </div>
- <div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
- </a>
- <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" href="" onclick="location.href='xxURL1-4'">
- <div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
- </div>
- <div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
- </a>
- <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
- </div>
-
- </div>
-
- <div id="middle-stories">
-
- <a target="_blank" href="" onclick="location.href='xxURL2-1'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
- </div>
- <div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
- </div>
- </a>
-
- <a target="_blank" href="" onclick="location.href='xxURL2-2'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
- </div>
- <div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
- </div>
- </a>
-
- <a target="_blank" href="" onclick="location.href='xxURL2-3'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
- </div>
- <div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
- </div>
- </a>
-
- <a target="_blank" href="" onclick="location.href='xxURL2-4'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
- </div>
- <div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
- </div>
- </a>
-
- <a target="_blank" href="" >
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
- </div>
- <div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
- </div>
- </a>
-
- <a target="_blank" href="" >
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
- </div>
- <div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
- </div>
- </a>
-
-
- </div>
-
- <div id="bottom-stories">
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-1xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-2xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-3xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-4xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-5xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-6xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-7xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-8xx</a>
- </div>
-</div>
-
-</div>
-
-<div id="sources">
- Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
-</div>
-</body>
-</html>
diff --git a/html_template/template.html b/html_template/template.html deleted file mode 100755 index fc17006..0000000 --- a/html_template/template.html +++ /dev/null @@ -1,173 +0,0 @@ -<!DOCTYPE html>
-<html>
- <head>
- <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
- <meta charset="utf-8">
- <link rel="stylesheet" href="unbiased.css">
- <title>UnBiased</title>
- </head>
-<body>
-
-<div id="page-header">
- <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
- <span id="subtitle">a different way to read the news</span>
- <p id="timestamp">Last updated: xxTimexx</p>
-</div>
-
-<div id="page-container">
- <div id="top-stories">
- <div class="row">
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-1xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-1xx');" /></div>
- <div class="top-stories-hed">xxTitle1-1xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-1xx</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-2xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-2xx');" />
- </div>
- <div class="top-stories-hed">xxTitle1-2xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-2xx</div>
- </div>
-
- </div>
-
-<div class="row">
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-3xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-3xx');" />
- </div>
- <div class="top-stories-hed">xxTitle1-3xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-3xx</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-4xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-4xx');" />
- </div>
- <div class="top-stories-hed">xxTitle1-4xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-4xx</div>
- </div>
-
- </div>
-
- </div>
-
- <div id="middle-stories">
-
- <a target="_blank" onclick="window.open('xxURL2-1xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-1xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-1xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-2xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-2xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-2xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-3xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-3xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-3xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-4xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-4xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-4xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-5xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-5xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-5xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-6xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-6xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-6xx</div>
- </div>
- </a>
-
-
- </div>
-
- <div id="bottom-stories">
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-1xx', '_blank')">xxTitle3-1xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-2xx', '_blank')">xxTitle3-2xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-3xx', '_blank')">xxTitle3-3xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-4xx', '_blank')">xxTitle3-4xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-5xx', '_blank')">xxTitle3-5xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-6xx', '_blank')">xxTitle3-6xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-7xx', '_blank')">xxTitle3-7xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-8xx', '_blank')">xxTitle3-8xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-9xx', '_blank')">xxTitle3-9xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-10xx', '_blank')">xxTitle3-10xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-11xx', '_blank')">xxTitle3-11xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-12xx', '_blank')">xxTitle3-12xx</a>
- </div>
-</div>
-
-</div>
-
-<div id="sources">
- Sources: xxSourcesxx
-</div>
-</body>
-</html>
diff --git a/html_template/unbiased.css b/html_template/unbiased.css deleted file mode 100755 index 244f100..0000000 --- a/html_template/unbiased.css +++ /dev/null @@ -1,220 +0,0 @@ -/*body{
- width:900px;
- margin-left:auto;
- margin-right:auto;
-}*/
-
-
-body{
- margin:0;
-}
-
-a:link, a:visited, a:hover, a:active {
- color: #00f;
- text-decoration:none;
- }
-
-a:hover{
- cursor:pointer;
-}
-
-#page-header{
- width:100%;
- text-align:center;
- padding:.5em 0 1em;
- margin-bottom:1em;
- border-bottom:3px solid #BB133E;
- background:#002147;
-}
-
-.title{
- font-size:3em;
-}
-
-#title-1{
- font-style:italic;
- color:#fff;
-}
-
-#title-2{
- color:#fff;
-}
-
-#subtitle{
- font-size:1.25em;
- color:#ccc;
-}
-
-#timestamp{
- margin:.5em 0 0 0;
- font-size:.8em;
- color:#cc6;
-}
-
-#page-container{
- width:900px;
- margin-left:auto;
- margin-right:auto;
-}
-
-@media only screen and (max-width:900px){
- #page-container{
- width:100%
- }
-}
-
-#top-stories{
- width:95%;
- display:block;
- overflow:auto;
- padding:10px;
- margin-left:auto;
- margin-right:auto;
- text-align:center;
- border-bottom: 3px solid #BB133E;
- margin-bottom: 10px;
-}
-
-.row{
- display:flex;
-}
-
-.top-story{
- display:inline-block;
- vertical-align:top;
- text-align:left;
- width:360px;
- height:auto;
- overflow:hidden;
- background:#fff;
- margin:10px;
- padding:10px;
- border:2px solid #ccc;
- flex:1;
-}
-
-@media only screen and (max-width:500px){
- .row{
- display:block;
- }
- .top-story{
- display:block;
- width:auto;
- height:auto;
- }
-}
-
-.top-stories-img{
- width:350px;
- height:200px;
- overflow:hidden;
- background-size: auto 234px;/*cover;*/
- background-position: top center;/*center center;*/
- margin:0 auto;
-}
-
-@media only screen and (max-width:500px){
- .top-stories-img{
- width:auto;
- }
-}
-
-
-.top-stories-hed{
- font-weight:bold;
- font-size:1.35em;
- margin:10px 10px 0;
- color:#00f;
-}
-
-.top-stories-desc{
- font-size:1em;
- padding-top:.5em;
- margin:0 .75em;
-}
-
-#middle-stories{
- clear:both;
- width:500px;
- margin:0 auto;
- padding:0;
- display:block;
- overflow:auto;
- float:left;
-}
-
-@media only screen and (max-width:500px){
- #middle-stories{
- width:100%;
- float:none;
- }
-}
-
-.middle-story{
- margin:5px 10px;
- padding:10px;
- background:#fff;
- border:2px solid #ddd;
- width:460px;
- float:left;
-}
-
-@media only screen and (max-width:500px){
- .middle-story{
- width:auto;
- }
-}
-
-.middle-stories-img{
- width:150px;
- height:100px;
- overflow:hidden;
- background-size: auto 117px;/*cover;*/
- background-position: top center;/*center center;*/
- float:left;
- max-width:35%;
-}
-
-.middle-stories-hed{
- font-size:1.2em;
- float:left;
- width:300px;
- margin-left:10px;
- color:#00f;
-}
-
-@media only screen and (max-width:500px){
- .middle-stories-hed{
- max-width:60%;
- }
-}
-
-#bottom-stories{
- margin:0 10px;
- padding:10px;
- display:block;
- overflow:auto;
- float:left;
- width:350px;
- border:5px solid #ddd;
-}
-
-@media only screen and (max-width:900px){
- #bottom-stories{
- width:auto;
- border-width:3px;
- float:none;
- }
-}
-
-.bottom-story{ color:#00f;
-
- padding:15px 0;
- color:#00f;
-}
-
-#sources{
- clear:both;
- padding-top:4em;
- font-size:.8em;
-}
\ No newline at end of file diff --git a/main.py b/main.py deleted file mode 100755 index a109d2f..0000000 --- a/main.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 - -from unbiasedObjects import * -from unbiasedFunctions import * -from parser import * -import time - - -def main(): - while True: - print('-----------------------') - run() - print('-----------------------') - time.sleep(600) - -def run(): - sourceList=[] - - ''' - - SOURCES TO ADD NEXT: - -ABC - -REUTERS - -Town Hall - - ''' - - - ### These values have to be the second half of the function name - ### E.g. Guardian calls buildGuardian(), etc. - sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS', - 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze' - - for source in sourceFnArr: - tries=0 - while tries<3: - try: - fn='build'+source - possibles = globals().copy() - possibles.update(locals()) - method = possibles.get(fn) - src=method() - sourceList.append(src) - break - except: - print('Build error. Looping again: '+source) - tries+=1 - time.sleep(tries) - - #scrape all urls and build data structure - newsSourceArr=buildNewsSourceArr(sourceList) - - #build the output file HTML - outputHTML=buildOutput(newsSourceArr) - - #print the output file HTML - printOutputHTML(outputHTML, '/var/www/html/index.html') - - -if __name__=="__main__": - main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2761041 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup + +setup( + name="unbiased", + version="1", + packages=['unbiased'], + package_data={ + 'unbiased': [ + 'html_template/*.html', + 'html_template/*.css', + 'html_template/*.ico', + 'html_template/*.png', + ], + }, + install_requires=[ + 'jinja2', + 'Pillow', + 'requests', + ], + entry_points={ + 'console_scripts': [ + 'unbiased = unbiased.main:main', + ], + }, +) diff --git a/scratch/do_not_delete b/unbiased/__init__.py index e69de29..e69de29 100644 --- a/scratch/do_not_delete +++ b/unbiased/__init__.py diff --git a/unbiased/html_template/apple-touch-icon.png b/unbiased/html_template/apple-touch-icon.png Binary files differnew file mode 100644 index 0000000..93c33aa --- /dev/null +++ b/unbiased/html_template/apple-touch-icon.png diff --git a/unbiased/html_template/favicon.ico b/unbiased/html_template/favicon.ico Binary files differnew file mode 100644 index 0000000..b2b29c6 --- /dev/null +++ b/unbiased/html_template/favicon.ico diff --git a/unbiased/html_template/favicon.png b/unbiased/html_template/favicon.png Binary files differnew file mode 100644 index 0000000..0b94313 --- /dev/null +++ b/unbiased/html_template/favicon.png diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css new file mode 100755 index 0000000..dc99ab7 --- /dev/null +++ b/unbiased/html_template/unbiased.css @@ -0,0 +1,172 @@ +body {
+ margin: 0;
+}
+
+a:link, a:visited, a:hover, a:active, a {
+ color: #00f;
+ text-decoration:none;
+}
+
+a:hover {
+ cursor:pointer;
+}
+
+hr {
+ max-width: 890px;
+ margin: 5px auto;
+ border: 0;
+ height: 3px;
+ background-color: #BB133E;
+}
+
+#page-header {
+ width: 100%;
+ text-align: center;
+ padding: .5em 0 1em;
+ margin-bottom: 1em;
+ border-bottom: 3px solid #BB133E;
+ background: #002147;
+}
+
+.title {
+ font-size: 3em;
+}
+
+#title-1 {
+ font-style: italic;
+ color: #fff;
+}
+
+#title-2 {
+ color: #fff;
+}
+
+#subtitle {
+ font-size: 1.25em;
+ color: #ccc;
+}
+
+#timestamp {
+ margin: .5em 0 0 0;
+ font-size: .8em;
+ color: #cc6;
+}
+
+#top-stories {
+ max-width: 900px;
+ display: flex;
+ flex-wrap: wrap;
+ margin: 5px auto;
+}
+
+.top-story {
+ flex: 1 0 350px;
+ margin: 5px;
+ padding: 10px;
+ border:2px solid #eee;
+}
+
+.top-stories-img {
+ width: 100%;
+ padding-bottom: 57%;
+ background-size: 100%;
+ background-position: center center;
+}
+
+.top-stories-hed {
+ font-size: 1.3em;
+ margin: 10px 0;
+ color: #00f;
+}
+
+.top-stories-desc {
+ font-size: 1em;
+}
+
+.c2 {
+ max-width: 900px;
+ display: flex;
+ flex-wrap: wrap;
+ margin: 5px auto;
+}
+
+.c2 hr {
+ display: none;
+}
+
+#middle-stories {
+ flex: 7 0 200px;
+}
+
+.middle-story {
+ margin: 5px;
+ border: 2px solid #eee;
+}
+
+.middle-story a {
+ padding: 10px;
+ display: inline-block;
+}
+
+.middle-story a p {
+ margin: 0;
+}
+
+.middle-stories-img{
+ width: 150px;
+ height: 100px;
+ background-size: 100%;
+ background-position: center center;
+ float: left;
+ margin-right: 10px;
+}
+
+#middle-stories a {
+ font-size: 1.1em;
+ color: #00f;
+}
+
+#bottom-stories {
+ flex: 3 0 200px;
+ border: 2px solid #eee;
+ margin: 5px;
+}
+
+.bottom-story {
+ padding: 10px;
+ color: #00f;
+}
+
+#sources {
+ margin: 2em 5px 0 5px;
+ font-size: .8em;
+}
+
+@media (max-width: 900px) {
+ hr {
+ width: inherit;
+ margin: 5px;
+ }
+}
+
+@media (max-width: 767px) {
+ #top-stories {
+ flex-wrap: nowrap;
+ flex-direction: column;
+ }
+ .top-story {
+ flex: 1 0 250px;
+ }
+ .c2 {
+ flex-direction: column;
+ }
+ .c2 hr {
+ display: inherit;
+ }
+ #middle-stories {
+ flex: inherit;
+ }
+ #bottom-stories {
+ flex: inherit;
+ }
+}
diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html new file mode 100644 index 0000000..0d191e7 --- /dev/null +++ b/unbiased/html_template/unbiased.jinja.html @@ -0,0 +1,79 @@ +<!DOCTYPE html> +<html> + <head> + <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0" /> + <meta charset="utf-8"> + <link rel="stylesheet" href="unbiased.css"> + <link rel="icon" sizes="32x32" href="/favicon.ico"> + <link rel="icon" sizes="32x32" href="/favicon.png"> + <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png"> + <title>UnBiased</title> + </head> + + <body> + + <div id="page-header"> + <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br /> + <span id="subtitle">a different way to read the news</span> + <p id="timestamp">Last updated: {{ timestamp }}</p> + </div> + + <div id="top-stories"> + + {% for story in top_stories %} + + <div class="top-story"> + <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')"> + <div class="top-stories-img" style="background-image: url('{{ story.img }}?{{ utime }}');" /></div> + <div class="top-stories-hed">{{ story.title }}</div> + </a> + <div class="top-stories-desc">{{ story.description }}</div> + </div> + + {% endfor %} + + </div> + + <hr/> + + <div class="c2"> + + <div id="middle-stories"> + + {% for story in middle_stories %} + + <div class="middle-story"> + <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')"> + <p> + <div class="middle-stories-img" style="background-image: url('{{ story.img }}?{{ utime }}');"></div> + {{ story.title }} + </p> + </a> + </div> + + {% endfor %} + + </div> + + <hr/> + + <div id="bottom-stories"> + + {% for story in bottom_stories %} + + <div class="bottom-story"> + <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title }}</a> + </div> + + {% endfor %} + + </div> + + </div> + + <div id="sources"> + Sources: {{ sources }} + </div> + + </body> +</html> diff --git a/unbiased/main.py b/unbiased/main.py new file mode 100755 index 0000000..caf77eb --- /dev/null +++ b/unbiased/main.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import logging.config +import time + +from unbiased.unbiasedObjects import * +from unbiased.unbiasedFunctions import * +from unbiased.parser import * + +logger = logging.getLogger('unbiased') + +logging_config = { + 'version': 1, + 'formatters': { + 'console': { + 'format': '%(levelname)s %(filename)s:%(lineno)d %(message)s', + }, + 'file': { + 'format': '%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s', + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'INFO', + 'formatter': 'console', + }, + 'file': { + 'class': 'logging.handlers.RotatingFileHandler', + 'level': 'DEBUG', + 'formatter': 'file', + 'filename': '', + 'maxBytes': 1024 * 1024, + 'backupCount': 3, + }, + }, + 'loggers': { + 'unbiased': { + 'handlers': ['console', 'file'], + }, + }, + 'root': { + 'level': 'DEBUG', + } +} + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('webroot', help='location to write html output') + parser.add_argument('-l', '--log-dir', help='location to write detailed logs') + parser.add_argument('-d', '--debug', action='store_true', help='run in debug mode') + args = parser.parse_args() + + if args.log_dir: + logging_config['handlers']['file']['filename'] = os.path.join(args.log_dir, 'unbiased.debug.log') + else: + logging_config['loggers']['unbiased']['handlers'].remove('file') + del logging_config['handlers']['file'] + if args.debug: + logging_config['handlers']['console']['level'] = 'DEBUG' + logging.config.dictConfig(logging_config) + + crawl_frequency = 600 + while True: + logger.info('Starting crawl') + start = time.time() + run(args.webroot) + finish = time.time() + runtime = finish - start + sleeptime = crawl_frequency - runtime + logger.info('Crawl complete in {}s. Sleeping for {}s'.format(int(runtime), int(sleeptime))) + if sleeptime > 0: + time.sleep(sleeptime) + +def run(webroot): + sources = [] + + ''' + SOURCES TO ADD NEXT: + -REUTERS + -Town Hall + ''' + + logger.debug('Running with webroot="{}"'.format(webroot)) + + ### These values have to be the second half of the function name + ### E.g. Guardian calls buildGuardian(), etc. + sourceFnArr = [ + 'Guardian', + 'TheHill', + 'NPR', + 'BBC', + 'NBC', + 'CBS', + 'FoxNews', + 'WashTimes', + 'CSM', + 'ABC', + ] + + for source in sourceFnArr: + logger.info('Crawling {}'.format(source)) + tries = 0 + while tries < 3: + time.sleep(tries) + try: + fn = 'build' + source + possibles = globals().copy() + possibles.update(locals()) + method = possibles.get(fn) + src = method() + sources.append(src) + break + except Exception as ex: + tries += 1 + if tries == 3: + logger.error('Build failed. source={} ex={}'.format(source, ex)) + else: + logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex)) + logger.info('Parsed home pages for: {}'.format([x.name for x in sources])) + + top_stories, middle_stories, bottom_stories = pickStories(sources) + logger.info('Picked top stories from: {}'.format([x.source for x in top_stories])) + logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories])) + logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories])) + + # download images + img_idx = 0 + for story in top_stories: + story.img = pullImage(story.img, img_idx, webroot, 350, 200) + img_idx += 1 + for story in middle_stories: + story.img = pullImage(story.img, img_idx, webroot, 150, 100) + img_idx += 1 + + #build the output file HTML + outputHTML = buildOutput(top_stories, middle_stories, bottom_stories) + + #print the output file HTML + writeOutputHTML(outputHTML, webroot) + +if __name__=="__main__": + main() diff --git a/parser.py b/unbiased/parser.py index f69281b..05a7fc1 100755 --- a/parser.py +++ b/unbiased/parser.py @@ -1,9 +1,16 @@ #!/usr/bin/env python3
-from unbiasedObjects import *
-from unbiasedFunctions import buildArticle
+import logging
import os
import re
+import urllib.parse
+
+import requests
+
+from unbiased.unbiasedObjects import *
+from unbiased.unbiasedFunctions import buildArticle
+
+logger = logging.getLogger('unbiased')
'''
@@ -11,18 +18,11 @@ Takes in a URL, downloads the file to a temp file, reads the file into a string, and returns that string
'''
def urlToContent(url, sourceEncoding='utf8'):
- #download file
- os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
-
- #read file
- if sourceEncoding=='utf8':
- f=open('scratch/temp1.html', 'r', encoding="utf8")
+ res = requests.get(url)
+ if res.status_code == 200:
+ return res.text
else:
- f=open('scratch/temp1.html', 'r', encoding="latin-1")
- content=f.read()
- f.close()
-
- return content
+ raise Exception("Failed to download {}".format(url))
'''
@@ -31,10 +31,17 @@ calls the file scraper and appends the new Article object. Returns a newsSource2 object
'''
def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
+
+ url_parts = urllib.parse.urlparse(url)
+ scheme = url_parts.scheme
+ h1URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h1URLs]
+ h2URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h2URLs]
+ h3URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h3URLs]
+
h1Arr=[]
a=buildArticle(h1URLs[0], name)
if a==None:
- print('................\nH1 Nonetype in '+name+'\n................')
+ logger.debug('H1 Nonetype in '+name)
else:
h1Arr.append(a)
@@ -44,16 +51,15 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs): if a!=None:
h2Arr.append(a)
else:
- print('................\nH2 Nonetype in '+name+'\n................')
+ logger.debug('H2 Nonetype in '+name)
-
h3Arr=[]
for x in h3URLs:
a=buildArticle(x, name)
if a!=None:
h3Arr.append(a)
else:
- print('................\nH3 Nonetype in '+name+'\n................')
+ logger.debug('H3 Nonetype in '+name)
#BUILD THE NEWS SOURCE
newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
@@ -114,13 +120,11 @@ def removeDuplicates(h1s, h2s, h3s): def removalNotification(source, title, reason, value):
- print('*************************')
- print('\t\tSTORY REMOVED')
- print('SOURCE: '+source)
- print('TITLE: \t'+title)
- print('REASON: '+reason)
- print('VALUE: \t'+value)
- print('*************************\n\n')
+ logger.debug("""Story removed
+ SOURCE:\t{}
+ TITLE:\t{})
+ REASON:\t{}
+ VALUE:\t{}""".format(source, title, reason, value))
def removeBadStoriesHelper(source, element, badStringList, arr):
@@ -128,7 +132,7 @@ def removeBadStoriesHelper(source, element, badStringList, arr): for i in range(len(arr)):
for hed in arr[i]:
if hed==None:
- print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////")
+ logger.debug("None type found in removeBadStoriesHelper for "+source.name)
break
for item in badStringList:
if item in getattr(hed, element):
@@ -220,7 +224,7 @@ def buildGuardian(): if h1!='https://www.theguardian.com/us':
break
else:
- print('Guardian loop')
+ logger.debug('Guardian loop')
h1s=[h1]
@@ -822,6 +826,7 @@ def buildFoxNews(): h1=h1.split('<h1><a href="', 1)[1]
h1=h1.split('"', 1)[0]
h1s=[h1]
+ h1s = ['http:' + x if x.startswith('//') else x for x in h1s]
#GET SECONDARY HEADLINES
h2=content
@@ -833,6 +838,7 @@ def buildFoxNews(): x=h2.split('"', 1)[0]
if h1 not in x:
h2s.append(x)
+ h2s = ['http:' + x if x.startswith('//') else x for x in h2s]
#GET TERTIARY HEADLINES
h3=content
@@ -844,14 +850,15 @@ def buildFoxNews(): x=h3.split('"', 1)[0]
if h1 not in x:
h3s.append(x)
+ h3s = ['http:' + x if x.startswith('//') else x for x in h3s]
- h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+ h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
fox=buildNewsSource2(name, url, h1s, h2s, h3s)
#REMOVE BAD STORIES
badTitleArr=['O'Reilly', 'Fox News', 'Brett Baier', 'Tucker']
badDescArr=['Sean Hannity']
- badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
+ badAuthorArr=['Bill O\'Reilly', 'Sean Hannity', 'Howard Kurtz']
badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
diff --git a/spotCheck.py b/unbiased/spotCheck.py index d1edda4..7ce50d3 100755 --- a/spotCheck.py +++ b/unbiased/spotCheck.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 - -from parser import * -from unbiasedObjects import * import sys +from unbiased.parser import * +from unbiased.unbiasedObjects import * + def spotCheck(src): fns = {'hil' : buildTheHill, diff --git a/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py index 1a80d7a..cb13a44 100644 --- a/unbiasedFunctions.py +++ b/unbiased/unbiasedFunctions.py @@ -1,29 +1,50 @@ -from unbiasedObjects import *
+import html
+import io
+import logging
import os
+import pkgutil
import random
-import time
import re
+import time
+import urllib.parse
+
+from PIL import Image
+import requests
+
+from unbiased.unbiasedObjects import *
+logger = logging.getLogger('unbiased')
#take in a url and delimiters, return twitter card
def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
debugging=False
if debugging:
- print(sourceName)
- print(url)
- print()
-
+ logger.debug(sourceName)
+ logger.debug(url)
+
+ url_parts = urllib.parse.urlparse(url)
+ scheme = url_parts.scheme
+
#download url
- os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
+ try:
+ res = requests.get(url)
+ except Exception as ex:
+ logger.debug("""ARTICLE DOWNLOADING ERROR
+ SOURCE:\t{}
+ URL:\t{}""".format(sourceName, url))
+ return None
- #read the file in
- f=open('scratch/temp_article.html', 'r', encoding="utf8")
- content=f.read()
- f.close()
+ if res.status_code == 200:
+ content = res.text
+ else:
+ logger.debug("""ARTICLE DOWNLOADING ERROR
+ SOURCE:\t{}
+ URL:\t{}""".format(sourceName, url))
+ return None
try:
- if sourceName=='The Guardian':
+ if sourceName=='The Guardian US':
#The Guardian puts an identifying banner on their og:images
#grab the main image from the page instead
@@ -39,20 +60,23 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, elif '<img class="immersive-main-media__media"' in content:
img=content.split('<img class="immersive-main-media__media"', 1)[1]
img=img.split('src="', 1)[1].split('"')[0]
-
+ img = html.unescape(img)
+
else:
if 'og:image' in content:
img=content.split('og:image" content=')[1][1:].split('>')[0]
elif sourceName=='ABC News':
img='https://c1.staticflickr.com/7/6042/6276688407_12900948a2_b.jpgX'
if img[-1]=='/':
- #because the quote separator could be ' or ",
+ #because the quote separator could be ' or ",
#trim to just before it then lop it off
img=img[:-1].strip()
img=img[:-1]
+ # fix the scheme if it's missing
+ img = urllib.parse.urlparse(img, scheme=scheme).geturl()
if debugging:
- print(img)
+ logger.debug(img)
title=content.split('og:title" content=')[1][1:].split('>')[0]
if title[-1]=='/':
@@ -60,7 +84,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, title=title[:-1]
if debugging:
- print(title)
+ logger.debug(title)
author=''
@@ -82,7 +106,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, break
if debugging:
- print(author)
+ logger.debug(author)
if 'og:description' in content:
@@ -96,7 +120,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, description=re.sub('<[^<]+?>', '', description)
description=description[1:200]
else:
- print("SHOULDN'T GET HERE")
+ logger.debug("SHOULDN'T GET HERE")
#strip out self-references
description=description.replace(sourceName+"'s", '***')
@@ -104,27 +128,20 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, description=description.replace(sourceName, '***')
if debugging:
- print(description)
+ logger.debug(description)
- a=Article(title, url, img, description, sourceName, author)
+ a=Article(html.unescape(title), url, img, html.unescape(description), sourceName, html.unescape(author))
return a
- except:
- print('^^^^^^^^^^^^^^^^^^^^^^^^^')
- print('\tARTICLE PARSING ERROR')
- print('SOURCE: '+sourceName)
- print('URL: \t'+url)
- print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
+ except Exception:
+ logger.debug("""ARTICLE PARSING ERROR
+ SOURCE:\t{}
+ URL:\t{}""".format(sourceName, url))
return None
-def buildOutput(newsSourceArr):
- #read in the template html file
- f=open('html_template/template.html', 'r')
- template=f.read()
- f.close()
-
+def pickStories(newsSourceArr):
#set the random order for sources
h1RandomSources=[]
while len(h1RandomSources)<4:
@@ -133,10 +150,10 @@ def buildOutput(newsSourceArr): if x not in h1RandomSources:
h1RandomSources.append(x)
else:
- print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
-
+ logger.debug('No H1 stories in '+newsSourceArr[x].name)
+
#For h2s and h3s, select N random sources (can repeat), then
- #a non-repetitive random article from within
+ #a non-repetitive random article from within
h2RandomPairs=[]
while len(h2RandomPairs) < 6:
x=random.sample(range(len(newsSourceArr)), 1)[0]
@@ -146,114 +163,110 @@ def buildOutput(newsSourceArr): if not pair in h2RandomPairs:
h2RandomPairs.append(pair)
else:
- print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+ logger.debug('No H2 stories in '+newsSourceArr[x].name)
h3RandomPairs=[]
while len(h3RandomPairs) < 12:
x=random.sample(range(len(newsSourceArr)), 1)[0]
- print(newsSourceArr[x].name)
if len(newsSourceArr[x].h3Arr) > 0:
y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
pair=[x,y]
if not pair in h3RandomPairs:
h3RandomPairs.append(pair)
else:
- print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+ logger.debug('No H3 stories in '+newsSourceArr[x].name)
- #replace html template locations with data from newsSourceArr
+ # collect articles for each section
+ image_index = 0
+
+ top_stories = []
for i in range(len(h1RandomSources)):
source=newsSourceArr[h1RandomSources[i]]
randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
article=source.h1Arr[randomArticle]
- template=template.replace('xxURL1-'+str(i+1)+'xx', article.url)
- template=template.replace('xxTitle1-'+str(i+1)+'xx', article.title)
- template=template.replace('xxImg1-'+str(i+1)+'xx', article.img)
- desc=article.description
- if len(desc)>144:
- desc=desc[:141]
- desc=desc.split()[:-1]
- desc=' '.join(desc)+' ...'
- template=template.replace('xxDesc1-'+str(i+1)+'xx', desc)
+ top_stories.append(article)
+ middle_stories = []
for i in range(len(h2RandomPairs)):
pair=h2RandomPairs[i]
article=newsSourceArr[pair[0]].h2Arr[pair[1]]
- template=template.replace('xxURL2-'+str(i+1)+'xx', article.url)
- template=template.replace('xxTitle2-'+str(i+1)+'xx', article.title)
- template=template.replace('xxImg2-'+str(i+1)+'xx', article.img)
+ middle_stories.append(article)
+ bottom_stories = []
for i in range(len(h3RandomPairs)):
pair=h3RandomPairs[i]
article=newsSourceArr[pair[0]].h3Arr[pair[1]]
- template=template.replace('xxURL3-'+str(i+1)+'xx', article.url)
- template=template.replace('xxTitle3-'+str(i+1)+'xx', article.title)
- template=template.replace('xxImg3-'+str(i+1)+'xx', article.img)
+ bottom_stories.append(article)
+ return top_stories, middle_stories, bottom_stories
- sourcesStr=''
- for i in range(len(newsSourceArr)-1):
- sourcesStr+=newsSourceArr[i].name+', '
- sourcesStr+=newsSourceArr[-1].name
- print('Successfully parsed: '+sourcesStr)
- template=template.replace('xxSourcesxx', sourcesStr)
-
+def buildOutput(top_stories, middle_stories, bottom_stories):
+ #read in the template html file
+ from jinja2 import Environment, PackageLoader, select_autoescape
+ env = Environment(
+ loader=PackageLoader('unbiased', 'html_template'),
+ autoescape=select_autoescape(['html', 'xml'])
+ )
+ template = env.get_template('unbiased.jinja.html')
+
+ timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+ utime = int(time.time())
+
+ sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
+
+ html = template.render(
+ timestamp = timestamp,
+ utime = utime,
+ top_stories = top_stories,
+ middle_stories = middle_stories,
+ bottom_stories = bottom_stories,
+ sources = sourcesStr,
+ )
#return updated text
- return template
-
-def printOutputHTML(outputHTML, outFile):
- timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
- outputHTML=outputHTML.replace('xxTimexx', timestamp)
-
- f=open(outFile, 'w')
- f.write(outputHTML)
- f.close()
-
-def buildNewsSourceArr(sourceList):
-
- #build the data structure
- i=0
- listLen=len(sourceList)
- while i < listLen:
- source=sourceList[i]
-
- if type(source) is NewsSource2:
- i+=1
- continue
-
- url=source.url
-
- #download file
- os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
-
- #read file
- f=open('scratch/temp'+str(i)+'.html', 'r', encoding="utf8")
- content=f.read()
- f.close()
-
- #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
- #os.remove('scratch/temp'+str(i)+'.html')
-
- #add stories etc to the NewsSource object
- h1s, h2s, h3s=extractURLs(content, source)
-
- #build the Article objects and add to newsSource's appropriate list
- if h1s!=None and h2s!=None:
- for url in h1s:
- article=buildArticle(url, source.name)
- if article!=None: source.addArticle(article, 1) #sourceList[i].h1Arr.append(article)
- for url in h2s:
- article=buildArticle(url, source.name)
- if article!=None: sourceList[i].h2Arr.append(article)
- for url in h3s:
- article=buildArticle(url, source.name)
- if article!=None: sourceList[i].h3Arr.append(article)
- i+=1
- else:
- sourceList.remove(source)
- listLen-=1
-
-
- #return the original sourceList,
- #since everything should have been modified in place
- return sourceList
+ return html
+
+def writeOutputHTML(outputHTML, outDir):
+ timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+
+ with open(os.path.join(outDir, 'index.html'), 'w') as fp:
+ fp.write(outputHTML)
+
+ # copy over static package files
+ for filename in ['unbiased.css', 'favicon.ico', 'favicon.png', 'apple-touch-icon.png']:
+ data = pkgutil.get_data('unbiased', os.path.join('html_template', filename))
+ with open(os.path.join(outDir, filename), 'wb') as fp:
+ fp.write(data)
+
+def pullImage(url, index, webroot, target_width=350, target_height=200):
+ extension = url.split('.')[-1].split('?')[0]
+ img_name = 'img{}.{}'.format(index, extension)
+ res = requests.get(url)
+ if res.status_code == 200:
+ content = res.content
+ else:
+ logger.debug('Image not found: url={}'.format(url))
+ return ''
+ img = Image.open(io.BytesIO(content))
+ # crop to aspect ratio
+ target_ar = target_width / target_height
+ left, top, right, bottom = img.getbbox()
+ height = bottom - top
+ width = right - left
+ ar = width / height
+ if target_ar > ar:
+ new_height = (target_height / target_width) * width
+ bbox = (left, top + ((height - new_height) / 2), right, bottom - ((height - new_height) / 2))
+ img = img.crop(bbox)
+ elif target_ar < ar:
+ new_width = (target_width / target_height) * height
+ bbox = (left + ((width - new_width) / 2), top, right - ((width - new_width) / 2), bottom)
+ img = img.crop(bbox)
+ # resize if larger
+ if target_width * 2 < width or target_height * 2 < height:
+ img = img.resize((target_width*2, target_height*2), Image.LANCZOS)
+ # TODO: create retina images
+ jpg_name = 'img{}.jpg'.format(index)
+ out_file = os.path.join(webroot, jpg_name)
+ img.save(out_file, 'JPEG')
+ return jpg_name
diff --git a/unbiasedObjects.py b/unbiased/unbiasedObjects.py index 3affbe6..7908fbb 100644 --- a/unbiasedObjects.py +++ b/unbiased/unbiasedObjects.py @@ -1,3 +1,7 @@ +import logging
+
+logger = logging.getLogger('unbiased')
+
class Article():
title=''
url=''
@@ -86,5 +90,5 @@ class NewsSource(): elif level==3:
self.h3Arr.append(article)
else:
- print("Error: invalid level in NewsSource.addArtlce: ", level)
+ logger.debug("Invalid level in NewsSource.addArtlce: " + level)
|