summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore9
-rw-r--r--etc/unbiased.service12
-rw-r--r--html_template/newtemplate.html150
-rwxr-xr-xhtml_template/template.html173
-rwxr-xr-xhtml_template/unbiased.css220
-rwxr-xr-xmain.py61
-rw-r--r--setup.py25
-rw-r--r--unbiased/__init__.py (renamed from scratch/do_not_delete)0
-rw-r--r--unbiased/html_template/apple-touch-icon.pngbin0 -> 7036 bytes
-rw-r--r--unbiased/html_template/favicon.icobin0 -> 4414 bytes
-rw-r--r--unbiased/html_template/favicon.pngbin0 -> 1093 bytes
-rwxr-xr-xunbiased/html_template/unbiased.css172
-rw-r--r--unbiased/html_template/unbiased.jinja.html79
-rwxr-xr-xunbiased/main.py145
-rwxr-xr-xunbiased/parser.py (renamed from parser.py)63
-rwxr-xr-xunbiased/spotCheck.py (renamed from spotCheck.py)6
-rw-r--r--unbiased/unbiasedFunctions.py (renamed from unbiasedFunctions.py)251
-rw-r--r--unbiased/unbiasedObjects.py (renamed from unbiasedObjects.py)6
18 files changed, 615 insertions, 757 deletions
diff --git a/.gitignore b/.gitignore
index 65c8f8e..9e0f924 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,15 @@
*.pyc
*~
+.DS_Store
__pycache__/
-scratch/*.html
+unbiased/scratch/*.html
legacy_py/
unbiased.html
html_template/Penguins.jpg
html_template/BAK*
+build/
+dist/
+venv/
+unbiased.egg-info/
#*
-.#* \ No newline at end of file
+.#*
diff --git a/etc/unbiased.service b/etc/unbiased.service
new file mode 100644
index 0000000..391e4ff
--- /dev/null
+++ b/etc/unbiased.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Unbiased News
+
+[Service]
+Type=simple
+ExecStart=/opt/unbiased/venv/bin/unbiased /opt/unbiased/webroot -l /opt/unbiased/logs
+WorkingDirectory=/opt/unbiased
+User=www-data
+Restart=on-failure
+
+[Install]
+WantedBy=multi-user.target
diff --git a/html_template/newtemplate.html b/html_template/newtemplate.html
deleted file mode 100644
index 0cec766..0000000
--- a/html_template/newtemplate.html
+++ /dev/null
@@ -1,150 +0,0 @@
-<!DOCTYPE html>
-<html>
- <head>
- <meta charset="utf-8">
- <link rel="stylesheet" href="unbiased.css">
- <title>UnBiased</title>
- </head>
-<body>
-
-<div id="page-header">
- <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
- <span id="subtitle">a different way to read the news</span>
- <p id="timestamp">Last updated: Mon, Feb 13, 7:51pm EST</p>
-</div>
-
-<div id="page-container">
- <div id="top-stories">
-
- <div class="top-story">
- <a target="_blank" id="top-story-1" href="" onclick="location.href='xxURL1-1'">
- <div class="top-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-465794068-1280x720.jpg');" />
- </div>
- <div class="top-stories-hed">Rand Paul and Cory Booker push bipartisan effort to limit solitary confinement for juveniles</div>
- </a>
- <div class="top-stories-desc">Sen. Rand Paul (R-Ky) and Sen &hellip;</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" href="" onclick="location.href='xxURL1-2'">
- <div class="top-stories-img" style="background-image: url('http://cdn.weeklystandard.biz/cache/r960-90b8d8d5cbcef212ecae2a5c455fed8f.jpg');" />
- </div>
- <div class="top-stories-hed">Bibi and Donald</div>
- </a>
- <div class="top-stories-desc">This week, Israel&#039;s prime minister will visit Washington and meet with our new president. They will have a complex agenda. Benjamin ...</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" href="" onclick="location.href='xxURL1-3'">
- <div class="top-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/13/multimedia/DavidOyelowo-UnitedKingdom/DavidOyelowo-UnitedKingdom-facebookJumbo.png');" />
- </div>
- <div class="top-stories-hed">David Oyelowo on How to Play a Real King</div>
- </a>
- <div class="top-stories-desc">He stars in “A United Kingdom,” about the Botswana leader who married a white woman and set off an international crisis.</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" href="" onclick="location.href='xxURL1-4'">
- <div class="top-stories-img" style="background-image: url('http://a57.foxnews.com/images.foxnews.com/content/fox-news/us/2017/02/13/judge-orders-ohio-village-to-pay-back-3-million-to-lead-footed-drivers/_jcr_content/par/featured-media/media-0.img.jpg/0/0/1487019011476.jpg?ve=1');" />
- </div>
- <div class="top-stories-hed">Judge orders Ohio village to pay back $3 million to lead-footed drivers</div>
- </a>
- <div class="top-stories-desc">Speed cameras became a cash cow for the small village of New Miami, Ohio.</div>
- </div>
-
- </div>
-
- <div id="middle-stories">
-
- <a target="_blank" href="" onclick="location.href='xxURL2-1'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://www.theblaze.com/wp-content/uploads/2017/02/GettyImages-635148734-1280x720.jpg');">
- </div>
- <div class="middle-stories-hed">DHS says 75 percent of those detained in ICE raids last week were ‘criminal aliens’</div>
- </div>
- </a>
-
- <a target="_blank" href="" onclick="location.href='xxURL2-2'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://a57.foxnews.com/media2.foxnews.com/BrightCove/694940094001/2017/02/12/0/0/694940094001_5320280093001_5320267547001-vs.jpg?ve=1');">
- </div>
- <div class="middle-stories-hed">Drama grips Trump inner circle, as president charges ahead on agenda</div>
- </div>
- </a>
-
- <a target="_blank" href="" onclick="location.href='xxURL2-3'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://ichef.bbci.co.uk/news/1024/cpsprodpb/C9C5/production/_94635615_6c33162f-1c24-487d-8a51-bb7b13ec063f.jpg');">
- </div>
- <div class="middle-stories-hed">Ku Klux Klan killing: Frank Ancona's wife and stepson charged - BBC News</div>
- </div>
- </a>
-
- <a target="_blank" href="" onclick="location.href='xxURL2-4'">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://media1.s-nbcnews.com/j/newscms/2017_07/1900281/13217-oroville-dam-724a-rs_4a8b5ba9690488f11410f156833e1b70.nbcnews-fp-1200-800.jpg');">
- </div>
- <div class="middle-stories-hed">Nearly 190,000 ordered to evacuate in California dam spillway failure</div>
- </div>
- </a>
-
- <a target="_blank" href="" >
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('http://cbsnews1.cbsistatic.com/hub/i/2017/02/13/4ad800d9-69ba-4102-a8ec-af12e8eb6adb/021317-news.jpg');">
- </div>
- <div class="middle-stories-hed">Jerry Sandusky's son, 41, arrested on child sex charges</div>
- </div>
- </a>
-
- <a target="_blank" href="" >
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('https://static01.nyt.com/images/2017/02/14/us/14townhall1/14townhall1-facebookJumbo.jpg');">
- </div>
- <div class="middle-stories-hed">Angry Town Hall Meetings on Health Care Law, and Few Answers</div>
- </div>
- </a>
-
-
- </div>
-
- <div id="bottom-stories">
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-1xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-2xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-3xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-4xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-5xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-6xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-7xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" href="">xxTitle3-8xx</a>
- </div>
-</div>
-
-</div>
-
-<div id="sources">
- Sources: BBC US, NBC News, CBS News, The Blaze, Weekly Standard, New York Times, Fox News
-</div>
-</body>
-</html>
diff --git a/html_template/template.html b/html_template/template.html
deleted file mode 100755
index fc17006..0000000
--- a/html_template/template.html
+++ /dev/null
@@ -1,173 +0,0 @@
-<!DOCTYPE html>
-<html>
- <head>
- <meta name="viewport" content="width=device-width; initial-scale=1.0; maximum-scale=1.0; user-scalable=0;" />
- <meta charset="utf-8">
- <link rel="stylesheet" href="unbiased.css">
- <title>UnBiased</title>
- </head>
-<body>
-
-<div id="page-header">
- <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
- <span id="subtitle">a different way to read the news</span>
- <p id="timestamp">Last updated: xxTimexx</p>
-</div>
-
-<div id="page-container">
- <div id="top-stories">
- <div class="row">
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-1xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-1xx');" /></div>
- <div class="top-stories-hed">xxTitle1-1xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-1xx</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-2xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-2xx');" />
- </div>
- <div class="top-stories-hed">xxTitle1-2xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-2xx</div>
- </div>
-
- </div>
-
-<div class="row">
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-3xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-3xx');" />
- </div>
- <div class="top-stories-hed">xxTitle1-3xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-3xx</div>
- </div>
-
- <div class="top-story">
- <a target="_blank" onclick="window.open('xxURL1-4xx', '_blank')">
- <div class="top-stories-img" style="background-image: url('xxImg1-4xx');" />
- </div>
- <div class="top-stories-hed">xxTitle1-4xx</div>
- </a>
- <div class="top-stories-desc">xxDesc1-4xx</div>
- </div>
-
- </div>
-
- </div>
-
- <div id="middle-stories">
-
- <a target="_blank" onclick="window.open('xxURL2-1xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-1xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-1xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-2xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-2xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-2xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-3xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-3xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-3xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-4xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-4xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-4xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-5xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-5xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-5xx</div>
- </div>
- </a>
-
- <a target="_blank" onclick="window.open('xxURL2-6xx', '_blank')">
- <div class="middle-story">
- <div class="middle-stories-img" style="background-image: url('xxImg2-6xx');">
- </div>
- <div class="middle-stories-hed">xxTitle2-6xx</div>
- </div>
- </a>
-
-
- </div>
-
- <div id="bottom-stories">
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-1xx', '_blank')">xxTitle3-1xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-2xx', '_blank')">xxTitle3-2xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-3xx', '_blank')">xxTitle3-3xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-4xx', '_blank')">xxTitle3-4xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-5xx', '_blank')">xxTitle3-5xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-6xx', '_blank')">xxTitle3-6xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-7xx', '_blank')">xxTitle3-7xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-8xx', '_blank')">xxTitle3-8xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-9xx', '_blank')">xxTitle3-9xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-10xx', '_blank')">xxTitle3-10xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-11xx', '_blank')">xxTitle3-11xx</a>
- </div>
-
- <div class="bottom-story">
- <a target="_blank" onclick="window.open('xxURL3-12xx', '_blank')">xxTitle3-12xx</a>
- </div>
-</div>
-
-</div>
-
-<div id="sources">
- Sources: xxSourcesxx
-</div>
-</body>
-</html>
diff --git a/html_template/unbiased.css b/html_template/unbiased.css
deleted file mode 100755
index 244f100..0000000
--- a/html_template/unbiased.css
+++ /dev/null
@@ -1,220 +0,0 @@
-/*body{
- width:900px;
- margin-left:auto;
- margin-right:auto;
-}*/
-
-
-body{
- margin:0;
-}
-
-a:link, a:visited, a:hover, a:active {
- color: #00f;
- text-decoration:none;
- }
-
-a:hover{
- cursor:pointer;
-}
-
-#page-header{
- width:100%;
- text-align:center;
- padding:.5em 0 1em;
- margin-bottom:1em;
- border-bottom:3px solid #BB133E;
- background:#002147;
-}
-
-.title{
- font-size:3em;
-}
-
-#title-1{
- font-style:italic;
- color:#fff;
-}
-
-#title-2{
- color:#fff;
-}
-
-#subtitle{
- font-size:1.25em;
- color:#ccc;
-}
-
-#timestamp{
- margin:.5em 0 0 0;
- font-size:.8em;
- color:#cc6;
-}
-
-#page-container{
- width:900px;
- margin-left:auto;
- margin-right:auto;
-}
-
-@media only screen and (max-width:900px){
- #page-container{
- width:100%
- }
-}
-
-#top-stories{
- width:95%;
- display:block;
- overflow:auto;
- padding:10px;
- margin-left:auto;
- margin-right:auto;
- text-align:center;
- border-bottom: 3px solid #BB133E;
- margin-bottom: 10px;
-}
-
-.row{
- display:flex;
-}
-
-.top-story{
- display:inline-block;
- vertical-align:top;
- text-align:left;
- width:360px;
- height:auto;
- overflow:hidden;
- background:#fff;
- margin:10px;
- padding:10px;
- border:2px solid #ccc;
- flex:1;
-}
-
-@media only screen and (max-width:500px){
- .row{
- display:block;
- }
- .top-story{
- display:block;
- width:auto;
- height:auto;
- }
-}
-
-.top-stories-img{
- width:350px;
- height:200px;
- overflow:hidden;
- background-size: auto 234px;/*cover;*/
- background-position: top center;/*center center;*/
- margin:0 auto;
-}
-
-@media only screen and (max-width:500px){
- .top-stories-img{
- width:auto;
- }
-}
-
-
-.top-stories-hed{
- font-weight:bold;
- font-size:1.35em;
- margin:10px 10px 0;
- color:#00f;
-}
-
-.top-stories-desc{
- font-size:1em;
- padding-top:.5em;
- margin:0 .75em;
-}
-
-#middle-stories{
- clear:both;
- width:500px;
- margin:0 auto;
- padding:0;
- display:block;
- overflow:auto;
- float:left;
-}
-
-@media only screen and (max-width:500px){
- #middle-stories{
- width:100%;
- float:none;
- }
-}
-
-.middle-story{
- margin:5px 10px;
- padding:10px;
- background:#fff;
- border:2px solid #ddd;
- width:460px;
- float:left;
-}
-
-@media only screen and (max-width:500px){
- .middle-story{
- width:auto;
- }
-}
-
-.middle-stories-img{
- width:150px;
- height:100px;
- overflow:hidden;
- background-size: auto 117px;/*cover;*/
- background-position: top center;/*center center;*/
- float:left;
- max-width:35%;
-}
-
-.middle-stories-hed{
- font-size:1.2em;
- float:left;
- width:300px;
- margin-left:10px;
- color:#00f;
-}
-
-@media only screen and (max-width:500px){
- .middle-stories-hed{
- max-width:60%;
- }
-}
-
-#bottom-stories{
- margin:0 10px;
- padding:10px;
- display:block;
- overflow:auto;
- float:left;
- width:350px;
- border:5px solid #ddd;
-}
-
-@media only screen and (max-width:900px){
- #bottom-stories{
- width:auto;
- border-width:3px;
- float:none;
- }
-}
-
-.bottom-story{ color:#00f;
-
- padding:15px 0;
- color:#00f;
-}
-
-#sources{
- clear:both;
- padding-top:4em;
- font-size:.8em;
-} \ No newline at end of file
diff --git a/main.py b/main.py
deleted file mode 100755
index a109d2f..0000000
--- a/main.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-
-from unbiasedObjects import *
-from unbiasedFunctions import *
-from parser import *
-import time
-
-
-def main():
- while True:
- print('-----------------------')
- run()
- print('-----------------------')
- time.sleep(600)
-
-def run():
- sourceList=[]
-
- '''
-
- SOURCES TO ADD NEXT:
- -ABC
- -REUTERS
- -Town Hall
-
- '''
-
-
- ### These values have to be the second half of the function name
- ### E.g. Guardian calls buildGuardian(), etc.
- sourceFnArr=['Guardian', 'TheHill', 'NPR', 'BBC', 'NBC', 'CBS',
- 'FoxNews', 'WashTimes', 'CSM', 'ABC'] #'Blaze'
-
- for source in sourceFnArr:
- tries=0
- while tries<3:
- try:
- fn='build'+source
- possibles = globals().copy()
- possibles.update(locals())
- method = possibles.get(fn)
- src=method()
- sourceList.append(src)
- break
- except:
- print('Build error. Looping again: '+source)
- tries+=1
- time.sleep(tries)
-
- #scrape all urls and build data structure
- newsSourceArr=buildNewsSourceArr(sourceList)
-
- #build the output file HTML
- outputHTML=buildOutput(newsSourceArr)
-
- #print the output file HTML
- printOutputHTML(outputHTML, '/var/www/html/index.html')
-
-
-if __name__=="__main__":
- main()
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..2761041
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,25 @@
+from setuptools import setup
+
+setup(
+ name="unbiased",
+ version="1",
+ packages=['unbiased'],
+ package_data={
+ 'unbiased': [
+ 'html_template/*.html',
+ 'html_template/*.css',
+ 'html_template/*.ico',
+ 'html_template/*.png',
+ ],
+ },
+ install_requires=[
+ 'jinja2',
+ 'Pillow',
+ 'requests',
+ ],
+ entry_points={
+ 'console_scripts': [
+ 'unbiased = unbiased.main:main',
+ ],
+ },
+)
diff --git a/scratch/do_not_delete b/unbiased/__init__.py
index e69de29..e69de29 100644
--- a/scratch/do_not_delete
+++ b/unbiased/__init__.py
diff --git a/unbiased/html_template/apple-touch-icon.png b/unbiased/html_template/apple-touch-icon.png
new file mode 100644
index 0000000..93c33aa
--- /dev/null
+++ b/unbiased/html_template/apple-touch-icon.png
Binary files differ
diff --git a/unbiased/html_template/favicon.ico b/unbiased/html_template/favicon.ico
new file mode 100644
index 0000000..b2b29c6
--- /dev/null
+++ b/unbiased/html_template/favicon.ico
Binary files differ
diff --git a/unbiased/html_template/favicon.png b/unbiased/html_template/favicon.png
new file mode 100644
index 0000000..0b94313
--- /dev/null
+++ b/unbiased/html_template/favicon.png
Binary files differ
diff --git a/unbiased/html_template/unbiased.css b/unbiased/html_template/unbiased.css
new file mode 100755
index 0000000..dc99ab7
--- /dev/null
+++ b/unbiased/html_template/unbiased.css
@@ -0,0 +1,172 @@
+body {
+ margin: 0;
+}
+
+a:link, a:visited, a:hover, a:active, a {
+ color: #00f;
+ text-decoration:none;
+}
+
+a:hover {
+ cursor:pointer;
+}
+
+hr {
+ max-width: 890px;
+ margin: 5px auto;
+ border: 0;
+ height: 3px;
+ background-color: #BB133E;
+}
+
+#page-header {
+ width: 100%;
+ text-align: center;
+ padding: .5em 0 1em;
+ margin-bottom: 1em;
+ border-bottom: 3px solid #BB133E;
+ background: #002147;
+}
+
+.title {
+ font-size: 3em;
+}
+
+#title-1 {
+ font-style: italic;
+ color: #fff;
+}
+
+#title-2 {
+ color: #fff;
+}
+
+#subtitle {
+ font-size: 1.25em;
+ color: #ccc;
+}
+
+#timestamp {
+ margin: .5em 0 0 0;
+ font-size: .8em;
+ color: #cc6;
+}
+
+#top-stories {
+ max-width: 900px;
+ display: flex;
+ flex-wrap: wrap;
+ margin: 5px auto;
+}
+
+.top-story {
+ flex: 1 0 350px;
+ margin: 5px;
+ padding: 10px;
+ border:2px solid #eee;
+}
+
+.top-stories-img {
+ width: 100%;
+ padding-bottom: 57%;
+ background-size: 100%;
+ background-position: center center;
+}
+
+.top-stories-hed {
+ font-size: 1.3em;
+ margin: 10px 0;
+ color: #00f;
+}
+
+.top-stories-desc {
+ font-size: 1em;
+}
+
+.c2 {
+ max-width: 900px;
+ display: flex;
+ flex-wrap: wrap;
+ margin: 5px auto;
+}
+
+.c2 hr {
+ display: none;
+}
+
+#middle-stories {
+ flex: 7 0 200px;
+}
+
+.middle-story {
+ margin: 5px;
+ border: 2px solid #eee;
+}
+
+.middle-story a {
+ padding: 10px;
+ display: inline-block;
+}
+
+.middle-story a p {
+ margin: 0;
+}
+
+.middle-stories-img{
+ width: 150px;
+ height: 100px;
+ background-size: 100%;
+ background-position: center center;
+ float: left;
+ margin-right: 10px;
+}
+
+#middle-stories a {
+ font-size: 1.1em;
+ color: #00f;
+}
+
+#bottom-stories {
+ flex: 3 0 200px;
+ border: 2px solid #eee;
+ margin: 5px;
+}
+
+.bottom-story {
+ padding: 10px;
+ color: #00f;
+}
+
+#sources {
+ margin: 2em 5px 0 5px;
+ font-size: .8em;
+}
+
+@media (max-width: 900px) {
+ hr {
+ width: inherit;
+ margin: 5px;
+ }
+}
+
+@media (max-width: 767px) {
+ #top-stories {
+ flex-wrap: nowrap;
+ flex-direction: column;
+ }
+ .top-story {
+ flex: 1 0 250px;
+ }
+ .c2 {
+ flex-direction: column;
+ }
+ .c2 hr {
+ display: inherit;
+ }
+ #middle-stories {
+ flex: inherit;
+ }
+ #bottom-stories {
+ flex: inherit;
+ }
+}
diff --git a/unbiased/html_template/unbiased.jinja.html b/unbiased/html_template/unbiased.jinja.html
new file mode 100644
index 0000000..0d191e7
--- /dev/null
+++ b/unbiased/html_template/unbiased.jinja.html
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0" />
+ <meta charset="utf-8">
+ <link rel="stylesheet" href="unbiased.css">
+ <link rel="icon" sizes="32x32" href="/favicon.ico">
+ <link rel="icon" sizes="32x32" href="/favicon.png">
+ <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
+ <title>UnBiased</title>
+ </head>
+
+ <body>
+
+ <div id="page-header">
+ <span id="title-1" class="title">un</span><span id="title-2" class="title">biased</span><br />
+ <span id="subtitle">a different way to read the news</span>
+ <p id="timestamp">Last updated: {{ timestamp }}</p>
+ </div>
+
+ <div id="top-stories">
+
+ {% for story in top_stories %}
+
+ <div class="top-story">
+ <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
+ <div class="top-stories-img" style="background-image: url('{{ story.img }}?{{ utime }}');" /></div>
+ <div class="top-stories-hed">{{ story.title }}</div>
+ </a>
+ <div class="top-stories-desc">{{ story.description }}</div>
+ </div>
+
+ {% endfor %}
+
+ </div>
+
+ <hr/>
+
+ <div class="c2">
+
+ <div id="middle-stories">
+
+ {% for story in middle_stories %}
+
+ <div class="middle-story">
+ <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">
+ <p>
+ <div class="middle-stories-img" style="background-image: url('{{ story.img }}?{{ utime }}');"></div>
+ {{ story.title }}
+ </p>
+ </a>
+ </div>
+
+ {% endfor %}
+
+ </div>
+
+ <hr/>
+
+ <div id="bottom-stories">
+
+ {% for story in bottom_stories %}
+
+ <div class="bottom-story">
+ <a target="_blank" onclick="window.open('{{ story.url }}', '_blank')">{{ story.title }}</a>
+ </div>
+
+ {% endfor %}
+
+ </div>
+
+ </div>
+
+ <div id="sources">
+ Sources: {{ sources }}
+ </div>
+
+ </body>
+</html>
diff --git a/unbiased/main.py b/unbiased/main.py
new file mode 100755
index 0000000..caf77eb
--- /dev/null
+++ b/unbiased/main.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+
+import argparse
+import logging
+import logging.config
+import time
+
+from unbiased.unbiasedObjects import *
+from unbiased.unbiasedFunctions import *
+from unbiased.parser import *
+
+logger = logging.getLogger('unbiased')
+
+logging_config = {
+ 'version': 1,
+ 'formatters': {
+ 'console': {
+ 'format': '%(levelname)s %(filename)s:%(lineno)d %(message)s',
+ },
+ 'file': {
+ 'format': '%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s',
+ },
+ },
+ 'handlers': {
+ 'console': {
+ 'class': 'logging.StreamHandler',
+ 'level': 'INFO',
+ 'formatter': 'console',
+ },
+ 'file': {
+ 'class': 'logging.handlers.RotatingFileHandler',
+ 'level': 'DEBUG',
+ 'formatter': 'file',
+ 'filename': '',
+ 'maxBytes': 1024 * 1024,
+ 'backupCount': 3,
+ },
+ },
+ 'loggers': {
+ 'unbiased': {
+ 'handlers': ['console', 'file'],
+ },
+ },
+ 'root': {
+ 'level': 'DEBUG',
+ }
+}
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('webroot', help='location to write html output')
+ parser.add_argument('-l', '--log-dir', help='location to write detailed logs')
+ parser.add_argument('-d', '--debug', action='store_true', help='run in debug mode')
+ args = parser.parse_args()
+
+ if args.log_dir:
+ logging_config['handlers']['file']['filename'] = os.path.join(args.log_dir, 'unbiased.debug.log')
+ else:
+ logging_config['loggers']['unbiased']['handlers'].remove('file')
+ del logging_config['handlers']['file']
+ if args.debug:
+ logging_config['handlers']['console']['level'] = 'DEBUG'
+ logging.config.dictConfig(logging_config)
+
+ crawl_frequency = 600
+ while True:
+ logger.info('Starting crawl')
+ start = time.time()
+ run(args.webroot)
+ finish = time.time()
+ runtime = finish - start
+ sleeptime = crawl_frequency - runtime
+ logger.info('Crawl complete in {}s. Sleeping for {}s'.format(int(runtime), int(sleeptime)))
+ if sleeptime > 0:
+ time.sleep(sleeptime)
+
+def run(webroot):
+ sources = []
+
+ '''
+ SOURCES TO ADD NEXT:
+ -REUTERS
+ -Town Hall
+ '''
+
+ logger.debug('Running with webroot="{}"'.format(webroot))
+
+ ### These values have to be the second half of the function name
+ ### E.g. Guardian calls buildGuardian(), etc.
+ sourceFnArr = [
+ 'Guardian',
+ 'TheHill',
+ 'NPR',
+ 'BBC',
+ 'NBC',
+ 'CBS',
+ 'FoxNews',
+ 'WashTimes',
+ 'CSM',
+ 'ABC',
+ ]
+
+ for source in sourceFnArr:
+ logger.info('Crawling {}'.format(source))
+ tries = 0
+ while tries < 3:
+ time.sleep(tries)
+ try:
+ fn = 'build' + source
+ possibles = globals().copy()
+ possibles.update(locals())
+ method = possibles.get(fn)
+ src = method()
+ sources.append(src)
+ break
+ except Exception as ex:
+ tries += 1
+ if tries == 3:
+ logger.error('Build failed. source={} ex={}'.format(source, ex))
+ else:
+ logger.debug('Build failed, retrying. source={} ex={}'.format(source, ex))
+ logger.info('Parsed home pages for: {}'.format([x.name for x in sources]))
+
+ top_stories, middle_stories, bottom_stories = pickStories(sources)
+ logger.info('Picked top stories from: {}'.format([x.source for x in top_stories]))
+ logger.info('Picked middle stories from: {}'.format([x.source for x in middle_stories]))
+ logger.info('Picked bottom stories from: {}'.format([x.source for x in bottom_stories]))
+
+ # download images
+ img_idx = 0
+ for story in top_stories:
+ story.img = pullImage(story.img, img_idx, webroot, 350, 200)
+ img_idx += 1
+ for story in middle_stories:
+ story.img = pullImage(story.img, img_idx, webroot, 150, 100)
+ img_idx += 1
+
+ #build the output file HTML
+ outputHTML = buildOutput(top_stories, middle_stories, bottom_stories)
+
+ #print the output file HTML
+ writeOutputHTML(outputHTML, webroot)
+
+if __name__=="__main__":
+ main()
diff --git a/parser.py b/unbiased/parser.py
index f69281b..05a7fc1 100755
--- a/parser.py
+++ b/unbiased/parser.py
@@ -1,9 +1,16 @@
#!/usr/bin/env python3
-from unbiasedObjects import *
-from unbiasedFunctions import buildArticle
+import logging
import os
import re
+import urllib.parse
+
+import requests
+
+from unbiased.unbiasedObjects import *
+from unbiased.unbiasedFunctions import buildArticle
+
+logger = logging.getLogger('unbiased')
'''
@@ -11,18 +18,11 @@ Takes in a URL, downloads the file to a temp file,
reads the file into a string, and returns that string
'''
def urlToContent(url, sourceEncoding='utf8'):
- #download file
- os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url)
-
- #read file
- if sourceEncoding=='utf8':
- f=open('scratch/temp1.html', 'r', encoding="utf8")
+ res = requests.get(url)
+ if res.status_code == 200:
+ return res.text
else:
- f=open('scratch/temp1.html', 'r', encoding="latin-1")
- content=f.read()
- f.close()
-
- return content
+ raise Exception("Failed to download {}".format(url))
'''
@@ -31,10 +31,17 @@ calls the file scraper and appends the new Article object.
Returns a newsSource2 object
'''
def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
+
+ url_parts = urllib.parse.urlparse(url)
+ scheme = url_parts.scheme
+ h1URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h1URLs]
+ h2URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h2URLs]
+ h3URLs = [urllib.parse.urlparse(x, scheme=scheme).geturl() for x in h3URLs]
+
h1Arr=[]
a=buildArticle(h1URLs[0], name)
if a==None:
- print('................\nH1 Nonetype in '+name+'\n................')
+ logger.debug('H1 Nonetype in '+name)
else:
h1Arr.append(a)
@@ -44,16 +51,15 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs):
if a!=None:
h2Arr.append(a)
else:
- print('................\nH2 Nonetype in '+name+'\n................')
+ logger.debug('H2 Nonetype in '+name)
-
h3Arr=[]
for x in h3URLs:
a=buildArticle(x, name)
if a!=None:
h3Arr.append(a)
else:
- print('................\nH3 Nonetype in '+name+'\n................')
+ logger.debug('H3 Nonetype in '+name)
#BUILD THE NEWS SOURCE
newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
@@ -114,13 +120,11 @@ def removeDuplicates(h1s, h2s, h3s):
def removalNotification(source, title, reason, value):
- print('*************************')
- print('\t\tSTORY REMOVED')
- print('SOURCE: '+source)
- print('TITLE: \t'+title)
- print('REASON: '+reason)
- print('VALUE: \t'+value)
- print('*************************\n\n')
+ logger.debug("""Story removed
+ SOURCE:\t{}
+ TITLE:\t{})
+ REASON:\t{}
+ VALUE:\t{}""".format(source, title, reason, value))
def removeBadStoriesHelper(source, element, badStringList, arr):
@@ -128,7 +132,7 @@ def removeBadStoriesHelper(source, element, badStringList, arr):
for i in range(len(arr)):
for hed in arr[i]:
if hed==None:
- print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////")
+ logger.debug("None type found in removeBadStoriesHelper for "+source.name)
break
for item in badStringList:
if item in getattr(hed, element):
@@ -220,7 +224,7 @@ def buildGuardian():
if h1!='https://www.theguardian.com/us':
break
else:
- print('Guardian loop')
+ logger.debug('Guardian loop')
h1s=[h1]
@@ -822,6 +826,7 @@ def buildFoxNews():
h1=h1.split('<h1><a href="', 1)[1]
h1=h1.split('"', 1)[0]
h1s=[h1]
+ h1s = ['http:' + x if x.startswith('//') else x for x in h1s]
#GET SECONDARY HEADLINES
h2=content
@@ -833,6 +838,7 @@ def buildFoxNews():
x=h2.split('"', 1)[0]
if h1 not in x:
h2s.append(x)
+ h2s = ['http:' + x if x.startswith('//') else x for x in h2s]
#GET TERTIARY HEADLINES
h3=content
@@ -844,14 +850,15 @@ def buildFoxNews():
x=h3.split('"', 1)[0]
if h1 not in x:
h3s.append(x)
+ h3s = ['http:' + x if x.startswith('//') else x for x in h3s]
- h1s, h2s, h3s = removeDuplicates([h1], h2s, h3s)
+ h1s, h2s, h3s = removeDuplicates(h1s, h2s, h3s)
fox=buildNewsSource2(name, url, h1s, h2s, h3s)
#REMOVE BAD STORIES
badTitleArr=['O&#039;Reilly', 'Fox News', 'Brett Baier', 'Tucker']
badDescArr=['Sean Hannity']
- badAuthorArr=['Bill O\'Reilly', 'Sean Hannity']
+ badAuthorArr=['Bill O\'Reilly', 'Sean Hannity', 'Howard Kurtz']
badImgArr=['http://www.foxnews.com/content/dam/fox-news/logo/og-fn-foxnews.jpg']
badURLArr=['http://www.foxnews.com/opinion', 'videos.foxnews.com']
fox=removeBadStories(fox, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr)
diff --git a/spotCheck.py b/unbiased/spotCheck.py
index d1edda4..7ce50d3 100755
--- a/spotCheck.py
+++ b/unbiased/spotCheck.py
@@ -1,10 +1,10 @@
#!/usr/bin/env python3
-
-from parser import *
-from unbiasedObjects import *
import sys
+from unbiased.parser import *
+from unbiased.unbiasedObjects import *
+
def spotCheck(src):
fns = {'hil' : buildTheHill,
diff --git a/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 1a80d7a..cb13a44 100644
--- a/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -1,29 +1,50 @@
-from unbiasedObjects import *
+import html
+import io
+import logging
import os
+import pkgutil
import random
-import time
import re
+import time
+import urllib.parse
+
+from PIL import Image
+import requests
+
+from unbiased.unbiasedObjects import *
+logger = logging.getLogger('unbiased')
#take in a url and delimiters, return twitter card
def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
debugging=False
if debugging:
- print(sourceName)
- print(url)
- print()
-
+ logger.debug(sourceName)
+ logger.debug(url)
+
+ url_parts = urllib.parse.urlparse(url)
+ scheme = url_parts.scheme
+
#download url
- os.system('wget -q -O scratch/temp_article.html --no-check-certificate '+url)
+ try:
+ res = requests.get(url)
+ except Exception as ex:
+ logger.debug("""ARTICLE DOWNLOADING ERROR
+ SOURCE:\t{}
+ URL:\t{}""".format(sourceName, url))
+ return None
- #read the file in
- f=open('scratch/temp_article.html', 'r', encoding="utf8")
- content=f.read()
- f.close()
+ if res.status_code == 200:
+ content = res.text
+ else:
+ logger.debug("""ARTICLE DOWNLOADING ERROR
+ SOURCE:\t{}
+ URL:\t{}""".format(sourceName, url))
+ return None
try:
- if sourceName=='The Guardian':
+ if sourceName=='The Guardian US':
#The Guardian puts an identifying banner on their og:images
#grab the main image from the page instead
@@ -39,20 +60,23 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
elif '<img class="immersive-main-media__media"' in content:
img=content.split('<img class="immersive-main-media__media"', 1)[1]
img=img.split('src="', 1)[1].split('"')[0]
-
+ img = html.unescape(img)
+
else:
if 'og:image' in content:
img=content.split('og:image" content=')[1][1:].split('>')[0]
elif sourceName=='ABC News':
img='https://c1.staticflickr.com/7/6042/6276688407_12900948a2_b.jpgX'
if img[-1]=='/':
- #because the quote separator could be ' or ",
+ #because the quote separator could be ' or ",
#trim to just before it then lop it off
img=img[:-1].strip()
img=img[:-1]
+ # fix the scheme if it's missing
+ img = urllib.parse.urlparse(img, scheme=scheme).geturl()
if debugging:
- print(img)
+ logger.debug(img)
title=content.split('og:title" content=')[1][1:].split('>')[0]
if title[-1]=='/':
@@ -60,7 +84,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
title=title[:-1]
if debugging:
- print(title)
+ logger.debug(title)
author=''
@@ -82,7 +106,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
break
if debugging:
- print(author)
+ logger.debug(author)
if 'og:description' in content:
@@ -96,7 +120,7 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
description=re.sub('<[^<]+?>', '', description)
description=description[1:200]
else:
- print("SHOULDN'T GET HERE")
+ logger.debug("SHOULDN'T GET HERE")
#strip out self-references
description=description.replace(sourceName+"'s", '***')
@@ -104,27 +128,20 @@ def buildArticle(url, sourceName, encoding=None):#, titleDelStart, titleDelEnd,
description=description.replace(sourceName, '***')
if debugging:
- print(description)
+ logger.debug(description)
- a=Article(title, url, img, description, sourceName, author)
+ a=Article(html.unescape(title), url, img, html.unescape(description), sourceName, html.unescape(author))
return a
- except:
- print('^^^^^^^^^^^^^^^^^^^^^^^^^')
- print('\tARTICLE PARSING ERROR')
- print('SOURCE: '+sourceName)
- print('URL: \t'+url)
- print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
+ except Exception:
+ logger.debug("""ARTICLE PARSING ERROR
+ SOURCE:\t{}
+ URL:\t{}""".format(sourceName, url))
return None
-def buildOutput(newsSourceArr):
- #read in the template html file
- f=open('html_template/template.html', 'r')
- template=f.read()
- f.close()
-
+def pickStories(newsSourceArr):
#set the random order for sources
h1RandomSources=[]
while len(h1RandomSources)<4:
@@ -133,10 +150,10 @@ def buildOutput(newsSourceArr):
if x not in h1RandomSources:
h1RandomSources.append(x)
else:
- print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
-
+ logger.debug('No H1 stories in '+newsSourceArr[x].name)
+
#For h2s and h3s, select N random sources (can repeat), then
- #a non-repetitive random article from within
+ #a non-repetitive random article from within
h2RandomPairs=[]
while len(h2RandomPairs) < 6:
x=random.sample(range(len(newsSourceArr)), 1)[0]
@@ -146,114 +163,110 @@ def buildOutput(newsSourceArr):
if not pair in h2RandomPairs:
h2RandomPairs.append(pair)
else:
- print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+ logger.debug('No H2 stories in '+newsSourceArr[x].name)
h3RandomPairs=[]
while len(h3RandomPairs) < 12:
x=random.sample(range(len(newsSourceArr)), 1)[0]
- print(newsSourceArr[x].name)
if len(newsSourceArr[x].h3Arr) > 0:
y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
pair=[x,y]
if not pair in h3RandomPairs:
h3RandomPairs.append(pair)
else:
- print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+ logger.debug('No H3 stories in '+newsSourceArr[x].name)
- #replace html template locations with data from newsSourceArr
+ # collect articles for each section
+ image_index = 0
+
+ top_stories = []
for i in range(len(h1RandomSources)):
source=newsSourceArr[h1RandomSources[i]]
randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
article=source.h1Arr[randomArticle]
- template=template.replace('xxURL1-'+str(i+1)+'xx', article.url)
- template=template.replace('xxTitle1-'+str(i+1)+'xx', article.title)
- template=template.replace('xxImg1-'+str(i+1)+'xx', article.img)
- desc=article.description
- if len(desc)>144:
- desc=desc[:141]
- desc=desc.split()[:-1]
- desc=' '.join(desc)+' ...'
- template=template.replace('xxDesc1-'+str(i+1)+'xx', desc)
+ top_stories.append(article)
+ middle_stories = []
for i in range(len(h2RandomPairs)):
pair=h2RandomPairs[i]
article=newsSourceArr[pair[0]].h2Arr[pair[1]]
- template=template.replace('xxURL2-'+str(i+1)+'xx', article.url)
- template=template.replace('xxTitle2-'+str(i+1)+'xx', article.title)
- template=template.replace('xxImg2-'+str(i+1)+'xx', article.img)
+ middle_stories.append(article)
+ bottom_stories = []
for i in range(len(h3RandomPairs)):
pair=h3RandomPairs[i]
article=newsSourceArr[pair[0]].h3Arr[pair[1]]
- template=template.replace('xxURL3-'+str(i+1)+'xx', article.url)
- template=template.replace('xxTitle3-'+str(i+1)+'xx', article.title)
- template=template.replace('xxImg3-'+str(i+1)+'xx', article.img)
+ bottom_stories.append(article)
+ return top_stories, middle_stories, bottom_stories
- sourcesStr=''
- for i in range(len(newsSourceArr)-1):
- sourcesStr+=newsSourceArr[i].name+', '
- sourcesStr+=newsSourceArr[-1].name
- print('Successfully parsed: '+sourcesStr)
- template=template.replace('xxSourcesxx', sourcesStr)
-
+def buildOutput(top_stories, middle_stories, bottom_stories):
+ #read in the template html file
+ from jinja2 import Environment, PackageLoader, select_autoescape
+ env = Environment(
+ loader=PackageLoader('unbiased', 'html_template'),
+ autoescape=select_autoescape(['html', 'xml'])
+ )
+ template = env.get_template('unbiased.jinja.html')
+
+ timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+ utime = int(time.time())
+
+ sourcesStr = ', '.join(set([x.source for x in top_stories] + [x.source for x in middle_stories] + [x.source for x in bottom_stories]))
+
+ html = template.render(
+ timestamp = timestamp,
+ utime = utime,
+ top_stories = top_stories,
+ middle_stories = middle_stories,
+ bottom_stories = bottom_stories,
+ sources = sourcesStr,
+ )
#return updated text
- return template
-
-def printOutputHTML(outputHTML, outFile):
- timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
- outputHTML=outputHTML.replace('xxTimexx', timestamp)
-
- f=open(outFile, 'w')
- f.write(outputHTML)
- f.close()
-
-def buildNewsSourceArr(sourceList):
-
- #build the data structure
- i=0
- listLen=len(sourceList)
- while i < listLen:
- source=sourceList[i]
-
- if type(source) is NewsSource2:
- i+=1
- continue
-
- url=source.url
-
- #download file
- os.system('wget -q -O scratch/temp'+str(i)+'.html --no-check-certificate '+url)
-
- #read file
- f=open('scratch/temp'+str(i)+'.html', 'r', encoding="utf8")
- content=f.read()
- f.close()
-
- #delete file MAYBE DON'T DO THIS? CAUSES OS ERRORS
- #os.remove('scratch/temp'+str(i)+'.html')
-
- #add stories etc to the NewsSource object
- h1s, h2s, h3s=extractURLs(content, source)
-
- #build the Article objects and add to newsSource's appropriate list
- if h1s!=None and h2s!=None:
- for url in h1s:
- article=buildArticle(url, source.name)
- if article!=None: source.addArticle(article, 1) #sourceList[i].h1Arr.append(article)
- for url in h2s:
- article=buildArticle(url, source.name)
- if article!=None: sourceList[i].h2Arr.append(article)
- for url in h3s:
- article=buildArticle(url, source.name)
- if article!=None: sourceList[i].h3Arr.append(article)
- i+=1
- else:
- sourceList.remove(source)
- listLen-=1
-
-
- #return the original sourceList,
- #since everything should have been modified in place
- return sourceList
+ return html
+
+def writeOutputHTML(outputHTML, outDir):
+ timestamp = time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
+
+ with open(os.path.join(outDir, 'index.html'), 'w') as fp:
+ fp.write(outputHTML)
+
+ # copy over static package files
+ for filename in ['unbiased.css', 'favicon.ico', 'favicon.png', 'apple-touch-icon.png']:
+ data = pkgutil.get_data('unbiased', os.path.join('html_template', filename))
+ with open(os.path.join(outDir, filename), 'wb') as fp:
+ fp.write(data)
+
+def pullImage(url, index, webroot, target_width=350, target_height=200):
+ extension = url.split('.')[-1].split('?')[0]
+ img_name = 'img{}.{}'.format(index, extension)
+ res = requests.get(url)
+ if res.status_code == 200:
+ content = res.content
+ else:
+ logger.debug('Image not found: url={}'.format(url))
+ return ''
+ img = Image.open(io.BytesIO(content))
+ # crop to aspect ratio
+ target_ar = target_width / target_height
+ left, top, right, bottom = img.getbbox()
+ height = bottom - top
+ width = right - left
+ ar = width / height
+ if target_ar > ar:
+ new_height = (target_height / target_width) * width
+ bbox = (left, top + ((height - new_height) / 2), right, bottom - ((height - new_height) / 2))
+ img = img.crop(bbox)
+ elif target_ar < ar:
+ new_width = (target_width / target_height) * height
+ bbox = (left + ((width - new_width) / 2), top, right - ((width - new_width) / 2), bottom)
+ img = img.crop(bbox)
+ # resize if larger
+ if target_width * 2 < width or target_height * 2 < height:
+ img = img.resize((target_width*2, target_height*2), Image.LANCZOS)
+ # TODO: create retina images
+ jpg_name = 'img{}.jpg'.format(index)
+ out_file = os.path.join(webroot, jpg_name)
+ img.save(out_file, 'JPEG')
+ return jpg_name
diff --git a/unbiasedObjects.py b/unbiased/unbiasedObjects.py
index 3affbe6..7908fbb 100644
--- a/unbiasedObjects.py
+++ b/unbiased/unbiasedObjects.py
@@ -1,3 +1,7 @@
+import logging
+
+logger = logging.getLogger('unbiased')
+
class Article():
title=''
url=''
@@ -86,5 +90,5 @@ class NewsSource():
elif level==3:
self.h3Arr.append(article)
else:
- print("Error: invalid level in NewsSource.addArtlce: ", level)
+ logger.debug("Invalid level in NewsSource.addArtlce: " + level)