4 files changed, 47 insertions, 37 deletions
diff --git a/unbiased/main.py b/unbiased/main.py
index 88ceb7e..ea5412d 100755
--- a/unbiased/main.py
+++ b/unbiased/main.py
@@ -1,12 +1,20 @@
 #!/usr/bin/env python3
 
 import argparse
+import logging
 import time
 
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import *
 from unbiased.parser import *
 
+logger = logging.getLogger('unbiased')
+logger.setLevel(logging.DEBUG)
+ch = logging.StreamHandler()
+ch.setLevel(logging.DEBUG)
+ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+logger.addHandler(ch)
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -15,9 +23,9 @@ def main():
     args = parser.parse_args()
 
     while True:
-        print('-----------------------')
+        logger.info('Starting crawl')
         run(args.webroot, args.scratch)
-        print('-----------------------')
+        logger.info('Crawl complete. Sleeping for 600s')
         time.sleep(600)
 
 def run(webroot, scratch):
@@ -32,8 +40,8 @@ def run(webroot, scratch):
 
     '''
 
-    print('running with webroot="{}"'.format(webroot))
-    print('running with scratch="{}"'.format(scratch))
+    logger.debug('Running with webroot="{}"'.format(webroot))
+    logger.debug('Running with scratch="{}"'.format(scratch))
 
 
     ### These values have to be the second half of the function name
@@ -53,8 +61,7 @@ def run(webroot, scratch):
                 sourceList.append(src)
                 break
             except Exception as ex:
-                print(ex)
-                print('Build error. Looping again: '+source)
+                logger.error('Build error. Looping again. source={} ex={}'.format(source, ex))
                 tries+=1
                 time.sleep(tries)
     
diff --git a/unbiased/parser.py b/unbiased/parser.py
index f068ae8..2bba27d 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+import logging
 import os
 import re
 import subprocess
@@ -7,6 +8,8 @@ import subprocess
 from unbiased.unbiasedObjects import *
 from unbiased.unbiasedFunctions import buildArticle
 
+logger = logging.getLogger('unbiased')
+
 
 '''
 Takes in a URL, downloads the file to a temp file,
@@ -39,7 +42,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
     h1Arr=[]
     a=buildArticle(h1URLs[0], name, scratchDir)
     if a==None:
-        print('................\nH1 Nonetype in '+name+'\n................')
+        logger.debug('H1 Nonetype in '+name)
     else:
         h1Arr.append(a)
 
@@ -49,7 +52,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
         if a!=None:
             h2Arr.append(a)
         else:
-            print('................\nH2 Nonetype in '+name+'\n................')
+            logger.debug('H2 Nonetype in '+name)
 
             
     h3Arr=[]
@@ -58,7 +61,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir):
         if a!=None:
             h3Arr.append(a)
         else:
-            print('................\nH3 Nonetype in '+name+'\n................')
+            logger.debug('H3 Nonetype in '+name)
 
     #BUILD THE NEWS SOURCE
     newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr)
@@ -119,13 +122,11 @@ def removeDuplicates(h1s, h2s, h3s):
 
 
 def removalNotification(source, title, reason, value):
-    print('*************************')
-    print('\t\tSTORY REMOVED')
-    print('SOURCE: '+source)
-    print('TITLE: \t'+title)
-    print('REASON: '+reason)
-    print('VALUE: \t'+value)
-    print('*************************\n\n')
+    logger.debug("""Story removed
+    SOURCE:\t{}
+    TITLE:\t{})
+    REASON:\t{}
+    VALUE:\t{}""".format(source, title, reason, value))
 
 
 def removeBadStoriesHelper(source, element, badStringList, arr):
@@ -133,7 +134,7 @@ def removeBadStoriesHelper(source, element, badStringList, arr):
         for i in range(len(arr)):
             for hed in arr[i]:
                 if hed==None:
-                    print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////")
+                    logger.debug("None type found in removeBadStoriesHelper for "+source.name)
                     break
                 for item in badStringList:
                     if item in getattr(hed, element):
@@ -225,7 +226,7 @@ def buildGuardian(scratchDir):
         if h1!='https://www.theguardian.com/us':
             break
         else:
-            print('Guardian loop')
+            logger.debug('Guardian loop')
         
     h1s=[h1]
 
diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py
index 16ea07d..775346f 100644
--- a/unbiased/unbiasedFunctions.py
+++ b/unbiased/unbiasedFunctions.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import pkgutil
 import random
@@ -9,15 +10,15 @@ from unbiased.unbiasedObjects import *
 
 from PIL import Image
 
+logger = logging.getLogger('unbiased')
 
 #take in a url and delimiters, return twitter card
 def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd):
 
     debugging=False
     if debugging:
-        print(sourceName)
-        print(url)
-        print()
+        logger.debug(sourceName)
+        logger.debug(url)
 
     temp_article = os.path.join(scratchDir, 'temp_article.html')
 
@@ -60,7 +61,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
             img=img[:-1]
 
         if debugging:
-            print(img)
+            logger.debug(img)
 
         title=content.split('og:title" content=')[1][1:].split('>')[0]
         if title[-1]=='/':
@@ -68,7 +69,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         title=title[:-1]
 
         if debugging:
-            print(title)
+            logger.debug(title)
 
 
         author=''
@@ -90,7 +91,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
                     break
 
         if debugging:
-            print(author)
+            logger.debug(author)
 
 
         if 'og:description' in content:
@@ -104,7 +105,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
                 description=re.sub('<[^<]+?>', '', description)
                 description=description[1:200]
             else:
-                print("SHOULDN'T GET HERE")
+                logger.debug("SHOULDN'T GET HERE")
 
         #strip out self-references
         description=description.replace(sourceName+"'s", '***')
@@ -112,18 +113,16 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t
         description=description.replace(sourceName, '***')
 
         if debugging:
-            print(description)
+            logger.debug(description)
 
 
         a=Article(title, url, img, description, sourceName, author)
         return a
 
     except Exception:
-        print('^^^^^^^^^^^^^^^^^^^^^^^^^')
-        print('\tARTICLE PARSING ERROR')
-        print('SOURCE: '+sourceName)
-        print('URL: \t'+url)
-        print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n')
+        logger.error("""ARTICLE PARSING ERROR
+        SOURCE:\t{}
+        URL:\t{}""".format(sourceName, url))
         return None
 
 
@@ -144,7 +143,7 @@ def buildOutput(newsSourceArr, webroot):
             if x not in h1RandomSources:
                 h1RandomSources.append(x)
         else:
-            print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+            logger.debug('No H1 stories in '+newsSourceArr[x].name)
 
     #For h2s and h3s, select N random sources (can repeat), then
     #a non-repetitive random article from within
@@ -157,19 +156,18 @@ def buildOutput(newsSourceArr, webroot):
             if not pair in h2RandomPairs:
                 h2RandomPairs.append(pair)
         else:
-            print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+            logger.debug('No H2 stories in '+newsSourceArr[x].name)
 
     h3RandomPairs=[]
     while len(h3RandomPairs) < 12:
         x=random.sample(range(len(newsSourceArr)), 1)[0]
-        print(newsSourceArr[x].name)
         if len(newsSourceArr[x].h3Arr) > 0:
             y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
             pair=[x,y]
             if not pair in h3RandomPairs:
                 h3RandomPairs.append(pair)
         else:
-            print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n')
+            logger.debug('No H3 stories in '+newsSourceArr[x].name)
 
     # collect articles for each section
     image_index = 0
@@ -203,7 +201,7 @@ def buildOutput(newsSourceArr, webroot):
     for i in range(len(newsSourceArr)-1):
         sourcesStr+=newsSourceArr[i].name+', '
     sourcesStr+=newsSourceArr[-1].name
-    print('Successfully parsed: '+sourcesStr)
+    logger.info('Successfully parsed: '+sourcesStr)
 
     timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime())
 
diff --git a/unbiased/unbiasedObjects.py b/unbiased/unbiasedObjects.py
index 3affbe6..9372d3a 100644
--- a/unbiased/unbiasedObjects.py
+++ b/unbiased/unbiasedObjects.py
@@ -1,3 +1,7 @@
+import logging
+
+logger = logging.getLogger('unbiased')
+
 class Article():
     title=''
     url=''
@@ -86,5 +90,5 @@ class NewsSource():
         elif level==3:
             self.h3Arr.append(article)
         else:
-            print("Error: invalid level in NewsSource.addArtlce: ", level)
+            logger.error("Invalid level in NewsSource.addArtlce: " + level)