From c3d54bbe304708693891fe68cf3760c5fb2545b3 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Mon, 17 Apr 2017 22:59:02 -0400 Subject: replace print statements with the logging module --- unbiased/main.py | 19 +++++++++++++------ unbiased/parser.py | 25 +++++++++++++------------ unbiased/unbiasedFunctions.py | 34 ++++++++++++++++------------------ unbiased/unbiasedObjects.py | 6 +++++- 4 files changed, 47 insertions(+), 37 deletions(-) diff --git a/unbiased/main.py b/unbiased/main.py index 88ceb7e..ea5412d 100755 --- a/unbiased/main.py +++ b/unbiased/main.py @@ -1,12 +1,20 @@ #!/usr/bin/env python3 import argparse +import logging import time from unbiased.unbiasedObjects import * from unbiased.unbiasedFunctions import * from unbiased.parser import * +logger = logging.getLogger('unbiased') +logger.setLevel(logging.DEBUG) +ch = logging.StreamHandler() +ch.setLevel(logging.DEBUG) +ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) +logger.addHandler(ch) + def main(): parser = argparse.ArgumentParser() @@ -15,9 +23,9 @@ def main(): args = parser.parse_args() while True: - print('-----------------------') + logger.info('Starting crawl') run(args.webroot, args.scratch) - print('-----------------------') + logger.info('Crawl complete. Sleeping for 600s') time.sleep(600) def run(webroot, scratch): @@ -32,8 +40,8 @@ def run(webroot, scratch): ''' - print('running with webroot="{}"'.format(webroot)) - print('running with scratch="{}"'.format(scratch)) + logger.debug('Running with webroot="{}"'.format(webroot)) + logger.debug('Running with scratch="{}"'.format(scratch)) ### These values have to be the second half of the function name @@ -53,8 +61,7 @@ def run(webroot, scratch): sourceList.append(src) break except Exception as ex: - print(ex) - print('Build error. Looping again: '+source) + logger.error('Build error. Looping again. source={} ex={}'.format(source, ex)) tries+=1 time.sleep(tries) diff --git a/unbiased/parser.py b/unbiased/parser.py index f068ae8..2bba27d 100755 --- a/unbiased/parser.py +++ b/unbiased/parser.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import logging import os import re import subprocess @@ -7,6 +8,8 @@ import subprocess from unbiased.unbiasedObjects import * from unbiased.unbiasedFunctions import buildArticle +logger = logging.getLogger('unbiased') + ''' Takes in a URL, downloads the file to a temp file, @@ -39,7 +42,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir): h1Arr=[] a=buildArticle(h1URLs[0], name, scratchDir) if a==None: - print('................\nH1 Nonetype in '+name+'\n................') + logger.debug('H1 Nonetype in '+name) else: h1Arr.append(a) @@ -49,7 +52,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir): if a!=None: h2Arr.append(a) else: - print('................\nH2 Nonetype in '+name+'\n................') + logger.debug('H2 Nonetype in '+name) h3Arr=[] @@ -58,7 +61,7 @@ def buildNewsSource2(name, url, h1URLs, h2URLs, h3URLs, scratchDir): if a!=None: h3Arr.append(a) else: - print('................\nH3 Nonetype in '+name+'\n................') + logger.debug('H3 Nonetype in '+name) #BUILD THE NEWS SOURCE newsSource=NewsSource2(name, url, h1Arr, h2Arr, h3Arr) @@ -119,13 +122,11 @@ def removeDuplicates(h1s, h2s, h3s): def removalNotification(source, title, reason, value): - print('*************************') - print('\t\tSTORY REMOVED') - print('SOURCE: '+source) - print('TITLE: \t'+title) - print('REASON: '+reason) - print('VALUE: \t'+value) - print('*************************\n\n') + logger.debug("""Story removed + SOURCE:\t{} + TITLE:\t{}) + REASON:\t{} + VALUE:\t{}""".format(source, title, reason, value)) def removeBadStoriesHelper(source, element, badStringList, arr): @@ -133,7 +134,7 @@ def removeBadStoriesHelper(source, element, badStringList, arr): for i in range(len(arr)): for hed in arr[i]: if hed==None: - print("////////\nNone type found in removeBadStoriesHelper for "+source.name+"\n/////////") + logger.debug("None type found in removeBadStoriesHelper for "+source.name) break for item in badStringList: if item in getattr(hed, element): @@ -225,7 +226,7 @@ def buildGuardian(scratchDir): if h1!='https://www.theguardian.com/us': break else: - print('Guardian loop') + logger.debug('Guardian loop') h1s=[h1] diff --git a/unbiased/unbiasedFunctions.py b/unbiased/unbiasedFunctions.py index 16ea07d..775346f 100644 --- a/unbiased/unbiasedFunctions.py +++ b/unbiased/unbiasedFunctions.py @@ -1,3 +1,4 @@ +import logging import os import pkgutil import random @@ -9,15 +10,15 @@ from unbiased.unbiasedObjects import * from PIL import Image +logger = logging.getLogger('unbiased') #take in a url and delimiters, return twitter card def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, titleDelEnd, imgDelStart, imgDelEnd): debugging=False if debugging: - print(sourceName) - print(url) - print() + logger.debug(sourceName) + logger.debug(url) temp_article = os.path.join(scratchDir, 'temp_article.html') @@ -60,7 +61,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t img=img[:-1] if debugging: - print(img) + logger.debug(img) title=content.split('og:title" content=')[1][1:].split('>')[0] if title[-1]=='/': @@ -68,7 +69,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t title=title[:-1] if debugging: - print(title) + logger.debug(title) author='' @@ -90,7 +91,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t break if debugging: - print(author) + logger.debug(author) if 'og:description' in content: @@ -104,7 +105,7 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t description=re.sub('<[^<]+?>', '', description) description=description[1:200] else: - print("SHOULDN'T GET HERE") + logger.debug("SHOULDN'T GET HERE") #strip out self-references description=description.replace(sourceName+"'s", '***') @@ -112,18 +113,16 @@ def buildArticle(url, sourceName, scratchDir, encoding=None):#, titleDelStart, t description=description.replace(sourceName, '***') if debugging: - print(description) + logger.debug(description) a=Article(title, url, img, description, sourceName, author) return a except Exception: - print('^^^^^^^^^^^^^^^^^^^^^^^^^') - print('\tARTICLE PARSING ERROR') - print('SOURCE: '+sourceName) - print('URL: \t'+url) - print('^^^^^^^^^^^^^^^^^^^^^^^^^ \n\n') + logger.error("""ARTICLE PARSING ERROR + SOURCE:\t{} + URL:\t{}""".format(sourceName, url)) return None @@ -144,7 +143,7 @@ def buildOutput(newsSourceArr, webroot): if x not in h1RandomSources: h1RandomSources.append(x) else: - print('\n\n@@@@\nNo H1 stories in '+newsSourceArr[x].name+'\n@@@@\n\n') + logger.debug('No H1 stories in '+newsSourceArr[x].name) #For h2s and h3s, select N random sources (can repeat), then #a non-repetitive random article from within @@ -157,19 +156,18 @@ def buildOutput(newsSourceArr, webroot): if not pair in h2RandomPairs: h2RandomPairs.append(pair) else: - print('\n\n@@@@\nNo H2 stories in '+newsSourceArr[x].name+'\n@@@@\n\n') + logger.debug('No H2 stories in '+newsSourceArr[x].name) h3RandomPairs=[] while len(h3RandomPairs) < 12: x=random.sample(range(len(newsSourceArr)), 1)[0] - print(newsSourceArr[x].name) if len(newsSourceArr[x].h3Arr) > 0: y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0] pair=[x,y] if not pair in h3RandomPairs: h3RandomPairs.append(pair) else: - print('\n\n@@@@\nNo H3 stories in '+newsSourceArr[x].name+'\n@@@@\n\n') + logger.debug('No H3 stories in '+newsSourceArr[x].name) # collect articles for each section image_index = 0 @@ -203,7 +201,7 @@ def buildOutput(newsSourceArr, webroot): for i in range(len(newsSourceArr)-1): sourcesStr+=newsSourceArr[i].name+', ' sourcesStr+=newsSourceArr[-1].name - print('Successfully parsed: '+sourcesStr) + logger.info('Successfully parsed: '+sourcesStr) timestamp=time.strftime("%a, %b %-d, %-I:%M%P %Z", time.localtime()) diff --git a/unbiased/unbiasedObjects.py b/unbiased/unbiasedObjects.py index 3affbe6..9372d3a 100644 --- a/unbiased/unbiasedObjects.py +++ b/unbiased/unbiasedObjects.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger('unbiased') + class Article(): title='' url='' @@ -86,5 +90,5 @@ class NewsSource(): elif level==3: self.h3Arr.append(article) else: - print("Error: invalid level in NewsSource.addArtlce: ", level) + logger.error("Invalid level in NewsSource.addArtlce: " + level) -- cgit v1.2.3