3 files changed, 67 insertions, 11 deletions
diff --git a/parser.py b/parser.py
index 93ed020..0bd5b0f 100644
--- a/parser.py
+++ b/parser.py
@@ -92,6 +92,56 @@ def removeDuplicates(h1s, h2s, h3s):
     
 
     return h1s, h2s, h3s
+
+
+
+def removeBadStories(source, badDescArr, badAuthorArr):
+
+    if badAuthorArr!=None:
+        for h1 in source.h1Arr:
+            for item in badAuthorArr:
+                if item in h1.author:
+                    source.h1Arr.remove(h1)
+                    #if it's in the h1 slot, bump up the first h2 into the h1 slot
+                    source.h1Arr.append(source.h2Arr[0])
+                    source.h2Arr.remove(source.h2Arr[0])
+                    print('removed '+h1.title+' from '+source.name)
+        for h2 in source.h2Arr:
+            for item in badAuthorArr:
+                if item in h2.author:
+                    source.h2Arr.remove(h2)
+                    print('removed '+h2.title+' from '+source.name)
+
+        for h3 in source.h3Arr:
+            for item in badAuthorArr:
+                if item in h3.author:
+                    source.h3Arr.remove(h3)
+                    print('removed '+h3.title+' from '+source.name)
+
+    '''
+    if badDescArr!=None:
+        for h1 in source.h1Arr:
+            for item in badDescArr:
+                if item in h1.description:
+                    source.h1Arr.remove(h1)
+                    #if it's in the h1 slot, bump up the first h2 into the h1 slot
+                    source.h1Arr.append(source.h2Arr[0])
+                    source.h2Arr.remove(source.h2Arr[0])
+                    print('removed '+h1.title+' from '+source.name)
+        for h2 in source.h2Arr:
+            for item in badDescArr:
+                if item in h2.description:
+                    source.h2Arr.remove(h2)
+                    print('removed '+h2.title+' from '+source.name)
+
+        for h3 in source.h3Arr:
+            for item in badDescArr:
+                if item in h3.description:
+                    source.h3Arr.remove(h3)
+                    print('removed '+h3.title+' from '+source.name)
+    '''
+
+    return source
     
 
 
@@ -148,13 +198,9 @@ def buildWeeklyStandard():
     wkl=buildNewsSource2(name, url, h1s, h2s, h3s)
 
     #REMOVE BAD STORIES
-    #if it's in the h1 slot, bump up the first h2 into the h1 slot
-    for h1 in wkl.h1Arr:
-        if 'Matt Labash' in h1.description:
-            wkl.h1Arr.remove(h1)
-            wkl.h1Arr.append(wkl.h2Arr[0])
-            wkl.h2Arr.remove(wkl.h2Arr[0])
-            print('removed '+h1.title)
+    badDescArr=['Matt Labash']
+    badAuthorArr=['MATT LABASH']
+    wkl=removeBadStories(wkl, badDescArr, badAuthorArr)
 
     return wkl
 
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py
index ef6ae7c..5f46ed2 100644
--- a/unbiasedFunctions.py
+++ b/unbiasedFunctions.py
@@ -33,16 +33,24 @@ def buildArticle(url, sourceName):#, titleDelStart, titleDelEnd, imgDelStart, im
             title=title[:-1].strip()
         title=title[:-1]
 
+        author=''
+        authorTags=['article:author', 'dc.creator']
+        for tag in authorTags:
+            if tag in content:
+                author=content.split(tag+'" content=')[1][1:].split('>')[0]
+                author=author[:-1]
+                break
+
         description=content.split('og:description" content=')[1][1:].split('>')[0]
         if description[-1]=='/':
             description=description[:-1].strip()
         description=description[:-1]
 
-        a=Article(title, url, img, description, sourceName)
+        a=Article(title, url, img, description, sourceName, author)
         return a
 
     except:
-        print("Article parsing error in buildArticle() for URL: "+url)
+        print("Article parsing error in buildArticle() for URL: "+url+" in source"+sourceName)
         return None
 
 
diff --git a/unbiasedObjects.py b/unbiasedObjects.py
index 2233b0c..3affbe6 100644
--- a/unbiasedObjects.py
+++ b/unbiasedObjects.py
@@ -4,16 +4,18 @@ class Article():
     img=''
     description=''
     source=''
+    author=''
 
-    def __init__(self, title, url, img, description, source):
+    def __init__(self, title, url, img, description, source, author):
         self.title=title
         self.url=url
         self.img=img
         self.description=description
         self.source=source
+        self.author=author
 
     def __str__(self):
-        return '-----------\n'+self.title+'\n'+self.source+'\n'+self.description+'\n'+self.url+'\n'+self.img+'\n'+'-----------'
+        return '-----------\n'+self.title+'\n'+self.author+'\n'+self.source+'\n'+self.description+'\n'+self.url+'\n'+self.img+'\n'+'-----------'
 
 
 class NewsSource2():