summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Singleton <matt@xcolour.net>2017-09-03 14:10:08 -0400
committerMatt Singleton <matt@xcolour.net>2017-09-03 14:10:08 -0400
commit2869fc9b1e358c488fcc4fec5fbd4201a386c0c6 (patch)
treeb4329084220182b0a02fe61b0d56b9b2d36bec7d
parentcdf18074bc6f1df4bc2a605f5fc048b9af104a1b (diff)
rewrite removeBadStoriesHelper so I can understand how it works
-rwxr-xr-xunbiased/parser.py39
1 files changed, 21 insertions, 18 deletions
diff --git a/unbiased/parser.py b/unbiased/parser.py
index 05a7fc1..7d2d788 100755
--- a/unbiased/parser.py
+++ b/unbiased/parser.py
@@ -127,24 +127,27 @@ def removalNotification(source, title, reason, value):
VALUE:\t{}""".format(source, title, reason, value))
-def removeBadStoriesHelper(source, element, badStringList, arr):
- if badStringList!=None:
- for i in range(len(arr)):
- for hed in arr[i]:
- if hed==None:
- logger.debug("None type found in removeBadStoriesHelper for "+source.name)
- break
- for item in badStringList:
- if item in getattr(hed, element):
- arr[i].remove(hed)
- #if it's in the h1 slot, bump up the
- # first h2 into the h1 slot
- if i==0:
- arr[0].append(arr[1][0])
- arr[1].remove(arr[1][0])
- removalNotification(source.name, hed.title, element, item)
-
-
+def removeBadStoriesHelper(source, element, badStringList, article_tiers):
+ if badStringList is None:
+ return
+ for tier, articles in enumerate(article_tiers):
+ print(tier, articles)
+ for idx, article in enumerate(articles):
+ print(article)
+ if article is None:
+ logger.debug("None type found in removeBadStoriesHelper for {}".format(source.name))
+ break
+ for item in badStringList:
+ if item in getattr(article, element):
+ article_tiers[tier].remove(article)
+ # if it's in the h1 slot, bump up the
+ # first h2 into the h1 slot
+ if tier == 0 and len(article_tiers[1]) > 0:
+ article_tiers[0].append(article_tiers[1][0])
+ article_tiers[1].remove(article_tiers[1][0])
+ removalNotification(source.name, article.title, element, item)
+
+
def removeBadStories(source, badTitleArr, badDescArr, badAuthorArr, badImgArr, badURLArr=None):
arr=[source.h1Arr, source.h2Arr, source.h3Arr]