diff options
author | sstvinc2 <sstvinc2@gmail.com> | 2017-02-15 09:24:26 -0600 |
---|---|---|
committer | sstvinc2 <sstvinc2@gmail.com> | 2017-02-15 09:24:26 -0600 |
commit | 064e200fa57dfeda3776b7a708cc0af8280f20fb (patch) | |
tree | e52415d4e0f329c0283709c9e44e2ea93829721e /unbiasedFunctions.py | |
parent | 511b3ba3f9de0d38e861833d6bcd7160487af111 (diff) |
Added author name to Article class; added removeBadArticles() filter function
Diffstat (limited to 'unbiasedFunctions.py')
-rw-r--r-- | unbiasedFunctions.py | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py index ef6ae7c..5f46ed2 100644 --- a/unbiasedFunctions.py +++ b/unbiasedFunctions.py @@ -33,16 +33,24 @@ def buildArticle(url, sourceName):#, titleDelStart, titleDelEnd, imgDelStart, im title=title[:-1].strip()
title=title[:-1]
+ author=''
+ authorTags=['article:author', 'dc.creator']
+ for tag in authorTags:
+ if tag in content:
+ author=content.split(tag+'" content=')[1][1:].split('>')[0]
+ author=author[:-1]
+ break
+
description=content.split('og:description" content=')[1][1:].split('>')[0]
if description[-1]=='/':
description=description[:-1].strip()
description=description[:-1]
- a=Article(title, url, img, description, sourceName)
+ a=Article(title, url, img, description, sourceName, author)
return a
except:
- print("Article parsing error in buildArticle() for URL: "+url)
+ print("Article parsing error in buildArticle() for URL: "+url+" in source"+sourceName)
return None
|