diff options
Diffstat (limited to 'main.py')
-rwxr-xr-x | main.py | 65 |
1 files changed, 46 insertions, 19 deletions
@@ -13,10 +13,55 @@ def main(): def run(): sourceList=[] + + + + sourceList.append(NewsSource('NBC News', + 'http://nbcnews.com', + ['top-stories-section', 'panel_hero', '<a href="'], + ['<div class="story-link', '<a href="'], + [], + None, None, + 'ad-content ad-xs mobilebox1', 'panel panel_default', + None, None)) + + + sourceList.append(NewsSource('CBS News', + 'http://cbsnews.com', + ['<a href="'], + ['<li data-tb-region-item>', '<a href="'], + [], + 'Big News Area Side Assets', '</a>' + 'Big News Area Side Assets', '</ul></div>', + None, None)) + + + + sourceList.append(NewsSource('The Blaze', + 'http://theblaze.com', + ['<a class="gallery-link" href="'], + ['</figure>\n\n<figure class="gallery-item">', 'href="'], + [], + '<!-- home -->', '<!-- loop-home -->', + '<!-- home -->', '<!-- loop-home -->', + None, None)) + + + sourceList.append(NewsSource('Weekly Standard', + 'http://www.weeklystandard.com/', + ['<div class="lead-photo">', 'href="'], + ['<div class="lead-photo">', 'href="'], + [], + '<div id="region_1"', '<div id="region_2"', + '<div class="widget lead-story layout-3col-feature" data-count="2">', '<div id="region_2"', + None, None)) + + + sourceList.append(NewsSource('New York Times', 'http://nytimes.com', ['<a href="'],#'<h1 class="story-heading"><a href="'],#['"b-column column', 'h2 class="story-heading"><a href="'], - ['article class="story theme-summary', 'h2 class="story-heading"><a href="'], + ['<article', '<a href="'], ['<hr class="single-rule"', 'article class="story theme-summary', 'h2 class="story-heading"><a href="'], '<div class="b-column column">', '<!-- close photo-spot-region -->', 'section id="top-news" class="top-news"', '</div><!-- close a-column -->', @@ -33,24 +78,6 @@ def run(): - sourceList.append(NewsSource('NBC News', - 'http://nbcnews.com', - ['top-stories-section', 'panel_hero', '<a href="'], - ['panel panel_default', '<a href="'], - [], - None, None, - 'row_no-clear ad-container ad-container_default ad-hide ad-container-mobilebox1', 'js-more-topstories', - None, None)) - - - sourceList.append(NewsSource('CBS News', - 'http://cbsnews.com', - ['<h1 class="title"><a href="'], - ['<li data-tb-region-item>', '<a href="'], - [], - None, None, - 'Big News Area Side Assets', '</ul></div>', - None, None)) #scrape all urls and build data structure newsSourceArr=buildNewsSourceArr(sourceList) |