diff options
author | ssstvinc2 <sstvinc2@gmail.com> | 2017-02-09 20:24:25 -0500 |
---|---|---|
committer | ssstvinc2 <sstvinc2@gmail.com> | 2017-02-09 20:24:25 -0500 |
commit | cbbd1122c78834b1beef349797d142ed6d23218a (patch) | |
tree | 2afab3496e02cc1744e413d82ce509a25e3465f1 /main.py | |
parent | 439b2cf7317af9444e99ba58188d7ce80a906af5 (diff) |
Added/tweaked a few news sources, and added a source list to the page
Diffstat (limited to 'main.py')
-rwxr-xr-x | main.py | 65 |
1 files changed, 46 insertions, 19 deletions
@@ -13,10 +13,55 @@ def main(): def run(): sourceList=[] + + + + sourceList.append(NewsSource('NBC News', + 'http://nbcnews.com', + ['top-stories-section', 'panel_hero', '<a href="'], + ['<div class="story-link', '<a href="'], + [], + None, None, + 'ad-content ad-xs mobilebox1', 'panel panel_default', + None, None)) + + + sourceList.append(NewsSource('CBS News', + 'http://cbsnews.com', + ['<a href="'], + ['<li data-tb-region-item>', '<a href="'], + [], + 'Big News Area Side Assets', '</a>' + 'Big News Area Side Assets', '</ul></div>', + None, None)) + + + + sourceList.append(NewsSource('The Blaze', + 'http://theblaze.com', + ['<a class="gallery-link" href="'], + ['</figure>\n\n<figure class="gallery-item">', 'href="'], + [], + '<!-- home -->', '<!-- loop-home -->', + '<!-- home -->', '<!-- loop-home -->', + None, None)) + + + sourceList.append(NewsSource('Weekly Standard', + 'http://www.weeklystandard.com/', + ['<div class="lead-photo">', 'href="'], + ['<div class="lead-photo">', 'href="'], + [], + '<div id="region_1"', '<div id="region_2"', + '<div class="widget lead-story layout-3col-feature" data-count="2">', '<div id="region_2"', + None, None)) + + + sourceList.append(NewsSource('New York Times', 'http://nytimes.com', ['<a href="'],#'<h1 class="story-heading"><a href="'],#['"b-column column', 'h2 class="story-heading"><a href="'], - ['article class="story theme-summary', 'h2 class="story-heading"><a href="'], + ['<article', '<a href="'], ['<hr class="single-rule"', 'article class="story theme-summary', 'h2 class="story-heading"><a href="'], '<div class="b-column column">', '<!-- close photo-spot-region -->', 'section id="top-news" class="top-news"', '</div><!-- close a-column -->', @@ -33,24 +78,6 @@ def run(): - sourceList.append(NewsSource('NBC News', - 'http://nbcnews.com', - ['top-stories-section', 'panel_hero', '<a href="'], - ['panel panel_default', '<a href="'], - [], - None, None, - 'row_no-clear ad-container ad-container_default ad-hide ad-container-mobilebox1', 'js-more-topstories', - None, None)) - - - sourceList.append(NewsSource('CBS News', - 'http://cbsnews.com', - ['<h1 class="title"><a href="'], - ['<li data-tb-region-item>', '<a href="'], - [], - None, None, - 'Big News Area Side Assets', '</ul></div>', - None, None)) #scrape all urls and build data structure newsSourceArr=buildNewsSourceArr(sourceList) |