3 files changed, 0 insertions, 183 deletions
diff --git a/legacy_py/downloadFiles.py b/legacy_py/downloadFiles.py
deleted file mode 100644
index 9a79e27..0000000
--- a/legacy_py/downloadFiles.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import os
-
-files=[]
-files.append('http://www.nytimes.com')
-files.append('http://www.nbcnews.com')
-files.append('http://www.foxnews.com')
-
-stories=[]
-stories.append('https://www.nytimes.com/2017/02/03/business/dealbook/trump-congress-financial-regulations.html')
-stories.append('http://www.nbcnews.com/news/us-news/over-100-000-visas-have-been-revoked-immigration-ban-justice-n716121')
-stories.append('http://www.foxnews.com/politics/2017/02/03/calls-mount-for-trump-administration-to-label-muslim-brotherhood-terrorist-organization.html')
-
-for i in range(len(files)):
-    os.system('wget -O source'+str(i+1)+'.html --no-check-certificate '+files[i])
-    
-for i in range(len(stories)):
-    os.system('wget -O story'+str(i+1)+'.html --no-check-certificate '+stories[i])
-
-'''
-f=open('testOut.html', 'r', encoding="utf8")
-content=f.read()
-f.close()
-#os.remove('testOut.html')
-'''
diff --git a/legacy_py/get_sources.py b/legacy_py/get_sources.py
deleted file mode 100755
index cb23f61..0000000
--- a/legacy_py/get_sources.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from bs4 import BeautifulSoup
-import wget
-import os
-
-def getHeds(heds, h_class=None):
-    hed_text=[]
-    hed_hrefs=[]
-    #print(heds[0])
-    for hed in heds:
-        #print('*'+hed.string)
-        if hed.a:
-            try:
-                if h_class==None or hed['class'][0]==h_class:
-                    if hed.a.string!=None: hed_text.append(hed.a.string.strip())
-                    if hed.a['href']!='': hed_hrefs.append(hed.a['href'])
-            except:
-                continue
-    return hed_text, hed_hrefs
-
-
-
-def get_nyt():
-    #init vars
-    stories={}
-
-    #download and soup web page
-    wget.download('https://nytimes.com', out="nyt.html")
-    soup = BeautifulSoup(open("nyt.html", encoding="utf8"), "lxml")
-    os.remove('nyt.html')
-
-    #get top story
-    h1s=soup('h1')
-    h1_heds, h1_href = getHeds(h1s, 'story-heading')
-    stories['h1']={'heds':[h1_heds[0]], 'href':[h1_href[0]]}
-
-    #get secondary stories
-    aCol = soup.find_all('div', 'a-column')
-    h2s=aCol[0].find_all('h2')
-    h2_heds, h2_href = getHeds(h2s, 'story-heading')
-    stories['h2']={'heds':h2_heds, 'href':h2_href}
-
-    #get tertiary stories
-    bCol = soup.find_all('div', 'b-column')
-    h3s=bCol[0].find_all('h2')
-    h3_heds, h3_href = getHeds(h3s, 'story-heading')
-    stories['h3']={'heds':h3_heds, 'href':h3_href}
-
-    return stories
-
-
-def get_fox():
-    #init vars
-    stories={}
-    h2_heds = []
-    h2_href = []
-
-    #download and soup web page
-    wget.download('http://www.foxnews.com', out="fox.html")
-    soup = BeautifulSoup(open("fox.html", encoding="utf8"), "lxml")
-    os.remove('fox.html')
-
-    #get top story
-    h1s=soup('h1')
-    h1_heds, h1_href = getHeds(h1s)
-    stories['h1']={'heds':[h1_heds[0]], 'href':[h1_href[0]]}
-
-    #get secondary stories - <div top-stories><li> (loop for first <a>)
-    topStories = soup('div', 'top-stories')
-    topStoriesCols = topStories[0].ul
-    for c in topStoriesCols.children:
-        if c.string==None:
-            h2_heds.append(c.h3.text)
-            h2_href.append(c.a['href'])
-
-    stories['h2']={'heds':h2_heds, 'href':h2_href}
-
-    #get tertiary stories
-    h3s=topStoriesCols.find_all('li')
-    h3_heds, h3_href=getHeds(h3s)
-    for href in h3_href:
-        if href in h2_href:
-            h3_href.remove(href)
-
-    stories['h3']={'heds':h3_heds, 'href':h3_href}
-
-    return stories
-
-
-def get_nbc():
-    #init vars
-    stories={}
-
-    #download and soup web page
-    wget.download('http://www.nbcnews.com', out="nbc.html")
-    soup = BeautifulSoup(open("nbc.html", encoding="utf8"), "lxml")
-    os.remove('nbc.html')
-
-    #get top story
-    panel = soup.find_all('div', 'panel-txt_hero')
-    h1_heds=panel[0].find_all('h3')
-    panel = soup.find_all('div', 'panel-txt')
-    stories['h1']={'heds':[h1_heds[0].text.strip()], 'href':['http://www.nbcnews.com'+panel[0].a['href']]}
-
-    #get secondary stories - div class panel
-    h2s = soup.find_all('div', 'story-link_default-height')
-    story_heds=[]
-    for item in h2s:
-        story_heds.append(item.h3.text.strip())
-    h2_heds, h2_href = getHeds(h2s)
-    for i in range(len(h2_href)):
-        h2_href[i]='http://www.nbcnews.com'+h2_href[i]
-    stories['h2']={'heds':story_heds[:3], 'href':h2_href[:3]}
-
-    #get tertiary stories - div class story-link
-    stories['h3']={'heds':story_heds[3:], 'href':h2_href[3:]}
-
-    return stories
-
-def getTwitterCard(url):
-    card={}
-
-    #wget.download(url, out="card.html")
-    cmd='wget --no-check-certificate '+url+' -O card.html'
-    print(cmd)
-    ret=os.system(cmd)
-    print(ret)
-    soup = BeautifulSoup(open("card.html", encoding="utf8"), "lxml")
-    #os.remove('card.html')
-
-    hed=soup.find_all('meta', {'name' : 'twitter:title'})
-    card['hed']=hed[0]['content']
-
-    img=soup.find_all('meta', {'name' : 'twitter:image'})
-    card['img']=img[0]['content']
-
-    return card
-
-
-
-'''  
-links=get_nbc()
-card=getTwitterCard(links['h2']['href'][0])
-print(card)
-
-
-nyt=get_nyt()
-fox=get_fox()
-nbc=get_nbc()
-'''
diff --git a/legacy_py/update_stories.py b/legacy_py/update_stories.py
deleted file mode 100755
index 4340148..0000000
--- a/legacy_py/update_stories.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from get_sources import *
-
-nyt=get_nyt()
-#fox=get_fox()
-#nbc=get_nbc()
-
-url=nyt['h2']['href'][0]
-print(url)
-card=getTwitterCard("https://www.nytimes.com/2017/01/30/us/politics/trump-immigration-ban-memo.html")
-#print(card)