#!/usr/bin/env python3 from unbiasedObjects import * from unbiasedFunctions import buildArticle import os def buildNYT(): url='http://www.nytimes.com' #download file os.system('wget -q -O scratch/temp1.html --no-check-certificate '+url) #read file f=open('scratch/temp1.html', 'r')#, encoding="utf8") content=f.read() f.close() #get main headline #this will likely need if/else logic h1=content #This is with a large headline over a and b columns h1=h1.split('story theme-summary banner', 1)[1] h1=h1.split('', 1)[1] h2=h2.split('', 1)[0] #remove "collection" sets while '
' in h2: arr=h2.split('
', 1) h2=arr[0]+arr[1].split('', 1)[1] #Grab the remaining URLs while '', 1)[1] h2=h2.split('', 1)[0] #remove "collection" sets while '