From 38483987b2389b92ca06ac1b409f358ecd4fa991 Mon Sep 17 00:00:00 2001
From: sstvinc2 <sstvinc2@gmail.com>
Date: Wed, 15 Feb 2017 16:14:38 -0600
Subject: Changed randomization algorith for H2 and H3; fully implemented H3

---
 html_template/template.html | 32 +++++++++++----
 main.py                     | 17 ++------
 parser.py                   |  1 -
 unbiasedFunctions.py        | 96 +++++++++++++--------------------------------
 4 files changed, 55 insertions(+), 91 deletions(-)
diff --git a/html_template/template.html b/html_template/template.html
index befaaff..1c2e858 100644
--- a/html_template/template.html
+++ b/html_template/template.html
@@ -109,35 +109,51 @@
   
   <div id="bottom-stories">
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-1.html">xxTitle3-1xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-1xx', '_blank')">xxTitle3-1xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-2.html">xxTitle3-2xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-2xx', '_blank')">xxTitle3-2xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-3.html">xxTitle3-3xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-3xx', '_blank')">xxTitle3-3xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-4.html">xxTitle3-4xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-4xx', '_blank')">xxTitle3-4xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-5.html">xxTitle3-5xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-5xx', '_blank')">xxTitle3-5xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-6.html">xxTitle3-6xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-6xx', '_blank')">xxTitle3-6xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-7.html">xxTitle3-7xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-7xx', '_blank')">xxTitle3-7xx</a>
     </div>
 
     <div class="bottom-story">
-      <a target="_blank" href="redirects/h3-8.html">xxTitle3-8xx</a>
+      <a target="_blank" href="" onclick="window.open('xxURL3-8xx', '_blank')">xxTitle3-8xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="" onclick="window.open('xxURL3-9xx', '_blank')">xxTitle3-9xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="" onclick="window.open('xxURL3-10xx', '_blank')">xxTitle3-10xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="" onclick="window.open('xxURL3-11xx', '_blank')">xxTitle3-11xx</a>
+    </div>
+
+    <div class="bottom-story">
+      <a target="_blank" href="" onclick="window.open('xxURL3-12xx', '_blank')">xxTitle3-12xx</a>
     </div>
 </div>
 
diff --git a/main.py b/main.py
index cf68d01..92f96ae 100644
--- a/main.py
+++ b/main.py
@@ -27,23 +27,11 @@ def run():
     cbs=buildCBS()
     sourceList.append(cbs)
 
-
-    '''
-    sourceList.append(NewsSource('The Blaze',
-                                 'http://theblaze.com',
-                                 ['<a class="gallery-link" href="'],
-                                 ['</figure>\n\n<figure class="gallery-item">', 'href="'],
-                                 [],
-                                 '<!-- home -->', '<!-- loop-home -->',
-                                 '<!-- home -->', '<!-- loop-home -->',
-                                 None, None))
-    '''
-
     wkl=buildWeeklyStandard()
     sourceList.append(wkl)
 
-    nyt=buildNYT()
-    sourceList.append(nyt)
+    #nyt=buildNYT()
+    #sourceList.append(nyt)
 
     fox=buildFoxNews()
     sourceList.append(fox)
@@ -53,6 +41,7 @@ def run():
 
     #build the output file HTML
     outputHTML=buildOutput(newsSourceArr)
+
     #print the output file HTML
     printOutputHTML(outputHTML, '/var/www/html/index.html')
 
diff --git a/parser.py b/parser.py
index b9a05b9..53b3261 100644
--- a/parser.py
+++ b/parser.py
@@ -178,7 +178,6 @@ def blazeFixDesc(articleArr):
         desc=TAG_RE.sub('', desc)
         desc=desc.replace('\n', ' ')
         desc=desc[:144]
-        print(desc+'\n\n')
         articleArr[i].description=desc
 
     return articleArr
diff --git a/unbiasedFunctions.py b/unbiasedFunctions.py
index 733d6ba..46723cd 100644
--- a/unbiasedFunctions.py
+++ b/unbiasedFunctions.py
@@ -54,58 +54,6 @@ def buildArticle(url, sourceName):#, titleDelStart, titleDelEnd, imgDelStart, im
         return None
 
 
-#take in a read main source file (e.g. from nytimes.com) and return lists of the urls for stories
-def extractURLs(content, source):
-    h1s=[]
-    h2s=[]
-    h3s=[]
-
-    try:
-        h1=content
-        if source.h1SectionDividerStart!=None:
-            h1=h1.split(source.h1SectionDividerStart)[1]
-        if source.h1SectionDividerEnd!=None:
-            h1=h1.split(source.h1SectionDividerEnd)[0]
-        for delim in source.h1DelStart:
-            h1=h1.split(delim)[1]
-        h1=h1.split(source.h1DelEnd)[0]
-        if '.com' not in h1:
-            if source.stubURL!=None:
-                h1=source.stubURL+h1
-            else:
-                h1=source.url+h1
-        h1s.append(h1)
-    except:
-        print("Parse error in extractURLs: "+source.name+" h1")
-        h1s=None
-        
-    try:
-        h2=content
-        if source.h2SectionDividerStart!=None:
-            h2=h2.split(source.h2SectionDividerStart, 1)[1]
-        if source.h2SectionDividerEnd!=None:
-            h2=h2.split(source.h2SectionDividerEnd, 1)[0]
-
-        while source.h2DelStart[0] in h2:
-            x=h2
-            for delim in source.h2DelStart:
-                x=x.split(delim)[1]
-                h2=h2.split(delim, 1)[1]
-            x=x.split(source.h2DelEnd)[0]
-            h2=h2.split(source.h2DelEnd, 1)[1]
-            if '.com' not in x:
-                if source.stubURL!=None:
-                    x=source.stubURL+x
-                else:
-                    x=source.url+x
-            h2s.append(x)
-    except:
-        print("Parse error in extractURLs: "+source.name+" h2")
-        h2s=None
-
-    return h1s, h2s, h3s
-
-
 def buildOutput(newsSourceArr):
     #read in the template html file
     f=open('html_template/template.html', 'r')
@@ -114,7 +62,23 @@ def buildOutput(newsSourceArr):
     
     #set the random order for sources
     h1RandomSources=random.sample(range(len(newsSourceArr)), 4)
-    h2RandomSources=random.sample(range(len(newsSourceArr)), 6)
+    #For h2s and h3s, select N random sources (can repeat), then
+    #a non-repetitive random article from within that source
+    h2RandomPairs=[]
+    while len(h2RandomPairs) < 6:
+        x=random.sample(range(len(newsSourceArr)), 1)[0]
+        y=random.sample(range(len(newsSourceArr[x].h2Arr)), 1)[0]
+        pair=[x,y]
+        if not pair in h2RandomPairs:
+            h2RandomPairs.append(pair)
+    h3RandomPairs=[]
+    while len(h3RandomPairs) < 12:
+        x=random.sample(range(len(newsSourceArr)), 1)[0]
+        y=random.sample(range(len(newsSourceArr[x].h3Arr)), 1)[0]
+        pair=[x,y]
+        if not pair in h3RandomPairs:
+            h3RandomPairs.append(pair)
+
 
     #replace html template locations with data from newsSourceArr
     for i in range(len(h1RandomSources)):
@@ -122,11 +86,6 @@ def buildOutput(newsSourceArr):
         randomArticle=random.sample(range(len(source.h1Arr)), 1)[0]
         article=source.h1Arr[randomArticle]
         template=template.replace('xxURL1-'+str(i+1)+'xx', article.url)
-        '''
-        r=open('/var/www/html/redirects/h1-'+str(i+1)+'.html', 'w')
-        r.write('<html><head><script type="text/javascript">window.location="'+article.url+'"</script></head></html>')
-        r.close()
-        '''
         template=template.replace('xxTitle1-'+str(i+1)+'xx', article.title)
         template=template.replace('xxImg1-'+str(i+1)+'xx', article.img)
         desc=article.description
@@ -136,20 +95,21 @@ def buildOutput(newsSourceArr):
             desc=' '.join(desc)+' ...'
         template=template.replace('xxDesc1-'+str(i+1)+'xx', desc)
 
-
-    for i in range(len(h2RandomSources)):
-        source=newsSourceArr[h2RandomSources[i]]
-        randomArticle=random.sample(range(len(source.h2Arr)), 1)[0]
-        article=source.h2Arr[randomArticle]
+    for i in range(len(h2RandomPairs)):
+        pair=h2RandomPairs[i]
+        article=newsSourceArr[pair[0]].h2Arr[pair[1]]
         template=template.replace('xxURL2-'+str(i+1)+'xx', article.url)
-        '''
-        r=open('/var/www/html/redirects/h2-'+str(i+1)+'.html', 'w')
-        r.write('<html><head><script type="text/javascript">window.location="'+article.url+'"</script></head></html>')
-        r.close()
-        '''
         template=template.replace('xxTitle2-'+str(i+1)+'xx', article.title)
         template=template.replace('xxImg2-'+str(i+1)+'xx', article.img)
 
+    for i in range(len(h3RandomPairs)):
+        pair=h3RandomPairs[i]
+        article=newsSourceArr[pair[0]].h3Arr[pair[1]]
+        template=template.replace('xxURL3-'+str(i+1)+'xx', article.url)
+        template=template.replace('xxTitle3-'+str(i+1)+'xx', article.title)
+        template=template.replace('xxImg3-'+str(i+1)+'xx', article.img)
+
+
     sourcesStr=''
     for i in range(len(newsSourceArr)-1):
         sourcesStr+=newsSourceArr[i].name+', '
-- 
cgit v1.2.3