summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Singleton <matt@xcolour.net>2022-01-14 10:30:10 -0600
committerMatt Singleton <matt@xcolour.net>2022-01-16 13:04:41 -0600
commit09c7b7abbdbf5b12f908d4a88fe5c0098094b2c7 (patch)
treec6b45c831eef7fd4f31ad48a0e0525fad6ab3a08
parent5499319fa66d677d2ec0d39b077c4f42af4d1296 (diff)
switch to fsm implementation of gem2html
-rw-r--r--fsm.py86
-rw-r--r--gemini.py81
2 files changed, 29 insertions, 138 deletions
diff --git a/fsm.py b/fsm.py
index 40f9c2d..901e114 100644
--- a/fsm.py
+++ b/fsm.py
@@ -1,4 +1,6 @@
import sys
+import urllib.parse
+
class StackFSM(object):
"""
@@ -25,6 +27,7 @@ class StackFSM(object):
def pop_state(self):
return self._state_stack.pop()
+
class Parser(object):
def __init__(self, document, output=None):
@@ -38,29 +41,23 @@ class Parser(object):
def parse(self):
self._fsm.push_state(self.text_state)
- while self._fsm._current_state() is not None:
+ while self._fsm._current_state() is not None and len(self._document) > self._offset:
self._fsm.update()
def text_state(self):
- if len(self._document) <= self._offset:
- self._fsm.pop_state()
- return
line = self._document[self._offset]
if line.strip() == '':
self._blanks += 1
else:
self._blanks = 0
if line.strip() == '```':
- self._fsm.pop_state()
self._fsm.push_state(self.pre_state)
self._output.write('<pre>\n')
self._offset += 1
elif line.startswith('* '):
- self._fsm.pop_state()
self._fsm.push_state(self.list_state)
self._output.write('<ul>\n')
elif line.startswith('=>'):
- self._fsm.pop_state()
self._fsm.push_state(self.link_state)
self._output.write('<ul>\n')
else:
@@ -80,95 +77,40 @@ class Parser(object):
self._offset += 1
def pre_state(self):
- if len(self._document) < self._offset:
- self.pop_state()
- return
line = self._document[self._offset]
if line.strip() == '```':
self._fsm.pop_state()
- self._fsm.push_state(self.text_state)
self._output.write('</pre>\n')
self._offset += 1
- elif line.startswith('* '):
- self._fsm.pop_state()
- self._fsm.push_state(self.list_state)
- self._output.write('<ul>\n')
- elif line.startswith('=>'):
- self._fsm.pop_state()
- self._fsm.push_state(self.link_state)
- self._output.write('<ul>\n')
else:
self._output.write(line + '\n')
self._offset += 1
def list_state(self):
- if len(self._document) < self._offset:
- self.pop_state()
- return
line = self._document[self._offset]
if line.startswith('* '):
self._output.write('<li>{}</li>\n'.format(line[2:]))
self._offset += 1
else:
self._fsm.pop_state()
- self._fsm.push_state(self.text_state)
self._output.write('</ul>\n')
def link_state(self):
- if len(self._document) < self._offset:
- self.pop_state()
- return
line = self._document[self._offset]
if line.startswith('=>'):
- parts = line[2:].split(None, 2)
+ parts = line[2:].split(None, 1)
+ url = parts[0]
+ url_parts = urllib.parse.urlsplit(url)
+ if url_parts.scheme in ('gemini', ''):
+ external = ''
+ else:
+ external = ' class="external"'
if len(parts) == 1:
- self._output.write('<li><a href="{}">{}</a></li>\n'.format(parts[0], parts[0]))
+ text = url
else:
- self._output.write('<li><a href="{}">{}</a></li>\n'.format(parts[0], parts[1]))
+ text = parts[1]
+ self._output.write('<li{}><a href="{}">{}</a></li>\n'.format(external, url, text))
self._offset += 1
else:
self._fsm.pop_state()
- self._fsm.push_state(self.text_state)
self._output.write('</ul>\n')
-
-document = """
-# h1
-hello
-hello
-
-## h2
-```
-code
-code
-```
-### h3
-hello
-
-
-hello
-
-### lists
-* hello
-* two
-* three
-
-text
-
-* one
-* two
-* three
-
-### links
-=>https://example.com hello
-=> https://example.com two
-=> https://example.com three
-
-text
-
-=>https://example.com
-=> https://example.com
-=> https://example.com
-"""
-
-p = Parser(document.split('\n'))
-p.parse()
diff --git a/gemini.py b/gemini.py
index 560b01f..cbc2eca 100644
--- a/gemini.py
+++ b/gemini.py
@@ -1,80 +1,26 @@
+import io
import re
import socket
import ssl
import urllib.parse
-import pyphen
-dic = pyphen.Pyphen(lang='en_US')
+import fsm
+
def htmlescape(text: str) -> str:
return text.replace('<', '&lt;').replace('>', '&gt;')
-def gem2html(gem: str) -> str:
- html = []
- html.append('<html>\n<body>\n<div id="root">')
- state = 'text'
- blanklines = 0
- for line in gem.split('\n'):
- if line.startswith('```'):
- if state == 'pre':
- newstate = 'text'
- blanklines = 0
- else:
- newstate = 'pre'
- elif state == 'pre':
- newstate = 'pre'
- elif line.startswith('=>'):
- newstate = 'links'
- elif line.startswith('* '):
- newstate = 'list'
- else:
- newstate = 'text'
-
- if state != 'pre':
- if len(line.strip()) == 0:
- blanklines += 1
- if blanklines > 1:
- html.append('<br/>')
- continue
- blanklines = 0
-
- if state != newstate:
- if state in ('links', 'list'):
- html.append('</ul>')
- elif state == 'pre':
- html.append('</pre>')
- if newstate in ('links', 'list'):
- html.append('<ul>')
- elif newstate == 'pre':
- html.append('<pre>')
- state = newstate
+def gem2html(gem: str) -> str:
+ html = io.StringIO()
+ html.write('<html>\n<head>\n<style type="text/css">\n')
+ html.write(open('style.css').read())
+ html.write('</style>\n</head>\n<body>\n<div id="root">')
+ parser = fsm.Parser(gem.split('\n'), html)
+ parser.parse()
+ html.write('</div>\n</body>\n</html>')
+ return html.getvalue()
- if line.startswith('```'):
- pass
- elif state == 'links':
- tokens = line.split(None, 2)
- url = tokens[1]
- text = None if len(tokens) < 3 else tokens[2]
- url_parts = urllib.parse.urlsplit(url)
- external = ' class="external"' if url_parts.scheme not in ('gemini', '') else ''
- html.append('<li{external}><a href="{url}">{text}</a></li>'.format(url=url, text=text or url, external=external))
- elif state == 'list':
- html.append('<li>{}</li>'.format(line[2:]))
- elif state == 'pre':
- html.append(line)
- else:
- if line.startswith('###'):
- html.append('<p class="h3">{}</p>'.format(line[3:].lstrip()))
- elif line.startswith('##'):
- html.append('<p class="h2">{}</p>'.format(line[2:].lstrip()))
- elif line.startswith('#'):
- html.append('<p class="h1">{}</p>'.format(line[1:].lstrip()))
- else:
- ' '.join([dic.inserted(word, "\u00AD") for word in line.split()])
- html.append('<p>{}</p>'.format(htmlescape(line)))
- html.append('</div>\n</body>\n</html>')
- return '\n'.join(html)
def urljoin(base: str, url: str) -> str:
if base is None:
@@ -83,6 +29,7 @@ def urljoin(base: str, url: str) -> str:
url = re.sub('^gemini:', 'http:', url)
return re.sub('^http:', 'gemini:', urllib.parse.urljoin(base, url))
+
def get(url: str, follow_redirects: bool = True) -> dict:
response = _get(url)
if follow_redirects is True:
@@ -95,6 +42,7 @@ def get(url: str, follow_redirects: bool = True) -> dict:
response = _get(response['meta'])
return response
+
def _parse_meta(meta: str) -> dict:
mime, _, params_text = meta.lower().strip().partition(';')
params = {}
@@ -105,6 +53,7 @@ def _parse_meta(meta: str) -> dict:
params['mime'] = mime.strip()
return params
+
def _get(url: str) -> dict:
url_parts = urllib.parse.urlsplit(url)
if len(url_parts.path) == 0: