diff options
author | Matt Singleton <matt@xcolour.net> | 2022-01-14 10:30:10 -0600 |
---|---|---|
committer | Matt Singleton <matt@xcolour.net> | 2022-01-16 13:04:41 -0600 |
commit | 09c7b7abbdbf5b12f908d4a88fe5c0098094b2c7 (patch) | |
tree | c6b45c831eef7fd4f31ad48a0e0525fad6ab3a08 | |
parent | 5499319fa66d677d2ec0d39b077c4f42af4d1296 (diff) |
switch to fsm implementation of gem2html
-rw-r--r-- | fsm.py | 86 | ||||
-rw-r--r-- | gemini.py | 81 |
2 files changed, 29 insertions, 138 deletions
@@ -1,4 +1,6 @@ import sys +import urllib.parse + class StackFSM(object): """ @@ -25,6 +27,7 @@ class StackFSM(object): def pop_state(self): return self._state_stack.pop() + class Parser(object): def __init__(self, document, output=None): @@ -38,29 +41,23 @@ class Parser(object): def parse(self): self._fsm.push_state(self.text_state) - while self._fsm._current_state() is not None: + while self._fsm._current_state() is not None and len(self._document) > self._offset: self._fsm.update() def text_state(self): - if len(self._document) <= self._offset: - self._fsm.pop_state() - return line = self._document[self._offset] if line.strip() == '': self._blanks += 1 else: self._blanks = 0 if line.strip() == '```': - self._fsm.pop_state() self._fsm.push_state(self.pre_state) self._output.write('<pre>\n') self._offset += 1 elif line.startswith('* '): - self._fsm.pop_state() self._fsm.push_state(self.list_state) self._output.write('<ul>\n') elif line.startswith('=>'): - self._fsm.pop_state() self._fsm.push_state(self.link_state) self._output.write('<ul>\n') else: @@ -80,95 +77,40 @@ class Parser(object): self._offset += 1 def pre_state(self): - if len(self._document) < self._offset: - self.pop_state() - return line = self._document[self._offset] if line.strip() == '```': self._fsm.pop_state() - self._fsm.push_state(self.text_state) self._output.write('</pre>\n') self._offset += 1 - elif line.startswith('* '): - self._fsm.pop_state() - self._fsm.push_state(self.list_state) - self._output.write('<ul>\n') - elif line.startswith('=>'): - self._fsm.pop_state() - self._fsm.push_state(self.link_state) - self._output.write('<ul>\n') else: self._output.write(line + '\n') self._offset += 1 def list_state(self): - if len(self._document) < self._offset: - self.pop_state() - return line = self._document[self._offset] if line.startswith('* '): self._output.write('<li>{}</li>\n'.format(line[2:])) self._offset += 1 else: self._fsm.pop_state() - self._fsm.push_state(self.text_state) self._output.write('</ul>\n') def link_state(self): - if len(self._document) < self._offset: - self.pop_state() - return line = self._document[self._offset] if line.startswith('=>'): - parts = line[2:].split(None, 2) + parts = line[2:].split(None, 1) + url = parts[0] + url_parts = urllib.parse.urlsplit(url) + if url_parts.scheme in ('gemini', ''): + external = '' + else: + external = ' class="external"' if len(parts) == 1: - self._output.write('<li><a href="{}">{}</a></li>\n'.format(parts[0], parts[0])) + text = url else: - self._output.write('<li><a href="{}">{}</a></li>\n'.format(parts[0], parts[1])) + text = parts[1] + self._output.write('<li{}><a href="{}">{}</a></li>\n'.format(external, url, text)) self._offset += 1 else: self._fsm.pop_state() - self._fsm.push_state(self.text_state) self._output.write('</ul>\n') - -document = """ -# h1 -hello -hello - -## h2 -``` -code -code -``` -### h3 -hello - - -hello - -### lists -* hello -* two -* three - -text - -* one -* two -* three - -### links -=>https://example.com hello -=> https://example.com two -=> https://example.com three - -text - -=>https://example.com -=> https://example.com -=> https://example.com -""" - -p = Parser(document.split('\n')) -p.parse() @@ -1,80 +1,26 @@ +import io import re import socket import ssl import urllib.parse -import pyphen -dic = pyphen.Pyphen(lang='en_US') +import fsm + def htmlescape(text: str) -> str: return text.replace('<', '<').replace('>', '>') -def gem2html(gem: str) -> str: - html = [] - html.append('<html>\n<body>\n<div id="root">') - state = 'text' - blanklines = 0 - for line in gem.split('\n'): - if line.startswith('```'): - if state == 'pre': - newstate = 'text' - blanklines = 0 - else: - newstate = 'pre' - elif state == 'pre': - newstate = 'pre' - elif line.startswith('=>'): - newstate = 'links' - elif line.startswith('* '): - newstate = 'list' - else: - newstate = 'text' - - if state != 'pre': - if len(line.strip()) == 0: - blanklines += 1 - if blanklines > 1: - html.append('<br/>') - continue - blanklines = 0 - - if state != newstate: - if state in ('links', 'list'): - html.append('</ul>') - elif state == 'pre': - html.append('</pre>') - if newstate in ('links', 'list'): - html.append('<ul>') - elif newstate == 'pre': - html.append('<pre>') - state = newstate +def gem2html(gem: str) -> str: + html = io.StringIO() + html.write('<html>\n<head>\n<style type="text/css">\n') + html.write(open('style.css').read()) + html.write('</style>\n</head>\n<body>\n<div id="root">') + parser = fsm.Parser(gem.split('\n'), html) + parser.parse() + html.write('</div>\n</body>\n</html>') + return html.getvalue() - if line.startswith('```'): - pass - elif state == 'links': - tokens = line.split(None, 2) - url = tokens[1] - text = None if len(tokens) < 3 else tokens[2] - url_parts = urllib.parse.urlsplit(url) - external = ' class="external"' if url_parts.scheme not in ('gemini', '') else '' - html.append('<li{external}><a href="{url}">{text}</a></li>'.format(url=url, text=text or url, external=external)) - elif state == 'list': - html.append('<li>{}</li>'.format(line[2:])) - elif state == 'pre': - html.append(line) - else: - if line.startswith('###'): - html.append('<p class="h3">{}</p>'.format(line[3:].lstrip())) - elif line.startswith('##'): - html.append('<p class="h2">{}</p>'.format(line[2:].lstrip())) - elif line.startswith('#'): - html.append('<p class="h1">{}</p>'.format(line[1:].lstrip())) - else: - ' '.join([dic.inserted(word, "\u00AD") for word in line.split()]) - html.append('<p>{}</p>'.format(htmlescape(line))) - html.append('</div>\n</body>\n</html>') - return '\n'.join(html) def urljoin(base: str, url: str) -> str: if base is None: @@ -83,6 +29,7 @@ def urljoin(base: str, url: str) -> str: url = re.sub('^gemini:', 'http:', url) return re.sub('^http:', 'gemini:', urllib.parse.urljoin(base, url)) + def get(url: str, follow_redirects: bool = True) -> dict: response = _get(url) if follow_redirects is True: @@ -95,6 +42,7 @@ def get(url: str, follow_redirects: bool = True) -> dict: response = _get(response['meta']) return response + def _parse_meta(meta: str) -> dict: mime, _, params_text = meta.lower().strip().partition(';') params = {} @@ -105,6 +53,7 @@ def _parse_meta(meta: str) -> dict: params['mime'] = mime.strip() return params + def _get(url: str) -> dict: url_parts = urllib.parse.urlsplit(url) if len(url_parts.path) == 0: |