From 09c7b7abbdbf5b12f908d4a88fe5c0098094b2c7 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Fri, 14 Jan 2022 10:30:10 -0600 Subject: switch to fsm implementation of gem2html --- fsm.py | 86 +++++++++++---------------------------------------------------- gemini.py | 81 +++++++++++------------------------------------------------ 2 files changed, 29 insertions(+), 138 deletions(-) diff --git a/fsm.py b/fsm.py index 40f9c2d..901e114 100644 --- a/fsm.py +++ b/fsm.py @@ -1,4 +1,6 @@ import sys +import urllib.parse + class StackFSM(object): """ @@ -25,6 +27,7 @@ class StackFSM(object): def pop_state(self): return self._state_stack.pop() + class Parser(object): def __init__(self, document, output=None): @@ -38,29 +41,23 @@ class Parser(object): def parse(self): self._fsm.push_state(self.text_state) - while self._fsm._current_state() is not None: + while self._fsm._current_state() is not None and len(self._document) > self._offset: self._fsm.update() def text_state(self): - if len(self._document) <= self._offset: - self._fsm.pop_state() - return line = self._document[self._offset] if line.strip() == '': self._blanks += 1 else: self._blanks = 0 if line.strip() == '```': - self._fsm.pop_state() self._fsm.push_state(self.pre_state) self._output.write('
\n')
             self._offset += 1
         elif line.startswith('* '):
-            self._fsm.pop_state()
             self._fsm.push_state(self.list_state)
             self._output.write('
\n') self._offset += 1 - elif line.startswith('* '): - self._fsm.pop_state() - self._fsm.push_state(self.list_state) - self._output.write('\n') - -document = """ -# h1 -hello -hello - -## h2 -``` -code -code -``` -### h3 -hello - - -hello - -### lists -* hello -* two -* three - -text - -* one -* two -* three - -### links -=>https://example.com hello -=> https://example.com two -=> https://example.com three - -text - -=>https://example.com -=> https://example.com -=> https://example.com -""" - -p = Parser(document.split('\n')) -p.parse() diff --git a/gemini.py b/gemini.py index 560b01f..cbc2eca 100644 --- a/gemini.py +++ b/gemini.py @@ -1,80 +1,26 @@ +import io import re import socket import ssl import urllib.parse -import pyphen -dic = pyphen.Pyphen(lang='en_US') +import fsm + def htmlescape(text: str) -> str: return text.replace('<', '<').replace('>', '>') -def gem2html(gem: str) -> str: - html = [] - html.append('\n\n
') - state = 'text' - blanklines = 0 - for line in gem.split('\n'): - if line.startswith('```'): - if state == 'pre': - newstate = 'text' - blanklines = 0 - else: - newstate = 'pre' - elif state == 'pre': - newstate = 'pre' - elif line.startswith('=>'): - newstate = 'links' - elif line.startswith('* '): - newstate = 'list' - else: - newstate = 'text' - - if state != 'pre': - if len(line.strip()) == 0: - blanklines += 1 - if blanklines > 1: - html.append('
') - continue - blanklines = 0 - - if state != newstate: - if state in ('links', 'list'): - html.append('') - elif state == 'pre': - html.append('') - if newstate in ('links', 'list'): - html.append('
\n\n') - return '\n'.join(html) def urljoin(base: str, url: str) -> str: if base is None: @@ -83,6 +29,7 @@ def urljoin(base: str, url: str) -> str: url = re.sub('^gemini:', 'http:', url) return re.sub('^http:', 'gemini:', urllib.parse.urljoin(base, url)) + def get(url: str, follow_redirects: bool = True) -> dict: response = _get(url) if follow_redirects is True: @@ -95,6 +42,7 @@ def get(url: str, follow_redirects: bool = True) -> dict: response = _get(response['meta']) return response + def _parse_meta(meta: str) -> dict: mime, _, params_text = meta.lower().strip().partition(';') params = {} @@ -105,6 +53,7 @@ def _parse_meta(meta: str) -> dict: params['mime'] = mime.strip() return params + def _get(url: str) -> dict: url_parts = urllib.parse.urlsplit(url) if len(url_parts.path) == 0: -- cgit v1.2.3