Rob Sutherland’s Musings on Life, Code, and Anything Else

The Python files that make this site work.

I wanted all of the content to be in Markdown. I wanted to make sure that I could write content in Markdown and not have to sprinkle in HTML. I wrote several Marko plugins to help with that. They are shown below as well.

mdsite.py

from os import PathLike
from pathlib import Path
from shutil import copy, rmtree
from timeit import default_timer
import re
from tkinter import W
from footnote import Footnote
from byline import ByLine
from comment import Comment
from bible_reference import BibleReference
from margin_note import MarginNote
from helpers import strip_html_comments

try:
    from lookup import convert
    from lookup import load_books
except Exception as e:
    print("Could not load convert or load_books from .lookup")
    print(e)
    convert = None

# from marko import Markdown
# markdown = Markdown(extensions=[Footnote, ByLine, BibleReference, Comment])

from marko.ext.gfm import gfm

gfm.use(Footnote)
gfm.use(ByLine)
gfm.use(BibleReference)
gfm.use(Comment)
gfm.use(MarginNote)

if convert:
    print("Converting ./books.json to ./static/books.csv")
    books = load_books("./books.json")
    convert(books, "./static/books.csv")


def read_file_content(p: PathLike) -> str:
    """
    Reads the content of the file at Path(p). If the file does not
    exist returns and empty string.
    """
    if not p.exists():
        return ""

    return p.read_text(encoding="UTF8")


def write_file_content(p: PathLike, content: str) -> None:
    """
    writes the text in content to the Path(p)
    """
    p.write_text(content, encoding="UTF8")


def copy_recursive(src: PathLike, dest: PathLike) -> None:
    """
    Recursively copies all files and folders in a source path
    to the destination path.
    """

    dest.mkdir(exist_ok=True, parents=True)

    for f in src.glob("*"):
        target = dest.joinpath(f.name)
        if f.is_file():
            copy(f, target)
        if f.is_dir():
            copy_recursive(f, target)


def find_includes(content, rel=Path("./")):
    """
    Looks for any !{somefile.ext} in content
    and replaces that token with the contents of the
    file found at somefile.ext. It uses the Path in rel
    as the staring point to find the file to include.
    """
    matches = [
        (m, read_file_content(rel.joinpath(m))) for m in re.findall(r"!{(.*)}", content)
    ]
    for tag, tag_content in matches:
        content = content.replace("!{" + tag + "}", tag_content)

    return content


def should_include_file(p: PathLike):
    return p.is_file() and p.suffix == ".md" and p.stem[0] != "_"


def create_default_index(src: PathLike, dest: PathLike, template: str = "<content />"):
    """
    Creates a list of links to files and subfolders within a folder that
    doesn't have an index.md file
    """

    dest.mkdir(exist_ok=True, parents=True)

    target = dest.joinpath("index.html")
    lines = [
        f"# Related Pages",
        ""
        "This page was created automatically as a placeholder. It links to any sub-pages. If you, as the content author wants to create a specific page for this content add the index.md file.",
        "",
    ]

    for f in src.glob("*"):
        if should_include_file(f):
            lines.append(f"- [{f.stem.replace('_', ' ')}]({f.name})")
        if f.is_dir():
            lines.append(f"- [{f.name.replace('_', ' ')}]({f.name}/index.md)")

    lines.append("")
    h = generate_html_from_template_and_md(template, "\n".join(lines), src.parent)
    write_file_content(target, h)


def generate_html_from_template_and_md(
    template: str, content: str, rel: PathLike
) -> str:
    """
    Converts the content (markdown) to html and preforms some
    standard processing.
    """
    h = find_includes(content, rel=rel)
    # h = markdown.convert(h)
    h = gfm(h)
    h = template.replace(
        "<content />", h
    )  # put the generated content into the template
    h = re.sub(
        "href=['\"](.*)\\.md['\"]", 'href="\\1.html"', h
    )  # convert *.md links to *.html
    return h


def process_markdown(
    src: PathLike, dest: PathLike, template: str = "<content />"
) -> None:
    """
    Recursivly process markdown files into html files with the
    same folder structure.
    """

    dest.mkdir(exist_ok=True, parents=True)

    for f in src.glob("*"):
        if should_include_file(f):
            target = dest.joinpath(f.stem + ".html")
            h = read_file_content(f)
            h = generate_html_from_template_and_md(template, h, f.parent)
            write_file_content(target, h)
        if f.is_dir():
            if not f.joinpath("index.md").exists():
                print("index.md not found in", f)
                create_default_index(f, dest.joinpath(f.name), template)
            process_markdown(f, dest.joinpath(f.name), template)


def build(
    template_path: PathLike,
    static_root: PathLike,
    content_root: PathLike,
    destination: PathLike,
) -> None:

    start = default_timer()
    print("building", end="...")

    if destination.exists():
        rmtree(destination)

    destination.mkdir(exist_ok=True, parents=True)

    static_dest = destination.joinpath("static")

    # copy static to [www]/static
    copy_recursive(static_root, static_dest)

    # copy CNAME to [www]/
    # the CNAME file must be at the root of the site, I keep
    # it at the root of the repo. This ensures that it works
    # correctly after I clean the destination dir and push to
    # github again
    copy("./CNAME", destination.joinpath("CNAME"))

    template = read_file_content(template_path)
    template = strip_html_comments(template)

    # convert markdow files in [content]/**/*.md
    # to html in [www]/**/*.html
    process_markdown(content_root, destination, template)

    print("completed in {:0.2f} s".format(default_timer() - start))


def serve(source: PathLike) -> None:
    import http.server
    import socketserver

    class Handler(http.server.SimpleHTTPRequestHandler):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, directory=str(source), **kwargs)

    handler = Handler
    handler.extensions_map.update({".js": "application/javascript"})
    
    PORT = 8000

    with socketserver.TCPServer(("", PORT), handler) as httpd:
        print("serving at port", PORT)
        print("Ctrl-C to stop")
        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            pass
        httpd.shutdown()


def publish() -> None:
    """
    calls git add, git commit, and git push
    """

    import subprocess

    start = default_timer()
    print("publishing", end="...")
    subprocess.run(["git", "add", "."])
    subprocess.run(["git", "commit", "-m", "'Publishing new version of web site'"])
    subprocess.run(["git", "push", "origin", "main"])
    print("completed in {:0.2f} s".format(default_timer() - start))


def watch(paths, on_change) -> None:
    """
    watches the given paths and when a change occurs execute the on_change action
    """
    from watchdog.observers import Observer
    from watchdog.events import LoggingEventHandler
    import logging

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )

    class OnChangeEventHandler(LoggingEventHandler):
        """Logs all the events captured."""

        def __init__(self, action, logger=None):
            super().__init__(logger=logger)
            self.action = action

        def on_moved(self, event):
            super().on_moved(event)
            self.action()

        def on_created(self, event):
            super().on_created(event)
            self.action()

        def on_deleted(self, event):
            super().on_deleted(event)
            self.action()

        def on_modified(self, event):
            super().on_modified(event)
            self.action()

    observer = Observer()

    event_handler = OnChangeEventHandler(on_change)

    for path in paths:
        print("watching", path, "for changes")
        observer.schedule(event_handler, path, recursive=True)

    observer.start()

def try_log(a): 
    try: 
        a()
    except Exception as e:
        print(e)


if __name__ == "__main__":

    import sys

    template_path = Path("./templates")
    template_name = "basic"
    content_root = Path("./content")
    static_root = Path("./static")
    destination = Path("./docs")

    build_action = lambda: build(
        template_path.joinpath(f"{template_name}.html"),
        static_root,
        content_root,
        destination,
    )

    actions = {
        "build": [build_action],
        "serve": [
            build_action,
            lambda: try_log(watch([template_path, content_root, static_root], build_action)),
            lambda: serve(destination),
        ],
        "publish": [
            build_action,
            lambda: publish(),
        ],
    }

    if len(sys.argv) < 2:
        action = "build"
    else:
        action = sys.argv[1]

    if action not in actions:
        print(
            f"Sorry, I don't know how to '{action}'. I can only {', '.join(actions.keys())}."
        )
        exit()

    for step in actions[action]:
        step()

helpers.py

A few simple helper functions to avoid putting everything in mdsite.py

import re


def strip_html_comments(s):
    """
    strips all html comments from the input string.
    """
    t = re.sub("<!--(.|\n)*?-->", "", s)
    return t

comment.py

I wanted to be able to write all of the content in markdown. I had a need to comment out some sections of the markdown while working on the content.


Here's a paragraph.

// this is a comment

And here's another.

Running this will generate the following HTML.

<p>Here's a pragraph.</p>
<p>And here's another.</p>
import re
from marko import block, inline

"""
Comment extension
~~~~~~~~~~~~~~~~~

Enables comments in a markdown file rendered in Marko. 

Usage::

    from comment import Comment

    text = '''
Here's some text

// with a comment line
// and another line

With more text following

will render

Here's some text

With more text following

"""

class CommentLine(block.BlockElement):

pattern = re.compile(r"//(.*)")
priority = 6
parse_children = False

def __init__(self, match):
    comment_text = match.group(1)
    self._prefix = re.escape(match.group())

@classmethod
def match(cls, source):
    return source.expect_re(cls.pattern)

@classmethod
def parse(cls, source):
    state = cls(source.match)
    with source.under_state(state):
        state.children = inline.parser.parse(source)
    return state

class CommentRenderMixin: def init(self): super().init()

def render_comment_line(self, element):
    return ""  # should ignore the output

class Comment: elements = [CommentLine] render_mixins = [CommentRenderMixin]


## footnote.py

This is based on other Footnote plugins. Mine only allows for numerical identifiers.

```python
import re
from marko import block, inline

"""
Footnotes extension
~~~~~~~~~~~~~~~~~~~

Enable footnotes parsing and renderering in Marko.

Usage::

    from markdown import markdown

    text = 'Foo[^1]\\n\\n[^1]: This is a footnote.\\n'
    markdown = Markdown(extensions=['footnote'])
    print(markdown(text))

    Will only convert numerical footnotes. [^2] will work, 
    but [^bob] will not). 

"""


class Document(block.Document):
    def __init__(self, text):
        self.footnotes = {}
        super().__init__(text)


class FootnoteDefinition(block.BlockElement):

    pattern = re.compile(r"\[\^(\d+)\]:[^\n\S]*(?=\S| {4})")
    priority = 6

    def __init__(self, match):
        self.id = match.group(1)
        self._prefix = re.escape(match.group())
        self._second_prefix = r" {1,4}"

    @classmethod
    def match(cls, source):
        return source.expect_re(cls.pattern)

    @classmethod
    def parse(cls, source):
        state = cls(source.match)
        with source.under_state(state):
            state.children = inline.parser.parse(source)
        source.root.footnotes[state.id] = state
        return state


class FootnoteReference(inline.InlineElement):

    pattern = re.compile(r"\[\^(\d+)\]")
    priority = 6

    def __init__(self, match):
        self.id = match.group(1)

    @classmethod
    def find(cls, text):
        for match in super().find(text):
            id = match.group(1)
            if id in inline._root_node.footnotes:
                yield match


class FootnoteRendererMixin:
    def __init__(self):
        super().__init__()
        self.footnotes = []

    def render_footnote_reference(self, element):
        if element.id not in self.footnotes:
            self.footnotes.append(element.id)
        return (
            f'<sup class="fn_ref" id="fnr_{element.id}">'
            f'<a class="fn_ref_link" href="#fn_{element.id}">{element.id}</a></sup>'
        )

    def render_footnote_definition(self, element):
        return ""

    def _render_footnote_definition(self, element):
        children = self.render_children(element).rstrip()
        # hack to remove any paragraph tags from the definition.
        # I want to process all inline element, but not the outer paragraph.
        children = re.sub("</?p>", "", children)
        return (
            f'<div id="fn_{element.id}" class="fn_def">'
            f'<sup><a href="#fnr_{element.id}">{element.id}</a></sup>'
            f"{children}</div>"
        )

    def render_document(self, element):
        text = self.render_children(element)
        items = [self.root_node.footnotes[id] for id in self.footnotes]
        if not items:
            return text
        children = "".join(self._render_footnote_definition(item) for item in items)
        footnotes = f'<div class="fn_container">{children}</div>'
        self.footnotes = []
        return text + footnotes


class Footnote:
    elements = [Document, FootnoteDefinition, FootnoteReference]
    renderer_mixins = [FootnoteRendererMixin]

bible_reference.py

Since I reference Scripture often, I wanted to

import re
from marko import inline

"""
Bible Reference extension
~~~~~~~~~~~~~~~~~~~

Enables adding simple Biblical References to a document using a named-link like format

e.g. [& JHN 3:16-17]

Uses bible.com as the source. Will default to HCSB version.
To specify a version put the version number in the reference.

Usage::

    from bible_reference import BibleReference

    text = 'Additional text [& JHN 3:16-17]\\n'
    markdown = Markdown(extensions=[BibleReference])
    print(markdown(text))

    <p>Additional text <a href="https://www.bible.com/bible/72/JHN.3.16-17" class="bible_reference">JHN 3:16-17</a></p>

"""

version_index = {
    "HCSB": 72,  # Holeman Christian Standard
    "ESV": 59,  # English Standard Version
    "CJB": 1275,  # Complete Jewish Bible
    "KJV": 1,  # King James Version
    "MSG": 97,  # The Message
    "TPT": 1849,  # The Passion Translation
}

default_translation = "HCSB"


def create_uri(translation_id, reference):
    return f"https://www.bible.com/bible/{translation_id}/{reference}"


def get_translation_id(t):
    k = t.split(" ")
    trans_text = k[-1] if len(k) > 2 else default_translation
    return version_index[trans_text] if trans_text in version_index else trans_text


def convert_to_reference(t):
    r = t.split(" ")
    r = ".".join(r[0:2])
    r = r.replace(":", ".")
    return r


def get_text(t):
    r = t.split(" ")
    v = " ".join(r[0:2])
    if len(r) > 2:
        v = f"{v} ({r[2]})"
    return v


class BibleReferenceElement(inline.InlineElement):

    pattern = re.compile(r"\[& *([^]]+) *\]")
    priority = 6
    parse_children = False

    def __init__(self, match):
        raw_content = match.group()
        reference = match.group(1)

        self.reference = convert_to_reference(reference)
        self.translation_id = get_translation_id(reference)
        self.text = get_text(reference)
        self.raw_content = reference
        self._prefix = re.escape(match.group())

    @classmethod
    def match(cls, source):
        return source.expect_re(cls.pattern)

    @classmethod
    def parse(cls, source):
        state = cls(source.match)
        with source.under_state(state):
            state.children = inline.parser.parse(source)
        return state


class BibleReferenceRenderMixin:
    def __init__(self):
        super().__init__()

    def render_bible_reference_element(self, element):
        uri = create_uri(element.translation_id, element.reference)
        return f'<a href="{uri}" class="bible_reference">{element.text}</a>'


class BibleReference:
    elements = [BibleReferenceElement]
    renderer_mixins = [BibleReferenceRenderMixin]