← Back to docs index

HLI: PKMS — Pseudocode & Implementation Plan

Build order: llm_service.pywiki_store.pyoperations.pyserver.py → nginx → systemd


Component 1: llm_service.py

Pure functions. No filesystem, no network calls except call_llm(). All I/O is explicit in parameters and return values.

Data shapes

from dataclasses import dataclass, field

@dataclass(frozen=True)
class IngestInput:
    source_path: str
    source_content: str
    wiki_pages: dict[str, str]   # path → content
    schema: str                   # AGENTS.md content

@dataclass(frozen=True)
class IngestOutput:
    pages: dict[str, str]        # path → content for new/updated pages
    log_entry: str

@dataclass(frozen=True)
class QueryInput:
    question: str
    wiki_pages: dict[str, str]

@dataclass(frozen=True)
class QueryOutput:
    answer: str
    citations: list[str]

@dataclass(frozen=True)
class LintInput:
    wiki_pages: dict[str, str]

@dataclass(frozen=True)
class LintOutput:
    contradictions: list[dict]   # [{"pages": ["a.md","b.md"], "issue": "..."}]
    orphans: list[str]
    missing_concepts: list[str]
    stale: list[str]

Functions (pseudocode)

def build_ingest_prompt(inp: IngestInput) -> str:
    """Construct the system + user prompt for source ingestion."""
    system = f"""You are a disciplined wiki maintainer. Follow these rules:

{inp.schema}

Respond with updated/new wiki pages using ### FILE: markers."""

    wiki_text = "\n\n".join(
        f"### FILE: {path}\n{content}"
        for path, content in sorted(inp.wiki_pages.items())
    )

    user = f"""Ingest this source: {inp.source_path}

Source content:
{inp.source_content}

Existing wiki:
{wiki_text}

Return ALL wiki pages that need updating (including index.md and log.md).
Use ### FILE: wiki/path.md before each page."""

    return system + "\n\n" + user


def parse_ingest_response(response: str) -> IngestOutput:
    """Parse LLM response with ### FILE: markers into structured pages."""
    pages = {}
    log_entry = ""
    current_path = None
    current_lines = []

    for line in response.split("\n"):
        if line.startswith("### FILE:"):
            if current_path:
                content = "\n".join(current_lines).strip()
                if current_path == "wiki/log.md":
                    log_entry = content
                else:
                    pages[current_path] = content
            current_path = line.replace("### FILE:", "").strip()
            current_lines = []
        elif current_path:
            current_lines.append(line)

    # Flush last file
    if current_path:
        content = "\n".join(current_lines).strip()
        if current_path == "wiki/log.md":
            log_entry = content
        else:
            pages[current_path] = content

    return IngestOutput(pages=pages, log_entry=log_entry)


def build_query_prompt(inp: QueryInput) -> str:
    """Construct prompt for answering a question from wiki."""
    system = "Answer using ONLY the wiki pages below. Cite with [[page path]]. If the wiki lacks the answer, say so."
    wiki_text = "\n\n".join(
        f"### {path}\n{content}"
        for path, content in sorted(inp.wiki_pages.items())
    )
    return f"{system}\n\n{wiki_text}\n\nQuestion: {inp.question}"


def parse_query_response(response: str) -> QueryOutput:
    """Extract answer and [[citations]] from LLM response."""
    import re
    citations = list(set(re.findall(r'\[\[([^\]]+)\]\]', response)))
    return QueryOutput(answer=response.strip(), citations=sorted(citations))


def build_lint_prompt(inp: LintInput) -> str:
    """Construct prompt for wiki health audit."""
    system = """Audit this wiki. Report:
1. CONTRADICTIONS: pages that conflict
2. ORPHANS: pages with no inbound [[links]]
3. MISSING: important concepts mentioned but lacking a page
4. STALE: pages referencing outdated info"""

    wiki_text = "\n\n".join(
        f"### {path}\n{content}"
        for path, content in sorted(inp.wiki_pages.items())
    )
    return f"{system}\n\n{wiki_text}"


def parse_lint_response(response: str) -> LintOutput:
    """Parse lint report into structured output. Expects sections."""
    contradictions, orphans, missing, stale = [], [], [], []
    section = None
    for line in response.split("\n"):
        line = line.strip()
        if not line:
            continue
        if "CONTRADICTION" in line.upper():
            section = "contradictions"
        elif "ORPHAN" in line.upper():
            section = "orphans"
        elif "MISSING" in line.upper():
            section = "missing"
        elif "STALE" in line.upper():
            section = "stale"
        elif section == "contradictions" and line.startswith("-"):
            parts = line[1:].split(":")
            contradictions.append({"pages": parts[0].strip().split(","), "issue": parts[1].strip() if len(parts) > 1 else ""})
        elif section == "orphans" and line.startswith("-"):
            orphans.append(line[1:].strip())
        elif section == "missing" and line.startswith("-"):
            missing.append(line[1:].strip())
        elif section == "stale" and line.startswith("-"):
            stale.append(line[1:].strip())
    return LintOutput(contradictions=contradictions, orphans=orphans,
                       missing_concepts=missing, stale=stale)


def call_llm(prompt: str) -> str:
    """Call litellm with deepseek. Reads DEEPSEEK_API_KEY from env."""
    import os, litellm
    response = litellm.completion(
        model="deepseek/deepseek-chat",
        messages=[
            {"role": "system", "content": prompt.split("\n\n")[0]},
            {"role": "user", "content": "\n\n".join(prompt.split("\n\n")[1:])}
        ],
        api_key=os.getenv("DEEPSEEK_API_KEY"),
        max_tokens=16000,
        temperature=0.3,
    )
    return response.choices[0].message.content

Tests (pseudocode)

test_build_ingest_prompt_includes_source_and_wiki:
    inp = IngestInput(source_path="raw/test.pdf", source_content="Hello",
                       wiki_pages={"wiki/index.md": "# Index"}, schema="Be concise.")
    prompt = build_ingest_prompt(inp)
    assert "Hello" in prompt
    assert "wiki/index.md" in prompt
    assert "Be concise" in prompt
    assert "### FILE:" in prompt

test_parse_ingest_response_extracts_pages:
    response = "### FILE: wiki/new.md\n# New Page\n\nContent here.\n### FILE: wiki/index.md\nUpdated index"
    out = parse_ingest_response(response)
    assert out.pages == {"wiki/new.md": "# New Page\n\nContent here.", "wiki/index.md": "Updated index"}

test_parse_ingest_response_log_entry:
    response = "### FILE: wiki/summary.md\nSummary\n### FILE: wiki/log.md\nIngested test."
    out = parse_ingest_response(response)
    assert out.log_entry == "Ingested test."
    assert "wiki/log.md" not in out.pages

test_parse_query_response_extracts_citations:
    response = "Sutton argues [[concepts/search.md]] and Karpathy [[concepts/software-2-0.md]] agree."
    out = parse_query_response(response)
    assert "concepts/search.md" in out.citations
    assert "concepts/software-2-0.md" in out.citations

test_build_query_prompt_no_hallucination_guard:
    inp = QueryInput(question="What is X?", wiki_pages={})
    prompt = build_query_prompt(inp)
    assert "say so" in prompt.lower()

Component 2: wiki_store.py

Filesystem operations. No LLM dependency.

Data shapes

@dataclass(frozen=True)
class WikiPage:
    path: str           # relative to wiki/ e.g. "concepts/search.md"
    title: str          # extracted from first # heading
    content_raw: str    # raw markdown

@dataclass(frozen=True)
class Source:
    filename: str
    size: int
    uploaded_at: str

Functions (pseudocode)

WIKI_DIR = Path("/home/pankaj/pkms/wiki")
RAW_DIR = Path("/home/pankaj/pkms/raw")
SCHEMA_PATH = Path("/home/pankaj/pkms/AGENTS.md")


def load_wiki_pages() -> dict[str, str]:
    """Read all .md files from wiki/. Returns {relative_path: content}."""
    pages = {}
    if WIKI_DIR.exists():
        for md_file in sorted(WIKI_DIR.rglob("*.md")):
            rel = str(md_file.relative_to(WIKI_DIR.parent))  # "wiki/concepts/search.md"
            pages[rel] = md_file.read_text()
    return pages


def write_wiki_pages(pages: dict[str, str]):
    """Write pages to disk. Creates parent dirs. Paths relative to pkms root."""
    root = WIKI_DIR.parent
    for path, content in pages.items():
        full = root / path
        full.parent.mkdir(parents=True, exist_ok=True)
        full.write_text(content)


def append_log(entry: str):
    """Append timestamped entry to wiki/log.md."""
    log_path = WIKI_DIR / "log.md"
    WIKI_DIR.mkdir(parents=True, exist_ok=True)
    from datetime import datetime
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(log_path, "a") as f:
        f.write(f"\n## {ts}\n\n{entry}\n")


def load_schema() -> str:
    """Read AGENTS.md. Return empty string if not found."""
    if SCHEMA_PATH.exists():
        return SCHEMA_PATH.read_text()
    return ""


def ensure_dirs():
    """Create raw/ and wiki/ if they don't exist."""
    RAW_DIR.mkdir(parents=True, exist_ok=True)
    WIKI_DIR.mkdir(parents=True, exist_ok=True)


def list_sources() -> list[Source]:
    """List files in raw/ with metadata."""
    sources = []
    if RAW_DIR.exists():
        for f in sorted(RAW_DIR.iterdir()):
            if f.is_file():
                sources.append(Source(
                    filename=f.name,
                    size=f.stat().st_size,
                    uploaded_at=datetime.fromtimestamp(f.stat().st_mtime).isoformat()
                ))
    return sources


def save_upload(filename: str, content: bytes) -> str:
    """Save uploaded file to raw/. Returns saved path."""
    RAW_DIR.mkdir(parents=True, exist_ok=True)
    path = RAW_DIR / filename
    path.write_bytes(content)
    return str(path.relative_to(RAW_DIR.parent))


def extract_text(filepath: str) -> str:
    """Extract text from a file. Supports .txt, .md, .pdf."""
    full = Path("/home/pankaj/pkms") / filepath
    suffix = full.suffix.lower()
    if suffix == ".pdf":
        import fitz  # pymupdf
        doc = fitz.open(str(full))
        text = "\n".join(page.get_text() for page in doc)
        doc.close()
        return text
    else:
        return full.read_text()

Tests (pseudocode)

test_load_wiki_pages_returns_dict(tmp_path):
    write tmp/wiki/test.md with "content"
    pages = load_wiki_pages()
    assert "wiki/test.md" in pages
    assert pages["wiki/test.md"] == "content"

test_write_wiki_pages_creates_dirs(tmp_path):
    write_wiki_pages({"wiki/deep/nested/page.md": "hello"})
    assert (tmp_path / "wiki/deep/nested/page.md").read_text() == "hello"

test_extract_text_txt():
    text = extract_text("raw/test.txt")  # plain text
    assert "expected content" in text

test_list_sources_includes_metadata():
    save_upload("test.txt", b"hello")
    sources = list_sources()
    assert len(sources) == 1
    assert sources[0].filename == "test.txt"
    assert sources[0].size == 5

Component 3: operations.py

Wires llm_service + wiki_store. Orchestrates the three operations.

Functions (pseudocode)

def ingest_source(filename: str) -> str:
    """Ingest a source from raw/ into the wiki. Returns summary message."""
    source_path = f"raw/{filename}"
    full_path = RAW_DIR / filename
    if not full_path.exists():
        raise FileNotFoundError(f"Source not found: {filename}")

    # 1. Extract text
    source_content = extract_text(source_path)

    # 2. Load wiki + schema
    wiki_pages = load_wiki_pages()
    schema = load_schema()

    # 3. Call LLM
    inp = IngestInput(source_path=source_path, source_content=source_content,
                       wiki_pages=wiki_pages, schema=schema)
    prompt = build_ingest_prompt(inp)
    response = call_llm(prompt)
    out = parse_ingest_response(response)

    # 4. Write pages
    write_wiki_pages(out.pages)

    # 5. Append log
    if out.log_entry:
        append_log(out.log_entry)
    else:
        append_log(f"Ingested {filename} — created/updated {len(out.pages)} pages")

    return f"Ingested {filename}: {len(out.pages)} pages updated."


def query_wiki(question: str) -> QueryOutput:
    """Answer a question using the wiki. No side effects."""
    wiki_pages = load_wiki_pages()
    if not wiki_pages:
        return QueryOutput(
            answer="No wiki pages exist yet. Ingest a source first.",
            citations=[]
        )
    inp = QueryInput(question=question, wiki_pages=wiki_pages)
    prompt = build_query_prompt(inp)
    response = call_llm(prompt)
    return parse_query_response(response)


def lint_wiki() -> LintOutput:
    """Audit the wiki for health issues. No side effects."""
    wiki_pages = load_wiki_pages()
    if not wiki_pages:
        return LintOutput(contradictions=[], orphans=[], missing_concepts=[], stale=[])
    inp = LintInput(wiki_pages=wiki_pages)
    prompt = build_lint_prompt(inp)
    response = call_llm(prompt)
    return parse_lint_response(response)

Tests (pseudocode)

# Integration test — mock call_llm to return known response

test_ingest_source_creates_pages(monkeypatch, tmp_path):
    # Set up raw/test.txt with known content
    # Mock call_llm to return "### FILE: wiki/new.md\n# Test\n\nContent."
    # Call ingest_source("test.txt")
    # Assert wiki/new.md was created with correct content
    # Assert log.md was appended

test_query_wiki_empty_wiki():
    result = query_wiki("test question")
    assert "No wiki pages exist" in result.answer

test_lint_wiki_empty():
    result = lint_wiki()
    assert result.contradictions == []

Component 4: server.py

Flask app on port 8890. Jinja2 templates (inheritance from base.html). HTMX + Alpine.js for interactivity. Sync throughout — no async, no overhead.

Endpoints (pseudocode)

from flask import Flask, render_template, request

app = Flask(__name__)
WIKI_ROOT = Path("/home/pankaj/pkms")


@app.route("/")
def index():
    """Wiki browser homepage. Shows index if wiki exists, else welcome."""
    pages = load_wiki_pages()
    if not pages:
        return render_template("welcome.html")
    # Pass index.md content + page tree for sidebar
    index_content = pages.get("wiki/index.md", "")
    return render_template("wiki_browser.html",
                           content=render_wikilinks(index_content),
                           pages=pages)


@app.route("/wiki/<path:path>")
def view_page(path):
    """View a single wiki page. [[links]] become clickable."""
    wiki_pages = load_wiki_pages()
    full = f"wiki/{path}"
    if full not in wiki_pages:
        return render_template("404.html"), 404
    html = mistune_html(render_wikilinks(wiki_pages[full]))
    # Compute backlinks: which pages link to this one?
    backlinks = find_backlinks(full, wiki_pages)
    return render_template("wiki_page.html",
                           path=path, content=html,
                           backlinks=backlinks, pages=wiki_pages)


@app.route("/raw")
def raw_sources():
    """Upload page — list sources with ingest buttons."""
    sources = list_sources()
    return render_template("raw.html", sources=sources)


@app.route("/upload", methods=["POST"])
def upload():
    """Handle file upload. Returns updated source list HTML fragment."""
    for f in request.files.getlist("files"):
        if f.filename:
            save_upload(f.filename, f.read())
    sources = list_sources()
    return render_template("_source_list.html", sources=sources)


@app.route("/ingest/<filename>", methods=["POST"])
def ingest(filename):
    """Trigger ingest. Returns status HTML fragment."""
    try:
        summary = ingest_source(filename)
        return render_template("_ingest_result.html", success=True, summary=summary)
    except Exception as e:
        return render_template("_ingest_result.html", success=False, error=str(e))


@app.route("/query", methods=["POST"])
def query_ask():
    """Ask a question. Returns answer HTML fragment with citations."""
    question = request.form.get("question", "").strip()
    if not question:
        return '<div class="error">Enter a question.</div>'
    result = query_wiki(question)
    answer_html = mistune_html(result.answer)
    return render_template("_query_result.html",
                           answer=answer_html, citations=result.citations)


@app.route("/lint", methods=["POST"])
def lint():
    """Run lint audit. Returns report HTML fragment."""
    result = lint_wiki()
    return render_template("_lint_report.html", result=result)

Jinja2 templates (in templates/ directory)

templates/
├── base.html              # Dark theme layout, nav bar, HTMX + Alpine CDN
├── welcome.html           # "No wiki yet" hero with upload CTA
├── wiki_browser.html      # Sidebar (page tree) + main content area
├── wiki_page.html         # Single page view with backlinks
├── raw.html               # Upload form + source list
├── 404.html               # Not found
├── _source_list.html      # HTMX fragment: source table after upload
├── _ingest_result.html    # HTMX fragment: success/error after ingest
├── _query_result.html     # HTMX fragment: answer + citations
└── _lint_report.html      # HTMX fragment: contradictions/orphans/missing/stale

Template inheritance means the dark theme CSS, nav, and layout live in base.html once. All pages extend it. HTMX fragments (prefixed _) don't extend base — they're injected into existing pages.

[[wikilink]] handling

def render_wikilinks(content: str) -> str:
    """Convert [[page path]] to markdown links before mistune rendering."""
    import re
    return re.sub(r'\[\[([^\]]+)\]\]', r'[\1](/wiki/\1)', content)


def find_backlinks(current: str, pages: dict[str, str]) -> list[str]:
    """Find pages that [[link]] to the current page."""
    backlinks = []
    current_name = current.replace("wiki/", "")
    for path, content in pages.items():
        if path == current:
            continue
        if f"[[{current_name}]]" in content:
            backlinks.append(path)
    return sorted(backlinks)

Tests (pseudocode)

test_index_no_wiki_returns_welcome(client):
    rv = client.get("/")
    assert rv.status_code == 200
    assert "upload" in rv.data.decode().lower()

test_view_page_renders_wikilinks(client, tmp_path):
    write wiki/test.md with "See [[concepts/ai.md]] for more."
    rv = client.get("/wiki/test.md")
    assert 'href="/wiki/concepts/ai.md"' in rv.data.decode()

test_upload_returns_fragment(client):
    data = {"files": (BytesIO(b"hello"), "test.txt")}
    rv = client.post("/upload", data=data, content_type="multipart/form-data")
    assert rv.status_code == 200
    assert b"test.txt" in rv.data

test_query_empty_returns_error(client):
    rv = client.post("/query", data={"question": ""})
    assert b"Enter a question" in rv.data

test_lint_empty_wiki(client):
    rv = client.post("/lint")
    assert rv.status_code == 200

[[wikilink]] handling

Client-side: render [[page path]] as <a href="/wiki/page path">page path</a>. Server-side: mistune renders markdown; a post-processing step converts [[...]] patterns in the raw markdown before rendering, or we use a custom mistune plugin.

Simplest approach: regex-replace [[...]] → markdown links BEFORE passing to mistune.

def render_wikilinks(content: str) -> str:
    """Convert [[page path]] to [page path](/wiki/page path)."""
    import re
    return re.sub(r'\[\[([^\]]+)\]\]', r'[\1](/wiki/\1)', content)

Tests (pseudocode)

test_index_no_wiki_returns_welcome():
    response = client.get("/")
    assert response.status_code == 200
    assert "upload" in response.text.lower()

test_view_page_renders_wikilinks():
    write wiki/test.md with "See [[concepts/ai.md]] for more."
    response = client.get("/wiki/test.md")
    assert '<a href="/wiki/concepts/ai.md">' in response.text

test_upload_returns_200():
    response = client.post("/upload", files={"files": ("test.txt", b"hello")})
    assert response.status_code == 200
    assert "test.txt" in response.text

test_query_empty_returns_error():
    response = client.post("/query", data={"question": ""})
    assert "Enter a question" in response.text

test_lint_empty_wiki():
    response = client.post("/lint")
    assert response.status_code == 200

Component 5: nginx config

Update /etc/nginx/sites-enabled/pkms.hermesbillpay from static file server to reverse proxy:

server {
    listen 443 ssl;
    server_name pkms.hermesbillpay.com;
    # ... SSL cert lines (certbot-managed) ...

    location / {
        proxy_pass http://127.0.0.1:8890;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        client_max_body_size 100M;
    }
}

Component 6: systemd service

[Unit]
Description=PKMS Server
After=network.target

[Service]
Type=simple
User=pankaj
WorkingDirectory=/home/pankaj/pkms
ExecStart=/home/pankaj/commerce-agent/.venv/bin/python server.py
Restart=on-failure
RestartSec=5
EnvironmentFile=/home/pankaj/.hermes/.env

[Install]
WantedBy=multi-user.target

Note: EnvironmentFile loads API keys from Hermes config so DEEPSEEK_API_KEY is available.