HLI: PKMS — Pseudocode & Implementation Plan
Build order: llm_service.py → wiki_store.py → operations.py → server.py → nginx → systemd
Component 1: llm_service.py
Pure functions. No filesystem, no network calls except call_llm(). All I/O is explicit in parameters and return values.
Data shapes
from dataclasses import dataclass, field
@dataclass(frozen=True)
class IngestInput:
source_path: str
source_content: str
wiki_pages: dict[str, str] # path → content
schema: str # AGENTS.md content
@dataclass(frozen=True)
class IngestOutput:
pages: dict[str, str] # path → content for new/updated pages
log_entry: str
@dataclass(frozen=True)
class QueryInput:
question: str
wiki_pages: dict[str, str]
@dataclass(frozen=True)
class QueryOutput:
answer: str
citations: list[str]
@dataclass(frozen=True)
class LintInput:
wiki_pages: dict[str, str]
@dataclass(frozen=True)
class LintOutput:
contradictions: list[dict] # [{"pages": ["a.md","b.md"], "issue": "..."}]
orphans: list[str]
missing_concepts: list[str]
stale: list[str]
Functions (pseudocode)
def build_ingest_prompt(inp: IngestInput) -> str:
"""Construct the system + user prompt for source ingestion."""
system = f"""You are a disciplined wiki maintainer. Follow these rules:
{inp.schema}
Respond with updated/new wiki pages using ### FILE: markers."""
wiki_text = "\n\n".join(
f"### FILE: {path}\n{content}"
for path, content in sorted(inp.wiki_pages.items())
)
user = f"""Ingest this source: {inp.source_path}
Source content:
{inp.source_content}
Existing wiki:
{wiki_text}
Return ALL wiki pages that need updating (including index.md and log.md).
Use ### FILE: wiki/path.md before each page."""
return system + "\n\n" + user
def parse_ingest_response(response: str) -> IngestOutput:
"""Parse LLM response with ### FILE: markers into structured pages."""
pages = {}
log_entry = ""
current_path = None
current_lines = []
for line in response.split("\n"):
if line.startswith("### FILE:"):
if current_path:
content = "\n".join(current_lines).strip()
if current_path == "wiki/log.md":
log_entry = content
else:
pages[current_path] = content
current_path = line.replace("### FILE:", "").strip()
current_lines = []
elif current_path:
current_lines.append(line)
# Flush last file
if current_path:
content = "\n".join(current_lines).strip()
if current_path == "wiki/log.md":
log_entry = content
else:
pages[current_path] = content
return IngestOutput(pages=pages, log_entry=log_entry)
def build_query_prompt(inp: QueryInput) -> str:
"""Construct prompt for answering a question from wiki."""
system = "Answer using ONLY the wiki pages below. Cite with [[page path]]. If the wiki lacks the answer, say so."
wiki_text = "\n\n".join(
f"### {path}\n{content}"
for path, content in sorted(inp.wiki_pages.items())
)
return f"{system}\n\n{wiki_text}\n\nQuestion: {inp.question}"
def parse_query_response(response: str) -> QueryOutput:
"""Extract answer and [[citations]] from LLM response."""
import re
citations = list(set(re.findall(r'\[\[([^\]]+)\]\]', response)))
return QueryOutput(answer=response.strip(), citations=sorted(citations))
def build_lint_prompt(inp: LintInput) -> str:
"""Construct prompt for wiki health audit."""
system = """Audit this wiki. Report:
1. CONTRADICTIONS: pages that conflict
2. ORPHANS: pages with no inbound [[links]]
3. MISSING: important concepts mentioned but lacking a page
4. STALE: pages referencing outdated info"""
wiki_text = "\n\n".join(
f"### {path}\n{content}"
for path, content in sorted(inp.wiki_pages.items())
)
return f"{system}\n\n{wiki_text}"
def parse_lint_response(response: str) -> LintOutput:
"""Parse lint report into structured output. Expects sections."""
contradictions, orphans, missing, stale = [], [], [], []
section = None
for line in response.split("\n"):
line = line.strip()
if not line:
continue
if "CONTRADICTION" in line.upper():
section = "contradictions"
elif "ORPHAN" in line.upper():
section = "orphans"
elif "MISSING" in line.upper():
section = "missing"
elif "STALE" in line.upper():
section = "stale"
elif section == "contradictions" and line.startswith("-"):
parts = line[1:].split(":")
contradictions.append({"pages": parts[0].strip().split(","), "issue": parts[1].strip() if len(parts) > 1 else ""})
elif section == "orphans" and line.startswith("-"):
orphans.append(line[1:].strip())
elif section == "missing" and line.startswith("-"):
missing.append(line[1:].strip())
elif section == "stale" and line.startswith("-"):
stale.append(line[1:].strip())
return LintOutput(contradictions=contradictions, orphans=orphans,
missing_concepts=missing, stale=stale)
def call_llm(prompt: str) -> str:
"""Call litellm with deepseek. Reads DEEPSEEK_API_KEY from env."""
import os, litellm
response = litellm.completion(
model="deepseek/deepseek-chat",
messages=[
{"role": "system", "content": prompt.split("\n\n")[0]},
{"role": "user", "content": "\n\n".join(prompt.split("\n\n")[1:])}
],
api_key=os.getenv("DEEPSEEK_API_KEY"),
max_tokens=16000,
temperature=0.3,
)
return response.choices[0].message.content
Tests (pseudocode)
test_build_ingest_prompt_includes_source_and_wiki:
inp = IngestInput(source_path="raw/test.pdf", source_content="Hello",
wiki_pages={"wiki/index.md": "# Index"}, schema="Be concise.")
prompt = build_ingest_prompt(inp)
assert "Hello" in prompt
assert "wiki/index.md" in prompt
assert "Be concise" in prompt
assert "### FILE:" in prompt
test_parse_ingest_response_extracts_pages:
response = "### FILE: wiki/new.md\n# New Page\n\nContent here.\n### FILE: wiki/index.md\nUpdated index"
out = parse_ingest_response(response)
assert out.pages == {"wiki/new.md": "# New Page\n\nContent here.", "wiki/index.md": "Updated index"}
test_parse_ingest_response_log_entry:
response = "### FILE: wiki/summary.md\nSummary\n### FILE: wiki/log.md\nIngested test."
out = parse_ingest_response(response)
assert out.log_entry == "Ingested test."
assert "wiki/log.md" not in out.pages
test_parse_query_response_extracts_citations:
response = "Sutton argues [[concepts/search.md]] and Karpathy [[concepts/software-2-0.md]] agree."
out = parse_query_response(response)
assert "concepts/search.md" in out.citations
assert "concepts/software-2-0.md" in out.citations
test_build_query_prompt_no_hallucination_guard:
inp = QueryInput(question="What is X?", wiki_pages={})
prompt = build_query_prompt(inp)
assert "say so" in prompt.lower()
Component 2: wiki_store.py
Filesystem operations. No LLM dependency.
Data shapes
@dataclass(frozen=True)
class WikiPage:
path: str # relative to wiki/ e.g. "concepts/search.md"
title: str # extracted from first # heading
content_raw: str # raw markdown
@dataclass(frozen=True)
class Source:
filename: str
size: int
uploaded_at: str
Functions (pseudocode)
WIKI_DIR = Path("/home/pankaj/pkms/wiki")
RAW_DIR = Path("/home/pankaj/pkms/raw")
SCHEMA_PATH = Path("/home/pankaj/pkms/AGENTS.md")
def load_wiki_pages() -> dict[str, str]:
"""Read all .md files from wiki/. Returns {relative_path: content}."""
pages = {}
if WIKI_DIR.exists():
for md_file in sorted(WIKI_DIR.rglob("*.md")):
rel = str(md_file.relative_to(WIKI_DIR.parent)) # "wiki/concepts/search.md"
pages[rel] = md_file.read_text()
return pages
def write_wiki_pages(pages: dict[str, str]):
"""Write pages to disk. Creates parent dirs. Paths relative to pkms root."""
root = WIKI_DIR.parent
for path, content in pages.items():
full = root / path
full.parent.mkdir(parents=True, exist_ok=True)
full.write_text(content)
def append_log(entry: str):
"""Append timestamped entry to wiki/log.md."""
log_path = WIKI_DIR / "log.md"
WIKI_DIR.mkdir(parents=True, exist_ok=True)
from datetime import datetime
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(log_path, "a") as f:
f.write(f"\n## {ts}\n\n{entry}\n")
def load_schema() -> str:
"""Read AGENTS.md. Return empty string if not found."""
if SCHEMA_PATH.exists():
return SCHEMA_PATH.read_text()
return ""
def ensure_dirs():
"""Create raw/ and wiki/ if they don't exist."""
RAW_DIR.mkdir(parents=True, exist_ok=True)
WIKI_DIR.mkdir(parents=True, exist_ok=True)
def list_sources() -> list[Source]:
"""List files in raw/ with metadata."""
sources = []
if RAW_DIR.exists():
for f in sorted(RAW_DIR.iterdir()):
if f.is_file():
sources.append(Source(
filename=f.name,
size=f.stat().st_size,
uploaded_at=datetime.fromtimestamp(f.stat().st_mtime).isoformat()
))
return sources
def save_upload(filename: str, content: bytes) -> str:
"""Save uploaded file to raw/. Returns saved path."""
RAW_DIR.mkdir(parents=True, exist_ok=True)
path = RAW_DIR / filename
path.write_bytes(content)
return str(path.relative_to(RAW_DIR.parent))
def extract_text(filepath: str) -> str:
"""Extract text from a file. Supports .txt, .md, .pdf."""
full = Path("/home/pankaj/pkms") / filepath
suffix = full.suffix.lower()
if suffix == ".pdf":
import fitz # pymupdf
doc = fitz.open(str(full))
text = "\n".join(page.get_text() for page in doc)
doc.close()
return text
else:
return full.read_text()
Tests (pseudocode)
test_load_wiki_pages_returns_dict(tmp_path):
write tmp/wiki/test.md with "content"
pages = load_wiki_pages()
assert "wiki/test.md" in pages
assert pages["wiki/test.md"] == "content"
test_write_wiki_pages_creates_dirs(tmp_path):
write_wiki_pages({"wiki/deep/nested/page.md": "hello"})
assert (tmp_path / "wiki/deep/nested/page.md").read_text() == "hello"
test_extract_text_txt():
text = extract_text("raw/test.txt") # plain text
assert "expected content" in text
test_list_sources_includes_metadata():
save_upload("test.txt", b"hello")
sources = list_sources()
assert len(sources) == 1
assert sources[0].filename == "test.txt"
assert sources[0].size == 5
Component 3: operations.py
Wires llm_service + wiki_store. Orchestrates the three operations.
Functions (pseudocode)
def ingest_source(filename: str) -> str:
"""Ingest a source from raw/ into the wiki. Returns summary message."""
source_path = f"raw/{filename}"
full_path = RAW_DIR / filename
if not full_path.exists():
raise FileNotFoundError(f"Source not found: {filename}")
# 1. Extract text
source_content = extract_text(source_path)
# 2. Load wiki + schema
wiki_pages = load_wiki_pages()
schema = load_schema()
# 3. Call LLM
inp = IngestInput(source_path=source_path, source_content=source_content,
wiki_pages=wiki_pages, schema=schema)
prompt = build_ingest_prompt(inp)
response = call_llm(prompt)
out = parse_ingest_response(response)
# 4. Write pages
write_wiki_pages(out.pages)
# 5. Append log
if out.log_entry:
append_log(out.log_entry)
else:
append_log(f"Ingested {filename} — created/updated {len(out.pages)} pages")
return f"Ingested {filename}: {len(out.pages)} pages updated."
def query_wiki(question: str) -> QueryOutput:
"""Answer a question using the wiki. No side effects."""
wiki_pages = load_wiki_pages()
if not wiki_pages:
return QueryOutput(
answer="No wiki pages exist yet. Ingest a source first.",
citations=[]
)
inp = QueryInput(question=question, wiki_pages=wiki_pages)
prompt = build_query_prompt(inp)
response = call_llm(prompt)
return parse_query_response(response)
def lint_wiki() -> LintOutput:
"""Audit the wiki for health issues. No side effects."""
wiki_pages = load_wiki_pages()
if not wiki_pages:
return LintOutput(contradictions=[], orphans=[], missing_concepts=[], stale=[])
inp = LintInput(wiki_pages=wiki_pages)
prompt = build_lint_prompt(inp)
response = call_llm(prompt)
return parse_lint_response(response)
Tests (pseudocode)
# Integration test — mock call_llm to return known response
test_ingest_source_creates_pages(monkeypatch, tmp_path):
# Set up raw/test.txt with known content
# Mock call_llm to return "### FILE: wiki/new.md\n# Test\n\nContent."
# Call ingest_source("test.txt")
# Assert wiki/new.md was created with correct content
# Assert log.md was appended
test_query_wiki_empty_wiki():
result = query_wiki("test question")
assert "No wiki pages exist" in result.answer
test_lint_wiki_empty():
result = lint_wiki()
assert result.contradictions == []
Component 4: server.py
Flask app on port 8890. Jinja2 templates (inheritance from base.html). HTMX + Alpine.js for interactivity. Sync throughout — no async, no overhead.
Endpoints (pseudocode)
from flask import Flask, render_template, request
app = Flask(__name__)
WIKI_ROOT = Path("/home/pankaj/pkms")
@app.route("/")
def index():
"""Wiki browser homepage. Shows index if wiki exists, else welcome."""
pages = load_wiki_pages()
if not pages:
return render_template("welcome.html")
# Pass index.md content + page tree for sidebar
index_content = pages.get("wiki/index.md", "")
return render_template("wiki_browser.html",
content=render_wikilinks(index_content),
pages=pages)
@app.route("/wiki/<path:path>")
def view_page(path):
"""View a single wiki page. [[links]] become clickable."""
wiki_pages = load_wiki_pages()
full = f"wiki/{path}"
if full not in wiki_pages:
return render_template("404.html"), 404
html = mistune_html(render_wikilinks(wiki_pages[full]))
# Compute backlinks: which pages link to this one?
backlinks = find_backlinks(full, wiki_pages)
return render_template("wiki_page.html",
path=path, content=html,
backlinks=backlinks, pages=wiki_pages)
@app.route("/raw")
def raw_sources():
"""Upload page — list sources with ingest buttons."""
sources = list_sources()
return render_template("raw.html", sources=sources)
@app.route("/upload", methods=["POST"])
def upload():
"""Handle file upload. Returns updated source list HTML fragment."""
for f in request.files.getlist("files"):
if f.filename:
save_upload(f.filename, f.read())
sources = list_sources()
return render_template("_source_list.html", sources=sources)
@app.route("/ingest/<filename>", methods=["POST"])
def ingest(filename):
"""Trigger ingest. Returns status HTML fragment."""
try:
summary = ingest_source(filename)
return render_template("_ingest_result.html", success=True, summary=summary)
except Exception as e:
return render_template("_ingest_result.html", success=False, error=str(e))
@app.route("/query", methods=["POST"])
def query_ask():
"""Ask a question. Returns answer HTML fragment with citations."""
question = request.form.get("question", "").strip()
if not question:
return '<div class="error">Enter a question.</div>'
result = query_wiki(question)
answer_html = mistune_html(result.answer)
return render_template("_query_result.html",
answer=answer_html, citations=result.citations)
@app.route("/lint", methods=["POST"])
def lint():
"""Run lint audit. Returns report HTML fragment."""
result = lint_wiki()
return render_template("_lint_report.html", result=result)
Jinja2 templates (in templates/ directory)
templates/
├── base.html # Dark theme layout, nav bar, HTMX + Alpine CDN
├── welcome.html # "No wiki yet" hero with upload CTA
├── wiki_browser.html # Sidebar (page tree) + main content area
├── wiki_page.html # Single page view with backlinks
├── raw.html # Upload form + source list
├── 404.html # Not found
├── _source_list.html # HTMX fragment: source table after upload
├── _ingest_result.html # HTMX fragment: success/error after ingest
├── _query_result.html # HTMX fragment: answer + citations
└── _lint_report.html # HTMX fragment: contradictions/orphans/missing/stale
Template inheritance means the dark theme CSS, nav, and layout live in base.html once. All pages extend it. HTMX fragments (prefixed _) don't extend base — they're injected into existing pages.
[[wikilink]] handling
def render_wikilinks(content: str) -> str:
"""Convert [[page path]] to markdown links before mistune rendering."""
import re
return re.sub(r'\[\[([^\]]+)\]\]', r'[\1](/wiki/\1)', content)
def find_backlinks(current: str, pages: dict[str, str]) -> list[str]:
"""Find pages that [[link]] to the current page."""
backlinks = []
current_name = current.replace("wiki/", "")
for path, content in pages.items():
if path == current:
continue
if f"[[{current_name}]]" in content:
backlinks.append(path)
return sorted(backlinks)
Tests (pseudocode)
test_index_no_wiki_returns_welcome(client):
rv = client.get("/")
assert rv.status_code == 200
assert "upload" in rv.data.decode().lower()
test_view_page_renders_wikilinks(client, tmp_path):
write wiki/test.md with "See [[concepts/ai.md]] for more."
rv = client.get("/wiki/test.md")
assert 'href="/wiki/concepts/ai.md"' in rv.data.decode()
test_upload_returns_fragment(client):
data = {"files": (BytesIO(b"hello"), "test.txt")}
rv = client.post("/upload", data=data, content_type="multipart/form-data")
assert rv.status_code == 200
assert b"test.txt" in rv.data
test_query_empty_returns_error(client):
rv = client.post("/query", data={"question": ""})
assert b"Enter a question" in rv.data
test_lint_empty_wiki(client):
rv = client.post("/lint")
assert rv.status_code == 200
[[wikilink]] handling
Client-side: render [[page path]] as <a href="/wiki/page path">page path</a>.
Server-side: mistune renders markdown; a post-processing step converts [[...]] patterns in the raw markdown before rendering, or we use a custom mistune plugin.
Simplest approach: regex-replace [[...]] → markdown links BEFORE passing to mistune.
def render_wikilinks(content: str) -> str:
"""Convert [[page path]] to [page path](/wiki/page path)."""
import re
return re.sub(r'\[\[([^\]]+)\]\]', r'[\1](/wiki/\1)', content)
Tests (pseudocode)
test_index_no_wiki_returns_welcome():
response = client.get("/")
assert response.status_code == 200
assert "upload" in response.text.lower()
test_view_page_renders_wikilinks():
write wiki/test.md with "See [[concepts/ai.md]] for more."
response = client.get("/wiki/test.md")
assert '<a href="/wiki/concepts/ai.md">' in response.text
test_upload_returns_200():
response = client.post("/upload", files={"files": ("test.txt", b"hello")})
assert response.status_code == 200
assert "test.txt" in response.text
test_query_empty_returns_error():
response = client.post("/query", data={"question": ""})
assert "Enter a question" in response.text
test_lint_empty_wiki():
response = client.post("/lint")
assert response.status_code == 200
Component 5: nginx config
Update /etc/nginx/sites-enabled/pkms.hermesbillpay from static file server to reverse proxy:
server {
listen 443 ssl;
server_name pkms.hermesbillpay.com;
# ... SSL cert lines (certbot-managed) ...
location / {
proxy_pass http://127.0.0.1:8890;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
client_max_body_size 100M;
}
}
Component 6: systemd service
[Unit]
Description=PKMS Server
After=network.target
[Service]
Type=simple
User=pankaj
WorkingDirectory=/home/pankaj/pkms
ExecStart=/home/pankaj/commerce-agent/.venv/bin/python server.py
Restart=on-failure
RestartSec=5
EnvironmentFile=/home/pankaj/.hermes/.env
[Install]
WantedBy=multi-user.target
Note: EnvironmentFile loads API keys from Hermes config so DEEPSEEK_API_KEY is available.