a802f59f55
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
143 lines
4.3 KiB
Python
143 lines
4.3 KiB
Python
"""vault_knowledge_parse — Parsea un Markdown del vault y persiste en knowledge_docs."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import sqlite3
|
|
import time
|
|
from pathlib import Path
|
|
|
|
|
|
def _parse_frontmatter(text: str) -> tuple[dict, str]:
|
|
"""Separa YAML frontmatter del cuerpo. Retorna (frontmatter_dict, body)."""
|
|
if not text.startswith("---\n") and not text.startswith("---\r\n"):
|
|
return {}, text
|
|
|
|
# Buscar cierre del frontmatter
|
|
end = text.find("\n---", 4)
|
|
if end == -1:
|
|
return {}, text
|
|
|
|
yaml_block = text[4:end].strip()
|
|
body = text[end + 4:].lstrip("\n\r")
|
|
|
|
try:
|
|
import yaml
|
|
|
|
fm = yaml.safe_load(yaml_block) or {}
|
|
if not isinstance(fm, dict):
|
|
fm = {}
|
|
except Exception:
|
|
fm = {}
|
|
|
|
return fm, body
|
|
|
|
|
|
def _extract_headings(body: str) -> list[dict]:
|
|
"""Extrae headings Markdown (# ... ### ...) del cuerpo."""
|
|
headings = []
|
|
for line in body.splitlines():
|
|
m = re.match(r"^(#{1,6})\s+(.*)", line)
|
|
if m:
|
|
headings.append({"level": len(m.group(1)), "text": m.group(2).strip()})
|
|
return headings
|
|
|
|
|
|
def _extract_title(frontmatter: dict, body: str, basename: str) -> str:
|
|
"""Extrae título: frontmatter['title'] > primer H1 > basename."""
|
|
if frontmatter.get("title"):
|
|
return str(frontmatter["title"])
|
|
for line in body.splitlines():
|
|
m = re.match(r"^#\s+(.*)", line)
|
|
if m:
|
|
return m.group(1).strip()
|
|
return basename
|
|
|
|
|
|
def vault_knowledge_parse(
|
|
vault_path: str,
|
|
rel_path: str,
|
|
db_path: str | None = None,
|
|
) -> dict:
|
|
"""Parsea un archivo Markdown del vault: extrae frontmatter, título, headings y cuerpo.
|
|
|
|
Args:
|
|
vault_path: Ruta absoluta a la raiz del vault.
|
|
rel_path: Ruta relativa al archivo Markdown dentro del vault.
|
|
db_path: Override opcional de la ruta a vault_index.db.
|
|
|
|
Returns:
|
|
Dict con: rel_path, title, frontmatter, headings, content_text, persisted.
|
|
|
|
Raises:
|
|
RuntimeError: Si el archivo no existe o no se puede leer.
|
|
"""
|
|
vault = Path(vault_path)
|
|
md_file = vault / rel_path
|
|
if not md_file.exists():
|
|
raise RuntimeError(f"vault_knowledge_parse: archivo no encontrado: {md_file}")
|
|
|
|
db = Path(db_path) if db_path else vault / "vault_index.db"
|
|
|
|
try:
|
|
text = md_file.read_text(encoding="utf-8")
|
|
except UnicodeDecodeError:
|
|
text = md_file.read_text(encoding="latin-1", errors="replace")
|
|
|
|
frontmatter, body = _parse_frontmatter(text)
|
|
headings = _extract_headings(body)
|
|
basename = md_file.stem
|
|
title = _extract_title(frontmatter, body, basename)
|
|
content_text = body
|
|
|
|
# Persistir en vault_index.db
|
|
persisted = False
|
|
if db.exists():
|
|
conn = sqlite3.connect(str(db))
|
|
try:
|
|
now = int(time.time())
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO knowledge_docs(rel_path, title, frontmatter_json, headings_json, parsed_at)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
ON CONFLICT(rel_path) DO UPDATE SET
|
|
title=excluded.title,
|
|
frontmatter_json=excluded.frontmatter_json,
|
|
headings_json=excluded.headings_json,
|
|
parsed_at=excluded.parsed_at
|
|
""",
|
|
(
|
|
rel_path,
|
|
title,
|
|
json.dumps(frontmatter, ensure_ascii=False),
|
|
json.dumps(headings, ensure_ascii=False),
|
|
now,
|
|
),
|
|
)
|
|
# Actualizar files_fts (rowid debe coincidir con files)
|
|
conn.execute("DELETE FROM files_fts WHERE rel_path = ?", (rel_path,))
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO files_fts(rowid, rel_path, content_text)
|
|
VALUES ((SELECT rowid FROM files WHERE rel_path = ?), ?, ?)
|
|
""",
|
|
(rel_path, rel_path, content_text),
|
|
)
|
|
conn.commit()
|
|
persisted = True
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
conn.close()
|
|
|
|
return {
|
|
"rel_path": rel_path,
|
|
"title": title,
|
|
"frontmatter": frontmatter,
|
|
"headings": headings,
|
|
"content_text": content_text,
|
|
"persisted": persisted,
|
|
}
|