chore: auto-commit (95 archivos)
- cmd/fn/doctor.go - cmd/fn/main.go - cpp/apps/primitives_gallery/playground/tables/CMakeLists.txt - cpp/apps/primitives_gallery/playground/tables/data_table.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.cpp - cpp/apps/primitives_gallery/playground/tables/data_table_logic.h - cpp/apps/primitives_gallery/playground/tables/self_test.cpp - cpp/apps/primitives_gallery/playground/tables/tql.cpp - cpp/apps/primitives_gallery/playground/tables/viz.cpp - cpp/apps/primitives_gallery/playground/tables/viz.h - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
"""vault_knowledge_parse — Parsea un Markdown del vault y persiste en knowledge_docs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _parse_frontmatter(text: str) -> tuple[dict, str]:
|
||||
"""Separa YAML frontmatter del cuerpo. Retorna (frontmatter_dict, body)."""
|
||||
if not text.startswith("---\n") and not text.startswith("---\r\n"):
|
||||
return {}, text
|
||||
|
||||
# Buscar cierre del frontmatter
|
||||
end = text.find("\n---", 4)
|
||||
if end == -1:
|
||||
return {}, text
|
||||
|
||||
yaml_block = text[4:end].strip()
|
||||
body = text[end + 4:].lstrip("\n\r")
|
||||
|
||||
try:
|
||||
import yaml
|
||||
|
||||
fm = yaml.safe_load(yaml_block) or {}
|
||||
if not isinstance(fm, dict):
|
||||
fm = {}
|
||||
except Exception:
|
||||
fm = {}
|
||||
|
||||
return fm, body
|
||||
|
||||
|
||||
def _extract_headings(body: str) -> list[dict]:
|
||||
"""Extrae headings Markdown (# ... ### ...) del cuerpo."""
|
||||
headings = []
|
||||
for line in body.splitlines():
|
||||
m = re.match(r"^(#{1,6})\s+(.*)", line)
|
||||
if m:
|
||||
headings.append({"level": len(m.group(1)), "text": m.group(2).strip()})
|
||||
return headings
|
||||
|
||||
|
||||
def _extract_title(frontmatter: dict, body: str, basename: str) -> str:
|
||||
"""Extrae título: frontmatter['title'] > primer H1 > basename."""
|
||||
if frontmatter.get("title"):
|
||||
return str(frontmatter["title"])
|
||||
for line in body.splitlines():
|
||||
m = re.match(r"^#\s+(.*)", line)
|
||||
if m:
|
||||
return m.group(1).strip()
|
||||
return basename
|
||||
|
||||
|
||||
def vault_knowledge_parse(
|
||||
vault_path: str,
|
||||
rel_path: str,
|
||||
db_path: str | None = None,
|
||||
) -> dict:
|
||||
"""Parsea un archivo Markdown del vault: extrae frontmatter, título, headings y cuerpo.
|
||||
|
||||
Args:
|
||||
vault_path: Ruta absoluta a la raiz del vault.
|
||||
rel_path: Ruta relativa al archivo Markdown dentro del vault.
|
||||
db_path: Override opcional de la ruta a vault_index.db.
|
||||
|
||||
Returns:
|
||||
Dict con: rel_path, title, frontmatter, headings, content_text, persisted.
|
||||
|
||||
Raises:
|
||||
RuntimeError: Si el archivo no existe o no se puede leer.
|
||||
"""
|
||||
vault = Path(vault_path)
|
||||
md_file = vault / rel_path
|
||||
if not md_file.exists():
|
||||
raise RuntimeError(f"vault_knowledge_parse: archivo no encontrado: {md_file}")
|
||||
|
||||
db = Path(db_path) if db_path else vault / "vault_index.db"
|
||||
|
||||
try:
|
||||
text = md_file.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
text = md_file.read_text(encoding="latin-1", errors="replace")
|
||||
|
||||
frontmatter, body = _parse_frontmatter(text)
|
||||
headings = _extract_headings(body)
|
||||
basename = md_file.stem
|
||||
title = _extract_title(frontmatter, body, basename)
|
||||
content_text = body
|
||||
|
||||
# Persistir en vault_index.db
|
||||
persisted = False
|
||||
if db.exists():
|
||||
conn = sqlite3.connect(str(db))
|
||||
try:
|
||||
now = int(time.time())
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO knowledge_docs(rel_path, title, frontmatter_json, headings_json, parsed_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(rel_path) DO UPDATE SET
|
||||
title=excluded.title,
|
||||
frontmatter_json=excluded.frontmatter_json,
|
||||
headings_json=excluded.headings_json,
|
||||
parsed_at=excluded.parsed_at
|
||||
""",
|
||||
(
|
||||
rel_path,
|
||||
title,
|
||||
json.dumps(frontmatter, ensure_ascii=False),
|
||||
json.dumps(headings, ensure_ascii=False),
|
||||
now,
|
||||
),
|
||||
)
|
||||
# Actualizar files_fts (rowid debe coincidir con files)
|
||||
conn.execute("DELETE FROM files_fts WHERE rel_path = ?", (rel_path,))
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO files_fts(rowid, rel_path, content_text)
|
||||
VALUES ((SELECT rowid FROM files WHERE rel_path = ?), ?, ?)
|
||||
""",
|
||||
(rel_path, rel_path, content_text),
|
||||
)
|
||||
conn.commit()
|
||||
persisted = True
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"rel_path": rel_path,
|
||||
"title": title,
|
||||
"frontmatter": frontmatter,
|
||||
"headings": headings,
|
||||
"content_text": content_text,
|
||||
"persisted": persisted,
|
||||
}
|
||||
Reference in New Issue
Block a user