feat(infra): auto-commit con 88 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
"""Full-text substring search across the notes of an Obsidian vault."""
|
||||
|
||||
import os
|
||||
|
||||
from obsidian import parse_obsidian_frontmatter
|
||||
|
||||
# Directories that are part of Obsidian's machinery, never user notes.
|
||||
_EXCLUDED_DIRS = {".obsidian", ".trash"}
|
||||
|
||||
|
||||
def search_obsidian_notes(
|
||||
vault_dir: str,
|
||||
query: str,
|
||||
in_body: bool = True,
|
||||
in_frontmatter: bool = True,
|
||||
) -> list:
|
||||
"""Search a case-insensitive substring across every note of a vault.
|
||||
|
||||
Walks ``vault_dir`` recursively (pruning ``.obsidian/`` and ``.trash/``),
|
||||
reads every ``.md`` note and looks for ``query`` as a case-insensitive
|
||||
substring. Each line that contains the query is reported together with its
|
||||
1-based line number.
|
||||
|
||||
The ``in_body`` and ``in_frontmatter`` flags control which part of a note is
|
||||
searched. The frontmatter is delimited with ``parse_obsidian_frontmatter``:
|
||||
its raw lines (between the opening and closing ``---``) are searched when
|
||||
``in_frontmatter`` is True, and the body lines when ``in_body`` is True. Line
|
||||
numbers are always relative to the full file so they map directly onto the
|
||||
note on disk.
|
||||
|
||||
Impure: it reads the filesystem. Raises ``ValueError`` if ``query`` is empty,
|
||||
``FileNotFoundError`` if the vault does not exist and ``NotADirectoryError``
|
||||
if it is not a directory.
|
||||
|
||||
Args:
|
||||
vault_dir: Path to the vault root.
|
||||
query: Substring to look for (matched case-insensitively).
|
||||
in_body: Search the note body when True.
|
||||
in_frontmatter: Search the note frontmatter block when True.
|
||||
|
||||
Returns:
|
||||
A list of dicts ``{"path": str, "matches": list}`` (one per matching
|
||||
note), sorted by path. Each match is
|
||||
``{"line": int, "text": str}``.
|
||||
"""
|
||||
if not query:
|
||||
raise ValueError("query must be a non-empty string")
|
||||
|
||||
root = os.path.abspath(vault_dir)
|
||||
if not os.path.exists(root):
|
||||
raise FileNotFoundError(f"vault path does not exist: {root}")
|
||||
if not os.path.isdir(root):
|
||||
raise NotADirectoryError(f"vault path is not a directory: {root}")
|
||||
|
||||
needle = query.lower()
|
||||
results: list[dict] = []
|
||||
|
||||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
||||
for filename in filenames:
|
||||
if not filename.lower().endswith(".md"):
|
||||
continue
|
||||
full = os.path.abspath(os.path.join(dirpath, filename))
|
||||
matches = _search_note(full, needle, in_body, in_frontmatter)
|
||||
if matches:
|
||||
results.append({"path": full, "matches": matches})
|
||||
|
||||
results.sort(key=lambda r: r["path"])
|
||||
return results
|
||||
|
||||
|
||||
def _frontmatter_line_count(content: str) -> int:
|
||||
"""Number of full-file lines occupied by the frontmatter block (0 if none).
|
||||
|
||||
Counts the opening ``---``, the YAML lines and the closing ``---``. Returns
|
||||
0 when the note has no valid frontmatter (per ``parse_obsidian_frontmatter``).
|
||||
"""
|
||||
if parse_obsidian_frontmatter(content).get("frontmatter"):
|
||||
normalized = content.replace("\r\n", "\n").replace("\r", "\n")
|
||||
lines = normalized.split("\n")
|
||||
if lines and lines[0].strip() == "---":
|
||||
for i in range(1, len(lines)):
|
||||
if lines[i].strip() == "---":
|
||||
return i + 1 # inclusive of both delimiters
|
||||
return 0
|
||||
|
||||
|
||||
def _search_note(
|
||||
note_path: str, needle: str, in_body: bool, in_frontmatter: bool
|
||||
) -> list:
|
||||
"""Return the matching lines (with 1-based line numbers) inside one note."""
|
||||
try:
|
||||
with open(note_path, "r", encoding="utf-8", errors="replace") as handle:
|
||||
content = handle.read()
|
||||
except OSError:
|
||||
return []
|
||||
|
||||
normalized = content.replace("\r\n", "\n").replace("\r", "\n")
|
||||
lines = normalized.split("\n")
|
||||
fm_lines = _frontmatter_line_count(content)
|
||||
|
||||
matches: list[dict] = []
|
||||
for idx, text in enumerate(lines):
|
||||
is_frontmatter = idx < fm_lines
|
||||
if is_frontmatter and not in_frontmatter:
|
||||
continue
|
||||
if not is_frontmatter and not in_body:
|
||||
continue
|
||||
if needle in text.lower():
|
||||
matches.append({"line": idx + 1, "text": text})
|
||||
return matches
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import tempfile
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
os.makedirs(os.path.join(tmp, ".obsidian"))
|
||||
with open(os.path.join(tmp, ".obsidian", "noise.md"), "w") as f:
|
||||
f.write("ALPHA hidden in obsidian config")
|
||||
with open(os.path.join(tmp, "note.md"), "w") as f:
|
||||
f.write("---\ntitle: Alpha note\n---\nfirst line\nsecond ALPHA line\n")
|
||||
|
||||
hits = search_obsidian_notes(tmp, "alpha")
|
||||
assert len(hits) == 1, hits # .obsidian note excluded
|
||||
assert hits[0]["path"].endswith("note.md")
|
||||
lines = [m["line"] for m in hits[0]["matches"]]
|
||||
assert 2 in lines and 5 in lines, hits # frontmatter + body
|
||||
|
||||
body_only = search_obsidian_notes(tmp, "alpha", in_frontmatter=False)
|
||||
body_lines = [m["line"] for m in body_only[0]["matches"]]
|
||||
assert body_lines == [5], body_only
|
||||
|
||||
fm_only = search_obsidian_notes(tmp, "alpha", in_body=False)
|
||||
fm_lines = [m["line"] for m in fm_only[0]["matches"]]
|
||||
assert fm_lines == [2], fm_only
|
||||
|
||||
print("OK")
|
||||
Reference in New Issue
Block a user