eb8dbf66a1
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
139 lines
5.2 KiB
Python
139 lines
5.2 KiB
Python
"""Full-text substring search across the notes of an Obsidian vault."""
|
|
|
|
import os
|
|
|
|
from obsidian import parse_obsidian_frontmatter
|
|
|
|
# Directories that are part of Obsidian's machinery, never user notes.
|
|
_EXCLUDED_DIRS = {".obsidian", ".trash"}
|
|
|
|
|
|
def search_obsidian_notes(
|
|
vault_dir: str,
|
|
query: str,
|
|
in_body: bool = True,
|
|
in_frontmatter: bool = True,
|
|
) -> list:
|
|
"""Search a case-insensitive substring across every note of a vault.
|
|
|
|
Walks ``vault_dir`` recursively (pruning ``.obsidian/`` and ``.trash/``),
|
|
reads every ``.md`` note and looks for ``query`` as a case-insensitive
|
|
substring. Each line that contains the query is reported together with its
|
|
1-based line number.
|
|
|
|
The ``in_body`` and ``in_frontmatter`` flags control which part of a note is
|
|
searched. The frontmatter is delimited with ``parse_obsidian_frontmatter``:
|
|
its raw lines (between the opening and closing ``---``) are searched when
|
|
``in_frontmatter`` is True, and the body lines when ``in_body`` is True. Line
|
|
numbers are always relative to the full file so they map directly onto the
|
|
note on disk.
|
|
|
|
Impure: it reads the filesystem. Raises ``ValueError`` if ``query`` is empty,
|
|
``FileNotFoundError`` if the vault does not exist and ``NotADirectoryError``
|
|
if it is not a directory.
|
|
|
|
Args:
|
|
vault_dir: Path to the vault root.
|
|
query: Substring to look for (matched case-insensitively).
|
|
in_body: Search the note body when True.
|
|
in_frontmatter: Search the note frontmatter block when True.
|
|
|
|
Returns:
|
|
A list of dicts ``{"path": str, "matches": list}`` (one per matching
|
|
note), sorted by path. Each match is
|
|
``{"line": int, "text": str}``.
|
|
"""
|
|
if not query:
|
|
raise ValueError("query must be a non-empty string")
|
|
|
|
root = os.path.abspath(vault_dir)
|
|
if not os.path.exists(root):
|
|
raise FileNotFoundError(f"vault path does not exist: {root}")
|
|
if not os.path.isdir(root):
|
|
raise NotADirectoryError(f"vault path is not a directory: {root}")
|
|
|
|
needle = query.lower()
|
|
results: list[dict] = []
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
|
for filename in filenames:
|
|
if not filename.lower().endswith(".md"):
|
|
continue
|
|
full = os.path.abspath(os.path.join(dirpath, filename))
|
|
matches = _search_note(full, needle, in_body, in_frontmatter)
|
|
if matches:
|
|
results.append({"path": full, "matches": matches})
|
|
|
|
results.sort(key=lambda r: r["path"])
|
|
return results
|
|
|
|
|
|
def _frontmatter_line_count(content: str) -> int:
|
|
"""Number of full-file lines occupied by the frontmatter block (0 if none).
|
|
|
|
Counts the opening ``---``, the YAML lines and the closing ``---``. Returns
|
|
0 when the note has no valid frontmatter (per ``parse_obsidian_frontmatter``).
|
|
"""
|
|
if parse_obsidian_frontmatter(content).get("frontmatter"):
|
|
normalized = content.replace("\r\n", "\n").replace("\r", "\n")
|
|
lines = normalized.split("\n")
|
|
if lines and lines[0].strip() == "---":
|
|
for i in range(1, len(lines)):
|
|
if lines[i].strip() == "---":
|
|
return i + 1 # inclusive of both delimiters
|
|
return 0
|
|
|
|
|
|
def _search_note(
|
|
note_path: str, needle: str, in_body: bool, in_frontmatter: bool
|
|
) -> list:
|
|
"""Return the matching lines (with 1-based line numbers) inside one note."""
|
|
try:
|
|
with open(note_path, "r", encoding="utf-8", errors="replace") as handle:
|
|
content = handle.read()
|
|
except OSError:
|
|
return []
|
|
|
|
normalized = content.replace("\r\n", "\n").replace("\r", "\n")
|
|
lines = normalized.split("\n")
|
|
fm_lines = _frontmatter_line_count(content)
|
|
|
|
matches: list[dict] = []
|
|
for idx, text in enumerate(lines):
|
|
is_frontmatter = idx < fm_lines
|
|
if is_frontmatter and not in_frontmatter:
|
|
continue
|
|
if not is_frontmatter and not in_body:
|
|
continue
|
|
if needle in text.lower():
|
|
matches.append({"line": idx + 1, "text": text})
|
|
return matches
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import tempfile
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
os.makedirs(os.path.join(tmp, ".obsidian"))
|
|
with open(os.path.join(tmp, ".obsidian", "noise.md"), "w") as f:
|
|
f.write("ALPHA hidden in obsidian config")
|
|
with open(os.path.join(tmp, "note.md"), "w") as f:
|
|
f.write("---\ntitle: Alpha note\n---\nfirst line\nsecond ALPHA line\n")
|
|
|
|
hits = search_obsidian_notes(tmp, "alpha")
|
|
assert len(hits) == 1, hits # .obsidian note excluded
|
|
assert hits[0]["path"].endswith("note.md")
|
|
lines = [m["line"] for m in hits[0]["matches"]]
|
|
assert 2 in lines and 5 in lines, hits # frontmatter + body
|
|
|
|
body_only = search_obsidian_notes(tmp, "alpha", in_frontmatter=False)
|
|
body_lines = [m["line"] for m in body_only[0]["matches"]]
|
|
assert body_lines == [5], body_only
|
|
|
|
fm_only = search_obsidian_notes(tmp, "alpha", in_body=False)
|
|
fm_lines = [m["line"] for m in fm_only[0]["matches"]]
|
|
assert fm_lines == [2], fm_only
|
|
|
|
print("OK")
|