"""Full-text substring search across the notes of an Obsidian vault.""" import os from obsidian import parse_obsidian_frontmatter # Directories that are part of Obsidian's machinery, never user notes. _EXCLUDED_DIRS = {".obsidian", ".trash"} def search_obsidian_notes( vault_dir: str, query: str, in_body: bool = True, in_frontmatter: bool = True, ) -> list: """Search a case-insensitive substring across every note of a vault. Walks ``vault_dir`` recursively (pruning ``.obsidian/`` and ``.trash/``), reads every ``.md`` note and looks for ``query`` as a case-insensitive substring. Each line that contains the query is reported together with its 1-based line number. The ``in_body`` and ``in_frontmatter`` flags control which part of a note is searched. The frontmatter is delimited with ``parse_obsidian_frontmatter``: its raw lines (between the opening and closing ``---``) are searched when ``in_frontmatter`` is True, and the body lines when ``in_body`` is True. Line numbers are always relative to the full file so they map directly onto the note on disk. Impure: it reads the filesystem. Raises ``ValueError`` if ``query`` is empty, ``FileNotFoundError`` if the vault does not exist and ``NotADirectoryError`` if it is not a directory. Args: vault_dir: Path to the vault root. query: Substring to look for (matched case-insensitively). in_body: Search the note body when True. in_frontmatter: Search the note frontmatter block when True. Returns: A list of dicts ``{"path": str, "matches": list}`` (one per matching note), sorted by path. Each match is ``{"line": int, "text": str}``. """ if not query: raise ValueError("query must be a non-empty string") root = os.path.abspath(vault_dir) if not os.path.exists(root): raise FileNotFoundError(f"vault path does not exist: {root}") if not os.path.isdir(root): raise NotADirectoryError(f"vault path is not a directory: {root}") needle = query.lower() results: list[dict] = [] for dirpath, dirnames, filenames in os.walk(root): dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS] for filename in filenames: if not filename.lower().endswith(".md"): continue full = os.path.abspath(os.path.join(dirpath, filename)) matches = _search_note(full, needle, in_body, in_frontmatter) if matches: results.append({"path": full, "matches": matches}) results.sort(key=lambda r: r["path"]) return results def _frontmatter_line_count(content: str) -> int: """Number of full-file lines occupied by the frontmatter block (0 if none). Counts the opening ``---``, the YAML lines and the closing ``---``. Returns 0 when the note has no valid frontmatter (per ``parse_obsidian_frontmatter``). """ if parse_obsidian_frontmatter(content).get("frontmatter"): normalized = content.replace("\r\n", "\n").replace("\r", "\n") lines = normalized.split("\n") if lines and lines[0].strip() == "---": for i in range(1, len(lines)): if lines[i].strip() == "---": return i + 1 # inclusive of both delimiters return 0 def _search_note( note_path: str, needle: str, in_body: bool, in_frontmatter: bool ) -> list: """Return the matching lines (with 1-based line numbers) inside one note.""" try: with open(note_path, "r", encoding="utf-8", errors="replace") as handle: content = handle.read() except OSError: return [] normalized = content.replace("\r\n", "\n").replace("\r", "\n") lines = normalized.split("\n") fm_lines = _frontmatter_line_count(content) matches: list[dict] = [] for idx, text in enumerate(lines): is_frontmatter = idx < fm_lines if is_frontmatter and not in_frontmatter: continue if not is_frontmatter and not in_body: continue if needle in text.lower(): matches.append({"line": idx + 1, "text": text}) return matches if __name__ == "__main__": import tempfile with tempfile.TemporaryDirectory() as tmp: os.makedirs(os.path.join(tmp, ".obsidian")) with open(os.path.join(tmp, ".obsidian", "noise.md"), "w") as f: f.write("ALPHA hidden in obsidian config") with open(os.path.join(tmp, "note.md"), "w") as f: f.write("---\ntitle: Alpha note\n---\nfirst line\nsecond ALPHA line\n") hits = search_obsidian_notes(tmp, "alpha") assert len(hits) == 1, hits # .obsidian note excluded assert hits[0]["path"].endswith("note.md") lines = [m["line"] for m in hits[0]["matches"]] assert 2 in lines and 5 in lines, hits # frontmatter + body body_only = search_obsidian_notes(tmp, "alpha", in_frontmatter=False) body_lines = [m["line"] for m in body_only[0]["matches"]] assert body_lines == [5], body_only fm_only = search_obsidian_notes(tmp, "alpha", in_body=False) fm_lines = [m["line"] for m in fm_only[0]["matches"]] assert fm_lines == [2], fm_only print("OK")