"""Extract wikilink targets from the body of an Obsidian note.""" import re # Matches both plain wikilinks [[...]] and embeds ![[...]]. # The captured group is everything between the double brackets. _WIKILINK_RE = re.compile(r"!?\[\[([^\[\]]+?)\]\]") def extract_obsidian_wikilinks(body: str) -> list: """Extract the note targets from the wikilinks in a note body. Recognizes both plain links `[[...]]` and embeds `![[...]]` (Obsidian treats embeds as links too). Each target is normalized to the bare note name: [[note|alias]] -> "note" [[note#heading]] -> "note" [[note#^blockid]] -> "note" [[note]] -> "note" ![[image.png]] -> "image.png" The alias (after `|`), the heading/block anchor (after `#`) and surrounding whitespace are stripped. Targets are deduplicated while preserving the order of first appearance. Pure and deterministic: no I/O, no mutation. Args: body: The Markdown body of an Obsidian note (without frontmatter). Returns: A list of unique target note names (strings), in order of appearance. """ if not body: return [] seen = set() targets = [] for match in _WIKILINK_RE.finditer(body): inner = match.group(1) # Drop the alias part after the first pipe. target = inner.split("|", 1)[0] # Drop the heading / block anchor after the first hash. target = target.split("#", 1)[0] target = target.strip() if not target: continue if target in seen: continue seen.add(target) targets.append(target) return targets if __name__ == "__main__": body = ( "See [[Note A]] and [[Note B|the second]] plus [[Note A#Section]] " "and [[Note C#^block123]]. Embed: ![[diagram.png]]. Repeat [[Note A]]." ) links = extract_obsidian_wikilinks(body) assert links == ["Note A", "Note B", "Note C", "diagram.png"], links assert extract_obsidian_wikilinks("") == [] assert extract_obsidian_wikilinks("no links here") == [] assert extract_obsidian_wikilinks("[[ spaced |alias]]") == ["spaced"] print("OK")