"""Parse the YAML frontmatter of an Obsidian note treated as plain Markdown.""" import yaml def parse_obsidian_frontmatter(content: str) -> dict: """Split an Obsidian note into its YAML frontmatter and body. The frontmatter is the YAML block delimited by `---` lines at the very start of the file. It is parsed with `yaml.safe_load`. If there is no valid frontmatter block at the start of the content (no leading `---`, no closing `---`, or the YAML does not parse into a mapping), the whole content is returned as the body and the frontmatter is an empty dict. Supports both `\\n` and `\\r\\n` line endings. Pure and deterministic: no I/O, no mutation of the input. Args: content: Full text of an Obsidian/Markdown note. Returns: A dict with two keys: - "frontmatter": the parsed YAML mapping (dict), or {} if absent. - "body": the note body after the frontmatter block, or the full content when there is no valid frontmatter. """ if not content: return {"frontmatter": {}, "body": content} # Normalize line endings for splitting without mutating the original body. normalized = content.replace("\r\n", "\n").replace("\r", "\n") lines = normalized.split("\n") # Frontmatter must start on the very first line with an exact `---`. if not lines or lines[0].strip() != "---": return {"frontmatter": {}, "body": content} # Find the closing `---` delimiter. closing_index = None for i in range(1, len(lines)): if lines[i].strip() == "---": closing_index = i break if closing_index is None: return {"frontmatter": {}, "body": content} yaml_block = "\n".join(lines[1:closing_index]) body = "\n".join(lines[closing_index + 1:]) try: parsed = yaml.safe_load(yaml_block) except yaml.YAMLError: return {"frontmatter": {}, "body": content} if not isinstance(parsed, dict): return {"frontmatter": {}, "body": content} return {"frontmatter": parsed, "body": body} if __name__ == "__main__": note = "---\ntitle: My Note\ntags:\n - a\n - b\n---\n\nHello [[other]]." result = parse_obsidian_frontmatter(note) assert result["frontmatter"] == {"title": "My Note", "tags": ["a", "b"]} assert result["body"] == "\nHello [[other]]." # CRLF line endings. crlf = "---\r\ntitle: X\r\n---\r\nbody line" assert parse_obsidian_frontmatter(crlf)["frontmatter"] == {"title": "X"} # No frontmatter -> body is the full content. plain = "just a body, no frontmatter" assert parse_obsidian_frontmatter(plain) == {"frontmatter": {}, "body": plain} # Unterminated frontmatter -> treated as plain body. broken = "---\ntitle: X\nno closing delimiter" assert parse_obsidian_frontmatter(broken) == {"frontmatter": {}, "body": broken} print("OK")