eb8dbf66a1
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
81 lines
2.9 KiB
Python
81 lines
2.9 KiB
Python
"""Parse the YAML frontmatter of an Obsidian note treated as plain Markdown."""
|
|
|
|
import yaml
|
|
|
|
|
|
def parse_obsidian_frontmatter(content: str) -> dict:
|
|
"""Split an Obsidian note into its YAML frontmatter and body.
|
|
|
|
The frontmatter is the YAML block delimited by `---` lines at the very
|
|
start of the file. It is parsed with `yaml.safe_load`. If there is no
|
|
valid frontmatter block at the start of the content (no leading `---`,
|
|
no closing `---`, or the YAML does not parse into a mapping), the whole
|
|
content is returned as the body and the frontmatter is an empty dict.
|
|
|
|
Supports both `\\n` and `\\r\\n` line endings. Pure and deterministic:
|
|
no I/O, no mutation of the input.
|
|
|
|
Args:
|
|
content: Full text of an Obsidian/Markdown note.
|
|
|
|
Returns:
|
|
A dict with two keys:
|
|
- "frontmatter": the parsed YAML mapping (dict), or {} if absent.
|
|
- "body": the note body after the frontmatter block, or the full
|
|
content when there is no valid frontmatter.
|
|
"""
|
|
if not content:
|
|
return {"frontmatter": {}, "body": content}
|
|
|
|
# Normalize line endings for splitting without mutating the original body.
|
|
normalized = content.replace("\r\n", "\n").replace("\r", "\n")
|
|
lines = normalized.split("\n")
|
|
|
|
# Frontmatter must start on the very first line with an exact `---`.
|
|
if not lines or lines[0].strip() != "---":
|
|
return {"frontmatter": {}, "body": content}
|
|
|
|
# Find the closing `---` delimiter.
|
|
closing_index = None
|
|
for i in range(1, len(lines)):
|
|
if lines[i].strip() == "---":
|
|
closing_index = i
|
|
break
|
|
|
|
if closing_index is None:
|
|
return {"frontmatter": {}, "body": content}
|
|
|
|
yaml_block = "\n".join(lines[1:closing_index])
|
|
body = "\n".join(lines[closing_index + 1:])
|
|
|
|
try:
|
|
parsed = yaml.safe_load(yaml_block)
|
|
except yaml.YAMLError:
|
|
return {"frontmatter": {}, "body": content}
|
|
|
|
if not isinstance(parsed, dict):
|
|
return {"frontmatter": {}, "body": content}
|
|
|
|
return {"frontmatter": parsed, "body": body}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
note = "---\ntitle: My Note\ntags:\n - a\n - b\n---\n\nHello [[other]]."
|
|
result = parse_obsidian_frontmatter(note)
|
|
assert result["frontmatter"] == {"title": "My Note", "tags": ["a", "b"]}
|
|
assert result["body"] == "\nHello [[other]]."
|
|
|
|
# CRLF line endings.
|
|
crlf = "---\r\ntitle: X\r\n---\r\nbody line"
|
|
assert parse_obsidian_frontmatter(crlf)["frontmatter"] == {"title": "X"}
|
|
|
|
# No frontmatter -> body is the full content.
|
|
plain = "just a body, no frontmatter"
|
|
assert parse_obsidian_frontmatter(plain) == {"frontmatter": {}, "body": plain}
|
|
|
|
# Unterminated frontmatter -> treated as plain body.
|
|
broken = "---\ntitle: X\nno closing delimiter"
|
|
assert parse_obsidian_frontmatter(broken) == {"frontmatter": {}, "body": broken}
|
|
|
|
print("OK")
|