Files
fn_registry/python/functions/obsidian/parse_obsidian_frontmatter.py
T
egutierrez eb8dbf66a1 feat(infra): auto-commit con 88 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-11 00:16:46 +02:00

81 lines
2.9 KiB
Python

"""Parse the YAML frontmatter of an Obsidian note treated as plain Markdown."""
import yaml
def parse_obsidian_frontmatter(content: str) -> dict:
"""Split an Obsidian note into its YAML frontmatter and body.
The frontmatter is the YAML block delimited by `---` lines at the very
start of the file. It is parsed with `yaml.safe_load`. If there is no
valid frontmatter block at the start of the content (no leading `---`,
no closing `---`, or the YAML does not parse into a mapping), the whole
content is returned as the body and the frontmatter is an empty dict.
Supports both `\\n` and `\\r\\n` line endings. Pure and deterministic:
no I/O, no mutation of the input.
Args:
content: Full text of an Obsidian/Markdown note.
Returns:
A dict with two keys:
- "frontmatter": the parsed YAML mapping (dict), or {} if absent.
- "body": the note body after the frontmatter block, or the full
content when there is no valid frontmatter.
"""
if not content:
return {"frontmatter": {}, "body": content}
# Normalize line endings for splitting without mutating the original body.
normalized = content.replace("\r\n", "\n").replace("\r", "\n")
lines = normalized.split("\n")
# Frontmatter must start on the very first line with an exact `---`.
if not lines or lines[0].strip() != "---":
return {"frontmatter": {}, "body": content}
# Find the closing `---` delimiter.
closing_index = None
for i in range(1, len(lines)):
if lines[i].strip() == "---":
closing_index = i
break
if closing_index is None:
return {"frontmatter": {}, "body": content}
yaml_block = "\n".join(lines[1:closing_index])
body = "\n".join(lines[closing_index + 1:])
try:
parsed = yaml.safe_load(yaml_block)
except yaml.YAMLError:
return {"frontmatter": {}, "body": content}
if not isinstance(parsed, dict):
return {"frontmatter": {}, "body": content}
return {"frontmatter": parsed, "body": body}
if __name__ == "__main__":
note = "---\ntitle: My Note\ntags:\n - a\n - b\n---\n\nHello [[other]]."
result = parse_obsidian_frontmatter(note)
assert result["frontmatter"] == {"title": "My Note", "tags": ["a", "b"]}
assert result["body"] == "\nHello [[other]]."
# CRLF line endings.
crlf = "---\r\ntitle: X\r\n---\r\nbody line"
assert parse_obsidian_frontmatter(crlf)["frontmatter"] == {"title": "X"}
# No frontmatter -> body is the full content.
plain = "just a body, no frontmatter"
assert parse_obsidian_frontmatter(plain) == {"frontmatter": {}, "body": plain}
# Unterminated frontmatter -> treated as plain body.
broken = "---\ntitle: X\nno closing delimiter"
assert parse_obsidian_frontmatter(broken) == {"frontmatter": {}, "body": broken}
print("OK")