"""Enricher: Extract text from a document file.""" import sys import json import os sys.path.insert(0, os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), "python", "functions", "core")) from extract_text_from_file import extract_text_from_file def main(): entity = json.load(sys.stdin) file_path = (entity.get("metadata") or {}).get("file_path", "") if not file_path: json.dump({"error": "No file_path in entity metadata"}, sys.stdout) return if not os.path.exists(file_path): json.dump({"error": f"File not found: {file_path}"}, sys.stdout) return text = extract_text_from_file(file_path) result = { "entities": [ { "name": f"Text: {os.path.basename(file_path)}", "type_ref": "text", "description": f"Text extracted from {os.path.basename(file_path)}", "tags": ["extracted"], "metadata": { "content_preview": text[:500], "source": file_path, "char_count": len(text), "full_content": text, }, "notes": "", } ], "relations": [ { "name": "extracted_from", "from_entity": "__NEW_0__", "to_entity": "__SOURCE__", "description": f"Text extracted from document", "weight": 1.0, "tags": [], "notes": "", } ], } json.dump(result, sys.stdout, ensure_ascii=False) if __name__ == "__main__": main()