63a9cb5273
Datascience: aggregate_by_group, deduplicate_entities/relations, detect_drift, diff_entities/relations, extract_entities/relations_llm, hotness_score, melt, merge_graphs, pivot, build_entity/relation_schema_prompt. Finance: avellaneda_stoikov_quotes, generate_gbm_prices, generate_taker_order, hawkes_intensity + módulo finance.py. Cybersecurity: envelope_encrypt/decrypt + módulo cybersecurity.py. Pipelines: extraction_pipeline, monte_carlo_market, run_market_sim. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
"""Genera la seccion del system prompt que describe los entity types disponibles para extraccion."""
|
|
|
|
|
|
def build_entity_schema_prompt(entity_presets: list[dict]) -> str:
|
|
"""Genera texto legible para el LLM describiendo los entity types disponibles.
|
|
|
|
Formatea los presets del registry en una seccion del system prompt que indica
|
|
al LLM que tipos de entidades puede extraer y que atributos tiene cada uno.
|
|
|
|
Args:
|
|
entity_presets: Lista de presets con campos 'label', 'type_ref' y
|
|
opcionalmente 'metadata_fields'. Ejemplo:
|
|
[{"type_ref": "osint_person_go_cybersecurity",
|
|
"label": "Person",
|
|
"metadata_fields": ["full_name", "alias"]}]
|
|
|
|
Returns:
|
|
String formateado con la seccion del prompt. Retorna string vacio si
|
|
la lista de presets esta vacia.
|
|
"""
|
|
if not entity_presets:
|
|
return ""
|
|
|
|
lines = ["Entity types available for extraction:", ""]
|
|
|
|
for i, preset in enumerate(entity_presets, start=1):
|
|
label = preset.get("label", "Unknown")
|
|
type_ref = preset.get("type_ref", "")
|
|
metadata_fields = preset.get("metadata_fields", [])
|
|
|
|
lines.append(f"{i}. {label} (type_ref: {type_ref})")
|
|
|
|
if metadata_fields:
|
|
attrs = ", ".join(metadata_fields)
|
|
lines.append(f" Attributes: {attrs}")
|
|
|
|
lines.append("")
|
|
|
|
# Remove trailing blank line
|
|
if lines and lines[-1] == "":
|
|
lines.pop()
|
|
|
|
return "\n".join(lines)
|