Files
presupuestos_callcenter/notebooks/.ipynb_checkpoints/03_regeneracion-checkpoint.ipynb
T
2026-05-21 18:26:30 +02:00

368 lines
13 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "b090f346",
"metadata": {},
"source": [
"# 03 — Regeneración de presupuestos\n",
"\n",
"**Hipótesis:** un mismo cliente (`customer_id` + `vehicle_id`) recibe N quotes antes de convertir. El centro \"regenera\" el presupuesto cuando descarta el de call_center y abre uno nuevo en TPV local.\n",
"\n",
"Definición operativa de regeneración:\n",
"- Existe quote call_center previa (Q0) para el par cliente+vehículo.\n",
"- Existe quote posterior (Q1...Qn) en un terminal de centro NO call_center, dentro de ventana D días.\n",
"- Q1 puede tener distinto `order_id` que Q0 (regenera de cero) o mismo (reescribe — menos común).\n",
"\n",
"Métricas pedidas:\n",
"1. Centros que MÁS regeneran (cuentan regeneraciones absolutas y % sobre quotes call_center recibidos).\n",
"2. Quotes call_center con regeneración vs sin regeneración."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5626c2cd",
"metadata": {},
"outputs": [],
"source": [
"from google.cloud import bigquery\n",
"import pandas as pd\n",
"\n",
"PROJECT = \"autingo-159109\"\n",
"DATASET = \"psql_dcpublic\"\n",
"bq = bigquery.Client(project=PROJECT)\n",
"\n",
"WINDOW_DAYS = 90 # ventana de análisis sobre quote call_center\n",
"REGEN_WINDOW_DAYS = 60 # ventana para detectar regeneración posterior\n",
"\n",
"def q(sql):\n",
" return bq.query(sql).to_dataframe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17cff6ce",
"metadata": {},
"outputs": [],
"source": [
"SQL_REGEN = f\"\"\"\n",
"DECLARE win INT64 DEFAULT {WINDOW_DAYS};\n",
"DECLARE regen_win INT64 DEFAULT {REGEN_WINDOW_DAYS};\n",
"DECLARE t_start TIMESTAMP DEFAULT TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL win DAY);\n",
"\n",
"WITH\n",
"cc_users AS (\n",
" SELECT DISTINCT tpvuser_id AS user_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_authorization_tpvuser_centers`\n",
" WHERE dccenter_id IN (159, 162)\n",
"),\n",
"-- Q0: quotes generados por call_center\n",
"q0 AS (\n",
" SELECT\n",
" q.id AS q0_id,\n",
" q.order_id AS q0_order,\n",
" q.created_at AS q0_ts,\n",
" o.customer_id,\n",
" o.vehicle_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN cc_users cc ON q.created_by_id = cc.user_id\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" WHERE q.created_at >= t_start\n",
" AND q.deleted_at IS NULL\n",
" AND o.customer_id IS NOT NULL\n",
" AND o.vehicle_id IS NOT NULL\n",
"),\n",
"-- Q1..Qn: quotes posteriores para mismo cliente+vehículo, en centro NO call_center\n",
"qN AS (\n",
" SELECT\n",
" q.id AS qn_id,\n",
" q.order_id AS qn_order,\n",
" q.created_at AS qn_ts,\n",
" q.created_by_id,\n",
" o.customer_id,\n",
" o.vehicle_id,\n",
" o.terminal_id,\n",
" t.center_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" LEFT JOIN `{PROJECT}.{DATASET}.tpv_terminals` t ON o.terminal_id = t.id\n",
" WHERE q.deleted_at IS NULL\n",
" AND t.center_id IS NOT NULL\n",
" AND t.center_id NOT IN (159, 162)\n",
"),\n",
"-- Empareja Q0 con Q1+ dentro de regen_win días\n",
"regen AS (\n",
" SELECT\n",
" q0.q0_id,\n",
" q0.q0_order,\n",
" q0.customer_id,\n",
" q0.vehicle_id,\n",
" qN.qn_id,\n",
" qN.qn_order,\n",
" qN.center_id AS regen_center,\n",
" TIMESTAMP_DIFF(qN.qn_ts, q0.q0_ts, HOUR) / 24 AS dias_entre\n",
" FROM q0\n",
" JOIN qN\n",
" ON q0.customer_id = qN.customer_id\n",
" AND q0.vehicle_id = qN.vehicle_id\n",
" AND qN.qn_ts > q0.q0_ts\n",
" AND qN.qn_ts <= TIMESTAMP_ADD(q0.q0_ts, INTERVAL regen_win DAY)\n",
" AND qN.qn_order != q0.q0_order\n",
"),\n",
"-- Para cada Q0, ¿hay al menos UNA regeneración?\n",
"q0_has_regen AS (\n",
" SELECT q0_id, COUNT(*) AS regen_count,\n",
" MIN(dias_entre) AS dias_a_regen,\n",
" APPROX_TOP_COUNT(regen_center, 1)[OFFSET(0)].value AS first_regen_center\n",
" FROM regen\n",
" GROUP BY q0_id\n",
")\n",
"\n",
"-- Vista por centro: cuántos Q0 regenera cada centro\n",
"SELECT\n",
" c.id AS center_id,\n",
" c.name AS center_name,\n",
" COUNT(DISTINCT r.q0_id) AS q0_regenerados_aqui,\n",
" COUNT(*) AS regen_events,\n",
" ROUND(AVG(r.dias_entre), 1) AS dias_avg_regen\n",
"FROM regen r\n",
"JOIN `{PROJECT}.{DATASET}.centers` c ON r.regen_center = c.id\n",
"GROUP BY c.id, c.name\n",
"ORDER BY q0_regenerados_aqui DESC\n",
"LIMIT 30\n",
"\"\"\"\n",
"\n",
"df_centros = q(SQL_REGEN)\n",
"print(f\"Centros con eventos de regeneración: {len(df_centros)}\")\n",
"df_centros.head(30)"
]
},
{
"cell_type": "markdown",
"id": "43add847",
"metadata": {},
"source": [
"## Totales: Q0 con regeneración vs sin regeneración"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "736158ba",
"metadata": {},
"outputs": [],
"source": [
"SQL_TOT = f\"\"\"\n",
"DECLARE win INT64 DEFAULT {WINDOW_DAYS};\n",
"DECLARE regen_win INT64 DEFAULT {REGEN_WINDOW_DAYS};\n",
"DECLARE t_start TIMESTAMP DEFAULT TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL win DAY);\n",
"\n",
"WITH\n",
"cc_users AS (\n",
" SELECT DISTINCT tpvuser_id AS user_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_authorization_tpvuser_centers`\n",
" WHERE dccenter_id IN (159, 162)\n",
"),\n",
"q0 AS (\n",
" SELECT q.id AS q0_id, q.order_id AS q0_order, q.created_at AS q0_ts,\n",
" o.customer_id, o.vehicle_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN cc_users cc ON q.created_by_id = cc.user_id\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" WHERE q.created_at >= t_start AND q.deleted_at IS NULL\n",
" AND o.customer_id IS NOT NULL AND o.vehicle_id IS NOT NULL\n",
"),\n",
"qN AS (\n",
" SELECT q.order_id AS qn_order, q.created_at AS qn_ts,\n",
" o.customer_id, o.vehicle_id, t.center_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" LEFT JOIN `{PROJECT}.{DATASET}.tpv_terminals` t ON o.terminal_id = t.id\n",
" WHERE q.deleted_at IS NULL\n",
" AND t.center_id IS NOT NULL AND t.center_id NOT IN (159,162)\n",
"),\n",
"regen AS (\n",
" SELECT DISTINCT q0.q0_id\n",
" FROM q0\n",
" JOIN qN\n",
" ON q0.customer_id = qN.customer_id\n",
" AND q0.vehicle_id = qN.vehicle_id\n",
" AND qN.qn_ts > q0.q0_ts\n",
" AND qN.qn_ts <= TIMESTAMP_ADD(q0.q0_ts, INTERVAL regen_win DAY)\n",
" AND qN.qn_order != q0.q0_order\n",
")\n",
"SELECT\n",
" COUNT(*) AS q0_total,\n",
" COUNT(DISTINCT r.q0_id) AS q0_regenerados,\n",
" COUNT(*) - COUNT(DISTINCT r.q0_id) AS q0_no_regenerados,\n",
" ROUND(SAFE_DIVIDE(COUNT(DISTINCT r.q0_id), COUNT(*)), 4) AS pct_regenerados\n",
"FROM q0\n",
"LEFT JOIN regen r USING (q0_id)\n",
"\"\"\"\n",
"q(SQL_TOT)"
]
},
{
"cell_type": "markdown",
"id": "c183a653",
"metadata": {},
"source": [
"## Distribución días hasta regeneración"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aab452ca",
"metadata": {},
"outputs": [],
"source": [
"SQL_DIAS = f\"\"\"\n",
"DECLARE win INT64 DEFAULT {WINDOW_DAYS};\n",
"DECLARE regen_win INT64 DEFAULT {REGEN_WINDOW_DAYS};\n",
"DECLARE t_start TIMESTAMP DEFAULT TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL win DAY);\n",
"\n",
"WITH\n",
"cc_users AS (\n",
" SELECT DISTINCT tpvuser_id AS user_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_authorization_tpvuser_centers`\n",
" WHERE dccenter_id IN (159, 162)\n",
"),\n",
"q0 AS (\n",
" SELECT q.id AS q0_id, q.order_id AS q0_order, q.created_at AS q0_ts,\n",
" o.customer_id, o.vehicle_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN cc_users cc ON q.created_by_id = cc.user_id\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" WHERE q.created_at >= t_start AND q.deleted_at IS NULL\n",
" AND o.customer_id IS NOT NULL AND o.vehicle_id IS NOT NULL\n",
"),\n",
"qN AS (\n",
" SELECT q.order_id AS qn_order, q.created_at AS qn_ts,\n",
" o.customer_id, o.vehicle_id, t.center_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" LEFT JOIN `{PROJECT}.{DATASET}.tpv_terminals` t ON o.terminal_id = t.id\n",
" WHERE q.deleted_at IS NULL\n",
" AND t.center_id IS NOT NULL AND t.center_id NOT IN (159,162)\n",
")\n",
"SELECT\n",
" TIMESTAMP_DIFF(qN.qn_ts, q0.q0_ts, HOUR)/24 AS dias_entre\n",
"FROM q0\n",
"JOIN qN\n",
" ON q0.customer_id = qN.customer_id\n",
" AND q0.vehicle_id = qN.vehicle_id\n",
" AND qN.qn_ts > q0.q0_ts\n",
" AND qN.qn_ts <= TIMESTAMP_ADD(q0.q0_ts, INTERVAL regen_win DAY)\n",
" AND qN.qn_order != q0.q0_order\n",
"\"\"\"\n",
"dias = q(SQL_DIAS)\n",
"print(dias.describe())\n",
"import matplotlib.pyplot as plt\n",
"dias[\"dias_entre\"].clip(upper=60).hist(bins=30)\n",
"plt.xlabel(\"Días entre Q0 (call_center) y Q1 (centro)\")\n",
"plt.ylabel(\"# eventos\")\n",
"plt.title(\"Distribución de regeneración temporal\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "0feaa9c1",
"metadata": {},
"source": [
"## Cruzar regeneración con conversión a factura\n",
"\n",
"¿Los Q0 regenerados convierten MENOS que los Q0 no regenerados? (Hipótesis: el cliente prefiere lo que negocia el centro)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "376502d8",
"metadata": {},
"outputs": [],
"source": [
"SQL_CONV = f\"\"\"\n",
"DECLARE win INT64 DEFAULT {WINDOW_DAYS};\n",
"DECLARE regen_win INT64 DEFAULT {REGEN_WINDOW_DAYS};\n",
"DECLARE t_start TIMESTAMP DEFAULT TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL win DAY);\n",
"\n",
"WITH\n",
"cc_users AS (\n",
" SELECT DISTINCT tpvuser_id AS user_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_authorization_tpvuser_centers`\n",
" WHERE dccenter_id IN (159, 162)\n",
"),\n",
"q0 AS (\n",
" SELECT q.id AS q0_id, q.order_id AS q0_order, q.created_at AS q0_ts,\n",
" o.customer_id, o.vehicle_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN cc_users cc ON q.created_by_id = cc.user_id\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" WHERE q.created_at >= t_start AND q.deleted_at IS NULL\n",
" AND o.customer_id IS NOT NULL AND o.vehicle_id IS NOT NULL\n",
"),\n",
"qN AS (\n",
" SELECT q.order_id AS qn_order, q.created_at AS qn_ts,\n",
" o.customer_id, o.vehicle_id, t.center_id\n",
" FROM `{PROJECT}.{DATASET}.tpv_orders_quote` q\n",
" JOIN `{PROJECT}.{DATASET}.tpv_orders_order` o ON q.order_id = o.id\n",
" LEFT JOIN `{PROJECT}.{DATASET}.tpv_terminals` t ON o.terminal_id = t.id\n",
" WHERE q.deleted_at IS NULL\n",
" AND t.center_id IS NOT NULL AND t.center_id NOT IN (159,162)\n",
"),\n",
"regen AS (\n",
" SELECT DISTINCT q0.q0_id\n",
" FROM q0\n",
" JOIN qN\n",
" ON q0.customer_id = qN.customer_id\n",
" AND q0.vehicle_id = qN.vehicle_id\n",
" AND qN.qn_ts > q0.q0_ts\n",
" AND qN.qn_ts <= TIMESTAMP_ADD(q0.q0_ts, INTERVAL regen_win DAY)\n",
" AND qN.qn_order != q0.q0_order\n",
"),\n",
"q0_inv AS (\n",
" SELECT q0.q0_id,\n",
" CASE WHEN i.id IS NOT NULL THEN 1 ELSE 0 END AS q0_factura\n",
" FROM q0\n",
" LEFT JOIN `{PROJECT}.{DATASET}.tpv_orders_invoice` i ON i.order_id = q0.q0_order\n",
")\n",
"SELECT\n",
" CASE WHEN r.q0_id IS NOT NULL THEN 'regenerado' ELSE 'no_regenerado' END AS bucket,\n",
" COUNT(*) AS q0_total,\n",
" SUM(qi.q0_factura) AS q0_convertido_propio,\n",
" ROUND(SAFE_DIVIDE(SUM(qi.q0_factura), COUNT(*)), 4) AS conv_q0_propio\n",
"FROM q0_inv qi\n",
"LEFT JOIN regen r USING (q0_id)\n",
"GROUP BY bucket\n",
"\"\"\"\n",
"q(SQL_CONV)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}