Primer commit
This commit is contained in:
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "36832aa6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.2.2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import duckdb\n",
|
||||
"print(duckdb.__version__)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c357d895",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<duckdb.duckdb.DuckDBPyConnection at 0x1cda135ac30>"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conn.execute(\"INSTALL vss; LOAD vss;\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "4a7a3ffb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "BinderException",
|
||||
"evalue": "Binder Error: HNSW indexes can only be created in in-memory databases, or when the configuration option 'hnsw_enable_experimental_persistence' is set to true.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mBinderException\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[13]\u001b[39m\u001b[32m, line 38\u001b[39m\n\u001b[32m 35\u001b[39m conn.execute(\u001b[33m\"\u001b[39m\u001b[33mINSERT INTO textos VALUES (?, ?, ?)\u001b[39m\u001b[33m\"\u001b[39m, (i, texto, vector))\n\u001b[32m 37\u001b[39m \u001b[38;5;66;03m# Crear el índice HNSW en la columna embedding\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m38\u001b[39m \u001b[43mconn\u001b[49m\u001b[43m.\u001b[49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mCREATE INDEX vss_idx ON textos USING HNSW (embedding);\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 40\u001b[39m \u001b[38;5;66;03m# Realizar una búsqueda de los 3 textos más similares a una consulta\u001b[39;00m\n\u001b[32m 41\u001b[39m consulta = \u001b[33m\"\u001b[39m\u001b[33minteligencia artificial en Python\u001b[39m\u001b[33m\"\u001b[39m\n",
|
||||
"\u001b[31mBinderException\u001b[39m: Binder Error: HNSW indexes can only be created in in-memory databases, or when the configuration option 'hnsw_enable_experimental_persistence' is set to true."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import duckdb\n",
|
||||
"import numpy as np\n",
|
||||
"from sentence_transformers import SentenceTransformer\n",
|
||||
"\n",
|
||||
"# Conectar a la base de datos\n",
|
||||
"conn = duckdb.connect(\"textos_vss.duckdb\")\n",
|
||||
"\n",
|
||||
"# Instalar y cargar la extensión VSS\n",
|
||||
"conn.execute(\"INSTALL vss;\")\n",
|
||||
"conn.execute(\"LOAD vss;\")\n",
|
||||
"\n",
|
||||
"# Modelo de embeddings\n",
|
||||
"model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
|
||||
"\n",
|
||||
"# Crear la tabla con una columna de embeddings de tipo FLOAT[]\n",
|
||||
"conn.execute(\"\"\"\n",
|
||||
"CREATE TABLE IF NOT EXISTS textos (\n",
|
||||
" id INTEGER PRIMARY KEY,\n",
|
||||
" texto TEXT,\n",
|
||||
" embedding FLOAT[]\n",
|
||||
");\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
"# Insertar textos y sus embeddings\n",
|
||||
"extractos = [\n",
|
||||
" \"El rápido zorro marrón salta sobre el perro perezoso.\",\n",
|
||||
" \"La inteligencia artificial está revolucionando el mundo.\",\n",
|
||||
" \"DuckDB permite análisis embebidos rápidos en Python.\",\n",
|
||||
" \"Los modelos de lenguaje pueden entender consultas humanas.\",\n",
|
||||
" \"Python es un lenguaje poderoso para ciencia de datos.\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for i, texto in enumerate(extractos):\n",
|
||||
" vector = model.encode(texto).astype(np.float32).tolist()\n",
|
||||
" conn.execute(\"INSERT INTO textos VALUES (?, ?, ?)\", (i, texto, vector))\n",
|
||||
"\n",
|
||||
"# Crear el índice HNSW en la columna embedding\n",
|
||||
"conn.execute(\"CREATE INDEX vss_idx ON textos USING HNSW (embedding);\")\n",
|
||||
"\n",
|
||||
"# Realizar una búsqueda de los 3 textos más similares a una consulta\n",
|
||||
"consulta = \"inteligencia artificial en Python\"\n",
|
||||
"vector_consulta = model.encode(consulta).astype(np.float32).tolist()\n",
|
||||
"resultados = conn.execute(\"\"\"\n",
|
||||
" SELECT id, texto, array_distance(embedding, ?) AS distancia\n",
|
||||
" FROM textos\n",
|
||||
" ORDER BY distancia ASC\n",
|
||||
" LIMIT 3;\n",
|
||||
"\"\"\", (vector_consulta,)).fetchall()\n",
|
||||
"\n",
|
||||
"# Mostrar los resultados\n",
|
||||
"for id_, texto, distancia in resultados:\n",
|
||||
" print(f\"[ID: {id_}] Distancia: {distancia:.4f} → {texto}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "2f6059bd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<duckdb.duckdb.DuckDBPyConnection at 0x1cda32d9730>"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conn = duckdb.connect(\"textos_vss.duckdb\")\n",
|
||||
"conn.execute(\"SET hnsw_enable_experimental_persistence=true;\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "449bebc1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<duckdb.duckdb.DuckDBPyConnection at 0x1cda322a5b0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conn.execute(\"DELETE FROM textos;\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user