Files
Transcripcion_audio_texto/audio_a_texto.ipynb
T

193 lines
8.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Comienza a hablar...\n",
"analiza este comprador y comprueba que tan verídico puede ser \n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[8], line 26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComienza a hablar...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m---> 26\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mstream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBUFFER\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexception_on_overflow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Lee el flujo de audio del micrófono\u001b[39;00m\n\u001b[0;32m 28\u001b[0m \u001b[38;5;66;03m# Si no se recibe audio, sale del bucle\u001b[39;00m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
"File \u001b[1;32mc:\\Users\\Tr4shhh\\.conda\\envs\\data\\Lib\\site-packages\\pyaudio\\__init__.py:570\u001b[0m, in \u001b[0;36mPyAudio.Stream.read\u001b[1;34m(self, num_frames, exception_on_overflow)\u001b[0m\n\u001b[0;32m 567\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_input:\n\u001b[0;32m 568\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNot input stream\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 569\u001b[0m paCanNotReadFromAnOutputOnlyStream)\n\u001b[1;32m--> 570\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpa\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_stream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_frames\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 571\u001b[0m \u001b[43m \u001b[49m\u001b[43mexception_on_overflow\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"import sys\n",
"import os\n",
"import pyaudio\n",
"from vosk import Model, KaldiRecognizer\n",
"import keyboard # Nueva biblioteca para escribir\n",
"\n",
"\n",
"BUFFER = 500 # Tamaño del buffer de audio\n",
"\n",
"# Carga el modelo de Vosk (español)\n",
"model = Model(\"vosk-model-small-es-0.42\")\n",
"\n",
"# Inicializa PyAudio\n",
"p = pyaudio.PyAudio()\n",
"\n",
"# Configura el micrófono (entrada de audio)\n",
"stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=4000)\n",
"stream.start_stream()\n",
"\n",
"# Inicializa el reconocedor\n",
"recognizer = KaldiRecognizer(model, 16000)\n",
"\n",
"print(\"Comienza a hablar...\")\n",
"\n",
"while True:\n",
" data = stream.read(BUFFER, exception_on_overflow=False) # Lee el flujo de audio del micrófono\n",
"\n",
" # Si no se recibe audio, sale del bucle\n",
" if len(data) == 0:\n",
" break\n",
"\n",
" # Procesa cada fragmento de audio en tiempo real\n",
" if recognizer.AcceptWaveform(data):\n",
" result = recognizer.Result() # Obtiene la transcripción de ese fragmento\n",
" text = eval(result)['text'].encode('utf-8').decode('utf-8')\n",
" text = text + ' ' # Agrega un espacio al final para separar las palabras\n",
" print(text)\n",
" keyboard.write(text)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"El archivo vosk-model-small-es-0.42.zip ha sido descomprimido.\n"
]
}
],
"source": [
"import zipfile\n",
"\n",
"# Nombre del archivo zip\n",
"zip_filename = r'vosk-model-small-es-0.42.zip'\n",
"\n",
"# Descomprime el archivo zip en la misma carpeta\n",
"with zipfile.ZipFile(zip_filename, 'r') as zip_ref:\n",
" zip_ref.extractall('.')\n",
"\n",
"print(f\"El archivo {zip_filename} ha sido descomprimido.\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ID: 0, Nombre: Microsoft Sound Mapper - Input, Canales de entrada: 2\n",
"ID: 1, Nombre: Microphone (RAGEX40), Canales de entrada: 1\n",
"ID: 2, Nombre: Microsoft Sound Mapper - Output, Canales de entrada: 0\n",
"ID: 3, Nombre: Speakers (2- High Definition Au, Canales de entrada: 0\n",
"ID: 4, Nombre: Speakers (RAGEX40), Canales de entrada: 0\n",
"ID: 5, Nombre: Primary Sound Capture Driver, Canales de entrada: 2\n",
"ID: 6, Nombre: Microphone (RAGEX40), Canales de entrada: 1\n",
"ID: 7, Nombre: Primary Sound Driver, Canales de entrada: 0\n",
"ID: 8, Nombre: Speakers (2- High Definition Audio Device), Canales de entrada: 0\n",
"ID: 9, Nombre: Speakers (RAGEX40), Canales de entrada: 0\n",
"ID: 10, Nombre: Speakers (RAGEX40), Canales de entrada: 0\n",
"ID: 11, Nombre: Speakers (2- High Definition Audio Device), Canales de entrada: 0\n",
"ID: 12, Nombre: Microphone (RAGEX40), Canales de entrada: 1\n",
"ID: 13, Nombre: Output (NVIDIA High Definition Audio), Canales de entrada: 0\n",
"ID: 14, Nombre: Output (NVIDIA High Definition Audio), Canales de entrada: 0\n",
"ID: 15, Nombre: Speakers (HD Audio Speaker), Canales de entrada: 0\n",
"ID: 16, Nombre: SPDIF Out (HD Audio SPDIF out), Canales de entrada: 0\n",
"ID: 17, Nombre: Speakers (RAGEX40), Canales de entrada: 0\n",
"ID: 18, Nombre: Microphone (RAGEX40), Canales de entrada: 1\n"
]
}
],
"source": [
"import pyaudio\n",
"\n",
"p = pyaudio.PyAudio()\n",
"for i in range(p.get_device_count()):\n",
" info = p.get_device_info_by_index(i)\n",
" print(f\"ID: {i}, Nombre: {info['name']}, Canales de entrada: {info['maxInputChannels']}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Presiona una tecla para ver su nombre (Ctrl+C para salir):\n",
"Tecla presionada: alt gr\n",
"Tecla presionada: alt gr\n",
"Tecla presionada: alt gr\n",
"Tecla presionada: ñ\n",
"Tecla presionada: alt gr\n",
"Tecla presionada: ñ\n"
]
}
],
"source": [
"import keyboard\n",
"\n",
"print(\"Presiona una tecla para ver su nombre (Ctrl+C para salir):\")\n",
"\n",
"try:\n",
" while True:\n",
" event = keyboard.read_event()\n",
" if event.event_type == keyboard.KEY_DOWN:\n",
" print(f\"Tecla presionada: {event.name}\")\n",
"except KeyboardInterrupt:\n",
" print(\"Finalizado.\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "data",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}