Compare commits

5 Commits

10 changed files with 609 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
venv
+27
View File
@@ -0,0 +1,27 @@
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
BASE_URL = "https://www.cpubenchmark.net"
LOOKUP_URL = f"{BASE_URL}/cpu_lookup.php"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.cpubenchmark.net/",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1", # No rastrear
"Connection": "keep-alive",
}
# Obtener la lista de CPUs desde la página principal
response = requests.get(LOOKUP_URL, headers=headers)
if response.status_code != 200:
print(response.text)
exit()
soup = BeautifulSoup(response.text, "html.parser")
# Obtener la lista de CPUs
+419
View File
@@ -0,0 +1,419 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import pandas as pd\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def get_tables_html(html):\n",
" soup = BeautifulSoup(html, 'html.parser')\n",
" tables = soup.find_all('table') # Extrae todas las tablas\n",
" return [str(table) for table in tables] # Devuelve el HTML de cada tabla en una lista\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def html_table_to_dataframe(html):\n",
" soup = BeautifulSoup(html, 'html.parser')\n",
" table = soup.find('table')\n",
"\n",
" # Extraer encabezados\n",
" headers = [th.text.strip() for th in table.find_all('th')]\n",
"\n",
" # Extraer filas\n",
" rows = []\n",
" for tr in table.find_all('tr')[1:]: # Omitimos la cabecera\n",
" row_data = []\n",
" row_links = {}\n",
"\n",
" for i, td in enumerate(tr.find_all('td')):\n",
" text = td.text.strip()\n",
" link = td.find('a', href=True)\n",
"\n",
" row_data.append(text)\n",
"\n",
" # Si hay un enlace, agregamos una columna con \"_link\" en el nombre\n",
" if link:\n",
" link_col_name = f\"{headers[i]}_link\" if headers else f\"column_{i}_link\"\n",
" row_links[link_col_name] = link['href']\n",
"\n",
" # Unir los datos con los enlaces en la fila\n",
" row_data.extend(row_links.values())\n",
" rows.append(row_data)\n",
"\n",
" # Crear encabezados finales (incluyendo las columnas de enlaces)\n",
" final_headers = headers + list(row_links.keys()) if headers else None\n",
"\n",
" # Crear DataFrame\n",
" df = pd.DataFrame(rows, columns=final_headers)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Página cargada correctamente\n"
]
}
],
"source": [
"\n",
"BASE_URL = \"https://www.cpubenchmark.net\"\n",
"LOOKUP_URL = f\"{BASE_URL}/cpu_lookup.php\"\n",
"\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36\",\n",
" \"Referer\": f\"{BASE_URL}/\",\n",
" \"Accept-Language\": \"en-US,en;q=0.9\",\n",
" \"Accept-Encoding\": \"gzip, deflate, br\",\n",
" \"DNT\": \"1\", # No rastrear\n",
" \"Connection\": \"keep-alive\",\n",
"}\n",
"\n",
"# Obtener la lista de CPUs desde la página principal\n",
"response = requests.get(LOOKUP_URL, headers=headers)\n",
"\n",
"# Verificar si la solicitud fue exitosa\n",
"if response.status_code == 200:\n",
" print(\"Página cargada correctamente\")\n",
" page_html = response.text # Aquí tienes todo el HTML de la página\n",
"else:\n",
" print(f\"Error al obtener la página: {response.status_code}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Se encontraron 3 tablas en la página\n"
]
}
],
"source": [
"tablas = get_tables_html(page_html)\n",
"\n",
"print(f\"Se encontraron {len(tablas)} tablas en la página\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Add other CPU:</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"0 Add other CPU:"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CPU Name</th>\n",
" <th>CPU Mark(higher is better)</th>\n",
" <th>Rank(lower is better)</th>\n",
" <th>CPU Value(higher is better)</th>\n",
" <th>Price(USD)</th>\n",
" <th>CPU Name_link</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AArch64</td>\n",
" <td>833</td>\n",
" <td>4068</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=AArch64&amp;id=5934</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AArch64 rev 2 (aarch64)</td>\n",
" <td>2,409</td>\n",
" <td>2853</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=AArch64+rev+2+%28aarch64%29...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AArch64 rev 4 (aarch64)</td>\n",
" <td>1,813</td>\n",
" <td>3225</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=AArch64+rev+4+%28aarch64%29...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AC8257V/WAB</td>\n",
" <td>788</td>\n",
" <td>4119</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=AC8257V%2FWAB&amp;id=3980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AC8259V/WAB</td>\n",
" <td>910</td>\n",
" <td>3980</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=AC8259V%2FWAB&amp;id=5947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4984</th>\n",
" <td>ZHAOXIN KaiXian KX-U6780A@2.7GHz</td>\n",
" <td>3,466</td>\n",
" <td>2354</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=ZHAOXIN+KaiXian+KX-U6780A%4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4985</th>\n",
" <td>ZHAOXIN KaiXian ZX-C+ C4700@2.0GHz</td>\n",
" <td>1,547</td>\n",
" <td>3407</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=ZHAOXIN+KaiXian+ZX-C%2B+C47...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4986</th>\n",
" <td>ZHAOXIN KaiXian ZX-D D4600@2.0GHz</td>\n",
" <td>1,492</td>\n",
" <td>3453</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=ZHAOXIN+KaiXian+ZX-D+D4600%...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4987</th>\n",
" <td>ZHAOXIN Z3-6540M@2.1+GHz</td>\n",
" <td>1,378</td>\n",
" <td>3535</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=ZHAOXIN+Z3-6540M%402.1%2BGH...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4988</th>\n",
" <td>天玑900</td>\n",
" <td>4,259</td>\n",
" <td>2132</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>cpu_lookup.php?cpu=%E5%A4%A9%E7%8E%91900&amp;id=5209</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4989 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" CPU Name CPU Mark(higher is better) \\\n",
"0 AArch64 833 \n",
"1 AArch64 rev 2 (aarch64) 2,409 \n",
"2 AArch64 rev 4 (aarch64) 1,813 \n",
"3 AC8257V/WAB 788 \n",
"4 AC8259V/WAB 910 \n",
"... ... ... \n",
"4984 ZHAOXIN KaiXian KX-U6780A@2.7GHz 3,466 \n",
"4985 ZHAOXIN KaiXian ZX-C+ C4700@2.0GHz 1,547 \n",
"4986 ZHAOXIN KaiXian ZX-D D4600@2.0GHz 1,492 \n",
"4987 ZHAOXIN Z3-6540M@2.1+GHz 1,378 \n",
"4988 天玑900 4,259 \n",
"\n",
" Rank(lower is better) CPU Value(higher is better) Price(USD) \\\n",
"0 4068 NA NA \n",
"1 2853 NA NA \n",
"2 3225 NA NA \n",
"3 4119 NA NA \n",
"4 3980 NA NA \n",
"... ... ... ... \n",
"4984 2354 NA NA \n",
"4985 3407 NA NA \n",
"4986 3453 NA NA \n",
"4987 3535 NA NA \n",
"4988 2132 NA NA \n",
"\n",
" CPU Name_link \n",
"0 cpu_lookup.php?cpu=AArch64&id=5934 \n",
"1 cpu_lookup.php?cpu=AArch64+rev+2+%28aarch64%29... \n",
"2 cpu_lookup.php?cpu=AArch64+rev+4+%28aarch64%29... \n",
"3 cpu_lookup.php?cpu=AC8257V%2FWAB&id=3980 \n",
"4 cpu_lookup.php?cpu=AC8259V%2FWAB&id=5947 \n",
"... ... \n",
"4984 cpu_lookup.php?cpu=ZHAOXIN+KaiXian+KX-U6780A%4... \n",
"4985 cpu_lookup.php?cpu=ZHAOXIN+KaiXian+ZX-C%2B+C47... \n",
"4986 cpu_lookup.php?cpu=ZHAOXIN+KaiXian+ZX-D+D4600%... \n",
"4987 cpu_lookup.php?cpu=ZHAOXIN+Z3-6540M%402.1%2BGH... \n",
"4988 cpu_lookup.php?cpu=%E5%A4%A9%E7%8E%91900&id=5209 \n",
"\n",
"[4989 rows x 6 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for tabla in tablas:\n",
" df = html_table_to_dataframe(tabla)\n",
" display(df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+9
View File
@@ -0,0 +1,9 @@
@echo off
REM Configuración del servidor Self-Hosted
set PREFECT_API_URL=http://10.8.0.6:4200/api
REM Activar entorno virtual de Python (si usas uno)
call E:\Proyects\Workers_data\venv\Scripts\activate.bat
REM Iniciar el worker de Prefect
prefect worker start --pool "Worker_lucas"
+61
View File
@@ -0,0 +1,61 @@
import random
from prefect import flow, task, get_run_logger
from prefect.filesystems import LocalFileSystem
from pathlib import Path
@task
def generar_mensaje_random():
lista_mensajes = ["Hola", "Adios", "Buenos días", "Buenas noches"]
return random.choice(lista_mensajes)
@flow
def otro_flow():
return (324 * 2 + 1) / 2
@flow
def my_flow():
otro_flow()
mensaje1 = generar_mensaje_random()
mensaje2 = generar_mensaje_random()
mensaje3 = generar_mensaje_random()
logger = get_run_logger()
logger.info(f"Mensaje 1: {mensaje1}")
logger.info(f"Mensaje 2: {mensaje2}")
logger.info(f"Mensaje 3: {mensaje3}")
# Configura el almacenamiento local
local_storage = LocalFileSystem(basepath="E:\Proyects\Workers_data\prueba_prefect")
# Codigo para usar con Git
# # Configura y despliega el flujo
# if __name__ == "__main__":
# my_flow.from_source(
# source="http://localhost:3123/egutierrez/prefect_automatizations.git",
# entrypoint="ejemplo_automatizacion_prefect.py:my_flow"
# ).deploy(
# name="Deployment_ejemplo",
# work_pool_name="Workers_pc_torre_Lucas",
# cron="*/1 * * * *"
# )
# Codigo para usar con almacenamiento local
# Configura y despliega el flujo
if __name__ == "__main__":
my_flow.from_source(
source=str(Path(__file__).parent), # code stored in local directory
entrypoint="E:\Proyects\Workers_data\prueba_prefect\primera_prueba.py:my_flow",
).deploy(
name="Deployment_ejemplo",
work_pool_name="Worker_lucas"
)
+3
View File
@@ -0,0 +1,3 @@
from primera_prueba import otro_flow
otro_flow()
+1
View File
@@ -0,0 +1 @@
OPENAI_API=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
+86
View File
@@ -0,0 +1,86 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Error en la API: 401, {\n \"error\": {\n \"message\": \"Your request to GET /v1/dashboard/billing/subscription must be made with a session key (that is, it can only be made from the browser). You made it with the following key type: .\",\n \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\": null\n }\n}\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[3], line 40\u001b[0m\n\u001b[0;32m 36\u001b[0m saldo_restante \u001b[38;5;241m=\u001b[39m total_grant \u001b[38;5;241m-\u001b[39m total_usage\n\u001b[0;32m 37\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m saldo_restante\n\u001b[1;32m---> 40\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mget_saldo_restante_api\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n",
"Cell \u001b[1;32mIn[3], line 17\u001b[0m, in \u001b[0;36mget_saldo_restante_api\u001b[1;34m()\u001b[0m\n\u001b[0;32m 15\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mBASE_URL\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/subscription\u001b[39m\u001b[38;5;124m\"\u001b[39m, headers\u001b[38;5;241m=\u001b[39mheaders)\n\u001b[0;32m 16\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m200\u001b[39m:\n\u001b[1;32m---> 17\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError en la API: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 19\u001b[0m data \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n\u001b[0;32m 20\u001b[0m total_grant \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhard_limit_usd\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m0\u001b[39m) \u001b[38;5;66;03m# Límite total en USD\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: Error en la API: 401, {\n \"error\": {\n \"message\": \"Your request to GET /v1/dashboard/billing/subscription must be made with a session key (that is, it can only be made from the browser). You made it with the following key type: .\",\n \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\": null\n }\n}\n"
]
}
],
"source": [
"import requests\n",
"from dotenv import load_dotenv\n",
"import os\n",
"from datetime import datetime, timedelta\n",
"\n",
"\n",
"load_dotenv()\n",
"API_KEY = os.getenv(\"OPENAI_API\")\n",
"BASE_URL = \"https://api.openai.com/v1/dashboard/billing\"\n",
"\n",
"def get_saldo_restante_api():\n",
" headers = {\"Authorization\": f\"Bearer {API_KEY}\"}\n",
"\n",
" # Obtener información de la suscripción\n",
" response = requests.get(f\"{BASE_URL}/subscription\", headers=headers)\n",
" if response.status_code != 200:\n",
" raise ValueError(f\"Error en la API: {response.status_code}, {response.text}\")\n",
" \n",
" data = response.json()\n",
" total_grant = data.get(\"hard_limit_usd\", 0) # Límite total en USD\n",
"\n",
" # Obtener el consumo de los últimos 30 días\n",
" end_date = datetime.utcnow().strftime(\"%Y-%m-%d\")\n",
" start_date = (datetime.utcnow() - timedelta(days=30)).strftime(\"%Y-%m-%d\")\n",
"\n",
" usage_response = requests.get(\n",
" f\"{BASE_URL}/usage?start_date={start_date}&end_date={end_date}\",\n",
" headers=headers\n",
" )\n",
" if usage_response.status_code != 200:\n",
" raise ValueError(f\"Error en la API de uso: {usage_response.status_code}, {usage_response.text}\")\n",
"\n",
" usage_data = usage_response.json()\n",
" total_usage = usage_data.get(\"total_usage\", 0) / 100 # OpenAI da el uso en centavos\n",
"\n",
" saldo_restante = total_grant - total_usage\n",
" return saldo_restante\n",
"\n",
"\n",
"print(get_saldo_restante_api())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+2
View File
@@ -0,0 +1,2 @@
Set WshShell = CreateObject("WScript.Shell")
WshShell.Run "cmd.exe /c E:\Proyects\Workers_data\generar_worker.bat", 0, False