feat: add marquez-cli tool for OpenLineage/Marquez management

Añadido binario CLI en Go para gestionar datasets, jobs y runs en Marquez.

Características:
- Enviar eventos OpenLineage (START, RUNNING, COMPLETE, FAIL)
- Registrar y consultar datasets
- Registrar y consultar jobs y runs
- Consultar lineage de datasets con formato texto/JSON
- Listar recursos (namespaces, jobs, datasets)
- Sin dependencias externas (solo Go stdlib)
- Binario estático compilado de ~5MB

Archivos:
- tools/marquez-cli/main.go: CLI principal con comandos
- tools/marquez-cli/openlineage.go: Cliente HTTP y estructuras OpenLineage
- tools/marquez-cli/go.mod: Módulo de Go
- tools/marquez-cli/Makefile: Build automation
- tools/marquez-cli/README.md: Documentación completa
- tools/marquez-cli/QUICKSTART.md: Guía rápida de uso

Instalación: make install en ~/.local/bin/marquez-cli
This commit is contained in:
2026-03-23 23:40:55 +01:00
parent ea84a8e1f8
commit 5f3bc84696
7 changed files with 1748 additions and 0 deletions
+230
View File
@@ -0,0 +1,230 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// OpenLineage Event Types
const (
EventTypeStart = "START"
EventTypeRunning = "RUNNING"
EventTypeComplete = "COMPLETE"
EventTypeFail = "FAIL"
EventTypeAbort = "ABORT"
)
// Dataset represents an OpenLineage dataset
type Dataset struct {
Namespace string `json:"namespace"`
Name string `json:"name"`
Facets map[string]interface{} `json:"facets,omitempty"`
}
// Job represents an OpenLineage job
type Job struct {
Namespace string `json:"namespace"`
Name string `json:"name"`
Facets map[string]interface{} `json:"facets,omitempty"`
}
// Run represents an OpenLineage run
type Run struct {
RunID string `json:"runId"`
Facets map[string]interface{} `json:"facets,omitempty"`
}
// OpenLineageEvent represents a complete OpenLineage event
type OpenLineageEvent struct {
EventType string `json:"eventType"`
EventTime string `json:"eventTime"`
Producer string `json:"producer"`
SchemaURL string `json:"schemaURL,omitempty"`
Job Job `json:"job"`
Run Run `json:"run"`
Inputs []Dataset `json:"inputs,omitempty"`
Outputs []Dataset `json:"outputs,omitempty"`
}
// MarquezClient handles communication with Marquez API
type MarquezClient struct {
BaseURL string
HTTPClient *http.Client
}
// NewMarquezClient creates a new Marquez API client
func NewMarquezClient(baseURL string) *MarquezClient {
return &MarquezClient{
BaseURL: baseURL,
HTTPClient: &http.Client{
Timeout: 10 * time.Second,
},
}
}
// SendEvent sends an OpenLineage event to Marquez
func (c *MarquezClient) SendEvent(event *OpenLineageEvent) error {
// Set default schema URL if not provided
if event.SchemaURL == "" {
event.SchemaURL = "https://openlineage.io/spec/1-0-5/OpenLineage.json"
}
// Set event time if not provided
if event.EventTime == "" {
event.EventTime = time.Now().UTC().Format(time.RFC3339Nano)
}
jsonData, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("failed to marshal event: %w", err)
}
url := fmt.Sprintf("%s/api/v1/lineage", c.BaseURL)
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.HTTPClient.Do(req)
if err != nil {
return fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
}
return nil
}
// GetLineage retrieves lineage information for a dataset
func (c *MarquezClient) GetLineage(namespace, datasetName string, depth int) (map[string]interface{}, error) {
url := fmt.Sprintf("%s/api/v1/lineage?nodeId=dataset:%s:%s&depth=%d",
c.BaseURL, namespace, datasetName, depth)
resp, err := c.HTTPClient.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to get lineage: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
}
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result, nil
}
// GetNamespaces retrieves all namespaces from Marquez
func (c *MarquezClient) GetNamespaces() ([]map[string]interface{}, error) {
url := fmt.Sprintf("%s/api/v1/namespaces", c.BaseURL)
resp, err := c.HTTPClient.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to get namespaces: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
}
var result struct {
Namespaces []map[string]interface{} `json:"namespaces"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result.Namespaces, nil
}
// GetJobs retrieves all jobs in a namespace
func (c *MarquezClient) GetJobs(namespace string) ([]map[string]interface{}, error) {
url := fmt.Sprintf("%s/api/v1/namespaces/%s/jobs", c.BaseURL, namespace)
resp, err := c.HTTPClient.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to get jobs: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
}
var result struct {
Jobs []map[string]interface{} `json:"jobs"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result.Jobs, nil
}
// GetDatasets retrieves all datasets in a namespace
func (c *MarquezClient) GetDatasets(namespace string) ([]map[string]interface{}, error) {
url := fmt.Sprintf("%s/api/v1/namespaces/%s/datasets", c.BaseURL, namespace)
resp, err := c.HTTPClient.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to get datasets: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
}
var result struct {
Datasets []map[string]interface{} `json:"datasets"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result.Datasets, nil
}
// GetJobRuns retrieves runs for a specific job
func (c *MarquezClient) GetJobRuns(namespace, jobName string) ([]map[string]interface{}, error) {
url := fmt.Sprintf("%s/api/v1/namespaces/%s/jobs/%s/runs", c.BaseURL, namespace, jobName)
resp, err := c.HTTPClient.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to get job runs: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body))
}
var result struct {
Runs []map[string]interface{} `json:"runs"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result.Runs, nil
}