Files

27 lines
948 B
Python

# embedder_nomic.py
from transformers import AutoTokenizer, AutoModel
import torch
class NomicEmbedder:
_instance = None
def __init__(self, model_path: str = ".model/nomic-embed-text-v1.5"):
# Load model only once
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
@classmethod
def get_instance(cls) -> "NomicEmbedder":
if cls._instance is None:
cls._instance = NomicEmbedder()
return cls._instance
def embed(self, text: str) -> list[float]:
"""Generate embedding from text"""
inputs = self.tokenizer(
[text], return_tensors="pt", padding=True, truncation=True, max_length=8192
)
with torch.no_grad():
embedding = self.model(**inputs).last_hidden_state.mean(dim=1).squeeze()
return embedding.tolist()