# embedder_nomic.py from transformers import AutoTokenizer, AutoModel import torch class NomicEmbedder: _instance = None def __init__(self, model_path: str = ".model/nomic-embed-text-v1.5"): # Load model only once self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True) @classmethod def get_instance(cls) -> "NomicEmbedder": if cls._instance is None: cls._instance = NomicEmbedder() return cls._instance def embed(self, text: str) -> list[float]: """Generate embedding from text""" inputs = self.tokenizer( [text], return_tensors="pt", padding=True, truncation=True, max_length=8192 ) with torch.no_grad(): embedding = self.model(**inputs).last_hidden_state.mean(dim=1).squeeze() return embedding.tolist()