Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from llm2vec import LLM2Vec | |
from transformers import AutoTokenizer, AutoModel, AutoConfig | |
from peft import PeftModel | |
import torch | |
torch.backends.cuda.enable_mem_efficient_sdp(False) | |
torch.backends.cuda.enable_flash_sdp(False) | |
# Read tokens from environment variables | |
GROQ_API_KEY = os.getenv('GROQ_API_KEY') | |
HF_TOKEN = os.getenv('HF_TOKEN') | |
if not GROQ_API_KEY or not HF_TOKEN: | |
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.") | |
os.environ['GROQ_API_KEY'] = GROQ_API_KEY | |
os.environ['HF_TOKEN'] = HF_TOKEN | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp") | |
config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True) | |
model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu") | |
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp") | |
model = model.merge_and_unload() | |
# Load unsupervised SimCSE model | |
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse") | |
# Wrapper for encoding and pooling operations | |
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512) | |
def encode_texts(input_texts): | |
encodings = [l2v.encode(text) for text in input_texts] | |
return encodings | |
# Define Gradio interface | |
iface = gr.Interface( | |
fn=encode_texts, | |
inputs=gr.Textbox(lines=5, placeholder="Enter texts separated by newlines..."), | |
outputs=gr.JSON() | |
) | |
# Launch Gradio app | |
iface.launch(share=True) | |