example_LLM2Vec / app.py
Abhijit-192-168-1-1's picture
modified app.py
498d36d
raw
history blame
1.69 kB
import os
import gradio as gr
from llm2vec import LLM2Vec
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel
import torch
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)
# Read tokens from environment variables
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')
if not GROQ_API_KEY or not HF_TOKEN:
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")
os.environ['GROQ_API_KEY'] = GROQ_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
model = model.merge_and_unload()
# Load unsupervised SimCSE model
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")
# Wrapper for encoding and pooling operations
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)
def encode_texts(input_texts):
encodings = [l2v.encode(text) for text in input_texts]
return encodings
# Define Gradio interface
iface = gr.Interface(
fn=encode_texts,
inputs=gr.Textbox(lines=5, placeholder="Enter texts separated by newlines..."),
outputs=gr.JSON()
)
# Launch Gradio app
iface.launch(share=True)