|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
def load_model_and_tokenizer(model_dir): |
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_dir) |
|
model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) |
|
if torch.cuda.is_available(): |
|
model = model.to('cuda') |
|
else: |
|
model = model.to('cpu') |
|
except Exception as e: |
|
print(f"Error loading model: {e}") |
|
return None, None |
|
return model, tokenizer |
|
|
|
|
|
model_dir = "KJX123/Llama2-7b-finetune" |
|
model, tokenizer = load_model_and_tokenizer(model_dir) |
|
|
|
|
|
def generate_code(query): |
|
if not model or not tokenizer: |
|
return "Model or tokenizer not loaded properly." |
|
|
|
prompt = f"Query: {query}\nGitHub Code:\nYouTube Code:" |
|
inputs = tokenizer(prompt, return_tensors='pt') |
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
inputs = inputs.to(device) |
|
|
|
outputs = model.generate(inputs['input_ids'], max_length=600, num_return_sequences=1) |
|
generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return generated_code |
|
|
|
def gradio_interface(query): |
|
code = generate_code(query) |
|
return code |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), |
|
outputs=gr.Textbox(lines=20, placeholder="Generated code will appear here..."), |
|
title="Code Generator", |
|
description="Enter a programming task or query to generate code using the fine-tuned model." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|