import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import gc import torch def clear_memory(): gc.collect() torch.cuda.empty_cache() model_name = "GIGAParviz/Firooze_test" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name , low_cpu_mem_usage=True , device_map="cpu") model = model.to("cpu") model.gradient_checkpointing_enable() pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128) def generate_response(prompt): clear_memory() instruction = f"### Instruction:\n{prompt}\n\n### Response:\n" result = pipe(instruction) return result[0]['generated_text'][len(instruction):] with gr.Blocks() as demo: gr.Markdown("

🔮 Persian LLM made by A.M.Parviz

") prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...", lines=2) generate_button = gr.Button("Generate Response") response_output = gr.Textbox(label="Generated Response", lines=5) generate_button.click(fn=generate_response, inputs=prompt_input, outputs=response_output) clear_button = gr.ClearButton([prompt_input, response_output]) demo.launch()