1024m commited on
Commit
afab4ab
·
verified ·
1 Parent(s): 58a71ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -70
app.py CHANGED
@@ -8,23 +8,10 @@ from datetime import datetime
8
  print("Loading model and tokenizer...")
9
  model_name = "large-traversaal/Phi-4-Hindi"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
- model = AutoModelForCausalLM.from_pretrained(
12
- model_name,
13
- torch_dtype=torch.bfloat16,
14
- device_map="auto"
15
- )
16
  print("Model and tokenizer loaded successfully!")
17
- option_mapping = {
18
- "translation": "### TRANSLATION ###",
19
- "mcq": "### MCQ ###",
20
- "nli": "### NLI ###",
21
- "summarization": "### SUMMARIZATION ###",
22
- "long response": "### LONG RESPONSE ###",
23
- "short response": "### SHORT RESPONSE ###",
24
- "direct response": "### DIRECT RESPONSE ###",
25
- "paraphrase": "### PARAPHRASE ###",
26
- "code": "### CODE ###"
27
- }
28
  def generate_response(message, temperature, max_new_tokens, top_p, task):
29
  append_text = option_mapping.get(task, "")
30
  prompt = f"INPUT : {message} {append_text} RESPONSE : "
@@ -32,14 +19,7 @@ def generate_response(message, temperature, max_new_tokens, top_p, task):
32
  start_time = time.time()
33
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
35
- gen_kwargs = {
36
- "input_ids": inputs["input_ids"],
37
- "streamer": streamer,
38
- "temperature": temperature,
39
- "max_new_tokens": max_new_tokens,
40
- "top_p": top_p,
41
- "do_sample": True if temperature > 0 else False,
42
- }
43
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
44
  thread.start()
45
  result = []
@@ -60,59 +40,22 @@ with gr.Blocks() as demo:
60
  gr.Markdown("# Phi-4-Hindi Demo")
61
  with gr.Row():
62
  with gr.Column():
63
- input_text = gr.Textbox(
64
- label="Input",
65
- placeholder="Enter your text here...",
66
- lines=5
67
- )
68
- task_dropdown = gr.Dropdown(
69
- choices=["translation", "mcq", "nli", "summarization", "long response", "short response", "direct response", "paraphrase", "code"],
70
- value="long response",
71
- label="Task"
72
- )
73
  with gr.Row():
74
  with gr.Column():
75
- temperature = gr.Slider(
76
- minimum=0.0,
77
- maximum=1.0,
78
- value=0.1,
79
- step=0.01,
80
- label="Temperature"
81
- )
82
  with gr.Column():
83
- max_new_tokens = gr.Slider(
84
- minimum=50,
85
- maximum=1000,
86
- value=400,
87
- step=10,
88
- label="Max New Tokens"
89
- )
90
  with gr.Column():
91
- top_p = gr.Slider(
92
- minimum=0.0,
93
- maximum=1.0,
94
- value=0.1,
95
- step=0.01,
96
- label="Top P"
97
- )
98
  with gr.Row():
99
  clear_btn = gr.Button("Clear")
100
  send_btn = gr.Button("Send", variant="primary")
101
  with gr.Column():
102
- output_text = gr.Textbox(
103
- label="Output",
104
- lines=15
105
- )
106
- send_btn.click(
107
- fn=generate_response,
108
- inputs=[input_text, temperature, max_new_tokens, top_p, task_dropdown],
109
- outputs=output_text
110
- )
111
- clear_btn.click(
112
- fn=lambda: ("", ""),
113
- inputs=None,
114
- outputs=[input_text, output_text]
115
- )
116
  if __name__ == "__main__":
117
  demo.queue().launch()
118
  """
@@ -138,7 +81,7 @@ def generate_response(message, temperature, max_new_tokens, top_p, task):
138
  inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
139
  outputs = model.generate(input_ids=inputs, max_new_tokens=max_new_tokens, use_cache=True, temperature=temperature, min_p=top_p, pad_token_id=tokenizer.eos_token_id)
140
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
141
- processed_response = response.split("### RESPONSE :assistant")[-1].strip()
142
  end_time = time.time()
143
  time_taken = end_time - start_time
144
  print(f"Output: {processed_response}")
 
8
  print("Loading model and tokenizer...")
9
  model_name = "large-traversaal/Phi-4-Hindi"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
 
 
 
 
12
  print("Model and tokenizer loaded successfully!")
13
+ option_mapping = {"translation": "### TRANSLATION ###", "mcq": "### MCQ ###", "nli": "### NLI ###", "summarization": "### SUMMARIZATION ###",
14
+ "long response": "### LONG RESPONSE ###", "direct response": "### DIRECT RESPONSE ###", "paraphrase": "### PARAPHRASE ###", "code": "### CODE ###"}
 
 
 
 
 
 
 
 
 
15
  def generate_response(message, temperature, max_new_tokens, top_p, task):
16
  append_text = option_mapping.get(task, "")
17
  prompt = f"INPUT : {message} {append_text} RESPONSE : "
 
19
  start_time = time.time()
20
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
22
+ gen_kwargs = {"input_ids": inputs["input_ids"], "streamer": streamer, "temperature": temperature, "max_new_tokens": max_new_tokens, "top_p": top_p, "do_sample": True if temperature > 0 else False,}
 
 
 
 
 
 
 
23
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
24
  thread.start()
25
  result = []
 
40
  gr.Markdown("# Phi-4-Hindi Demo")
41
  with gr.Row():
42
  with gr.Column():
43
+ input_text = gr.Textbox(label="Input", placeholder="Enter your text here...", lines=5)
44
+ task_dropdown = gr.Dropdown(choices=["translation", "mcq", "nli", "summarization", "long response", "direct response", "paraphrase", "code"], value="long response", label="Task")
 
 
 
 
 
 
 
 
45
  with gr.Row():
46
  with gr.Column():
47
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Temperature")
 
 
 
 
 
 
48
  with gr.Column():
49
+ max_new_tokens = gr.Slider(minimum=50, maximum=1000, value=400, step=10, label="Max New Tokens")
 
 
 
 
 
 
50
  with gr.Column():
51
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Top P")
 
 
 
 
 
 
52
  with gr.Row():
53
  clear_btn = gr.Button("Clear")
54
  send_btn = gr.Button("Send", variant="primary")
55
  with gr.Column():
56
+ output_text = gr.Textbox(label="Output", lines=15)
57
+ send_btn.click(fn=generate_response, inputs=[input_text, temperature, max_new_tokens, top_p, task_dropdown], outputs=output_text)
58
+ clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[input_text, output_text])
 
 
 
 
 
 
 
 
 
 
 
59
  if __name__ == "__main__":
60
  demo.queue().launch()
61
  """
 
81
  inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
82
  outputs = model.generate(input_ids=inputs, max_new_tokens=max_new_tokens, use_cache=True, temperature=temperature, min_p=top_p, pad_token_id=tokenizer.eos_token_id)
83
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
84
+ processed_response = response.split("RESPONSE :assistant")[-1].strip()
85
  end_time = time.time()
86
  time_taken = end_time - start_time
87
  print(f"Output: {processed_response}")