HaitameLaf commited on
Commit
7c0b176
1 Parent(s): 3716c94

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+ import os
4
+
5
+ # Fonction pour installer un package si non présent
6
+ def install_package(package_name):
7
+ subprocess.run([sys.executable, "-m", "pip", "install", package_name], check=True)
8
+
9
+ # Vérifiez si torch est installé, sinon installez-le
10
+ try:
11
+ import torch
12
+ except ImportError:
13
+ print("Torch n'est pas installé. Installation de torch...")
14
+ install_package("torch")
15
+ import torch
16
+
17
+ # Vérifiez si transformers est installé, sinon installez-le
18
+ try:
19
+ from transformers import (
20
+ AutoModelForCausalLM,
21
+ AutoTokenizer,
22
+ TextIteratorStreamer,
23
+ )
24
+ except ImportError:
25
+ print("Transformers n'est pas installé. Installation de transformers...")
26
+ install_package("transformers")
27
+ from transformers import (
28
+ AutoModelForCausalLM,
29
+ AutoTokenizer,
30
+ TextIteratorStreamer,
31
+ )
32
+
33
+ # Installer flash-attn
34
+ subprocess.run(
35
+ "pip install flash-attn --no-build-isolation",
36
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
37
+ shell=True,
38
+ )
39
+
40
+ import gradio as gr
41
+ from threading import Thread
42
+
43
+ # Obtenir le token d'authentification Hugging Face
44
+ token = os.getenv("HF_TOKEN")
45
+ if not token:
46
+ raise ValueError("Le token d'authentification HF_TOKEN n'est pas défini.")
47
+
48
+ # Charger le modèle et le tokenizer
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ "CampAIgn/Phi-3-mini_16bit",
51
+ token=token,
52
+ trust_remote_code=True,
53
+ )
54
+ tok = AutoTokenizer.from_pretrained("HaitameLaf/Phi3-Game16bit", token=token)
55
+
56
+ terminators = [tok.eos_token_id]
57
+
58
+ # Vérifier la disponibilité du GPU
59
+ if torch.cuda.is_available():
60
+ device = torch.device("cuda")
61
+ print(f"Using GPU: {torch.cuda.get_device_name(device)}")
62
+ else:
63
+ device = torch.device("cpu")
64
+ print("Using CPU")
65
+
66
+ model = model.to(device)
67
+
68
+ # Fonction de chat
69
+ def chat(message, history, temperature, do_sample, max_tokens):
70
+ chat = [{"role": "user", "content": item[0]} for item in history]
71
+ chat.extend({"role": "assistant", "content": item[1]} for item in history if item[1])
72
+ chat.append({"role": "user", "content": message})
73
+ messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
74
+ model_inputs = tok([messages], return_tensors="pt").to(device)
75
+ streamer = TextIteratorStreamer(tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
76
+
77
+ generate_kwargs = {
78
+ "input_ids": model_inputs.input_ids,
79
+ "streamer": streamer,
80
+ "max_new_tokens": max_tokens,
81
+ "do_sample": do_sample,
82
+ "temperature": temperature,
83
+ "eos_token_id": terminators,
84
+ }
85
+
86
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
87
+ t.start()
88
+
89
+ partial_text = ""
90
+ for new_text in streamer:
91
+ partial_text += new_text
92
+ yield partial_text
93
+
94
+ yield partial_text
95
+
96
+ # Configuration de Gradio
97
+ demo = gr.ChatInterface(
98
+ fn=chat,
99
+ examples=[["Write me a poem about Machine Learning."]],
100
+ additional_inputs_accordion=gr.Accordion(
101
+ label="⚙️ Parameters", open=False, render=False
102
+ ),
103
+ additional_inputs=[
104
+ gr.Slider(minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature"),
105
+ gr.Checkbox(label="Sampling", value=True),
106
+ gr.Slider(minimum=128, maximum=4096, step=1, value=512, label="Max new tokens"),
107
+ ],
108
+ stop_btn="Stop Generation",
109
+ title="Chat With LLMs",
110
+ description="Now Running [CampAIgn/Phi-3-mini_16bit](https://huggingface.co/CampAIgn/Phi-3-mini_16bit)",
111
+ )
112
+
113
+ if __name__ == "__main__":
114
+ demo.launch()