Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import logging
|
|
8 |
from unsloth import FastLanguageModel
|
9 |
import subprocess
|
10 |
|
|
|
11 |
logging.basicConfig(
|
12 |
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
|
13 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
@@ -17,7 +18,11 @@ logging.basicConfig(
|
|
17 |
)
|
18 |
logger = logging.getLogger(__name__)
|
19 |
logger.info("HELLO WORLD...")
|
|
|
|
|
20 |
READ_HF = os.environ["read_hf"]
|
|
|
|
|
21 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
22 |
|
23 |
### Instruction:
|
@@ -28,6 +33,8 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
|
|
28 |
|
29 |
### Response:
|
30 |
{}"""
|
|
|
|
|
31 |
string = '''
|
32 |
You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
|
33 |
|
@@ -72,6 +79,7 @@ You are an AI assistant tasked with managing inventory based on user instruction
|
|
72 |
|
73 |
Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
|
74 |
'''
|
|
|
75 |
@spaces.GPU()
|
76 |
def chunk_it(inventory_list, user_input_text):
|
77 |
# Check for CUDA and NVIDIA-related errors
|
@@ -98,46 +106,42 @@ def chunk_it(inventory_list, user_input_text):
|
|
98 |
token = READ_HF
|
99 |
)
|
100 |
logger.info("Model and tokenizer loaded.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
except Exception as e:
|
102 |
-
logger.error(f"
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
FastLanguageModel.for_inference(model)
|
107 |
-
logger.info("Inference enabled.")
|
108 |
-
except Exception as e:
|
109 |
-
logger.error(f"Failed to enable native inference: {e}")
|
110 |
-
raise
|
111 |
-
formatted_prompt = alpaca_prompt.format(
|
112 |
-
string + inventory_list, # instruction
|
113 |
-
user_input_text, # input
|
114 |
-
"", # output - leave this blank for generation!
|
115 |
-
)
|
116 |
-
logger.debug(f"Formatted prompt: {formatted_prompt}")
|
117 |
-
try:
|
118 |
-
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
|
119 |
-
logger.debug(f"Tokenized inputs: {inputs}")
|
120 |
-
except Exception as e:
|
121 |
-
logger.error(f"Failed to tokenize inputs: {e}")
|
122 |
-
raise
|
123 |
-
logger.info("Generating output...")
|
124 |
-
try:
|
125 |
-
outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
|
126 |
-
logger.info("Output generated.")
|
127 |
-
except Exception as e:
|
128 |
-
logger.error(f"Failed to generate output: {e}")
|
129 |
-
raise
|
130 |
-
try:
|
131 |
-
reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
132 |
-
logger.debug(f"Decoded output: {reply}")
|
133 |
-
except Exception as e:
|
134 |
-
logger.error(f"Failed to decode output: {e}")
|
135 |
-
raise
|
136 |
-
# pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
|
137 |
-
# match = re.search(pattern, reply[0], re.DOTALL)
|
138 |
-
# reply = match.group(1).strip()
|
139 |
-
logger.debug(f"Final reply: {reply}")
|
140 |
-
return reply
|
141 |
# Interface for inputs
|
142 |
iface = gr.Interface(
|
143 |
fn=chunk_it,
|
@@ -148,6 +152,15 @@ iface = gr.Interface(
|
|
148 |
outputs=gr.Textbox(label="output", lines=23),
|
149 |
title="Testing",
|
150 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
logger.info("Launching Gradio interface...")
|
152 |
try:
|
153 |
iface.launch(inline=False)
|
|
|
8 |
from unsloth import FastLanguageModel
|
9 |
import subprocess
|
10 |
|
11 |
+
# Set up logging for debugging
|
12 |
logging.basicConfig(
|
13 |
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
|
14 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
18 |
)
|
19 |
logger = logging.getLogger(__name__)
|
20 |
logger.info("HELLO WORLD...")
|
21 |
+
|
22 |
+
# Get environment variable for Hugging Face access
|
23 |
READ_HF = os.environ["read_hf"]
|
24 |
+
|
25 |
+
# Alpaca prompt template
|
26 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
27 |
|
28 |
### Instruction:
|
|
|
33 |
|
34 |
### Response:
|
35 |
{}"""
|
36 |
+
|
37 |
+
# Inventory management instructions
|
38 |
string = '''
|
39 |
You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
|
40 |
|
|
|
79 |
|
80 |
Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
|
81 |
'''
|
82 |
+
|
83 |
@spaces.GPU()
|
84 |
def chunk_it(inventory_list, user_input_text):
|
85 |
# Check for CUDA and NVIDIA-related errors
|
|
|
106 |
token = READ_HF
|
107 |
)
|
108 |
logger.info("Model and tokenizer loaded.")
|
109 |
+
|
110 |
+
# ... (rest of your code)
|
111 |
+
|
112 |
+
formatted_prompt = alpaca_prompt.format(
|
113 |
+
string + inventory_list, # instruction
|
114 |
+
user_input_text, # input
|
115 |
+
"", # output - leave this blank for generation!
|
116 |
+
)
|
117 |
+
logger.debug(f"Formatted prompt: {formatted_prompt}")
|
118 |
+
try:
|
119 |
+
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
|
120 |
+
logger.debug(f"Tokenized inputs: {inputs}")
|
121 |
+
except Exception as e:
|
122 |
+
logger.error(f"Failed to tokenize inputs: {e}")
|
123 |
+
raise
|
124 |
+
logger.info("Generating output...")
|
125 |
+
try:
|
126 |
+
outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
|
127 |
+
logger.info("Output generated.")
|
128 |
+
except Exception as e:
|
129 |
+
logger.error(f"Failed to generate output: {e}")
|
130 |
+
raise
|
131 |
+
try:
|
132 |
+
reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
133 |
+
logger.debug(f"Decoded output: {reply}")
|
134 |
+
except Exception as e:
|
135 |
+
logger.error(f"Failed to decode output: {e}")
|
136 |
+
raise
|
137 |
+
logger.debug(f"Final reply: {reply}")
|
138 |
+
return reply
|
139 |
+
|
140 |
except Exception as e:
|
141 |
+
logger.error(f"Error loading model or CUDA issues: {e}")
|
142 |
+
return "There seems to be an issue with CUDA or the model. Please check the Hugging Face Spaces environment."
|
143 |
+
|
144 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
# Interface for inputs
|
146 |
iface = gr.Interface(
|
147 |
fn=chunk_it,
|
|
|
152 |
outputs=gr.Textbox(label="output", lines=23),
|
153 |
title="Testing",
|
154 |
)
|
155 |
+
|
156 |
+
# Set up logging to display in Gradio
|
157 |
+
logger = logging.getLogger(__name__)
|
158 |
+
logger.setLevel(logging.DEBUG) # Set the logging level
|
159 |
+
ch = logging.StreamHandler(gr.Log()) # Create a StreamHandler and send logs to gr.Log
|
160 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
161 |
+
ch.setFormatter(formatter)
|
162 |
+
logger.addHandler(ch)
|
163 |
+
|
164 |
logger.info("Launching Gradio interface...")
|
165 |
try:
|
166 |
iface.launch(inline=False)
|