Update app.py
Browse files
app.py
CHANGED
@@ -2,13 +2,11 @@ import gradio as gr
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from transformers import AutoTokenizer # Import the tokenizer
|
4 |
|
5 |
-
# Import the tokenizer
|
6 |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
7 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
8 |
-
|
9 |
# Define a maximum context length (tokens). Check your model's documentation!
|
10 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
11 |
-
|
12 |
default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
13 |
1. **Goal of the Conversation**
|
14 |
- Translate the user’s story or judgments into feelings and needs.
|
@@ -74,13 +72,9 @@ default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Com
|
|
74 |
- “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
|
75 |
13. **Ending the Conversation**
|
76 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
77 |
-
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""
|
78 |
-
|
79 |
-
def count_tokens(text: str) -> int:
|
80 |
"""Counts the number of tokens in a given string."""
|
81 |
-
return len(tokenizer.encode(text))
|
82 |
-
|
83 |
-
def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
|
84 |
"""Truncates the conversation history to fit within the maximum token limit.
|
85 |
|
86 |
Args:
|
@@ -94,22 +88,17 @@ def truncate_history(history: list[tuple[str, str]], system_message: str, max_le
|
|
94 |
truncated_history = []
|
95 |
system_message_tokens = count_tokens(system_message)
|
96 |
current_length = system_message_tokens
|
97 |
-
|
98 |
# Iterate backwards through the history (newest to oldest)
|
99 |
for user_msg, assistant_msg in reversed(history):
|
100 |
user_tokens = count_tokens(user_msg) if user_msg else 0
|
101 |
assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
|
102 |
turn_tokens = user_tokens + assistant_tokens
|
103 |
-
|
104 |
if current_length + turn_tokens <= max_length:
|
105 |
truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
|
106 |
current_length += turn_tokens
|
107 |
else:
|
108 |
break # Stop adding turns if we exceed the limit
|
109 |
-
|
110 |
-
return truncated_history
|
111 |
-
|
112 |
-
def respond(
|
113 |
message,
|
114 |
history: list[tuple[str, str]],
|
115 |
system_message, # System message is now an argument
|
@@ -118,7 +107,6 @@ def respond(
|
|
118 |
top_p,
|
119 |
):
|
120 |
"""Responds to a user message, maintaining conversation history, using special tokens and message list."""
|
121 |
-
|
122 |
if message.lower() == "clear memory": # Check for the clear memory command
|
123 |
return "", [] # Return empty message and empty history to reset the chat
|
124 |
|
@@ -128,11 +116,10 @@ def respond(
|
|
128 |
messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
|
129 |
for user_msg, assistant_msg in truncated_history:
|
130 |
if user_msg:
|
131 |
-
messages.append({"role": "user", "content":
|
132 |
if assistant_msg:
|
133 |
-
messages.append({"role": "assistant", "content":
|
134 |
-
|
135 |
-
messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"}) # Format current user message
|
136 |
|
137 |
response = ""
|
138 |
try:
|
@@ -145,11 +132,15 @@ def respond(
|
|
145 |
):
|
146 |
token = chunk.choices[0].delta.content
|
147 |
response += token
|
148 |
-
yield
|
|
|
|
|
|
|
149 |
except Exception as e:
|
150 |
print(f"An error occurred: {e}") # It's a good practice add a try-except block
|
151 |
yield "I'm sorry, I encountered an error. Please try again."
|
152 |
|
|
|
153 |
# --- Gradio Interface ---
|
154 |
demo = gr.ChatInterface(
|
155 |
respond,
|
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from transformers import AutoTokenizer # Import the tokenizer
|
4 |
|
5 |
+
# Import the tokenizer - No need to import twice, remove the second import
|
6 |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
7 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
8 |
# Define a maximum context length (tokens). Check your model's documentation!
|
9 |
MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
|
|
|
10 |
default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
|
11 |
1. **Goal of the Conversation**
|
12 |
- Translate the user’s story or judgments into feelings and needs.
|
|
|
72 |
- “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
|
73 |
13. **Ending the Conversation**
|
74 |
- If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
|
75 |
+
- “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""def count_tokens(text: str) -> int:
|
|
|
|
|
76 |
"""Counts the number of tokens in a given string."""
|
77 |
+
return len(tokenizer.encode(text))def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
|
|
|
|
|
78 |
"""Truncates the conversation history to fit within the maximum token limit.
|
79 |
|
80 |
Args:
|
|
|
88 |
truncated_history = []
|
89 |
system_message_tokens = count_tokens(system_message)
|
90 |
current_length = system_message_tokens
|
|
|
91 |
# Iterate backwards through the history (newest to oldest)
|
92 |
for user_msg, assistant_msg in reversed(history):
|
93 |
user_tokens = count_tokens(user_msg) if user_msg else 0
|
94 |
assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
|
95 |
turn_tokens = user_tokens + assistant_tokens
|
|
|
96 |
if current_length + turn_tokens <= max_length:
|
97 |
truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
|
98 |
current_length += turn_tokens
|
99 |
else:
|
100 |
break # Stop adding turns if we exceed the limit
|
101 |
+
return truncated_historydef respond(
|
|
|
|
|
|
|
102 |
message,
|
103 |
history: list[tuple[str, str]],
|
104 |
system_message, # System message is now an argument
|
|
|
107 |
top_p,
|
108 |
):
|
109 |
"""Responds to a user message, maintaining conversation history, using special tokens and message list."""
|
|
|
110 |
if message.lower() == "clear memory": # Check for the clear memory command
|
111 |
return "", [] # Return empty message and empty history to reset the chat
|
112 |
|
|
|
116 |
messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
|
117 |
for user_msg, assistant_msg in truncated_history:
|
118 |
if user_msg:
|
119 |
+
messages.append({"role": "user", "content": user_msg}) # Format history user message - Removed extra tags
|
120 |
if assistant_msg:
|
121 |
+
messages.append({"role": "assistant", "content": assistant_msg}) # Format history assistant message - Removed extra tags
|
122 |
+
messages.append({"role": "user", "content": message}) # Format current user message - Removed extra tags
|
|
|
123 |
|
124 |
response = ""
|
125 |
try:
|
|
|
132 |
):
|
133 |
token = chunk.choices[0].delta.content
|
134 |
response += token
|
135 |
+
# Post-processing to remove prefixes (example - add to your existing yield) - Solution 3 (Fallback)
|
136 |
+
processed_response = response.replace("User:", "").replace("Assistant:", "").replace("Roos:", "").lstrip()
|
137 |
+
yield processed_response
|
138 |
+
|
139 |
except Exception as e:
|
140 |
print(f"An error occurred: {e}") # It's a good practice add a try-except block
|
141 |
yield "I'm sorry, I encountered an error. Please try again."
|
142 |
|
143 |
+
|
144 |
# --- Gradio Interface ---
|
145 |
demo = gr.ChatInterface(
|
146 |
respond,
|