Chatbot-Ros-DeepSeek-R1-Distill-Qwen-32B

Running

App Files Files Community

ruslanmv commited on 5 days ago

Commit

f2b4cb5

verified ·

1 Parent(s): c6caff5

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -21

app.py CHANGED Viewed

@@ -2,13 +2,11 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer  # Import the tokenizer
-# Import the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # Define a maximum context length (tokens).  Check your model's documentation!
 MAX_CONTEXT_LENGTH = 4096  # Example: Adjust this based on your model!
 default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
 1. **Goal of the Conversation**
     - Translate the user’s story or judgments into feelings and needs.
@@ -74,13 +72,9 @@ default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Com
         - “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
 13. **Ending the Conversation**
     - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
-        - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""
-def count_tokens(text: str) -> int:
     """Counts the number of tokens in a given string."""
-    return len(tokenizer.encode(text))
-def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
     """Truncates the conversation history to fit within the maximum token limit.
     Args:
@@ -94,22 +88,17 @@ def truncate_history(history: list[tuple[str, str]], system_message: str, max_le
     truncated_history = []
     system_message_tokens = count_tokens(system_message)
     current_length = system_message_tokens
     # Iterate backwards through the history (newest to oldest)
     for user_msg, assistant_msg in reversed(history):
         user_tokens = count_tokens(user_msg) if user_msg else 0
         assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
         turn_tokens = user_tokens + assistant_tokens
         if current_length + turn_tokens <= max_length:
             truncated_history.insert(0, (user_msg, assistant_msg))  # Add to the beginning
             current_length += turn_tokens
         else:
             break  # Stop adding turns if we exceed the limit
-    return truncated_history
-def respond(
     message,
     history: list[tuple[str, str]],
     system_message,  # System message is now an argument
@@ -118,7 +107,6 @@ def respond(
     top_p,
 ):
     """Responds to a user message, maintaining conversation history, using special tokens and message list."""
     if message.lower() == "clear memory": # Check for the clear memory command
         return "", []  # Return empty message and empty history to reset the chat
@@ -128,11 +116,10 @@ def respond(
     messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
     for user_msg, assistant_msg in truncated_history:
         if user_msg:
-            messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"}) # Format history user message
         if assistant_msg:
-            messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"}) # Format history assistant message
-    messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"}) # Format current user message
     response = ""
     try:
@@ -145,11 +132,15 @@ def respond(
       ):
           token = chunk.choices[0].delta.content
           response += token
-          yield response
     except Exception as e:
       print(f"An error occurred: {e}") # It's a good practice add a try-except block
       yield "I'm sorry, I encountered an error. Please try again."
 # --- Gradio Interface ---
 demo = gr.ChatInterface(
     respond,

 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer  # Import the tokenizer
+# Import the tokenizer - No need to import twice, remove the second import
 tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # Define a maximum context length (tokens).  Check your model's documentation!
 MAX_CONTEXT_LENGTH = 4096  # Example: Adjust this based on your model!
 default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
 1. **Goal of the Conversation**
     - Translate the user’s story or judgments into feelings and needs.
         - “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
 13. **Ending the Conversation**
     - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
+        - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""def count_tokens(text: str) -> int:
     """Counts the number of tokens in a given string."""
+    return len(tokenizer.encode(text))def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
     """Truncates the conversation history to fit within the maximum token limit.
     Args:
     truncated_history = []
     system_message_tokens = count_tokens(system_message)
     current_length = system_message_tokens
     # Iterate backwards through the history (newest to oldest)
     for user_msg, assistant_msg in reversed(history):
         user_tokens = count_tokens(user_msg) if user_msg else 0
         assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
         turn_tokens = user_tokens + assistant_tokens
         if current_length + turn_tokens <= max_length:
             truncated_history.insert(0, (user_msg, assistant_msg))  # Add to the beginning
             current_length += turn_tokens
         else:
             break  # Stop adding turns if we exceed the limit
+    return truncated_historydef respond(
     message,
     history: list[tuple[str, str]],
     system_message,  # System message is now an argument
     top_p,
 ):
     """Responds to a user message, maintaining conversation history, using special tokens and message list."""
     if message.lower() == "clear memory": # Check for the clear memory command
         return "", []  # Return empty message and empty history to reset the chat
     messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
     for user_msg, assistant_msg in truncated_history:
         if user_msg:
+            messages.append({"role": "user", "content": user_msg}) # Format history user message - Removed extra tags
         if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg}) # Format history assistant message - Removed extra tags
+    messages.append({"role": "user", "content": message}) # Format current user message - Removed extra tags
     response = ""
     try:
       ):
           token = chunk.choices[0].delta.content
           response += token
+          # Post-processing to remove prefixes (example - add to your existing yield) - Solution 3 (Fallback)
+          processed_response = response.replace("User:", "").replace("Assistant:", "").replace("Roos:", "").lstrip()
+          yield processed_response
     except Exception as e:
       print(f"An error occurred: {e}") # It's a good practice add a try-except block
       yield "I'm sorry, I encountered an error. Please try again."
 # --- Gradio Interface ---
 demo = gr.ChatInterface(
     respond,