ruslanmv commited on
Commit
f2b4cb5
·
verified ·
1 Parent(s): c6caff5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -2,13 +2,11 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer # Import the tokenizer
4
 
5
- # Import the tokenizer
6
  tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
  # Define a maximum context length (tokens). Check your model's documentation!
10
  MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
11
-
12
  default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
13
  1. **Goal of the Conversation**
14
  - Translate the user’s story or judgments into feelings and needs.
@@ -74,13 +72,9 @@ default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Com
74
  - “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
75
  13. **Ending the Conversation**
76
  - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
77
- - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""
78
-
79
- def count_tokens(text: str) -> int:
80
  """Counts the number of tokens in a given string."""
81
- return len(tokenizer.encode(text))
82
-
83
- def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
84
  """Truncates the conversation history to fit within the maximum token limit.
85
 
86
  Args:
@@ -94,22 +88,17 @@ def truncate_history(history: list[tuple[str, str]], system_message: str, max_le
94
  truncated_history = []
95
  system_message_tokens = count_tokens(system_message)
96
  current_length = system_message_tokens
97
-
98
  # Iterate backwards through the history (newest to oldest)
99
  for user_msg, assistant_msg in reversed(history):
100
  user_tokens = count_tokens(user_msg) if user_msg else 0
101
  assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
102
  turn_tokens = user_tokens + assistant_tokens
103
-
104
  if current_length + turn_tokens <= max_length:
105
  truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
106
  current_length += turn_tokens
107
  else:
108
  break # Stop adding turns if we exceed the limit
109
-
110
- return truncated_history
111
-
112
- def respond(
113
  message,
114
  history: list[tuple[str, str]],
115
  system_message, # System message is now an argument
@@ -118,7 +107,6 @@ def respond(
118
  top_p,
119
  ):
120
  """Responds to a user message, maintaining conversation history, using special tokens and message list."""
121
-
122
  if message.lower() == "clear memory": # Check for the clear memory command
123
  return "", [] # Return empty message and empty history to reset the chat
124
 
@@ -128,11 +116,10 @@ def respond(
128
  messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
129
  for user_msg, assistant_msg in truncated_history:
130
  if user_msg:
131
- messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"}) # Format history user message
132
  if assistant_msg:
133
- messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"}) # Format history assistant message
134
-
135
- messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"}) # Format current user message
136
 
137
  response = ""
138
  try:
@@ -145,11 +132,15 @@ def respond(
145
  ):
146
  token = chunk.choices[0].delta.content
147
  response += token
148
- yield response
 
 
 
149
  except Exception as e:
150
  print(f"An error occurred: {e}") # It's a good practice add a try-except block
151
  yield "I'm sorry, I encountered an error. Please try again."
152
 
 
153
  # --- Gradio Interface ---
154
  demo = gr.ChatInterface(
155
  respond,
 
2
  from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer # Import the tokenizer
4
 
5
+ # Import the tokenizer - No need to import twice, remove the second import
6
  tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
8
  # Define a maximum context length (tokens). Check your model's documentation!
9
  MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
 
10
  default_nvc_prompt_template = r"""<|system|>You are Roos, an NVC (Nonviolent Communication) Chatbot. Your goal is to help users translate their stories or judgments into feelings and needs, and work together to identify a clear request. Follow these steps:
11
  1. **Goal of the Conversation**
12
  - Translate the user’s story or judgments into feelings and needs.
 
72
  - “I sense some frustration. Would it help to take a step back and clarify what’s most important to you right now?”
73
  13. **Ending the Conversation**
74
  - If the user indicates they want to end the conversation, thank them for sharing and offer to continue later:
75
+ - “Thank you for sharing with me. If you’d like to continue this conversation later, I’m here to help.”</s>"""def count_tokens(text: str) -> int:
 
 
76
  """Counts the number of tokens in a given string."""
77
+ return len(tokenizer.encode(text))def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
 
 
78
  """Truncates the conversation history to fit within the maximum token limit.
79
 
80
  Args:
 
88
  truncated_history = []
89
  system_message_tokens = count_tokens(system_message)
90
  current_length = system_message_tokens
 
91
  # Iterate backwards through the history (newest to oldest)
92
  for user_msg, assistant_msg in reversed(history):
93
  user_tokens = count_tokens(user_msg) if user_msg else 0
94
  assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
95
  turn_tokens = user_tokens + assistant_tokens
 
96
  if current_length + turn_tokens <= max_length:
97
  truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
98
  current_length += turn_tokens
99
  else:
100
  break # Stop adding turns if we exceed the limit
101
+ return truncated_historydef respond(
 
 
 
102
  message,
103
  history: list[tuple[str, str]],
104
  system_message, # System message is now an argument
 
107
  top_p,
108
  ):
109
  """Responds to a user message, maintaining conversation history, using special tokens and message list."""
 
110
  if message.lower() == "clear memory": # Check for the clear memory command
111
  return "", [] # Return empty message and empty history to reset the chat
112
 
 
116
  messages = [{"role": "system", "content": formatted_system_message}] # Start with system message as before
117
  for user_msg, assistant_msg in truncated_history:
118
  if user_msg:
119
+ messages.append({"role": "user", "content": user_msg}) # Format history user message - Removed extra tags
120
  if assistant_msg:
121
+ messages.append({"role": "assistant", "content": assistant_msg}) # Format history assistant message - Removed extra tags
122
+ messages.append({"role": "user", "content": message}) # Format current user message - Removed extra tags
 
123
 
124
  response = ""
125
  try:
 
132
  ):
133
  token = chunk.choices[0].delta.content
134
  response += token
135
+ # Post-processing to remove prefixes (example - add to your existing yield) - Solution 3 (Fallback)
136
+ processed_response = response.replace("User:", "").replace("Assistant:", "").replace("Roos:", "").lstrip()
137
+ yield processed_response
138
+
139
  except Exception as e:
140
  print(f"An error occurred: {e}") # It's a good practice add a try-except block
141
  yield "I'm sorry, I encountered an error. Please try again."
142
 
143
+
144
  # --- Gradio Interface ---
145
  demo = gr.ChatInterface(
146
  respond,