daniel-cerebras commited on
Commit
8e74de2
1 Parent(s): a836b76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -32
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import time
3
  import json
4
  from cerebras.cloud.sdk import Cerebras
5
  from typing import List, Dict, Tuple, Any, Generator
@@ -24,14 +24,25 @@ def make_api_call(api_key: str, messages: List[Dict[str, str]], max_tokens: int,
24
 
25
  content = json.loads(response.choices[0].message.content)
26
 
27
- # Calculate tokens per second
28
- total_tokens = response.usage.total_tokens
29
- elapsed_time = end_time - start_time
30
- tokens_per_second = total_tokens / elapsed_time if elapsed_time > 0 else 0
 
 
 
 
 
 
 
31
 
32
  content['token_info'] = {
33
- 'total_tokens': total_tokens,
34
- 'tokens_per_second': tokens_per_second
 
 
 
 
35
  }
36
 
37
  return content
@@ -55,51 +66,50 @@ def generate_response(api_key: str, prompt: str) -> Generator[Tuple[List[Tuple[s
55
  ]
56
 
57
  steps = []
58
- step_count = 1
59
  total_thinking_time = 0
60
- total_tokens = 0
61
- total_tokens_per_second = 0
62
 
63
  while True:
64
- start_time = time.time()
65
  step_data = make_api_call(api_key, messages, 300)
66
- thinking_time = time.time() - start_time
67
- total_thinking_time += thinking_time
68
 
69
- token_info = step_data.pop('token_info', {'total_tokens': 0, 'tokens_per_second': 0})
70
- total_tokens += token_info['total_tokens']
71
- total_tokens_per_second += token_info['tokens_per_second']
72
 
 
73
  step_title = f"Step {step_count}: {step_data['title']}"
74
- step_content = f"{step_data['content']}\n\n**Cerebras LLM Call Duration: {thinking_time:.2f} seconds**\n**Tokens: {token_info['total_tokens']}, Tokens/s: {token_info['tokens_per_second']:.2f}**"
75
  steps.append((step_title, step_content))
76
  messages.append({"role": "assistant", "content": json.dumps(step_data)})
77
 
78
- # Yield the current conversation, total thinking time, total tokens, and average tokens per second
79
- yield steps, total_thinking_time, total_tokens, total_tokens_per_second / step_count if step_count > 0 else 0
 
 
 
80
 
81
  if step_data.get('next_action') == 'final_answer':
82
  break
83
-
84
- step_count += 1
85
 
86
  # Request the final answer
87
  messages.append({"role": "user", "content": "Please provide the final answer based on your reasoning above."})
88
 
89
- start_time = time.time()
90
  final_data = make_api_call(api_key, messages, 200, is_final_answer=True)
91
- thinking_time = time.time() - start_time
92
- total_thinking_time += thinking_time
93
 
94
- token_info = final_data.pop('token_info', {'total_tokens': 0, 'tokens_per_second': 0})
95
- total_tokens += token_info['total_tokens']
96
- total_tokens_per_second += token_info['tokens_per_second']
97
 
98
- final_content = f"{final_data.get('content', 'No final answer provided.')}\n\n**Final answer thinking time: {thinking_time:.2f} seconds**\n**Tokens: {token_info['total_tokens']}, Tokens/s: {token_info['tokens_per_second']:.2f}**"
99
  steps.append(("Final Answer", final_content))
100
 
101
- # Yield the final conversation, total thinking time, total tokens, and average tokens per second
102
- yield steps, total_thinking_time, total_tokens, total_tokens_per_second / (step_count + 1)
 
 
 
103
 
104
  def respond(api_key: str, message: str, history: List[Tuple[str, str]]) -> Generator[Tuple[List[Tuple[str, str]], str], None, None]:
105
  """
@@ -112,14 +122,14 @@ def respond(api_key: str, message: str, history: List[Tuple[str, str]]) -> Gener
112
  # Initialize the generator
113
  response_generator = generate_response(api_key, message)
114
 
115
- for steps, total_time, total_tokens, avg_tokens_per_second in response_generator:
116
  conversation = history.copy()
117
  for title, content in steps[len(conversation):]:
118
  if title.startswith("Step") or title == "Final Answer":
119
  conversation.append((title, content))
120
  else:
121
  conversation.append((title, content))
122
- yield conversation, f"**Total thinking time:** {total_time:.2f} seconds\n**Total tokens:** {total_tokens}\n**Average tokens/s:** {avg_tokens_per_second:.2f}"
123
 
124
  def main():
125
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
+ import time
3
  import json
4
  from cerebras.cloud.sdk import Cerebras
5
  from typing import List, Dict, Tuple, Any, Generator
 
24
 
25
  content = json.loads(response.choices[0].message.content)
26
 
27
+ # Access time_info attributes directly
28
+ queue_time = response.time_info.queue_time
29
+ prompt_time = response.time_info.prompt_time
30
+ completion_time = response.time_info.completion_time
31
+ total_time = response.time_info.total_time
32
+
33
+ # Use the provided usage information
34
+ completion_tokens = response.usage.completion_tokens
35
+
36
+ # Calculate tokens per second using completion tokens
37
+ tokens_per_second = completion_tokens / total_time if total_time > 0 else 0
38
 
39
  content['token_info'] = {
40
+ 'completion_tokens': completion_tokens,
41
+ 'tokens_per_second': tokens_per_second,
42
+ 'queue_time': queue_time,
43
+ 'prompt_time': prompt_time,
44
+ 'completion_time': completion_time,
45
+ 'total_time': total_time # Use total_time as the 'duration'
46
  }
47
 
48
  return content
 
66
  ]
67
 
68
  steps = []
69
+ step_count = 0
70
  total_thinking_time = 0
71
+ total_completion_tokens = 0
 
72
 
73
  while True:
 
74
  step_data = make_api_call(api_key, messages, 300)
75
+ token_info = step_data.pop('token_info', {'completion_tokens': 0, 'tokens_per_second': 0, 'duration': step_data.get('total_time', 0)})
 
76
 
77
+ # Use total_time from token_info as the duration
78
+ total_thinking_time += token_info.get('total_time', 0)
79
+ total_completion_tokens += token_info['completion_tokens']
80
 
81
+ step_count += 1
82
  step_title = f"Step {step_count}: {step_data['title']}"
83
+ step_content = f"{step_data['content']}\n\n**API Call Duration: {token_info['total_time']:.2f} seconds**\n**Completion Tokens: {token_info['completion_tokens']}, Tokens/s: {token_info['tokens_per_second']:.2f}**"
84
  steps.append((step_title, step_content))
85
  messages.append({"role": "assistant", "content": json.dumps(step_data)})
86
 
87
+ # Calculate the overall average tokens per second using completion tokens
88
+ overall_tokens_per_second = total_completion_tokens / total_thinking_time if total_thinking_time > 0 else 0
89
+
90
+ # Yield the current conversation, total thinking time, total completion tokens, and overall average tokens per second
91
+ yield steps, total_thinking_time, total_completion_tokens, overall_tokens_per_second
92
 
93
  if step_data.get('next_action') == 'final_answer':
94
  break
 
 
95
 
96
  # Request the final answer
97
  messages.append({"role": "user", "content": "Please provide the final answer based on your reasoning above."})
98
 
 
99
  final_data = make_api_call(api_key, messages, 200, is_final_answer=True)
100
+ token_info = final_data.pop('token_info', {'completion_tokens': 0, 'tokens_per_second': 0, 'duration': final_data.get('total_time', 0)})
 
101
 
102
+ total_thinking_time += token_info.get('total_time', 0)
103
+ total_completion_tokens += token_info['completion_tokens']
 
104
 
105
+ final_content = f"{final_data.get('content', 'No final answer provided.')}\n\n**Final answer API call duration: {token_info['total_time']:.2f} seconds**\n**Completion Tokens: {token_info['completion_tokens']}, Tokens/s: {token_info['tokens_per_second']:.2f}**"
106
  steps.append(("Final Answer", final_content))
107
 
108
+ # Calculate the final overall average tokens per second using completion tokens
109
+ overall_tokens_per_second = total_completion_tokens / total_thinking_time if total_thinking_time > 0 else 0
110
+
111
+ # Yield the final conversation, total thinking time, total completion tokens, and overall average tokens per second
112
+ yield steps, total_thinking_time, total_completion_tokens, overall_tokens_per_second
113
 
114
  def respond(api_key: str, message: str, history: List[Tuple[str, str]]) -> Generator[Tuple[List[Tuple[str, str]], str], None, None]:
115
  """
 
122
  # Initialize the generator
123
  response_generator = generate_response(api_key, message)
124
 
125
+ for steps, total_time, total_completion_tokens, avg_tokens_per_second in response_generator:
126
  conversation = history.copy()
127
  for title, content in steps[len(conversation):]:
128
  if title.startswith("Step") or title == "Final Answer":
129
  conversation.append((title, content))
130
  else:
131
  conversation.append((title, content))
132
+ yield conversation, f"**Total API call time:** {total_time:.2f} seconds\n**Completion tokens:** {total_completion_tokens}\n**Overall average tokens/s:** {avg_tokens_per_second:.2f}"
133
 
134
  def main():
135
  with gr.Blocks() as demo: