Saiteja Solleti commited on
Commit
585864a
·
1 Parent(s): 754ca59
Files changed (2) hide show
  1. generationhelper.py +33 -0
  2. requirements.txt +2 -1
generationhelper.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
  from groq import Groq
 
 
3
 
4
  groq_token = os.getenv("GROQ_TOKEN")
5
 
@@ -7,6 +9,35 @@ groq_client = Groq(
7
  api_key = groq_token
8
  )
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30):
@@ -28,6 +59,8 @@ def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30
28
  Answer:
29
  """
30
 
 
 
31
  # Call Groq API (Llama 3.3-70B)
32
  completion = groq_client.chat.completions.create(
33
  model=prompt_model,
 
1
  import os
2
  from groq import Groq
3
+ import time
4
+ import tiktoken
5
 
6
  groq_token = os.getenv("GROQ_TOKEN")
7
 
 
9
  api_key = groq_token
10
  )
11
 
12
+ # Initialize token counter and timestamp
13
+ tokens_used = 0
14
+ start_time = time.time()
15
+
16
+ def Count_tokens(text: str, model="gpt-3.5-turbo"):
17
+ """Counts tokens in the given text using tiktoken."""
18
+ enc = tiktoken.encoding_for_model(model)
19
+ return len(enc.encode(text))
20
+
21
+ def Enforce_token_limit(prompt, max_tokens_per_minute=6000):
22
+ """Ensures that token usage stays within the allowed rate limit."""
23
+ global tokens_used, start_time
24
+
25
+ tokens = Count_tokens(prompt)
26
+ elapsed_time = time.time() - start_time
27
+
28
+ # If the token limit is exceeded, wait until the reset
29
+ if tokens_used + tokens > max_tokens_per_minute:
30
+ if elapsed_time < 60:
31
+ sleep_time = 60 - elapsed_time
32
+ print(f"Rate limit reached! Sleeping for {sleep_time:.2f} seconds...")
33
+ time.sleep(sleep_time)
34
+
35
+ # Reset counter after sleeping
36
+ tokens_used = 0
37
+ start_time = time.time()
38
+
39
+ # Update token count
40
+ tokens_used += tokens
41
 
42
 
43
  def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30):
 
59
  Answer:
60
  """
61
 
62
+ Enforce_token_limit(prompt)
63
+
64
  # Call Groq API (Llama 3.3-70B)
65
  completion = groq_client.chat.completions.create(
66
  model=prompt_model,
requirements.txt CHANGED
@@ -5,4 +5,5 @@ huggingface_hub
5
  pymilvus
6
  nltk
7
  sentence-transformers
8
- Groq
 
 
5
  pymilvus
6
  nltk
7
  sentence-transformers
8
+ Groq
9
+ tiktoken