Spaces:
Running
Running
Saiteja Solleti
commited on
Commit
·
55915f0
1
Parent(s):
14249c4
Revert "tokening"
Browse filesThis reverts commit 585864a7c72456302949a2de8f4ae04376afd17f.
- generationhelper.py +0 -33
- requirements.txt +1 -2
generationhelper.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
import os
|
2 |
from groq import Groq
|
3 |
-
import time
|
4 |
-
import tiktoken
|
5 |
|
6 |
groq_token = os.getenv("GROQ_TOKEN")
|
7 |
|
@@ -9,35 +7,6 @@ groq_client = Groq(
|
|
9 |
api_key = groq_token
|
10 |
)
|
11 |
|
12 |
-
# Initialize token counter and timestamp
|
13 |
-
tokens_used = 0
|
14 |
-
start_time = time.time()
|
15 |
-
|
16 |
-
def Count_tokens(text: str, model="gpt-3.5-turbo"):
|
17 |
-
"""Counts tokens in the given text using tiktoken."""
|
18 |
-
enc = tiktoken.encoding_for_model(model)
|
19 |
-
return len(enc.encode(text))
|
20 |
-
|
21 |
-
def Enforce_token_limit(prompt, max_tokens_per_minute=6000):
|
22 |
-
"""Ensures that token usage stays within the allowed rate limit."""
|
23 |
-
global tokens_used, start_time
|
24 |
-
|
25 |
-
tokens = Count_tokens(prompt)
|
26 |
-
elapsed_time = time.time() - start_time
|
27 |
-
|
28 |
-
# If the token limit is exceeded, wait until the reset
|
29 |
-
if tokens_used + tokens > max_tokens_per_minute:
|
30 |
-
if elapsed_time < 60:
|
31 |
-
sleep_time = 60 - elapsed_time
|
32 |
-
print(f"Rate limit reached! Sleeping for {sleep_time:.2f} seconds...")
|
33 |
-
time.sleep(sleep_time)
|
34 |
-
|
35 |
-
# Reset counter after sleeping
|
36 |
-
tokens_used = 0
|
37 |
-
start_time = time.time()
|
38 |
-
|
39 |
-
# Update token count
|
40 |
-
tokens_used += tokens
|
41 |
|
42 |
|
43 |
def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30):
|
@@ -59,8 +28,6 @@ def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30
|
|
59 |
Answer:
|
60 |
"""
|
61 |
|
62 |
-
Enforce_token_limit(prompt)
|
63 |
-
|
64 |
# Call Groq API (Llama 3.3-70B)
|
65 |
completion = groq_client.chat.completions.create(
|
66 |
model=prompt_model,
|
|
|
1 |
import os
|
2 |
from groq import Groq
|
|
|
|
|
3 |
|
4 |
groq_token = os.getenv("GROQ_TOKEN")
|
5 |
|
|
|
7 |
api_key = groq_token
|
8 |
)
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30):
|
|
|
28 |
Answer:
|
29 |
"""
|
30 |
|
|
|
|
|
31 |
# Call Groq API (Llama 3.3-70B)
|
32 |
completion = groq_client.chat.completions.create(
|
33 |
model=prompt_model,
|
requirements.txt
CHANGED
@@ -5,5 +5,4 @@ huggingface_hub
|
|
5 |
pymilvus
|
6 |
nltk
|
7 |
sentence-transformers
|
8 |
-
Groq
|
9 |
-
tiktoken
|
|
|
5 |
pymilvus
|
6 |
nltk
|
7 |
sentence-transformers
|
8 |
+
Groq
|
|