eikarna
commited on
Commit
·
971fbd1
1
Parent(s):
6563cfd
Major Update
Browse files- app-api.py +152 -0
- app.py +70 -98
- packages.txt +0 -2
- requirements.txt +4 -13
app-api.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import logging
|
4 |
+
from typing import Optional, Dict, Any
|
5 |
+
|
6 |
+
# Configure logging
|
7 |
+
logging.basicConfig(level=logging.INFO)
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
# Constants
|
11 |
+
DEFAULT_SYSTEM_PROMPT = """You are a friendly Assistant. Provide clear, accurate, and brief answers.
|
12 |
+
Keep responses polite, engaging, and to the point. If unsure, politely suggest alternatives."""
|
13 |
+
|
14 |
+
MODEL_OPTIONS = [
|
15 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
16 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
17 |
+
"deepseek-ai/DeepSeek-R1"
|
18 |
+
]
|
19 |
+
API_BASE_URL = "https://api-inference.huggingface.co/models/"
|
20 |
+
|
21 |
+
# Page configuration
|
22 |
+
st.set_page_config(
|
23 |
+
page_title="DeepSeek-AI R1",
|
24 |
+
page_icon="🤖",
|
25 |
+
layout="centered"
|
26 |
+
)
|
27 |
+
|
28 |
+
def initialize_session_state():
|
29 |
+
"""Initialize all session state variables"""
|
30 |
+
if "messages" not in st.session_state:
|
31 |
+
st.session_state.messages = []
|
32 |
+
if "api_failures" not in st.session_state:
|
33 |
+
st.session_state.api_failures = 0
|
34 |
+
|
35 |
+
def configure_sidebar() -> Dict[str, Any]:
|
36 |
+
"""Create sidebar components and return settings"""
|
37 |
+
with st.sidebar:
|
38 |
+
st.header("Model Configuration")
|
39 |
+
st.markdown("[Get HuggingFace Token](https://huggingface.co/settings/tokens)")
|
40 |
+
|
41 |
+
return {
|
42 |
+
"model": st.selectbox("Select Model", MODEL_OPTIONS, index=0),
|
43 |
+
"system_message": st.text_area(
|
44 |
+
"System Message",
|
45 |
+
value=DEFAULT_SYSTEM_PROMPT,
|
46 |
+
height=100
|
47 |
+
),
|
48 |
+
"max_tokens": st.slider("Max Tokens", 10, 4000, 100),
|
49 |
+
"temperature": st.slider("Temperature", 0.1, 4.0, 0.3),
|
50 |
+
"top_p": st.slider("Top-p", 0.1, 1.0, 0.6),
|
51 |
+
"debug_chat": st.toggle("Return Full Text (Debugging Only)")
|
52 |
+
}
|
53 |
+
|
54 |
+
def format_deepseek_prompt(system_message: str, user_input: str) -> str:
|
55 |
+
"""Format the prompt according to DeepSeek's required structure"""
|
56 |
+
return f"""System: {system_message}
|
57 |
+
<|User|>{user_input}<|Assistant|>"""
|
58 |
+
|
59 |
+
def query_hf_api(payload: Dict[str, Any], api_url: str) -> Optional[Dict[str, Any]]:
|
60 |
+
"""Handle API requests with improved error handling"""
|
61 |
+
headers = {"Authorization": f"Bearer {st.secrets['HF_TOKEN']}"}
|
62 |
+
|
63 |
+
try:
|
64 |
+
response = requests.post(
|
65 |
+
api_url,
|
66 |
+
headers=headers,
|
67 |
+
json=payload,
|
68 |
+
timeout=30
|
69 |
+
)
|
70 |
+
response.raise_for_status()
|
71 |
+
return response.json()
|
72 |
+
except requests.exceptions.HTTPError as e:
|
73 |
+
logger.error(f"HTTP Error: {e.response.status_code} - {e.response.text}")
|
74 |
+
st.error(f"API Error: {e.response.status_code} - {e.response.text[:200]}")
|
75 |
+
except requests.exceptions.RequestException as e:
|
76 |
+
logger.error(f"Request failed: {str(e)}")
|
77 |
+
st.error("Connection error. Please check your internet connection.")
|
78 |
+
return None
|
79 |
+
|
80 |
+
def handle_chat_interaction(settings: Dict[str, Any]):
|
81 |
+
"""Manage chat input/output and API communication"""
|
82 |
+
if prompt := st.chat_input("Type your message..."):
|
83 |
+
# Add user message to history
|
84 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
85 |
+
|
86 |
+
with st.chat_message("user"):
|
87 |
+
st.markdown(prompt)
|
88 |
+
|
89 |
+
try:
|
90 |
+
with st.spinner("Generating response..."):
|
91 |
+
# Format prompt according to model requirements
|
92 |
+
full_prompt = format_deepseek_prompt(
|
93 |
+
system_message=settings["system_message"],
|
94 |
+
user_input=prompt
|
95 |
+
)
|
96 |
+
|
97 |
+
payload = {
|
98 |
+
"inputs": full_prompt,
|
99 |
+
"parameters": {
|
100 |
+
"max_new_tokens": settings["max_tokens"],
|
101 |
+
"temperature": settings["temperature"],
|
102 |
+
"top_p": settings["top_p"],
|
103 |
+
"return_full_text": settings["debug_chat"],
|
104 |
+
}
|
105 |
+
}
|
106 |
+
|
107 |
+
api_url = f"{API_BASE_URL}{settings['model']}"
|
108 |
+
output = query_hf_api(payload, api_url)
|
109 |
+
|
110 |
+
if output and isinstance(output, list):
|
111 |
+
if 'generated_text' in output[0]:
|
112 |
+
response_text = output[0]['generated_text'].strip()
|
113 |
+
# Remove any remaining special tokens
|
114 |
+
response_text = response_text.split("\n</think>\n")[0].strip()
|
115 |
+
|
116 |
+
# Display and store response
|
117 |
+
with st.chat_message("assistant"):
|
118 |
+
st.markdown(response_text)
|
119 |
+
st.session_state.messages.append(
|
120 |
+
{"role": "assistant", "content": response_text}
|
121 |
+
)
|
122 |
+
return
|
123 |
+
|
124 |
+
# Handle failed responses
|
125 |
+
st.session_state.api_failures += 1
|
126 |
+
if st.session_state.api_failures > 2:
|
127 |
+
st.error("Persistent API failures. Please check your API token and model selection.")
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
logger.error(f"Unexpected error: {str(e)}", exc_info=True)
|
131 |
+
st.error("An unexpected error occurred. Please try again.")
|
132 |
+
|
133 |
+
def display_chat_history():
|
134 |
+
"""Render chat message history"""
|
135 |
+
for message in st.session_state.messages:
|
136 |
+
with st.chat_message(message["role"]):
|
137 |
+
st.markdown(message["content"])
|
138 |
+
|
139 |
+
def main():
|
140 |
+
"""Main application flow"""
|
141 |
+
initialize_session_state()
|
142 |
+
settings = configure_sidebar()
|
143 |
+
|
144 |
+
st.title("🤖 DeepSeek Chatbot")
|
145 |
+
st.caption(f"Current Model: {settings['model']}")
|
146 |
+
st.caption("Powered by Hugging Face Inference API - Configure in sidebar")
|
147 |
+
|
148 |
+
display_chat_history()
|
149 |
+
handle_chat_interaction(settings)
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
main()
|
app.py
CHANGED
@@ -1,22 +1,14 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import
|
4 |
-
from typing import
|
5 |
|
6 |
-
# Configure
|
7 |
-
logging.basicConfig(level=logging.INFO)
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
|
10 |
-
# Constants
|
11 |
DEFAULT_SYSTEM_PROMPT = """You are a friendly Assistant. Provide clear, accurate, and brief answers.
|
12 |
Keep responses polite, engaging, and to the point. If unsure, politely suggest alternatives."""
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
17 |
-
"deepseek-ai/DeepSeek-R1"
|
18 |
-
]
|
19 |
-
API_BASE_URL = "https://api-inference.huggingface.co/models/"
|
20 |
|
21 |
# Page configuration
|
22 |
st.set_page_config(
|
@@ -29,58 +21,67 @@ def initialize_session_state():
|
|
29 |
"""Initialize all session state variables"""
|
30 |
if "messages" not in st.session_state:
|
31 |
st.session_state.messages = []
|
32 |
-
if "
|
33 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
def configure_sidebar() -> Dict[str, Any]:
|
36 |
-
"""Create sidebar components
|
37 |
with st.sidebar:
|
38 |
-
st.header("
|
39 |
-
st.markdown("[Get HuggingFace Token](https://huggingface.co/settings/tokens)")
|
40 |
-
|
41 |
return {
|
42 |
-
"
|
43 |
-
"
|
44 |
-
|
45 |
-
|
46 |
-
height=100
|
47 |
-
),
|
48 |
-
"max_tokens": st.slider("Max Tokens", 10, 4000, 100),
|
49 |
-
"temperature": st.slider("Temperature", 0.1, 4.0, 0.3),
|
50 |
-
"top_p": st.slider("Top-p", 0.1, 1.0, 0.6),
|
51 |
-
"debug_chat": st.toggle("Return Full Text (Debugging Only)")
|
52 |
}
|
53 |
|
54 |
-
def
|
55 |
-
"""Format
|
56 |
-
return f"""System: {system_message}
|
57 |
-
|
|
|
58 |
|
59 |
-
def
|
60 |
-
"""
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
api_url,
|
66 |
-
headers=headers,
|
67 |
-
json=payload,
|
68 |
-
timeout=30
|
69 |
-
)
|
70 |
-
response.raise_for_status()
|
71 |
-
return response.json()
|
72 |
-
except requests.exceptions.HTTPError as e:
|
73 |
-
logger.error(f"HTTP Error: {e.response.status_code} - {e.response.text}")
|
74 |
-
st.error(f"API Error: {e.response.status_code} - {e.response.text[:200]}")
|
75 |
-
except requests.exceptions.RequestException as e:
|
76 |
-
logger.error(f"Request failed: {str(e)}")
|
77 |
-
st.error("Connection error. Please check your internet connection.")
|
78 |
-
return None
|
79 |
|
80 |
def handle_chat_interaction(settings: Dict[str, Any]):
|
81 |
-
"""Manage chat
|
82 |
if prompt := st.chat_input("Type your message..."):
|
83 |
-
# Add user message to history
|
84 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
85 |
|
86 |
with st.chat_message("user"):
|
@@ -88,62 +89,33 @@ def handle_chat_interaction(settings: Dict[str, Any]):
|
|
88 |
|
89 |
try:
|
90 |
with st.spinner("Generating response..."):
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
user_input=prompt
|
95 |
)
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
"top_p": settings["top_p"],
|
103 |
-
"return_full_text": settings["debug_chat"],
|
104 |
-
}
|
105 |
-
}
|
106 |
-
|
107 |
-
api_url = f"{API_BASE_URL}{settings['model']}"
|
108 |
-
output = query_hf_api(payload, api_url)
|
109 |
-
|
110 |
-
if output and isinstance(output, list):
|
111 |
-
if 'generated_text' in output[0]:
|
112 |
-
response_text = output[0]['generated_text'].strip()
|
113 |
-
# Remove any remaining special tokens
|
114 |
-
response_text = response_text.split("\n</think>\n")[0].strip()
|
115 |
-
|
116 |
-
# Display and store response
|
117 |
-
with st.chat_message("assistant"):
|
118 |
-
st.markdown(response_text)
|
119 |
-
st.session_state.messages.append(
|
120 |
-
{"role": "assistant", "content": response_text}
|
121 |
-
)
|
122 |
-
return
|
123 |
-
|
124 |
-
# Handle failed responses
|
125 |
-
st.session_state.api_failures += 1
|
126 |
-
if st.session_state.api_failures > 2:
|
127 |
-
st.error("Persistent API failures. Please check your API token and model selection.")
|
128 |
|
129 |
except Exception as e:
|
130 |
-
|
131 |
-
st.error("An unexpected error occurred. Please try again.")
|
132 |
|
133 |
def display_chat_history():
|
134 |
-
"""
|
135 |
for message in st.session_state.messages:
|
136 |
with st.chat_message(message["role"]):
|
137 |
st.markdown(message["content"])
|
138 |
|
139 |
def main():
|
140 |
-
"""Main application flow"""
|
141 |
initialize_session_state()
|
|
|
142 |
settings = configure_sidebar()
|
143 |
|
144 |
-
st.title("🤖 DeepSeek Chatbot")
|
145 |
-
st.caption(f"
|
146 |
-
st.caption("Powered by Hugging Face Inference API - Configure in sidebar")
|
147 |
|
148 |
display_chat_history()
|
149 |
handle_chat_interaction(settings)
|
|
|
1 |
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
from typing import Dict, Any
|
5 |
|
6 |
+
# Configure model (updated for local execution)
|
|
|
|
|
|
|
|
|
7 |
DEFAULT_SYSTEM_PROMPT = """You are a friendly Assistant. Provide clear, accurate, and brief answers.
|
8 |
Keep responses polite, engaging, and to the point. If unsure, politely suggest alternatives."""
|
9 |
|
10 |
+
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # Directly specify model
|
11 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Page configuration
|
14 |
st.set_page_config(
|
|
|
21 |
"""Initialize all session state variables"""
|
22 |
if "messages" not in st.session_state:
|
23 |
st.session_state.messages = []
|
24 |
+
if "model_loaded" not in st.session_state:
|
25 |
+
st.session_state.update({
|
26 |
+
"model_loaded": False,
|
27 |
+
"model": None,
|
28 |
+
"tokenizer": None
|
29 |
+
})
|
30 |
+
|
31 |
+
def load_model():
|
32 |
+
"""Load model and tokenizer with quantization"""
|
33 |
+
if not st.session_state.model_loaded:
|
34 |
+
with st.spinner("Loading model (this may take a minute)..."):
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
36 |
+
model = AutoModelForCausalLM.from_pretrained(
|
37 |
+
MODEL_NAME,
|
38 |
+
trust_remote_code=True,
|
39 |
+
torch_dtype=torch.bfloat16,
|
40 |
+
device_map="auto"
|
41 |
+
)
|
42 |
+
|
43 |
+
st.session_state.update({
|
44 |
+
"model": model,
|
45 |
+
"tokenizer": tokenizer,
|
46 |
+
"model_loaded": True
|
47 |
+
})
|
48 |
|
49 |
def configure_sidebar() -> Dict[str, Any]:
|
50 |
+
"""Create sidebar components"""
|
51 |
with st.sidebar:
|
52 |
+
st.header("Configuration")
|
|
|
|
|
53 |
return {
|
54 |
+
"system_message": st.text_area("System Message", value=DEFAULT_SYSTEM_PROMPT, height=100),
|
55 |
+
"max_tokens": st.slider("Max Tokens", 10, 4000, 512),
|
56 |
+
"temperature": st.slider("Temperature", 0.1, 1.0, 0.7),
|
57 |
+
"top_p": st.slider("Top-p", 0.1, 1.0, 0.9)
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
}
|
59 |
|
60 |
+
def format_prompt(system_message: str, user_input: str) -> str:
|
61 |
+
"""Format prompt according to model's required template"""
|
62 |
+
return f"""<|begin▁of▁sentence|>System: {system_message}
|
63 |
+
User: {user_input}
|
64 |
+
Assistant: """
|
65 |
|
66 |
+
def generate_response(prompt: str, settings: Dict[str, Any]) -> str:
|
67 |
+
"""Generate response using local model"""
|
68 |
+
inputs = st.session_state.tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
69 |
+
|
70 |
+
outputs = st.session_state.model.generate(
|
71 |
+
inputs.input_ids,
|
72 |
+
max_new_tokens=settings["max_tokens"],
|
73 |
+
temperature=settings["temperature"],
|
74 |
+
top_p=settings["top_p"],
|
75 |
+
do_sample=True,
|
76 |
+
pad_token_id=st.session_state.tokenizer.eos_token_id
|
77 |
+
)
|
78 |
|
79 |
+
response = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
80 |
+
return response.split("Assistant:")[-1].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def handle_chat_interaction(settings: Dict[str, Any]):
|
83 |
+
"""Manage chat interactions"""
|
84 |
if prompt := st.chat_input("Type your message..."):
|
|
|
85 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
86 |
|
87 |
with st.chat_message("user"):
|
|
|
89 |
|
90 |
try:
|
91 |
with st.spinner("Generating response..."):
|
92 |
+
full_prompt = format_prompt(
|
93 |
+
settings["system_message"],
|
94 |
+
prompt
|
|
|
95 |
)
|
96 |
+
|
97 |
+
response = generate_response(full_prompt, settings)
|
98 |
+
|
99 |
+
with st.chat_message("assistant"):
|
100 |
+
st.markdown(response)
|
101 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
except Exception as e:
|
104 |
+
st.error(f"Generation error: {str(e)}")
|
|
|
105 |
|
106 |
def display_chat_history():
|
107 |
+
"""Display chat history"""
|
108 |
for message in st.session_state.messages:
|
109 |
with st.chat_message(message["role"]):
|
110 |
st.markdown(message["content"])
|
111 |
|
112 |
def main():
|
|
|
113 |
initialize_session_state()
|
114 |
+
load_model() # Load model before anything else
|
115 |
settings = configure_sidebar()
|
116 |
|
117 |
+
st.title("🤖 DeepSeek Chatbot (Local)")
|
118 |
+
st.caption(f"Running {MODEL_NAME} directly on {DEVICE.upper()}")
|
|
|
119 |
|
120 |
display_chat_history()
|
121 |
handle_chat_interaction(settings)
|
packages.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
tesseract-ocr
|
2 |
-
tesseract-ocr-eng
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,13 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
PyMuPDF==1.23.8
|
6 |
-
sentence-transformers==2.2.2
|
7 |
-
numpy==1.24.3
|
8 |
-
python-multipart==0.0.6
|
9 |
-
opencv-python-headless==4.8.1.78
|
10 |
-
scikit-learn==1.3.2
|
11 |
-
protobuf==4.25.1
|
12 |
-
huggingface_hub==0.25.2
|
13 |
-
transformers
|
|
|
1 |
+
torch>=2.0.0
|
2 |
+
transformers>=4.40.0
|
3 |
+
streamlit>=1.32.0
|
4 |
+
accelerate>=0.27.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|