import streamlit as st
from llama_cpp import Llama
st.set_page_config(page_title="Chat with AI", page_icon="🤖", layout="wide")
# Custom CSS for better styling
st.markdown("""
""", unsafe_allow_html=True)
@st.cache_resource
def load_model():
return Llama.from_pretrained(
repo_id="Mykes/med_phi3-mini-4k-GGUF",
filename="*Q4_K_M.gguf",
verbose=False,
n_ctx=512,
n_batch=256,
n_threads=4
)
llm = load_model()
def format_context(messages):
context = ""
for message in messages:
if message["role"] == "user":
context += f"Human: {message['content']}\n"
else:
context += f"Assistant: {message['content']}\n"
return context
# Sidebar
st.sidebar.title("Chat with AI")
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.")
# Add useful information to the sidebar
st.sidebar.header("How to use")
st.sidebar.markdown("""
1. Type your question in the chat input box at the bottom of the screen.
2. Press Enter or click the Send button to submit your question.
3. The AI will generate a response based on your input.
4. You can have a continuous conversation by asking follow-up questions.
""")
st.sidebar.header("Model Information")
st.sidebar.markdown("""
- Model: med_phi3-mini-4k-GGUF
- Context Length: 512 tokens
- This model is specialized in medical knowledge.
""")
st.sidebar.header("Tips")
st.sidebar.markdown("""
- Be clear and specific in your questions.
- For medical queries, provide relevant details.
- Remember that this is an AI model and may not always be 100% accurate.
""")
# Main chat interface
st.title("Chat with AI")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# React to user input
if prompt := st.chat_input("What is your question?"):
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Format the context with the last 5 messages
context = format_context(st.session_state.messages[-5:])
# Prepare the model input
model_input = f"{context}Human: {prompt}\nAssistant:"
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for token in llm(
model_input,
max_tokens=None,
stop=["Human:", ""],
echo=True,
stream=True
):
full_response += token['choices'][0]['text']
message_placeholder.markdown(full_response + "▌")
# Remove the initial context and prompt from the response
assistant_response = full_response.split("Assistant:")[-1].strip()
message_placeholder.markdown(assistant_response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": assistant_response})
# Add a button to clear the chat history
if st.sidebar.button("Clear Chat History"):
st.session_state.messages = []
st.experimental_rerun()
# Display the number of messages in the current conversation
st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages")
# Add a footer
st.sidebar.markdown("---")
st.sidebar.markdown("Created with ❤️ using Streamlit and Llama.cpp")