Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from huggingface_hub import login
|
4 |
+
from datasets import load_dataset
|
5 |
+
from llama_cpp import Llama
|
6 |
+
from huggingface_hub import hf_hub_download
|
7 |
+
import chromadb
|
8 |
+
from sentence_transformers import SentenceTransformer
|
9 |
+
|
10 |
+
# Load Hugging Face token
|
11 |
+
hf_token = os.getenv("HF_TOKEN")
|
12 |
+
login(hf_token)
|
13 |
+
|
14 |
+
# Load dataset
|
15 |
+
dataset = load_dataset("Maryem2025/final_dataset")
|
16 |
+
|
17 |
+
# Initialize Llama model
|
18 |
+
llm = Llama(
|
19 |
+
model_path=hf_hub_download(
|
20 |
+
repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
|
21 |
+
filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
|
22 |
+
),
|
23 |
+
n_ctx=2048,
|
24 |
+
)
|
25 |
+
|
26 |
+
# Initialize ChromaDB
|
27 |
+
class VectorStore:
|
28 |
+
def __init__(self, collection_name):
|
29 |
+
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
30 |
+
self.chroma_client = chromadb.Client()
|
31 |
+
if collection_name in self.chroma_client.list_collections():
|
32 |
+
self.chroma_client.delete_collection(collection_name)
|
33 |
+
self.collection = self.chroma_client.create_collection(name=collection_name)
|
34 |
+
|
35 |
+
def populate_vectors(self, dataset):
|
36 |
+
titles = dataset['train']['title'][:2000]
|
37 |
+
servings = dataset['train']['servings'][:2000]
|
38 |
+
total_times = dataset['train']['total_time'][:2000]
|
39 |
+
courses = dataset['train']['course'][:2000]
|
40 |
+
sections = dataset['train']['sections'][:2000]
|
41 |
+
instructions = dataset['train']['instructions'][:2000]
|
42 |
+
cuisines = dataset['train']['cuisine'][:2000]
|
43 |
+
calories = dataset['train']['calories'][:2000]
|
44 |
+
|
45 |
+
texts = [
|
46 |
+
f"Title: {title}. Servings: {serving}. Total Time: {total_time} minutes. "
|
47 |
+
f"Course: {course}. Sections: {section}. Instructions: {instruction}. "
|
48 |
+
f"Cuisine: {cuisine}. Calories: {calorie}."
|
49 |
+
for title, serving, total_time, course, section, instruction, cuisine, calorie
|
50 |
+
in zip(titles, servings, total_times, courses, sections, instructions, cuisines, calories)
|
51 |
+
]
|
52 |
+
|
53 |
+
for i, item in enumerate(texts):
|
54 |
+
embeddings = self.embedding_model.encode(item).tolist()
|
55 |
+
self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
|
56 |
+
|
57 |
+
def search_context(self, query, n_results=1):
|
58 |
+
query_embedding = self.embedding_model.encode([query]).tolist()
|
59 |
+
results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
|
60 |
+
return results['documents']
|
61 |
+
|
62 |
+
# Initialize and populate vector store
|
63 |
+
vector_store = VectorStore("embedding_vector")
|
64 |
+
vector_store.populate_vectors(dataset)
|
65 |
+
|
66 |
+
# Define function for generating text
|
67 |
+
def generate_text(message):
|
68 |
+
context_results = vector_store.search_context(message, n_results=1)
|
69 |
+
context = context_results[0] if context_results else ""
|
70 |
+
|
71 |
+
prompt_template = (
|
72 |
+
f"SYSTEM: You are a recipe generating bot.\n"
|
73 |
+
f"SYSTEM: {context}\n"
|
74 |
+
f"USER: {message}\n"
|
75 |
+
f"ASSISTANT:\n"
|
76 |
+
)
|
77 |
+
|
78 |
+
output = llm(
|
79 |
+
prompt_template,
|
80 |
+
temperature=0.3,
|
81 |
+
top_p=0.95,
|
82 |
+
top_k=40,
|
83 |
+
repeat_penalty=1.1,
|
84 |
+
max_tokens=600,
|
85 |
+
)
|
86 |
+
|
87 |
+
input_string = output['choices'][0]['text'].strip()
|
88 |
+
cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
|
89 |
+
return cleaned_text
|
90 |
+
|
91 |
+
# Streamlit UI
|
92 |
+
st.title("JOSHI's Kitchen 🍽️")
|
93 |
+
st.write("Generate recipes using AI powered by Hugging Face and ChromaDB!")
|
94 |
+
|
95 |
+
user_input = st.text_area("Enter ingredients or ask for a recipe:", "")
|
96 |
+
|
97 |
+
if st.button("Generate Recipe"):
|
98 |
+
if user_input:
|
99 |
+
with st.spinner("Generating recipe... 🍲"):
|
100 |
+
response = generate_text(user_input)
|
101 |
+
st.subheader("Generated Recipe:")
|
102 |
+
st.write(response)
|
103 |
+
else:
|
104 |
+
st.warning("Please enter a message.")
|