Spaces:
Sleeping
Sleeping
File size: 8,837 Bytes
a3c5f6f 65c65cd 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 a3c5f6f 6945642 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import os
from huggingface_hub import login, hf_hub_download
import pandas as pd
import gradio as gr
from llama_cpp import Llama
import chromadb
from sentence_transformers import SentenceTransformer
from deep_translator import GoogleTranslator # Changed from googletrans to deep_translator
import re
import requests # Import the requests library
# Charger le token depuis les secrets
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)
# Charger le dataset depuis un fichier CSV local
csv_file = "/content/indian_food (1).csv"
try:
df = pd.read_csv(csv_file)
print("Dataset chargé avec succès depuis le fichier CSV local.")
except FileNotFoundError:
print(f"Erreur: Fichier CSV non trouvé à l'emplacement: {csv_file}")
exit()
except Exception as e:
print(f"Erreur lors du chargement du CSV: {e}")
exit()
# Initialisation du modèle Llama
llm = None # Initialize to None
try:
# Use /tmp for the model path within Hugging Face Spaces
model_path = hf_hub_download(
repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
cache_dir="/tmp" # Store the model in /tmp
)
llm = Llama(
model_path=model_path,
n_ctx=2048,
)
print("Llama model loaded successfully.")
except Exception as e:
print(f"Error loading Llama model: {e}")
# Initialisation de ChromaDB Vector Store
class VectorStore:
def __init__(self, collection_name):
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
self.chroma_client = chromadb.Client()
if collection_name in self.chroma_client.list_collections():
self.chroma_client.delete_collection(collection_name)
self.collection = self.chroma_client.create_collection(name=collection_name)
def populate_vectors(self, df):
titles = df['name'].tolist()
ingredients = df['ingredients'].tolist()
diets = df['diet'].tolist()
prep_times = df['prep_time'].tolist()
# Load nutritional information, handling potentially missing columns and types
calories = df['calories'].astype(str).tolist() if 'calories' in df else ['None'] * len(df)
sugar = df['sugar'].astype(str).tolist() if 'sugar' in df else ['None'] * len(df)
gluten = df['gluten'].astype(str).tolist() if 'gluten' in df else ['None'] * len(df)
titles = titles[:2000]
ingredients = ingredients[:2000]
diets = diets[:2000]
prep_times = prep_times[:2000]
calories = calories[:2000]
sugar = sugar[:2000]
gluten = gluten[:2000]
texts = [
f"Recipe: {title}. Ingredients: {ingredient}. Diet: {diet}. Prep Time: {prep_time} minutes. Calories: {calorie}. Sugar: {sugar}. Gluten: {gluten}."
for title, ingredient, diet, prep_time, calorie, sugar, gluten in zip(titles, ingredients, diets, prep_times, calories, sugar, gluten)
]
for i, item in enumerate(texts):
embeddings = self.embedding_model.encode(item).tolist()
self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
def search_context(self, query, n_results=1):
query_embedding = self.embedding_model.encode([query]).tolist()
results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
return results['documents']
# Initialisation du store de vecteurs et peuplement
vector_store = None # Initialize to None
try:
vector_store = VectorStore("indian_food_embedding")
vector_store.populate_vectors(df)
print("Vector store initialized and populated.")
except Exception as e:
print(f"Error initializing or populating vector store: {e}")
# Replace the translate_text function with this new version
def translate_text(text, target_language='en'):
"""Translates the given text to the target language."""
try:
if target_language == 'en':
translator = GoogleTranslator(source='auto', target='en')
else:
translator = GoogleTranslator(source='en', target=target_language)
translated_text = translator.translate(text)
return translated_text
except Exception as e:
print(f"Translation error: {e}")
print(f"Detailed error: {type(e).__name__}, {e}") # Print more details for debugging.
return text # Return original text if translation fails
def generate_text(message, max_tokens=600, temperature=0.3, top_p=0.95,
gluten_free=False, dairy_free=False, allergies="", input_language='en'): # Added input_language
if llm is None:
return "Error: Llama model could not be loaded. Please check the console for errors."
if vector_store is None:
return "Error: Vector store could not be initialized. Please check the console for errors."
# Translate the input message to English
message_en = message
if input_language != 'en':
try:
message_en = translate_text(message, target_language='en')
except Exception as e:
print(f"Error translating input message: {e}")
return "Error translating input. Please try again in English."
context = ""
query = message_en
if gluten_free:
query += " gluten-free"
if dairy_free:
query += " dairy-free"
if allergies:
query += f" avoid ingredients: {allergies}"
try:
context_results = vector_store.search_context(query, n_results=1)
if context_results and isinstance(context_results, list):
context = context_results[0] if context_results else ""
else:
context = "" # or handle the error appropriately
print("Warning: No context found or invalid context format.")
except Exception as e:
return f"Error searching vector store: {e}"
prompt_template = (
f"SYSTEM: You are a helpful recipe generating bot specializing in Indian cuisine, assisting with dietary restrictions.\n"
f"SYSTEM: Here is some context:\n{context}\n"
f"USER: {message_en}\n" # Use the English translated message
f"ASSISTANT:\n"
)
try:
output = llm(
prompt_template,
temperature=temperature,
top_p=top_p,
top_k=40,
repeat_penalty=1.1,
max_tokens=max_tokens,
)
input_string = output['choices'][0]['text'].strip()
cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
continuous_text = '\n'.join(cleaned_text.split('\n'))
# Translate the output back to the input language
output_text = continuous_text
if input_language != 'en':
try:
output_text = translate_text(continuous_text, target_language=input_language)
except Exception as e:
print(f"Error translating output message: {e}")
output_text = "Error translating output. Here is the English version:\n\n" + continuous_text
# Gluten Check on Output
if context and isinstance(context, str):
context_lower = context.lower()
if "gluten: yes" in context_lower:
output_text += "\n\nWarning: This recipe contains gluten."
elif "gluten: no" in context_lower:
output_text += "\n\nGood news! This recipe is gluten-free."
return output_text
except Exception as e:
return f"Error generating text: {e}"
demo = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your message here...", label="Message"),
gr.Slider(minimum=50, maximum=1000, value=600, step=50, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="Temperature"),
gr.Slider(minimum=0.7, maximum=1.0, value=0.95, step=0.05, label="Top P"),
gr.Checkbox(label="Gluten-Free"),
gr.Checkbox(label="Dairy-Free"),
gr.Textbox(lines=1, placeholder="e.g., peanuts, shellfish", label="Allergies (comma-separated)"),
gr.Dropdown(choices=['en', 'hi'], value='en', label="Input Language (en=English, hi=Hindi/Hinglish)"), # Added language selection
],
outputs=gr.Textbox(label="Generated Text"),
title="Indian Recipe Bot",
description="Running LLM with context retrieval from ChromaDB. Supports dietary restrictions, allergies, and Hinglish input/output!",
examples=[
["mujhe chawal aur dal hai, main kya bana sakta hoon jo gluten-free ho?", 600, 0.3, 0.95, True, False, "", 'hi'],
["Suggest a vegetarian dish with spinach and no nuts.", 600, 0.3, 0.95, False, False, "nuts", 'en'],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch() |