File size: 8,837 Bytes
a3c5f6f
65c65cd
6945642
 
a3c5f6f
 
 
6945642
 
 
a3c5f6f
 
6945642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3c5f6f
 
6945642
 
 
 
 
 
 
 
 
 
a3c5f6f
6945642
a3c5f6f
 
 
6945642
 
 
 
a3c5f6f
 
6945642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3c5f6f
 
6945642
 
a3c5f6f
 
 
 
 
 
 
 
 
6945642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3c5f6f
 
6945642
 
 
a3c5f6f
 
 
6945642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3c5f6f
6945642
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import os
from huggingface_hub import login, hf_hub_download
import pandas as pd
import gradio as gr
from llama_cpp import Llama
import chromadb
from sentence_transformers import SentenceTransformer
from deep_translator import GoogleTranslator  # Changed from googletrans to deep_translator
import re
import requests  # Import the requests library


# Charger le token depuis les secrets
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)

# Charger le dataset depuis un fichier CSV local
csv_file = "/content/indian_food (1).csv"
try:
    df = pd.read_csv(csv_file)
    print("Dataset chargé avec succès depuis le fichier CSV local.")
except FileNotFoundError:
    print(f"Erreur: Fichier CSV non trouvé à l'emplacement: {csv_file}")
    exit()
except Exception as e:
    print(f"Erreur lors du chargement du CSV: {e}")
    exit()

# Initialisation du modèle Llama
llm = None  # Initialize to None
try:
    # Use /tmp for the model path within Hugging Face Spaces
    model_path = hf_hub_download(
        repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
        filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
        cache_dir="/tmp"  # Store the model in /tmp
    )

    llm = Llama(
        model_path=model_path,
        n_ctx=2048,
    )
    print("Llama model loaded successfully.")
except Exception as e:
    print(f"Error loading Llama model: {e}")

# Initialisation de ChromaDB Vector Store
class VectorStore:
    def __init__(self, collection_name):
        self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
        self.chroma_client = chromadb.Client()

        if collection_name in self.chroma_client.list_collections():
            self.chroma_client.delete_collection(collection_name)
        self.collection = self.chroma_client.create_collection(name=collection_name)

    def populate_vectors(self, df):
        titles = df['name'].tolist()
        ingredients = df['ingredients'].tolist()
        diets = df['diet'].tolist()
        prep_times = df['prep_time'].tolist()

        # Load nutritional information, handling potentially missing columns and types
        calories = df['calories'].astype(str).tolist() if 'calories' in df else ['None'] * len(df)
        sugar = df['sugar'].astype(str).tolist() if 'sugar' in df else ['None'] * len(df)
        gluten = df['gluten'].astype(str).tolist() if 'gluten' in df else ['None'] * len(df)

        titles = titles[:2000]
        ingredients = ingredients[:2000]
        diets = diets[:2000]
        prep_times = prep_times[:2000]
        calories = calories[:2000]
        sugar = sugar[:2000]
        gluten = gluten[:2000]

        texts = [
            f"Recipe: {title}. Ingredients: {ingredient}. Diet: {diet}. Prep Time: {prep_time} minutes. Calories: {calorie}. Sugar: {sugar}. Gluten: {gluten}."
            for title, ingredient, diet, prep_time, calorie, sugar, gluten in zip(titles, ingredients, diets, prep_times, calories, sugar, gluten)
        ]

        for i, item in enumerate(texts):
            embeddings = self.embedding_model.encode(item).tolist()
            self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])

    def search_context(self, query, n_results=1):
        query_embedding = self.embedding_model.encode([query]).tolist()
        results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
        return results['documents']

# Initialisation du store de vecteurs et peuplement
vector_store = None  # Initialize to None
try:
    vector_store = VectorStore("indian_food_embedding")
    vector_store.populate_vectors(df)
    print("Vector store initialized and populated.")
except Exception as e:
    print(f"Error initializing or populating vector store: {e}")


# Replace the translate_text function with this new version
def translate_text(text, target_language='en'):
    """Translates the given text to the target language."""
    try:
        if target_language == 'en':
            translator = GoogleTranslator(source='auto', target='en')
        else:
            translator = GoogleTranslator(source='en', target=target_language)

        translated_text = translator.translate(text)
        return translated_text
    except Exception as e:
        print(f"Translation error: {e}")
        print(f"Detailed error: {type(e).__name__}, {e}")  # Print more details for debugging.
        return text  # Return original text if translation fails

def generate_text(message, max_tokens=600, temperature=0.3, top_p=0.95,
                  gluten_free=False, dairy_free=False, allergies="", input_language='en'):  # Added input_language

    if llm is None:
        return "Error: Llama model could not be loaded. Please check the console for errors."

    if vector_store is None:
        return "Error: Vector store could not be initialized. Please check the console for errors."

    # Translate the input message to English
    message_en = message
    if input_language != 'en':
      try:
        message_en = translate_text(message, target_language='en')
      except Exception as e:
        print(f"Error translating input message: {e}")
        return "Error translating input.  Please try again in English."



    context = ""
    query = message_en
    if gluten_free:
        query += " gluten-free"
    if dairy_free:
        query += " dairy-free"
    if allergies:
        query += f" avoid ingredients: {allergies}"

    try:
        context_results = vector_store.search_context(query, n_results=1)
        if context_results and isinstance(context_results, list):
            context = context_results[0] if context_results else ""
        else:
            context = ""  # or handle the error appropriately
            print("Warning: No context found or invalid context format.")
    except Exception as e:
        return f"Error searching vector store: {e}"

    prompt_template = (
        f"SYSTEM: You are a helpful recipe generating bot specializing in Indian cuisine, assisting with dietary restrictions.\n"
        f"SYSTEM: Here is some context:\n{context}\n"
        f"USER: {message_en}\n"  # Use the English translated message
        f"ASSISTANT:\n"
    )

    try:
        output = llm(
            prompt_template,
            temperature=temperature,
            top_p=top_p,
            top_k=40,
            repeat_penalty=1.1,
            max_tokens=max_tokens,
        )

        input_string = output['choices'][0]['text'].strip()
        cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
        continuous_text = '\n'.join(cleaned_text.split('\n'))

        # Translate the output back to the input language
        output_text = continuous_text
        if input_language != 'en':
            try:
                output_text = translate_text(continuous_text, target_language=input_language)
            except Exception as e:
                print(f"Error translating output message: {e}")
                output_text = "Error translating output.  Here is the English version:\n\n" + continuous_text


        # Gluten Check on Output
        if context and isinstance(context, str):
            context_lower = context.lower()
            if "gluten: yes" in context_lower:
                output_text += "\n\nWarning: This recipe contains gluten."
            elif "gluten: no" in context_lower:
                output_text += "\n\nGood news! This recipe is gluten-free."

        return output_text

    except Exception as e:
        return f"Error generating text: {e}"

demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your message here...", label="Message"),
        gr.Slider(minimum=50, maximum=1000, value=600, step=50, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.7, maximum=1.0, value=0.95, step=0.05, label="Top P"),
        gr.Checkbox(label="Gluten-Free"),
        gr.Checkbox(label="Dairy-Free"),
        gr.Textbox(lines=1, placeholder="e.g., peanuts, shellfish", label="Allergies (comma-separated)"),
        gr.Dropdown(choices=['en', 'hi'], value='en', label="Input Language (en=English, hi=Hindi/Hinglish)"), # Added language selection
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title="Indian Recipe Bot",
    description="Running LLM with context retrieval from ChromaDB.  Supports dietary restrictions, allergies, and Hinglish input/output!",
    examples=[
        ["mujhe chawal aur dal hai, main kya bana sakta hoon jo gluten-free ho?", 600, 0.3, 0.95, True, False, "", 'hi'],
        ["Suggest a vegetarian dish with spinach and no nuts.", 600, 0.3, 0.95, False, False, "nuts", 'en'],
    ],
    cache_examples=False,
)

if __name__ == "__main__":
    demo.launch()