Spaces:
Sleeping
Sleeping
import os | |
os.system('pip install curl_cffi tqdm bitsandbytes tiktoken g4f pinecone-client pandas datasets sentence-transformers') | |
# Setup and load your keys | |
import os | |
from g4f import ChatCompletion | |
#from google.colab import userdata | |
from pinecone import Pinecone | |
import pandas as pd | |
from datasets import Dataset | |
from sentence_transformers import SentenceTransformer | |
import gradio as gr | |
model_name = "BAAI/bge-m3" | |
# APIs personales | |
#PINECONE_ENVIRONMENT = us-east-1 | |
#PINECONE_API_KEY = 3a3e9022-381d-436e-84cb-ba93464d283e | |
#os.environ["PINECONE_ENVIRONMENT"] = "us-east-1" | |
#os.environ["PINECONE_API_KEY"] = "3a3e9022-381d-436e-84cb-ba93464d283e" | |
# Retrieve the Pinecone API key from the user | |
PINECONE_API_KEY = "3a3e9022-381d-436e-84cb-ba93464d283e" # Use the key you set in the secrets | |
PINECONE_ENVIRONMENT = "us-east-1" # Use the environment you set in the secrets | |
# Initialize Pinecone with the API key | |
pc = Pinecone(api_key=PINECONE_API_KEY) | |
# Global variables to store the selected model and dimensions | |
EMBED_MODEL = 'BGE_M3-1024' | |
DIMENSIONS = 1024 | |
# Confirm selection automatically | |
print(f"Model selected: {EMBED_MODEL}") | |
print(f"Dimensions set as: {DIMENSIONS}") | |
# Function to print current selection (can be used in other cells) | |
def print_current_selection(): | |
print(f"Currently selected model: {EMBED_MODEL}") | |
print(f"Dimensions: {DIMENSIONS}") | |
# Establecer el nombre del índice automáticamente | |
INDEX_NAME = 'neonatos' | |
# Obtener la clave API de Pinecone | |
#PINECONE_API_KEY = userdata.get('PINECONE_API_KEY') | |
def connect_to_pinecone(index_name): | |
global INDEX_NAME | |
try: | |
pc = Pinecone(api_key=PINECONE_API_KEY) | |
index = pc.Index(index_name) | |
# Asegurarse de que la conexión se establezca | |
index_stats = index.describe_index_stats() | |
print(f"Successfully connected to Pinecone index '{index_name}'!") | |
print("Index Stats:", index_stats) | |
# Actualizar la variable global INDEX_NAME | |
INDEX_NAME = index_name | |
print(f"Global INDEX_NAME updated to: {INDEX_NAME}") | |
except Exception as e: | |
print(f"Failed to connect to Pinecone index '{index_name}':", str(e)) | |
# Conectar automáticamente al índice "neonatos" | |
connect_to_pinecone(INDEX_NAME) | |
# Función para imprimir el nombre del índice actual (puede ser usada en otras celdas) | |
def print_current_index(): | |
print(f"Current index name: {INDEX_NAME}") | |
# Verificar si las variables globales necesarias están configuradas | |
if 'INDEX_NAME' not in globals() or INDEX_NAME is None: | |
raise ValueError("INDEX_NAME is not set. Please set the index name first.") | |
if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None: | |
raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.") | |
# Inicializar cliente de Pinecone | |
#PINECONE_API_KEY = userdata.get('PINECONE_API_KEY') | |
pc = Pinecone(api_key=PINECONE_API_KEY) | |
# Inicializar el índice de Pinecone | |
index = pc.Index(INDEX_NAME) | |
# Obtener la dimensión del índice | |
index_stats = index.describe_index_stats() | |
vector_dim = index_stats['dimension'] | |
print(f"Index dimension: {vector_dim}") | |
# Definir manualmente los campos de contexto y enlace | |
CONTEXT_FIELDS = ['Etiqueta', 'Pregunta 1', 'Pregunta 2', 'Pregunta 3', 'Respuesta Combinada'] | |
LINK_FIELDS = ['Etiqueta', 'Respuesta Combinada'] | |
# Imprimir confirmación de campos seleccionados | |
print(f"Context fields set to: {CONTEXT_FIELDS}") | |
print(f"Link fields set to: {LINK_FIELDS}") | |
# Función para obtener las selecciones actuales de campos (puede ser usada en otras celdas) | |
def get_field_selections(): | |
return { | |
"CONTEXT_FIELDS": CONTEXT_FIELDS, | |
"LINK_FIELDS": LINK_FIELDS | |
} | |
##################################### | |
# Check if required global variables are set | |
if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None: | |
raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.") | |
if 'INDEX_NAME' not in globals() or INDEX_NAME is None: | |
raise ValueError("INDEX_NAME is not set. Please create or select an index first.") | |
if 'CONTEXT_FIELDS' not in globals() or 'LINK_FIELDS' not in globals(): | |
raise ValueError("CONTEXT_FIELDS and LINK_FIELDS are not set. Please run the field selection cell first.") | |
# Initialize the Sentence-Transformer model | |
embedding_model = SentenceTransformer(model_name) | |
# Initialize Pinecone with the API key and connect to the index | |
pinecone_client = Pinecone(api_key=PINECONE_API_KEY) | |
index = pinecone_client.Index(INDEX_NAME) | |
# Constants | |
LIMIT = 3750 | |
def vector_search(query): | |
# Generate embedding using Sentence-Transformer model | |
xq = embedding_model.encode(query) | |
# Perform vector search on Pinecone index | |
res = index.query(vector=xq.tolist(), top_k=3, include_metadata=True) | |
if res['matches']: | |
return [ | |
{ | |
'content': ' '.join(f"{k}: {v}" for k, v in match['metadata'].items() if k in CONTEXT_FIELDS and k != 'Etiqueta'), | |
'metadata': match['metadata'] | |
} | |
for match in res['matches'] | |
if 'metadata' in match | |
] | |
return [] | |
def create_prompt(query, contexts): | |
prompt_start = "\n\nContexto:\n" | |
prompt_end = f"\n\nPregunta: {query}\nRespuesta:" | |
current_contexts = "\n\n---\n\n".join([context['content'] for context in contexts]) | |
if len(prompt_start + current_contexts + prompt_end) >= LIMIT: | |
# Truncate contexts if they exceed the limit | |
available_space = LIMIT - len(prompt_start) - len(prompt_end) | |
truncated_contexts = current_contexts[:available_space] | |
return prompt_start + truncated_contexts + prompt_end | |
else: | |
return prompt_start + current_contexts + prompt_end | |
def complete(prompt): | |
return [f"Hola"] | |
def check_image_exists(filepath): | |
return os.path.exists(filepath) | |
def chat_function(message, history): | |
# Perform vector search | |
search_results = vector_search(message) | |
# Create prompt with relevant contexts | |
query_with_contexts = create_prompt(message, search_results) | |
# Generate response | |
response = complete(query_with_contexts) | |
partial_message = response[0].split("\n")[0] # Solo tomar la primera línea de la respuesta | |
# Handle the logic for processing tags and images internally | |
relevant_links = [result['metadata'].get(field) for result in search_results for field in LINK_FIELDS if field in result['metadata']] | |
full_response = partial_message | |
image_url = None | |
tags_detected = [] | |
filtered_links = [] | |
if relevant_links: | |
for link in relevant_links: | |
if any(tag in link for tag in ["rId101", "rId105", "rId109", "rId113", "rId117", | |
"rId121", "rId125", "rId129", "rId133", "rId136", | |
"rId139", "rId142", "rId145", "rId149", "rId153", | |
"rId157", "rId161", "rId165", "rId169", "rId173", | |
"rId177", "rId180", "rId184", "rId187", "rId192", | |
"rId195", "rId40", "rId44", "rId47", "rId50", | |
"rId53", "rId56", "rId59", "rId63", "rId66", | |
"rId70", "rId73", "rId76", "rId80", "rId83", | |
"rId87", "rId90", "rId94", "rId97"]): | |
tags_detected.append(link) # Save the tag but don't display it | |
else: | |
filtered_links.append(link) | |
# Add the first relevant link under a single "Respuestas relevantes" section | |
if filtered_links: | |
full_response += f".\n\nTe detallamos nuestro contenido a continuación:\n" + filtered_links[0] | |
# Now handle the images based on the detected tags | |
tags_to_images = { | |
"rId101": "images/rId101.png", | |
"rId105": "images/rId105.png", | |
"rId109": "images/rId109.png", | |
"rId113": "images/rId113.png", | |
"rId117": "images/rId117.png", | |
"rId121": "images/rId121.png", | |
"rId125": "images/rId125.png", | |
"rId129": "images/rId129.png", | |
"rId133": "images/rId133.png", | |
"rId136": "images/rId136.png", | |
"rId139": "images/rId139.png", | |
"rId142": "images/rId142.png", | |
"rId145": "images/rId145.png", | |
"rId149": "images/rId149.png", | |
"rId153": "images/rId153.png", | |
"rId157": "images/rId157.png", | |
"rId161": "images/rId161.png", | |
"rId165": "images/rId165.png", | |
"rId169": "images/rId169.png", | |
"rId173": "images/rId173.png", | |
"rId177": "images/rId177.png", | |
"rId180": "images/rId180.png", | |
"rId184": "images/rId184.png", | |
"rId187": "images/rId187.png", | |
"rId192": "images/rId192.png", | |
"rId195": "images/rId195.png", | |
"rId40": "images/rId40.png", | |
"rId44": "images/rId44.png", | |
"rId47": "images/rId47.png", | |
"rId50": "images/rId50.png", | |
"rId53": "images/rId53.png", | |
"rId56": "images/rId56.png", | |
"rId59": "images/rId59.png", | |
"rId63": "images/rId63.png", | |
"rId66": "images/rId66.png", | |
"rId70": "images/rId70.png", | |
"rId73": "images/rId73.png", | |
"rId76": "images/rId76.png", | |
"rId80": "images/rId80.png", | |
"rId83": "images/rId83.png", | |
"rId87": "images/rId87.png", | |
"rId90": "images/rId90.png", | |
"rId94": "images/rId94.png", | |
"rId97": "images/rId97.png", | |
} | |
for tag in tags_detected: | |
for key, path in tags_to_images.items(): | |
if key in tag and check_image_exists(path): | |
image_url = path | |
break | |
return full_response, image_url | |
def update_image(image_url): | |
if image_url: | |
return image_url | |
else: | |
return None | |
# Gradio layout setup | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
chatbot_input = gr.Textbox(label="Tu mensaje") | |
chatbot_output = gr.Chatbot(label="ChatBot") | |
chatbot_history = gr.State(value=[]) | |
image_url = gr.State(value=None) | |
submit_button = gr.Button("Enviar") | |
with gr.Column(scale=1): | |
image_output = gr.Image(label="Imagen asociada") | |
def process_input(message, history): | |
full_response, image = chat_function(message, history) | |
history.append((message, full_response)) | |
return history, history, image | |
submit_button.click(process_input, inputs=[chatbot_input, chatbot_history], outputs=[chatbot_output, chatbot_history, image_url]) | |
image_url.change(fn=update_image, inputs=image_url, outputs=image_output) | |
# Launch the interface | |
demo.launch(debug=True) | |