PROYECTO_2024

Sleeping

App Files Files Community

PROYECTO_2024 / app.py

C2MV

Update app.py

8572e8e verified 5 months ago

raw

history blame

11 kB

	import os
	os.system('pip install curl_cffi tqdm bitsandbytes tiktoken g4f pinecone-client pandas datasets sentence-transformers')

	# Setup and load your keys
	import os
	from g4f import ChatCompletion
	#from google.colab import userdata
	from pinecone import Pinecone
	import pandas as pd
	from datasets import Dataset
	from sentence_transformers import SentenceTransformer
	import gradio as gr

	model_name = "BAAI/bge-m3"

	# APIs personales
	#PINECONE_ENVIRONMENT = us-east-1
	#PINECONE_API_KEY = 3a3e9022-381d-436e-84cb-ba93464d283e

	#os.environ["PINECONE_ENVIRONMENT"] = "us-east-1"
	#os.environ["PINECONE_API_KEY"] = "3a3e9022-381d-436e-84cb-ba93464d283e"

	# Retrieve the Pinecone API key from the user
	PINECONE_API_KEY = "3a3e9022-381d-436e-84cb-ba93464d283e" # Use the key you set in the secrets
	PINECONE_ENVIRONMENT = "us-east-1" # Use the environment you set in the secrets

	# Initialize Pinecone with the API key
	pc = Pinecone(api_key=PINECONE_API_KEY)

	# Global variables to store the selected model and dimensions
	EMBED_MODEL = 'BGE_M3-1024'
	DIMENSIONS = 1024

	# Confirm selection automatically
	print(f"Model selected: {EMBED_MODEL}")
	print(f"Dimensions set as: {DIMENSIONS}")

	# Function to print current selection (can be used in other cells)
	def print_current_selection():
	print(f"Currently selected model: {EMBED_MODEL}")
	print(f"Dimensions: {DIMENSIONS}")

	# Establecer el nombre del índice automáticamente
	INDEX_NAME = 'neonatos'

	# Obtener la clave API de Pinecone
	#PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')

	def connect_to_pinecone(index_name):
	global INDEX_NAME
	try:
	pc = Pinecone(api_key=PINECONE_API_KEY)
	index = pc.Index(index_name)

	# Asegurarse de que la conexión se establezca
	index_stats = index.describe_index_stats()
	print(f"Successfully connected to Pinecone index '{index_name}'!")
	print("Index Stats:", index_stats)

	# Actualizar la variable global INDEX_NAME
	INDEX_NAME = index_name
	print(f"Global INDEX_NAME updated to: {INDEX_NAME}")

	except Exception as e:
	print(f"Failed to connect to Pinecone index '{index_name}':", str(e))

	# Conectar automáticamente al índice "neonatos"
	connect_to_pinecone(INDEX_NAME)

	# Función para imprimir el nombre del índice actual (puede ser usada en otras celdas)
	def print_current_index():
	print(f"Current index name: {INDEX_NAME}")

	# Verificar si las variables globales necesarias están configuradas
	if 'INDEX_NAME' not in globals() or INDEX_NAME is None:
	raise ValueError("INDEX_NAME is not set. Please set the index name first.")

	if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None:
	raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.")

	# Inicializar cliente de Pinecone
	#PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
	pc = Pinecone(api_key=PINECONE_API_KEY)

	# Inicializar el índice de Pinecone
	index = pc.Index(INDEX_NAME)

	# Obtener la dimensión del índice
	index_stats = index.describe_index_stats()
	vector_dim = index_stats['dimension']
	print(f"Index dimension: {vector_dim}")

	# Definir manualmente los campos de contexto y enlace
	CONTEXT_FIELDS = ['Etiqueta', 'Pregunta 1', 'Pregunta 2', 'Pregunta 3', 'Respuesta Combinada']
	LINK_FIELDS = ['Etiqueta', 'Respuesta Combinada']

	# Imprimir confirmación de campos seleccionados
	print(f"Context fields set to: {CONTEXT_FIELDS}")
	print(f"Link fields set to: {LINK_FIELDS}")

	# Función para obtener las selecciones actuales de campos (puede ser usada en otras celdas)
	def get_field_selections():
	return {
	"CONTEXT_FIELDS": CONTEXT_FIELDS,
	"LINK_FIELDS": LINK_FIELDS
	}

	#####################################

	# Check if required global variables are set
	if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None:
	raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.")
	if 'INDEX_NAME' not in globals() or INDEX_NAME is None:
	raise ValueError("INDEX_NAME is not set. Please create or select an index first.")
	if 'CONTEXT_FIELDS' not in globals() or 'LINK_FIELDS' not in globals():
	raise ValueError("CONTEXT_FIELDS and LINK_FIELDS are not set. Please run the field selection cell first.")

	# Initialize the Sentence-Transformer model
	embedding_model = SentenceTransformer(model_name)

	# Initialize Pinecone with the API key and connect to the index
	pinecone_client = Pinecone(api_key=PINECONE_API_KEY)
	index = pinecone_client.Index(INDEX_NAME)

	# Constants
	LIMIT = 3750

	def vector_search(query):
	# Generate embedding using Sentence-Transformer model
	xq = embedding_model.encode(query)

	# Perform vector search on Pinecone index
	res = index.query(vector=xq.tolist(), top_k=3, include_metadata=True)
	if res['matches']:
	return [
	{
	'content': ' '.join(f"{k}: {v}" for k, v in match['metadata'].items() if k in CONTEXT_FIELDS and k != 'Etiqueta'),
	'metadata': match['metadata']
	}
	for match in res['matches']
	if 'metadata' in match
	]
	return []

	def create_prompt(query, contexts):
	prompt_start = "\n\nContexto:\n"
	prompt_end = f"\n\nPregunta: {query}\nRespuesta:"

	current_contexts = "\n\n---\n\n".join([context['content'] for context in contexts])
	if len(prompt_start + current_contexts + prompt_end) >= LIMIT:
	# Truncate contexts if they exceed the limit
	available_space = LIMIT - len(prompt_start) - len(prompt_end)
	truncated_contexts = current_contexts[:available_space]
	return prompt_start + truncated_contexts + prompt_end
	else:
	return prompt_start + current_contexts + prompt_end

	def complete(prompt):
	return [f"Hola"]

	def check_image_exists(filepath):
	return os.path.exists(filepath)

	def chat_function(message, history):
	# Perform vector search
	search_results = vector_search(message)

	# Create prompt with relevant contexts
	query_with_contexts = create_prompt(message, search_results)

	# Generate response
	response = complete(query_with_contexts)

	partial_message = response[0].split("\n")[0] # Solo tomar la primera línea de la respuesta

	# Handle the logic for processing tags and images internally
	relevant_links = [result['metadata'].get(field) for result in search_results for field in LINK_FIELDS if field in result['metadata']]
	full_response = partial_message
	image_url = None
	tags_detected = []

	filtered_links = []
	if relevant_links:
	for link in relevant_links:
	if any(tag in link for tag in ["rId101", "rId105", "rId109", "rId113", "rId117",
	"rId121", "rId125", "rId129", "rId133", "rId136",
	"rId139", "rId142", "rId145", "rId149", "rId153",
	"rId157", "rId161", "rId165", "rId169", "rId173",
	"rId177", "rId180", "rId184", "rId187", "rId192",
	"rId195", "rId40", "rId44", "rId47", "rId50",
	"rId53", "rId56", "rId59", "rId63", "rId66",
	"rId70", "rId73", "rId76", "rId80", "rId83",
	"rId87", "rId90", "rId94", "rId97"]):
	tags_detected.append(link) # Save the tag but don't display it
	else:
	filtered_links.append(link)

	# Add the first relevant link under a single "Respuestas relevantes" section
	if filtered_links:
	full_response += f".\n\nTe detallamos nuestro contenido a continuación:\n" + filtered_links[0]

	# Now handle the images based on the detected tags
	tags_to_images = {
	"rId101": "images/rId101.png",
	"rId105": "images/rId105.png",
	"rId109": "images/rId109.png",
	"rId113": "images/rId113.png",
	"rId117": "images/rId117.png",
	"rId121": "images/rId121.png",
	"rId125": "images/rId125.png",
	"rId129": "images/rId129.png",
	"rId133": "images/rId133.png",
	"rId136": "images/rId136.png",
	"rId139": "images/rId139.png",
	"rId142": "images/rId142.png",
	"rId145": "images/rId145.png",
	"rId149": "images/rId149.png",
	"rId153": "images/rId153.png",
	"rId157": "images/rId157.png",
	"rId161": "images/rId161.png",
	"rId165": "images/rId165.png",
	"rId169": "images/rId169.png",
	"rId173": "images/rId173.png",
	"rId177": "images/rId177.png",
	"rId180": "images/rId180.png",
	"rId184": "images/rId184.png",
	"rId187": "images/rId187.png",
	"rId192": "images/rId192.png",
	"rId195": "images/rId195.png",
	"rId40": "images/rId40.png",
	"rId44": "images/rId44.png",
	"rId47": "images/rId47.png",
	"rId50": "images/rId50.png",
	"rId53": "images/rId53.png",
	"rId56": "images/rId56.png",
	"rId59": "images/rId59.png",
	"rId63": "images/rId63.png",
	"rId66": "images/rId66.png",
	"rId70": "images/rId70.png",
	"rId73": "images/rId73.png",
	"rId76": "images/rId76.png",
	"rId80": "images/rId80.png",
	"rId83": "images/rId83.png",
	"rId87": "images/rId87.png",
	"rId90": "images/rId90.png",
	"rId94": "images/rId94.png",
	"rId97": "images/rId97.png",
	}



	for tag in tags_detected:
	for key, path in tags_to_images.items():
	if key in tag and check_image_exists(path):
	image_url = path
	break

	return full_response, image_url


	def update_image(image_url):
	if image_url:
	return image_url
	else:
	return None

	# Gradio layout setup
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column(scale=1):
	chatbot_input = gr.Textbox(label="Tu mensaje")
	chatbot_output = gr.Chatbot(label="ChatBot")
	chatbot_history = gr.State(value=[])
	image_url = gr.State(value=None)
	submit_button = gr.Button("Enviar")
	with gr.Column(scale=1):
	image_output = gr.Image(label="Imagen asociada")

	def process_input(message, history):
	full_response, image = chat_function(message, history)
	history.append((message, full_response))
	return history, history, image

	submit_button.click(process_input, inputs=[chatbot_input, chatbot_history], outputs=[chatbot_output, chatbot_history, image_url])
	image_url.change(fn=update_image, inputs=image_url, outputs=image_output)

	# Launch the interface
	demo.launch(debug=True)