import gradio as gr
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
embeddings_hotels = np.load("normalized_embeddings_hotels_multilingual.npy", allow_pickle=True)
embeddings_ar = np.load("normalized_embeddings_ar_multilingual.npy", allow_pickle=True)
df_hotels = pd.read_csv("hotel_dataset_processed.csv")
df_ar = pd.read_csv("df_ar_1.csv")
embeddings_combined = np.vstack((embeddings_hotels, embeddings_ar))
df_combined = pd.concat([df_hotels, df_ar], ignore_index=True)
dimension = embeddings_combined.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings_combined)
def search_in_faiss(query_text, model, k=5):
query_embedding = model.encode(query_text).reshape(1, -1).astype("float32")
_, indices = index.search(query_embedding, k)
top_results = df_combined.iloc[indices[0]]
return top_results
def format_results(results):
formatted_results = []
for _, row in results.iterrows():
if not pd.isna(row.get("hotel_name", "")):
google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('hotel_name', 'N/A').replace(' ', '+')}"
result = (
f"Hotel Name: {row.get('hotel_name', 'N/A')}
"
f"Description: {row.get('hotel_description', 'N/A')}
"
f"Review Title: {row.get('review_title', 'N/A')}
"
f"Review Text: {row.get('review_text', 'N/A')}
"
f"Rating: {row.get('rate', 'N/A')}
"
f"Trip Date: {row.get('tripdate', 'N/A')}
"
f"Price Range: {row.get('price_range', 'N/A')}
"
f"Location: {row.get('locality', 'N/A')}, {row.get('country', 'N/A')}
"
f"Hotel Website URL: Link
"
f"Google Maps: View on Maps
"
f"Image:
"
)
else:
google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('name', 'N/A').replace(' ', '+')}"
result = (
f"Name: {row.get('name', 'N/A')}
"
f"Location: {row.get('location', 'N/A')}
"
f"Price: {row.get('price', 'N/A')}
"
f"Price For: {row.get('price_for', 'N/A')}
"
f"Room Type: {row.get('room_type', 'N/A')}
"
f"Beds: {row.get('beds', 'N/A')}
"
f"Rating: {row.get('rating', 'N/A')}
"
f"Rating Title: {row.get('rating_title', 'N/A')}
"
f"Google Maps: View on Maps
"
f"Number of Ratings: {row.get('number_of_ratings', 'N/A')}
"
f"Hotel Website URL: Link
"
f"Additional Info: {row.get('cm', 'N/A')}
"
)
formatted_results.append(result)
return "
".join(formatted_results)
def search_interface(query_text):
results = search_in_faiss(query_text, model, 7)
return format_results(results)
iface = gr.Interface(
fn=search_interface,
inputs=gr.Textbox(label="Enter your search query"),
outputs=gr.HTML(label="Search Results"),
title="Hotel and Arabic Data Search",
description="Enter a query to search for hotels or Arabic data. The results will show the top matches based on similarity and provide a Google Maps URL for hotel locations.",
examples=["Riyadh", "Deluxe Room"]
)
iface.launch()