import gradio as gr import pandas as pd import numpy as np from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") embeddings_hotels = np.load("normalized_embeddings.pkl", allow_pickle=True) embeddings_ar = np.load("normalized_embeddings_ar.pkl", allow_pickle=True) df_hotels = pd.read_csv("hotel_dataset_processed.csv") df_ar = pd.read_csv("df_ar_1.csv") def search_in_combined(query_text, model, k=5): query_embedding = model.encode(query_text, convert_to_tensor=True).cpu().numpy().reshape(1, -1) similarities_hotels = cosine_similarity(query_embedding, embeddings_hotels).flatten() similarities_ar = cosine_similarity(query_embedding, embeddings_ar).flatten() top_indices_hotels = np.argsort(similarities_hotels)[::-1][:k] top_indices_ar = np.argsort(similarities_ar)[::-1][:k] top_hotels = df_hotels.iloc[top_indices_hotels].copy() top_ar = df_ar.iloc[top_indices_ar].copy() top_hotels["similarity"] = similarities_hotels[top_indices_hotels] top_ar["similarity"] = similarities_ar[top_indices_ar] combined_top_results = pd.concat([top_hotels, top_ar], ignore_index=True) combined_top_results = combined_top_results.sort_values(by="similarity", ascending=False) return combined_top_results.head(k) def format_results(results): formatted_results = [] for _, row in results.iterrows(): if not pd.isna(row.get("hotel_name", "")): google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('hotel_name', 'N/A').replace(' ', '+')}" result = ( f"Hotel Name: {row.get('hotel_name', 'N/A')}
" f"Description: {row.get('hotel_description', 'N/A')}
" f"Review Title: {row.get('review_title', 'N/A')}
" f"Review Text: {row.get('review_text', 'N/A')}
" f"Rating: {row.get('rate', 'N/A')}
" f"Trip Date: {row.get('tripdate', 'N/A')}
" f"Price Range: {row.get('price_range', 'N/A')}
" f"Location: {row.get('locality', 'N/A')}, {row.get('country', 'N/A')}
" f"Hotel Website URL: Link
" f"Google Maps: View on Maps
" f"Image:
" ) else: google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('name', 'N/A').replace(' ', '+')}" result = ( f"Name: {row.get('name', 'N/A')}
" f"Location: {row.get('location', 'N/A')}
" f"Price: {row.get('price', 'N/A')}
" f"Price For: {row.get('price_for', 'N/A')}
" f"Room Type: {row.get('room_type', 'N/A')}
" f"Beds: {row.get('beds', 'N/A')}
" f"Rating: {row.get('rating', 'N/A')}
" f"Rating Title: {row.get('rating_title', 'N/A')}
" f"Google Maps: View on Maps
" f"Number of Ratings: {row.get('number_of_ratings', 'N/A')}
" f"Hotel Website URL: Link
" f"Additional Info: {row.get('cm', 'N/A')}
" ) formatted_results.append(result) return "

".join(formatted_results) def search_interface(query_text): results = search_in_combined(query_text, model, 7) return format_results(results) iface = gr.Interface( fn=search_interface, inputs=gr.Textbox(label="Enter your search query"), outputs=gr.HTML(label="Search Results"), title="Hotel and Arabic Data Search", description="Enter a query to search for hotels or Arabic data. The results will show the top matches based on similarity and provide a Google Maps URL for hotel locations.", examples=["Riyadh", "Deluxe Room"] ) iface.launch()