hatim00101
commited on
Commit
•
879567b
1
Parent(s):
84f0885
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
|
7 |
+
model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
8 |
+
|
9 |
+
embeddings_hotels = np.load("normalized_embeddings.pkl", allow_pickle=True)
|
10 |
+
embeddings_ar = np.load("normalized_embeddings_ar.pkl", allow_pickle=True)
|
11 |
+
df_hotels = pd.read_csv("hotel_dataset_processed.csv")
|
12 |
+
df_ar = pd.read_csv("df_ar_1.csv")
|
13 |
+
|
14 |
+
def search_in_combined(query_text, model, k=5):
|
15 |
+
query_embedding = model.encode(query_text, convert_to_tensor=True).cpu().numpy().reshape(1, -1)
|
16 |
+
similarities_hotels = cosine_similarity(query_embedding, embeddings_hotels).flatten()
|
17 |
+
similarities_ar = cosine_similarity(query_embedding, embeddings_ar).flatten()
|
18 |
+
top_indices_hotels = np.argsort(similarities_hotels)[::-1][:k]
|
19 |
+
top_indices_ar = np.argsort(similarities_ar)[::-1][:k]
|
20 |
+
top_hotels = df_hotels.iloc[top_indices_hotels].copy()
|
21 |
+
top_ar = df_ar.iloc[top_indices_ar].copy()
|
22 |
+
top_hotels["similarity"] = similarities_hotels[top_indices_hotels]
|
23 |
+
top_ar["similarity"] = similarities_ar[top_indices_ar]
|
24 |
+
combined_top_results = pd.concat([top_hotels, top_ar], ignore_index=True)
|
25 |
+
combined_top_results = combined_top_results.sort_values(by="similarity", ascending=False)
|
26 |
+
return combined_top_results.head(k)
|
27 |
+
|
28 |
+
def format_results(results):
|
29 |
+
formatted_results = []
|
30 |
+
for _, row in results.iterrows():
|
31 |
+
if not pd.isna(row.get("hotel_name", "")):
|
32 |
+
google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('hotel_name', 'N/A').replace(' ', '+')}"
|
33 |
+
result = (
|
34 |
+
f"<b>Hotel Name</b>: {row.get('hotel_name', 'N/A')}<br>"
|
35 |
+
f"<b>Description</b>: {row.get('hotel_description', 'N/A')}<br>"
|
36 |
+
f"<b>Review Title</b>: {row.get('review_title', 'N/A')}<br>"
|
37 |
+
f"<b>Review Text</b>: {row.get('review_text', 'N/A')}<br>"
|
38 |
+
f"<b>Rating</b>: {row.get('rate', 'N/A')}<br>"
|
39 |
+
f"<b>Trip Date</b>: {row.get('tripdate', 'N/A')}<br>"
|
40 |
+
f"<b>Price Range</b>: {row.get('price_range', 'N/A')}<br>"
|
41 |
+
f"<b>Location</b>: {row.get('locality', 'N/A')}, {row.get('country', 'N/A')}<br>"
|
42 |
+
f"<b>Hotel Website URL</b>: <a href='{row.get('hotel_url', 'N/A')}' target='_blank'>Link</a><br>"
|
43 |
+
f"<b>Google Maps</b>: <a href='{google_maps_url}' target='_blank'>View on Maps</a><br>"
|
44 |
+
f"<b>Image</b>: <img src='{row.get('hotel_image', 'N/A')}' width='200' /><br>"
|
45 |
+
)
|
46 |
+
else:
|
47 |
+
google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('name', 'N/A').replace(' ', '+')}"
|
48 |
+
result = (
|
49 |
+
f"<b>Name</b>: {row.get('name', 'N/A')}<br>"
|
50 |
+
f"<b>Location</b>: {row.get('location', 'N/A')}<br>"
|
51 |
+
f"<b>Price</b>: {row.get('price', 'N/A')}<br>"
|
52 |
+
f"<b>Price For</b>: {row.get('price_for', 'N/A')}<br>"
|
53 |
+
f"<b>Room Type</b>: {row.get('room_type', 'N/A')}<br>"
|
54 |
+
f"<b>Beds</b>: {row.get('beds', 'N/A')}<br>"
|
55 |
+
f"<b>Rating</b>: {row.get('rating', 'N/A')}<br>"
|
56 |
+
f"<b>Rating Title</b>: {row.get('rating_title', 'N/A')}<br>"
|
57 |
+
f"<b>Google Maps</b>: <a href='{google_maps_url}' target='_blank'>View on Maps</a><br>"
|
58 |
+
f"<b>Number of Ratings</b>: {row.get('number_of_ratings', 'N/A')}<br>"
|
59 |
+
f"<b>Hotel Website URL</b>: <a href='{row.get('url', 'N/A')}' target='_blank'>Link</a><br>"
|
60 |
+
f"<b>Additional Info</b>: {row.get('cm', 'N/A')}<br>"
|
61 |
+
)
|
62 |
+
formatted_results.append(result)
|
63 |
+
return "<br><br>".join(formatted_results)
|
64 |
+
|
65 |
+
def search_interface(query_text):
|
66 |
+
results = search_in_combined(query_text, model, 7)
|
67 |
+
return format_results(results)
|
68 |
+
|
69 |
+
iface = gr.Interface(
|
70 |
+
fn=search_interface,
|
71 |
+
inputs=gr.Textbox(label="Enter your search query"),
|
72 |
+
outputs=gr.HTML(label="Search Results"),
|
73 |
+
title="Hotel and Arabic Data Search",
|
74 |
+
description="Enter a query to search for hotels or Arabic data. The results will show the top matches based on similarity and provide a Google Maps URL for hotel locations.",
|
75 |
+
examples=["Riyadh", "Deluxe Room"]
|
76 |
+
)
|
77 |
+
|
78 |
+
iface.launch()
|