Bofandra commited on
Commit
7e487e9
·
verified ·
1 Parent(s): 45dddff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -5,17 +5,18 @@ import pandas as pd
5
  import torch
6
  from pathlib import Path
7
  import time
 
8
 
9
  model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
10
  print("load model")
11
- print(time.time())
12
 
13
  hadiths = pd.read_csv('all_hadiths_clean.csv', delimiter=",")
14
  document_embeddings = torch.load('encoded_hadiths_multilingual-e5-large-instruct (1).sav',map_location ='cpu')
15
  #file = open('encoded_hadiths_multilingual-e5-large-instruct (1).sav','rb')
16
  #document_embeddings = pickle.load(file)
17
  print("load hadiths")
18
- print(time.time())
19
 
20
  # Define the function to categorize ages
21
  def categorize_source(source):
@@ -44,21 +45,21 @@ def find(query):
44
  get_detailed_instruct(task, query)
45
  ]
46
  print("start")
47
- print(time.time())
48
 
49
  query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
50
  print("embed query")
51
- print(time.time())
52
 
53
  scores = (query_embeddings @ document_embeddings.T) * 100
54
  print("consine similarity")
55
- print(time.time())
56
 
57
  # insert the similarity value to dataframe & sort it
58
  hadiths['similarity'] = scores.tolist()[0]
59
  sorted_hadiths = hadiths.sort_values(by='similarity', ascending=False)
60
  print("sort hadiths")
61
- print(time.time())
62
 
63
  results = sorted_hadiths.head(3).drop(columns=['id', 'hadith_id', 'chain_indx'])
64
  results['source_cat'] = results['source'].apply(categorize_source)
@@ -72,7 +73,7 @@ def find(query):
72
  results = results.drop(columns=['source', 'chapter_no', 'hadith_no', 'chapter', 'similarity', 'text_ar', 'text_en'])
73
 
74
  print("prepare results")
75
- print(time.time())
76
 
77
  #return sorted_quran
78
  #filepath = Path(query+'.csv')
 
5
  import torch
6
  from pathlib import Path
7
  import time
8
+ from datetime import datetime
9
 
10
  model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
11
  print("load model")
12
+ print(datetime.fromtimestamp(time.time()))
13
 
14
  hadiths = pd.read_csv('all_hadiths_clean.csv', delimiter=",")
15
  document_embeddings = torch.load('encoded_hadiths_multilingual-e5-large-instruct (1).sav',map_location ='cpu')
16
  #file = open('encoded_hadiths_multilingual-e5-large-instruct (1).sav','rb')
17
  #document_embeddings = pickle.load(file)
18
  print("load hadiths")
19
+ print(datetime.fromtimestamp(time.time()))
20
 
21
  # Define the function to categorize ages
22
  def categorize_source(source):
 
45
  get_detailed_instruct(task, query)
46
  ]
47
  print("start")
48
+ print(datetime.fromtimestamp(time.time()))
49
 
50
  query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
51
  print("embed query")
52
+ print(datetime.fromtimestamp(time.time()))
53
 
54
  scores = (query_embeddings @ document_embeddings.T) * 100
55
  print("consine similarity")
56
+ print(datetime.fromtimestamp(time.time()))
57
 
58
  # insert the similarity value to dataframe & sort it
59
  hadiths['similarity'] = scores.tolist()[0]
60
  sorted_hadiths = hadiths.sort_values(by='similarity', ascending=False)
61
  print("sort hadiths")
62
+ print(datetime.fromtimestamp(time.time()))
63
 
64
  results = sorted_hadiths.head(3).drop(columns=['id', 'hadith_id', 'chain_indx'])
65
  results['source_cat'] = results['source'].apply(categorize_source)
 
73
  results = results.drop(columns=['source', 'chapter_no', 'hadith_no', 'chapter', 'similarity', 'text_ar', 'text_en'])
74
 
75
  print("prepare results")
76
+ print(datetime.fromtimestamp(time.time()))
77
 
78
  #return sorted_quran
79
  #filepath = Path(query+'.csv')