shajmaan commited on
Commit
4bec5b2
1 Parent(s): af75911

Update app.py

Browse files

use fuzzy search with difflib for better results

Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
 
5
 
6
  df = pd.read_csv("movies.csv")
7
  features = ["keywords", "cast", "genres", "director"]
@@ -21,7 +22,8 @@ vector_matrix.toarray()
21
  cosine_sim = cosine_similarity(vector_matrix)
22
 
23
  def get_index_from_title(title):
24
- return df[df.title == title]["index"].values[0]
 
25
 
26
  def get_title_from_index(index):
27
  return df[df.index == index]["title"].values[0]
 
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
+ import difflib
6
 
7
  df = pd.read_csv("movies.csv")
8
  features = ["keywords", "cast", "genres", "director"]
 
22
  cosine_sim = cosine_similarity(vector_matrix)
23
 
24
  def get_index_from_title(title):
25
+ search = difflib.get_close_matches(title, df['title'])[0]
26
+ return df[df.title == search]["index"].values[0]
27
 
28
  def get_title_from_index(index):
29
  return df[df.index == index]["title"].values[0]