File size: 1,553 Bytes
5c6823a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("movies.csv")
features = ["keywords", "cast", "genres", "director"]

for feature in features:
    df[feature] = df[feature].fillna('')

def combined_features(row):
    return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director']

df["combined_features"] = df.apply(combined_features, axis=1)

Tfidf_vect = TfidfVectorizer()
vector_matrix = Tfidf_vect.fit_transform(df["combined_features"])
vector_matrix.toarray()

cosine_sim = cosine_similarity(vector_matrix)

def get_index_from_title(title):
    return df[df.title == title]["index"].values[0]

def get_title_from_index(index):
    return df[df.index == index]["title"].values[0]

def check_movie(m_name):
    movie_index = get_index_from_title(m_name)
    similar_movies= list(enumerate(cosine_sim[movie_index]))
    sorted_similar_movies = sorted(similar_movies, key=lambda x:x[1], reverse=True)
    mv = get_suggestions(sorted_similar_movies)
    return mv

def get_suggestions(sorted_similar_movies):
    i=0
    movies = ""
    for movie in sorted_similar_movies:
        t = get_title_from_index(movie[0])
        movies = movies + t +"\n"
        
        i=i+1
        if i>10:
            print(movies)
            return movies

def check(enter_movie_name):
    mvs = check_movie(enter_movie_name)
    return mvs


movie = gr.Interface(fn=check, inputs="text", outputs="text")
movie.launch(share=True)