{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "0b2455be", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.feature_extraction.text import TfidfVectorizer" ] }, { "cell_type": "code", "execution_count": 5, "id": "1a400539", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"C:\\\\Users\\\\Immortal\\\\OneDrive\\\\Desktop\\\\aiml\\\\Datasets\\\\movies.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "ee8e5c57", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | index | \n", "budget | \n", "genres | \n", "homepage | \n", "id | \n", "keywords | \n", "original_language | \n", "original_title | \n", "overview | \n", "popularity | \n", "... | \n", "runtime | \n", "spoken_languages | \n", "status | \n", "tagline | \n", "title | \n", "vote_average | \n", "vote_count | \n", "cast | \n", "crew | \n", "director | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "237000000 | \n", "Action Adventure Fantasy Science Fiction | \n", "http://www.avatarmovie.com/ | \n", "19995 | \n", "culture clash future space war space colony so... | \n", "en | \n", "Avatar | \n", "In the 22nd century, a paraplegic Marine is di... | \n", "150.437577 | \n", "... | \n", "162.0 | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... | \n", "Released | \n", "Enter the World of Pandora. | \n", "Avatar | \n", "7.2 | \n", "11800 | \n", "Sam Worthington Zoe Saldana Sigourney Weaver S... | \n", "[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd... | \n", "James Cameron | \n", "
1 | \n", "1 | \n", "300000000 | \n", "Adventure Fantasy Action | \n", "http://disney.go.com/disneypictures/pirates/ | \n", "285 | \n", "ocean drug abuse exotic island east india trad... | \n", "en | \n", "Pirates of the Caribbean: At World's End | \n", "Captain Barbossa, long believed to be dead, ha... | \n", "139.082615 | \n", "... | \n", "169.0 | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n", "Released | \n", "At the end of the world, the adventure begins. | \n", "Pirates of the Caribbean: At World's End | \n", "6.9 | \n", "4500 | \n", "Johnny Depp Orlando Bloom Keira Knightley Stel... | \n", "[{'name': 'Dariusz Wolski', 'gender': 2, 'depa... | \n", "Gore Verbinski | \n", "
2 | \n", "2 | \n", "245000000 | \n", "Action Adventure Crime | \n", "http://www.sonypictures.com/movies/spectre/ | \n", "206647 | \n", "spy based on novel secret agent sequel mi6 | \n", "en | \n", "Spectre | \n", "A cryptic message from Bond’s past sends him o... | \n", "107.376788 | \n", "... | \n", "148.0 | \n", "[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},... | \n", "Released | \n", "A Plan No One Escapes | \n", "Spectre | \n", "6.3 | \n", "4466 | \n", "Daniel Craig Christoph Waltz L\\u00e9a Seydoux ... | \n", "[{'name': 'Thomas Newman', 'gender': 2, 'depar... | \n", "Sam Mendes | \n", "
3 | \n", "3 | \n", "250000000 | \n", "Action Crime Drama Thriller | \n", "http://www.thedarkknightrises.com/ | \n", "49026 | \n", "dc comics crime fighter terrorist secret ident... | \n", "en | \n", "The Dark Knight Rises | \n", "Following the death of District Attorney Harve... | \n", "112.312950 | \n", "... | \n", "165.0 | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n", "Released | \n", "The Legend Ends | \n", "The Dark Knight Rises | \n", "7.6 | \n", "9106 | \n", "Christian Bale Michael Caine Gary Oldman Anne ... | \n", "[{'name': 'Hans Zimmer', 'gender': 2, 'departm... | \n", "Christopher Nolan | \n", "
4 | \n", "4 | \n", "260000000 | \n", "Action Adventure Science Fiction | \n", "http://movies.disney.com/john-carter | \n", "49529 | \n", "based on novel mars medallion space travel pri... | \n", "en | \n", "John Carter | \n", "John Carter is a war-weary, former military ca... | \n", "43.926995 | \n", "... | \n", "132.0 | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n", "Released | \n", "Lost in our world, found in another. | \n", "John Carter | \n", "6.1 | \n", "2124 | \n", "Taylor Kitsch Lynn Collins Samantha Morton Wil... | \n", "[{'name': 'Andrew Stanton', 'gender': 2, 'depa... | \n", "Andrew Stanton | \n", "
5 rows × 24 columns
\n", "\n", " | index | \n", "budget | \n", "genres | \n", "homepage | \n", "id | \n", "keywords | \n", "original_language | \n", "original_title | \n", "overview | \n", "popularity | \n", "... | \n", "spoken_languages | \n", "status | \n", "tagline | \n", "title | \n", "vote_average | \n", "vote_count | \n", "cast | \n", "crew | \n", "director | \n", "combined_features | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4798 | \n", "4798 | \n", "220000 | \n", "Action Crime Thriller | \n", "NaN | \n", "9367 | \n", "united states\\u2013mexico barrier legs arms pa... | \n", "es | \n", "El Mariachi | \n", "El Mariachi just wants to play his guitar and ... | \n", "14.269792 | \n", "... | \n", "[{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}] | \n", "Released | \n", "He didn't come looking for trouble, but troubl... | \n", "El Mariachi | \n", "6.6 | \n", "238 | \n", "Carlos Gallardo Jaime de Hoyos Peter Marquardt... | \n", "[{'name': 'Robert Rodriguez', 'gender': 0, 'de... | \n", "Robert Rodriguez | \n", "united states\\u2013mexico barrier legs arms pa... | \n", "
4799 | \n", "4799 | \n", "9000 | \n", "Comedy Romance | \n", "NaN | \n", "72766 | \n", "\n", " | en | \n", "Newlyweds | \n", "A newlywed couple's honeymoon is upended by th... | \n", "0.642552 | \n", "... | \n", "[] | \n", "Released | \n", "A newlywed couple's honeymoon is upended by th... | \n", "Newlyweds | \n", "5.9 | \n", "5 | \n", "Edward Burns Kerry Bish\\u00e9 Marsha Dietlein ... | \n", "[{'name': 'Edward Burns', 'gender': 2, 'depart... | \n", "Edward Burns | \n", "Edward Burns Kerry Bish\\u00e9 Marsha Dietlein... | \n", "
4800 | \n", "4800 | \n", "0 | \n", "Comedy Drama Romance TV Movie | \n", "http://www.hallmarkchannel.com/signedsealeddel... | \n", "231617 | \n", "date love at first sight narration investigati... | \n", "en | \n", "Signed, Sealed, Delivered | \n", "\"Signed, Sealed, Delivered\" introduces a dedic... | \n", "1.444476 | \n", "... | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n", "Released | \n", "NaN | \n", "Signed, Sealed, Delivered | \n", "7.0 | \n", "6 | \n", "Eric Mabius Kristin Booth Crystal Lowe Geoff G... | \n", "[{'name': 'Carla Hetland', 'gender': 0, 'depar... | \n", "Scott Smith | \n", "date love at first sight narration investigati... | \n", "
4801 | \n", "4801 | \n", "0 | \n", "\n", " | http://shanghaicalling.com/ | \n", "126186 | \n", "\n", " | en | \n", "Shanghai Calling | \n", "When ambitious New York attorney Sam is sent t... | \n", "0.857008 | \n", "... | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n", "Released | \n", "A New Yorker in Shanghai | \n", "Shanghai Calling | \n", "5.7 | \n", "7 | \n", "Daniel Henney Eliza Coupe Bill Paxton Alan Ruc... | \n", "[{'name': 'Daniel Hsia', 'gender': 2, 'departm... | \n", "Daniel Hsia | \n", "Daniel Henney Eliza Coupe Bill Paxton Alan Ru... | \n", "
4802 | \n", "4802 | \n", "0 | \n", "Documentary | \n", "NaN | \n", "25975 | \n", "obsession camcorder crush dream girl | \n", "en | \n", "My Date with Drew | \n", "Ever since the second grade when he first saw ... | \n", "1.929883 | \n", "... | \n", "[{\"iso_639_1\": \"en\", \"name\": \"English\"}] | \n", "Released | \n", "NaN | \n", "My Date with Drew | \n", "6.3 | \n", "16 | \n", "Drew Barrymore Brian Herzlinger Corey Feldman ... | \n", "[{'name': 'Clark Peterson', 'gender': 2, 'depa... | \n", "Brian Herzlinger | \n", "obsession camcorder crush dream girl Drew Barr... | \n", "
5 rows × 25 columns
\n", "\n", " | 0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "8 | \n", "9 | \n", "... | \n", "4793 | \n", "4794 | \n", "4795 | \n", "4796 | \n", "4797 | \n", "4798 | \n", "4799 | \n", "4800 | \n", "4801 | \n", "4802 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1.000000 | \n", "0.027039 | \n", "0.044791 | \n", "0.007896 | \n", "0.114058 | \n", "0.071865 | \n", "0.0 | \n", "0.041171 | \n", "0.024869 | \n", "0.030587 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.020007 | \n", "0.000000 | \n", "0.041200 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000 | \n", "
1 | \n", "0.027039 | \n", "1.000000 | \n", "0.015052 | \n", "0.006808 | \n", "0.014699 | \n", "0.114475 | \n", "0.0 | \n", "0.014261 | \n", "0.036782 | \n", "0.026371 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.005007 | \n", "0.000000 | \n", "0.016538 | \n", "0.00000 | \n", "0.000000 | \n", "
2 | \n", "0.044791 | \n", "0.015052 | \n", "1.000000 | \n", "0.062943 | \n", "0.075407 | \n", "0.044188 | \n", "0.0 | \n", "0.088494 | \n", "0.041202 | \n", "0.051567 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.014666 | \n", "0.000000 | \n", "0.000000 | \n", "0.06576 | \n", "0.000000 | \n", "
3 | \n", "0.007896 | \n", "0.006808 | \n", "0.062943 | \n", "1.000000 | \n", "0.007961 | \n", "0.043877 | \n", "0.0 | \n", "0.007723 | \n", "0.036850 | \n", "0.112528 | \n", "... | \n", "0.015547 | \n", "0.008505 | \n", "0.028597 | \n", "0.038129 | \n", "0.013734 | \n", "0.030554 | \n", "0.000000 | \n", "0.003731 | \n", "0.00000 | \n", "0.000000 | \n", "
4 | \n", "0.114058 | \n", "0.014699 | \n", "0.075407 | \n", "0.007961 | \n", "1.000000 | \n", "0.151449 | \n", "0.0 | \n", "0.075335 | \n", "0.010052 | \n", "0.050359 | \n", "... | \n", "0.000000 | \n", "0.030153 | \n", "0.000000 | \n", "0.050553 | \n", "0.000000 | \n", "0.005855 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
4798 | \n", "0.041200 | \n", "0.005007 | \n", "0.014666 | \n", "0.030554 | \n", "0.005855 | \n", "0.005730 | \n", "0.0 | \n", "0.018688 | \n", "0.000000 | \n", "0.005664 | \n", "... | \n", "0.000000 | \n", "0.006256 | \n", "0.015159 | \n", "0.004299 | \n", "0.010101 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.000000 | \n", "
4799 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.005761 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "0.012605 | \n", "0.00000 | \n", "0.000000 | \n", "
4800 | \n", "0.000000 | \n", "0.016538 | \n", "0.000000 | \n", "0.003731 | \n", "0.000000 | \n", "0.018924 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.002987 | \n", "0.005371 | \n", "0.003100 | \n", "0.002718 | \n", "0.000000 | \n", "0.000000 | \n", "0.012605 | \n", "1.000000 | \n", "0.00000 | \n", "0.025548 | \n", "
4801 | \n", "0.000000 | \n", "0.000000 | \n", "0.065760 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.065673 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.00000 | \n", "0.000000 | \n", "
4802 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.025548 | \n", "0.00000 | \n", "1.000000 | \n", "
4803 rows × 4803 columns
\n", "