jrno commited on
Commit
0bd1550
1 Parent(s): 22854cc

Only recommend tracks that model has seen

Browse files
recommendation-api/recommender.py CHANGED
@@ -5,7 +5,7 @@ from tracks import get_unlistened_tracks_for_user, predictions_to_tracks
5
 
6
  def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
7
  not_listened_tracks = get_unlistened_tracks_for_user(user_id)
8
-
9
  # Get predictions for the tracks user hasn't listened yet
10
  input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
11
  test_dl = learn.dls.test_dl(input_dataframe)
 
5
 
6
  def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
7
  not_listened_tracks = get_unlistened_tracks_for_user(user_id)
8
+
9
  # Get predictions for the tracks user hasn't listened yet
10
  input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
11
  test_dl = learn.dls.test_dl(input_dataframe)
recommendation-api/tracks.py CHANGED
@@ -1,21 +1,26 @@
1
  import pandas as pd
2
 
3
- # Read the CSV files
4
  tracks_df = pd.read_csv('data/music_info.csv')
5
  tracks_df.fillna('', inplace=True)
6
  tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
7
- track_interactions_df = pd.read_csv('data/model.csv')[['user_id', 'track_id']]
8
 
9
- # Merge data on those two csvs
10
- dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
11
- # Convert all columns to string type
12
- dataframe = dataframe.astype(str)
13
- # Create a history lookup dictionary by 'user_id'
 
 
14
  user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
15
- for user_id, group in dataframe.groupby('user_id')}
 
 
 
 
16
 
17
  def get_users_with_track_interactions(ascending=False, limit=10):
18
- playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
19
  playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
20
  if limit is not None:
21
  playcount_summary = playcount_summary.head(limit)
@@ -28,10 +33,10 @@ def get_top_tracks_for_user(user_id: str, limit=10):
28
  sorted_tracks = sorted_tracks[:limit]
29
  return sorted_tracks
30
 
31
- def get_unlistened_tracks_for_user(user_id:str):
32
- all_tracks = tracks_df['entry'].tolist()
33
  listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
34
- return list(set(all_tracks) - set(listened_tracks))
35
 
36
  def predictions_to_tracks(entries_and_predictions):
37
  tracks = []
 
1
  import pandas as pd
2
 
3
+ # Read track infos and build the entry representation
4
  tracks_df = pd.read_csv('data/music_info.csv')
5
  tracks_df.fillna('', inplace=True)
6
  tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
 
7
 
8
+ # Raw dataframe from the training set
9
+ model_df = pd.read_csv('data/model.csv')
10
+ model_interactions_df = model_df[['user_id', 'track_id']]
11
+ model_tracks_df = model_df[['entry']].drop_duplicates()
12
+
13
+ # Create a dictionary where user_id is the key and full track history value
14
+ user_to_track_history_df = pd.merge(tracks_df, model_interactions_df, on='track_id', how='left').astype(str)
15
  user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
16
+ for user_id, group in user_to_track_history_df.groupby('user_id')}
17
+
18
+ print("Count of tracks:", tracks_df.shape[0])
19
+ print("Count of interactions (model):", model_interactions_df.shape[0])
20
+ print("Count of tracks (model):", model_tracks_df.shape[0])
21
 
22
  def get_users_with_track_interactions(ascending=False, limit=10):
23
+ playcount_summary = model_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
24
  playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
25
  if limit is not None:
26
  playcount_summary = playcount_summary.head(limit)
 
33
  sorted_tracks = sorted_tracks[:limit]
34
  return sorted_tracks
35
 
36
+ def get_unlistened_tracks_for_user(user_id: str):
37
+ possible_tracks = model_tracks_df['entry'].tolist()
38
  listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
39
+ return list(set(possible_tracks) - set(listened_tracks))
40
 
41
  def predictions_to_tracks(entries_and_predictions):
42
  tracks = []