jrno commited on
Commit
af5cf7c
1 Parent(s): 9c3a55c

add csv data and endpoints to show user track history from it

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .idea
2
+ __pycache__
data/music_info.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d930430f811ba3c77f217b3f456f2b6271c238b828d6d9ad76e889b5d725f187
3
+ size 14985870
data/user_listening_history_10k.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a82d52ec512f00bf1a3416ecbf153aaa478266e87f6d3c0c4bff85ce4e1d4a
3
+ size 620427
recommender.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastai.learner import Learner
2
+ import pandas as pd
3
+
4
+ def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
5
+ # TODO: Fetch list of not listened songs as entries
6
+ not_listened_songs = ["Revelry, Kings of Leon, 2008", "Gears, Miss May I, 2010", "Sexy Bitch, David Guetta, 2009"]
7
+ input_dataframe = pd.DataFrame({'user_id': ["440abe26940ae9d9268157222a4a3d5735d44ed8"] * len(not_listened_songs), 'entry': not_listened_songs})
8
+ test_dl = learn.dls.test_dl(input_dataframe)
9
+ predictions = learn.get_preds(dl=test_dl)
10
+
11
+ # TODO: Return recommendations in track format
12
+ return {
13
+ "user_id": user_id,
14
+ "limit": limit,
15
+ "recommendations": predictions[0].numpy().tolist()
16
+ }
server.py CHANGED
@@ -1,12 +1,14 @@
1
  from fastai.collab import load_learner
2
  from fastapi import FastAPI, Query
3
  from fastapi.middleware.cors import CORSMiddleware
4
- from custom_models import DotProductBias
5
  import asyncio
6
  import uvicorn
7
- import pandas as pd
8
  import os
9
 
 
 
 
10
  # FastAPI app
11
  app = FastAPI()
12
 
@@ -35,28 +37,18 @@ async def startup_event():
35
  tasks = [asyncio.ensure_future(setup_learner())] # assign some task
36
  learn = (await asyncio.gather(*tasks))[0]
37
 
38
- @app.get('/user/{user_id}/history')
39
- async def get_user_track_history(user_id: str):
40
- return {
41
- "user_id": user_id,
42
- "history": [
43
- {"track_id": "1", "genre": "Rock", "year": "2008", "artist": "Kings of Leon", "name": "Revelry"},
44
- {"track_id": "2", "genre": "Metalcore", "year": "2010", "artist": "Miss May I", "name": "Gears"},
45
- {"track_id": "3", "genre": "Electro", "year": "2009", "artist": "David Guetta", "name": "Sexy Bitch"}
46
- ]
47
- }
48
 
49
  @app.get("/recommend/{user_id}")
50
- async def get_recommendations_for_user(user_id: str, num_recommendations: int = Query(5)):
51
- print(num_recommendations)
52
- print(user_id)
53
- not_listened_songs = ["Revelry, Kings of Leon, 2008", "Gears, Miss May I, 2010", "Sexy Bitch, David Guetta, 2009"]
54
- input_dataframe = pd.DataFrame({'user_id': ["440abe26940ae9d9268157222a4a3d5735d44ed8"] * len(not_listened_songs), 'entry': not_listened_songs})
55
- test_dl = learn.dls.test_dl(input_dataframe)
56
- predictions = learn.get_preds(dl=test_dl)
57
- print(predictions)
58
- #pred = learn.predict(file)
59
- return {"result": predictions[0].numpy().tolist()}
60
 
61
  if __name__ == "__main__":
62
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
 
1
  from fastai.collab import load_learner
2
  from fastapi import FastAPI, Query
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from custom_models import DotProductBias
5
  import asyncio
6
  import uvicorn
 
7
  import os
8
 
9
+ from tracks import get_top_tracks_for_user, get_users_with_track_interactions
10
+ from recommender import get_recommendations_for_user
11
+
12
  # FastAPI app
13
  app = FastAPI()
14
 
 
37
  tasks = [asyncio.ensure_future(setup_learner())] # assign some task
38
  learn = (await asyncio.gather(*tasks))[0]
39
 
40
+ @app.get("/users")
41
+ async def get_users(limit: int = Query(10)):
42
+ return get_users_with_track_interactions(limit=limit)
43
+
44
+ @app.get('/users/{user_id}')
45
+ async def get_user_track_history(user_id: str, limit:int = Query(5)):
46
+ user_history = get_top_tracks_for_user(user_id, limit)
47
+ return {"user_id": user_id, "history": user_history}
 
 
48
 
49
  @app.get("/recommend/{user_id}")
50
+ async def get_recommendations(user_id: str, num_recommendations: int = Query(5)):
51
+ return get_recommendations_for_user(learn, user_id, num_recommendations)
 
 
 
 
 
 
 
 
52
 
53
  if __name__ == "__main__":
54
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
tracks.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Read the CSV files
4
+ print("Reading music info csv ...")
5
+ tracks_df = pd.read_csv('data/music_info.csv')
6
+
7
+ print("Reading user listening history ...")
8
+ track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv', nrows=1000)
9
+
10
+ # Merge the dataframes on 'track_id'
11
+ dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
12
+
13
+ # Convert all NaN values to empty strings and all columns to string type
14
+ dataframe.fillna('', inplace=True)
15
+ dataframe = dataframe.astype(str)
16
+
17
+ # Group by 'user_id' and then create a list of dictionaries for each group
18
+ lookup_table = {user_id: group.drop('user_id', axis=1).to_dict('records')
19
+ for user_id, group in dataframe.groupby('user_id')}
20
+
21
+ def get_users_with_track_interactions(ascending=False, limit=10):
22
+ # Count the number of rows for each 'user_id'
23
+ playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
24
+ # Sort the DataFrame based on 'track_interactions', either ascending or descending
25
+ playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
26
+ # Limit the results if limit is specified
27
+ if limit is not None:
28
+ playcount_summary = playcount_summary.head(limit)
29
+ # Convert the DataFrame to a list of dictionaries
30
+ return playcount_summary.to_dict(orient='records')
31
+
32
+ def get_top_tracks_for_user(user_id: str, limit=20):
33
+ # Retrieve the user's track list from the lookup table or an empty list if not found
34
+ track_list = lookup_table.get(user_id, [])
35
+ # Sort the track list by 'playcount' in descending order (assuming 'playcount' is stored as a string)
36
+ sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
37
+ # Apply the limit if specified
38
+ if limit is not None:
39
+ sorted_tracks = sorted_tracks[:limit]
40
+ return sorted_tracks