Spaces:
Sleeping
Sleeping
File size: 1,234 Bytes
5845aec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import json
import datasketch
from datasketch import MinHash
class SongGuesser:
@staticmethod
def guess_song(query):
with open("./swedish_christmas_songs.json", "r", encoding='utf-8') as f:
songs = json.load(f)
shingle_size = 5
query_shingles = get_shingles(query, shingle_size)
query_minhash = create_minhash(query_shingles)
max_sim = 0
max_name = ""
for song in songs:
song_lyrics = song['lyrics'].lower()
song_shingles = get_shingles(song_lyrics, shingle_size)
song_minhash = create_minhash(song_shingles)
estimated_jaccard = query_minhash.jaccard(song_minhash)
if estimated_jaccard > max_sim:
max_sim = estimated_jaccard
max_name = song['name']
return max_name
def get_shingles(text, shingle_size):
return set(text[i:i+shingle_size] for i in range(len(text) - shingle_size + 1))
def create_minhash(shingles, num_perm=128):
m = MinHash(num_perm=num_perm)
for shingle in shingles:
m.update(shingle.encode('utf8'))
return m
|