File size: 2,867 Bytes
ac6138f afff708 ac6138f afff708 ac6138f afff708 ac6138f 500010a ac6138f afff708 ac6138f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import json
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def update_filters(filters, data):
for key, value in filters.items():
dont_add = ['Boys Love', 'Erotica', 'Girls Love', 'Hentai', 'Ecchi', 'Gore', 'Crossdressing', 'Magical Sex Shift', 'Rx - Hentai', 'R+ - Mild Nudity']
if data[key] and key == 'rating':
if data[key] not in dont_add:
value.add(data[key])
else:
for val in data[key]:
if val and val not in dont_add:
value.add(val)
return filters
def clean_filters(filters):
for key, val in filters.items():
val.add('ALL')
filters[key] = list(val)
return filters
if __name__ == '__main__':
print('Embedding Started')
filters = {
'genres': set(),
'themes': set(),
'rating': set()
}
embeddings = {}
for name in os.listdir('./anime'):
with open(f"./anime/{name}", 'r') as file:
data = json.load(file)
if not data:
continue
filters = update_filters(filters, data)
name = name.replace('.json', '')
data['image'] = f"./images/{name}.jpg"
text = f'''
This anime has {data['episodes']} Episodes |
This anime premiered on {data['premiered']} |
This anime was broadcasted on: {data['broadcast']} |
This anime was produced by {' '.join(data['producers'])} |
This anime was licensed by Licensors: {' '.join(data['licensors'])} |
The studios in charge of this anime was {' '.join(data['studios'])} |
The source of this anime was {' '.join(data['source'])} |
The genres of this anime are {' '.join(data['genres'])} |
The themes of this anime are {' '.join(data['themes'])} |
The demographic of this anime is {data['demographic']} |
The duration of this anime is {data['duration']} |
The rating of this anime is {data['rating']} |
The description of this anime is {data['description']}'''
embeddings[name] = data.copy()
embeddings[name]['objective_embedding'] = [model.encode(text).tolist()]
subjective_embeddings = []
for review in embeddings[name]['reviews']:
text = review['text']
subjective_embeddings.append(model.encode(text).tolist())
data['review'] = text
embeddings[name]['subjective_embeddings'] = subjective_embeddings
filters = clean_filters(filters)
with open('./embeddings/data.json', 'w') as f:
json.dump({'embeddings':embeddings, 'filters': filters}, f)
print('Embedding Complete')
|