|
from transformers import GPT2LMHeadModel, GPT2Tokenizer |
|
import torch |
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
model = GPT2LMHeadModel.from_pretrained("gpt2") |
|
|
|
|
|
character_gender_mapping = { |
|
"NARRATOR": "neutral", |
|
"FATHER": "male", |
|
"HARPER": "female" |
|
} |
|
|
|
def predict_gender_aggregated(character, lines): |
|
|
|
if character.upper() in character_gender_mapping: |
|
return character_gender_mapping[character.upper()] |
|
|
|
|
|
aggregated_text = " ".join(lines) |
|
input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:" |
|
input_ids = tokenizer.encode(input_text, return_tensors='pt') |
|
|
|
|
|
attention_mask = torch.ones(input_ids.shape) |
|
|
|
output = model.generate(input_ids, attention_mask=attention_mask, max_length=60, do_sample=True, temperature=0.7) |
|
result = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
|
|
if 'male' in result.lower(): |
|
gender_prediction = 'male' |
|
elif 'female' in result.lower(): |
|
gender_prediction = 'female' |
|
else: |
|
gender_prediction = 'unknown' |
|
|
|
return gender_prediction |
|
|
|
|
|
def predict(input_data): |
|
character = input_data.get("character") |
|
lines = input_data.get("lines") |
|
|
|
|
|
if not character or not lines: |
|
return {"error": "Missing character or lines in the input"} |
|
|
|
gender_prediction = predict_gender_aggregated(character, lines) |
|
return {"character": character, "predicted_gender": gender_prediction} |
|
|
|
|
|
if __name__ == "__main__": |
|
test_input = { |
|
"character": "FATHER", |
|
"lines": ["I am very proud of you, son."] |
|
} |
|
print(predict(test_input)) |
|
|