tps_gender_prediction / inference.py
ddiddu's picture
Create inference.py
1209aa3
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
# Load model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
# Define gender predictions for specific characters
character_gender_mapping = {
"NARRATOR": "neutral",
"FATHER": "male",
"HARPER": "female"
}
def predict_gender_aggregated(character, lines):
# Check if the character is in the mapping
if character.upper() in character_gender_mapping:
return character_gender_mapping[character.upper()]
# For other characters, perform gender prediction as before
aggregated_text = " ".join(lines)
input_text = f"Character: {character}. Dialogue: {aggregated_text}. Gender:"
input_ids = tokenizer.encode(input_text, return_tensors='pt')
# Create an attention mask
attention_mask = torch.ones(input_ids.shape)
output = model.generate(input_ids, attention_mask=attention_mask, max_length=60, do_sample=True, temperature=0.7)
result = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract gender prediction as 'male' or 'female' (assuming it's one of these two)
if 'male' in result.lower():
gender_prediction = 'male'
elif 'female' in result.lower():
gender_prediction = 'female'
else:
gender_prediction = 'unknown' # Handle cases where gender isn't explicitly mentioned
return gender_prediction
# This function will be called for inference
def predict(input_data):
character = input_data.get("character")
lines = input_data.get("lines")
# Error handling for missing input
if not character or not lines:
return {"error": "Missing character or lines in the input"}
gender_prediction = predict_gender_aggregated(character, lines)
return {"character": character, "predicted_gender": gender_prediction}
# Example input format for testing locally
if __name__ == "__main__":
test_input = {
"character": "FATHER",
"lines": ["I am very proud of you, son."]
}
print(predict(test_input))