MCK-02 commited on
Commit
0c47c63
1 Parent(s): eb2e963

fix indentations

Browse files
Files changed (1) hide show
  1. app.py +25 -26
app.py CHANGED
@@ -191,43 +191,42 @@ def get_predictions_labels(model, dataloader):
191
 
192
 
193
  def load_data():
194
- df = pd.DataFrame(columns=['Model', 'Dataset', 'SacreBLEU', 'ROUGE-2', 'METEOR', 'BERTScore', 'Accuracy', 'Weighted F1', 'Macro F1'])
195
- for ds in all_datasets:
196
- split = get_split(ds)
197
- model = AutoModelForSeq2SeqLM.from_pretrained(get_model(ds))
198
- tokenizer = AutoTokenizer.from_pretrained(get_tokenizer(ds))
199
 
200
- processed_dataset = split.map(
201
- preprocess_function,
202
- batched=True,
203
- remove_columns=split.column_names
204
- )
205
- processed_dataset.set_format('torch')
206
 
207
- dataloader = DataLoader(processed_dataset, batch_size=4)
208
 
209
- predictions, labels = get_predictions_labels(model, dataloader)
210
 
211
- predicted_feedback = extract_feedback(predictions)
212
  predicted_labels = extract_labels(predictions)
213
 
214
  reference_feedback = [x.split('Feedback:', 1)[1].strip() for x in labels]
215
- reference_labels = [x.split('Feedback:', 1)[0].strip() for x in labels]
216
 
217
- rouge_score = rouge.compute(predictions=predicted_feedback, references=reference_feedback)['rouge2']
218
- bleu_score = sacrebleu.compute(predictions=predicted_feedback, references=[[x] for x in reference_feedback])['score']
219
- meteor_score = meteor.compute(predictions=predicted_feedback, references=reference_feedback)['meteor']
220
- bert_score = bertscore.compute(predictions=predicted_feedback, references=reference_feedback, lang='de', model_type='bert-base-multilingual-cased', rescale_with_baseline=True)
221
-
222
- reference_labels_np = np.array(reference_labels)
223
 
224
- accuracy_value = accuracy_score(reference_labels_np, predicted_labels)
225
- f1_weighted_value = f1_score(reference_labels_np, predicted_labels, average='weighted')
226
- f1_macro_value = f1_score(reference_labels_np, predicted_labels, average='macro', labels=['Incorrect', 'Partially correct', 'Correct'])
 
227
 
228
- new_row = pd.Dataframe("Model" : get_model(ds), "Dataset" : ds, "SacreBLEU" : bleu_score, "ROUGE-2" : rouge_score, "METEOR" : meteor_score, "BERTScore" : bert_score, "Accuracy" : accuracy_value, "Weighted F1" : f1_weighted_value, "Macro F1": f1_macro_value)
229
 
230
- df = pd.concat([df, new_row])
231
  return df
232
 
233
  dataframe = load_data()
 
191
 
192
 
193
  def load_data():
194
+ df = pd.DataFrame(columns=['Model', 'Dataset', 'SacreBLEU', 'ROUGE-2', 'METEOR', 'BERTScore', 'Accuracy', 'Weighted F1', 'Macro F1'])
195
+ for ds in all_datasets:
196
+ split = get_split(ds)
197
+ model = AutoModelForSeq2SeqLM.from_pretrained(get_model(ds))
198
+ tokenizer = AutoTokenizer.from_pretrained(get_tokenizer(ds))
199
 
200
+ processed_dataset = split.map(
201
+ preprocess_function,
202
+ batched=True,
203
+ remove_columns=split.column_names
204
+ )
205
+ processed_dataset.set_format('torch')
206
 
207
+ dataloader = DataLoader(processed_dataset, batch_size=4)
208
 
209
+ predictions, labels = get_predictions_labels(model, dataloader)
210
 
211
+ predicted_feedback = extract_feedback(predictions)
212
  predicted_labels = extract_labels(predictions)
213
 
214
  reference_feedback = [x.split('Feedback:', 1)[1].strip() for x in labels]
215
+ reference_labels = [x.split('Feedback:', 1)[0].strip() for x in labels]
216
 
217
+ rouge_score = rouge.compute(predictions=predicted_feedback, references=reference_feedback)['rouge2']
218
+ bleu_score = sacrebleu.compute(predictions=predicted_feedback, references=[[x] for x in reference_feedback])['score']
219
+ meteor_score = meteor.compute(predictions=predicted_feedback, references=reference_feedback)['meteor']
220
+ bert_score = bertscore.compute(predictions=predicted_feedback, references=reference_feedback, lang='de', model_type='bert-base-multilingual-cased', rescale_with_baseline=True)
 
 
221
 
222
+ reference_labels_np = np.array(reference_labels)
223
+ accuracy_value = accuracy_score(reference_labels_np, predicted_labels)
224
+ f1_weighted_value = f1_score(reference_labels_np, predicted_labels, average='weighted')
225
+ f1_macro_value = f1_score(reference_labels_np, predicted_labels, average='macro', labels=['Incorrect', 'Partially correct', 'Correct'])
226
 
227
+ new_row = pd.Dataframe("Model" : get_model(ds), "Dataset" : ds, "SacreBLEU" : bleu_score, "ROUGE-2" : rouge_score, "METEOR" : meteor_score, "BERTScore" : bert_score, "Accuracy" : accuracy_value, "Weighted F1" : f1_weighted_value, "Macro F1": f1_macro_value)
228
 
229
+ df = pd.concat([df, new_row])
230
  return df
231
 
232
  dataframe = load_data()