giskard-evaluator

Sleeping

App Files Files Community

200

giskard-evaluator / app.py

inoki-giskard

Add features, label mapping in text classification

85095eb about 1 year ago

raw

history blame

14.8 kB

	import gradio as gr
	import datasets
	import huggingface_hub
	import sys
	import os
	import time
	from pathlib import Path

	import json
	import logging

	import pandas as pd

	from transformers.pipelines import TextClassificationPipeline


	HF_REPO_ID = 'HF_REPO_ID'
	HF_SPACE_ID = 'SPACE_ID'
	HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'


	theme = gr.themes.Soft(
	primary_hue="green",
	)

	def check_model(model_id):
	try:
	task = huggingface_hub.model_info(model_id).pipeline_tag
	except Exception:
	return None, None

	try:
	from transformers import pipeline
	ppl = pipeline(task=task, model=model_id)

	return model_id, ppl
	except Exception as e:
	return model_id, e


	def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
	try:
	configs = datasets.get_dataset_config_names(dataset_id)
	except Exception:
	# Dataset may not exist
	return None, dataset_config, dataset_split

	if dataset_config not in configs:
	# Need to choose dataset subset (config)
	return dataset_id, configs, dataset_split

	ds = datasets.load_dataset(dataset_id, dataset_config)

	if isinstance(ds, datasets.DatasetDict):
	# Need to choose dataset split
	if dataset_split not in ds.keys():
	return dataset_id, None, list(ds.keys())
	elif not isinstance(ds, datasets.Dataset):
	# Unknown type
	return dataset_id, None, None
	return dataset_id, dataset_config, dataset_split


	def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
	for model_label in id2label_mapping.keys():
	if model_label.upper() == label.upper():
	return model_label, label
	return None, label


	def text_classification_map_model_and_dataset_labels(id2label, dataset_features):
	id2label_mapping = {id2label[k]: None for k in id2label.keys()}
	dataset_labels = None
	for feature in dataset_features.values():
	if not isinstance(feature, datasets.ClassLabel):
	continue
	if len(feature.names) != len(id2label_mapping.keys()):
	continue

	dataset_labels = feature.names

	# Try to match labels
	for label in feature.names:
	if label in id2label_mapping.keys():
	model_label = label
	else:
	# Try to find case unsensative
	model_label, label = text_classificaiton_match_label_case_unsensative(id2label_mapping, label)
	if model_label is not None:
	id2label_mapping[model_label] = label

	return id2label_mapping, dataset_labels


	def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
	# We assume dataset is ok here
	ds = datasets.load_dataset(d_id, config)[split]

	try:
	dataset_features = ds.features
	except AttributeError:
	# Dataset does not have features, need to provide everything
	return None, None, None

	# Check whether we need to infer the text input column
	infer_text_input_column = True
	if "text" in column_mapping.keys():
	dataset_text_column = column_mapping["text"]
	if dataset_text_column in dataset_features.keys():
	infer_text_input_column = False
	else:
	logging.warning(f"Provided {dataset_text_column} is not in Dataset columns")

	if infer_text_input_column:
	# Try to retrieve one
	candidates = [f for f in dataset_features if dataset_features[f].dtype == "string"]
	if len(candidates) > 0:
	logging.debug(f"Candidates are {candidates}")
	column_mapping["text"] = candidates[0]
	else:
	# Not found a text feature
	return column_mapping, None, None

	# Load dataset as DataFrame
	df = ds.to_pandas()

	# Retrieve all labels
	id2label_mapping = {}
	id2label = ppl.model.config.id2label
	label2id = {v: k for k, v in id2label.items()}
	prediction_result = None
	try:
	# Use the first item to test prediction
	results = ppl({"text": df.head(1).at[0, column_mapping["text"]]}, top_k=None)
	prediction_result = {
	f'{result["label"]}({label2id[result["label"]]})': result["score"] for result in results
	}
	except Exception:
	# Pipeline prediction failed, need to provide labels
	return column_mapping, None, None

	# Infer labels
	id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
	if "label" in column_mapping.keys():
	if not isinstance(column_mapping["label"], dict) or set(column_mapping["label"].values()) != set(dataset_labels):
	logging.warning(f'Provided {column_mapping["label"]} does not match labels in Dataset')
	return column_mapping, prediction_result, None

	if isinstance(column_mapping["label"], dict):
	for model_label in id2label_mapping.keys():
	id2label_mapping[model_label] = column_mapping["label"][str(label2id[model_label])]
	elif None in id2label_mapping.values():
	column_mapping["label"] = {
	i: None for i in id2label.keys()
	}
	return column_mapping, prediction_result, None

	id2label_df = pd.DataFrame({
	"ID": [i for i in id2label.keys()],
	"Model labels": [id2label[label] for label in id2label.keys()],
	"Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
	})
	if "label" not in column_mapping.keys():
	column_mapping["label"] = {
	i: id2label_mapping[id2label[i]] for i in id2label.keys()
	}

	return column_mapping, prediction_result, id2label_df


	def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_mapping):
	# Validate model
	m_id, ppl = check_model(model_id=model_id)
	if m_id is None:
	gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
	return (
	dataset_config, dataset_split,
	gr.update(interactive=False), # Submit button
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(visible=True), # Column mapping
	)
	if isinstance(ppl, Exception):
	gr.Warning(f'Failed to load "{model_id} model": {ppl}')
	return (
	dataset_config, dataset_split,
	gr.update(interactive=False), # Submit button
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(visible=True), # Column mapping
	)

	# Validate dataset
	d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)

	dataset_ok = False
	if d_id is None:
	gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
	elif isinstance(config, list):
	gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.')
	config = gr.update(choices=config, value=config[0])
	elif isinstance(split, list):
	gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.')
	split = gr.update(choices=split, value=split[0])
	else:
	dataset_ok = True

	if not dataset_ok:
	return (
	config, split,
	gr.update(interactive=False), # Submit button
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(visible=True), # Column mapping
	)

	# TODO: Validate column mapping by running once
	prediction_result = None
	id2label_df = None
	if isinstance(ppl, TextClassificationPipeline):
	try:
	column_mapping = json.loads(column_mapping)
	except Exception:
	column_mapping = {}

	column_mapping, prediction_result, id2label_df = \
	text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)

	column_mapping = json.dumps(column_mapping, indent=2)

	del ppl

	if prediction_result is None:
	gr.Warning('The model failed to predict with the first row in the dataset. Please provide column mappings in "Advance" settings.')
	return (
	config, split,
	gr.update(interactive=False), # Submit button
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(value=column_mapping, visible=True, interactive=True), # Column mapping
	)
	elif id2label_df is None:
	gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
	return (
	config, split,
	gr.update(interactive=False), # Submit button
	gr.update(value=prediction_result, visible=True), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(value=column_mapping, visible=True, interactive=True), # Column mapping
	)

	gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")

	return (
	config, split,
	gr.update(interactive=True), # Submit button
	gr.update(value=prediction_result, visible=True), # Model prediction preview
	gr.update(value=id2label_df, visible=True), # Label mapping preview
	gr.update(value=column_mapping, visible=True, interactive=True), # Column mapping
	)


	def try_submit(m_id, d_id, config, split, local):
	if local:
	if "cicd" not in sys.path:
	sys.path.append("cicd")
	from giskard_cicd.loaders import HuggingFaceLoader
	from giskard_cicd.pipeline.runner import PipelineRunner

	from automation import create_discussion_detailed
	supported_loaders = {
	"huggingface": HuggingFaceLoader(),
	}

	runner = PipelineRunner(loaders=supported_loaders)

	runner_kwargs = {
	"loader_id": "huggingface",
	"model": m_id,
	"dataset": d_id,
	"scan_config": None,
	"dataset_split": split,
	"dataset_config": config,
	}

	eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
	start = time.time()
	print(f"Start local evaluation on {eval_str}")

	report = runner.run(**runner_kwargs)

	# TODO: Publish it with given repo id/model id
	if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID) and os.environ.get(HF_WRITE_TOKEN):
	rendered_report = report.to_markdown(template="github")
	repo = os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID)
	create_discussion_detailed(repo, m_id, d_id, config, split, os.environ.get(HF_WRITE_TOKEN), rendered_report)

	# Cache locally
	rendered_report = report.to_html()
	output_dir = Path(f"output/{m_id}/{d_id}/{config}/{split}/")
	output_dir.mkdir(parents=True, exist_ok=True)
	with open(output_dir / "report.html", "w") as f:
	print(f'Writing to {output_dir / "report.html"}')
	f.write(rendered_report)

	print(f"Finished local evaluation on {eval_str}: {time.time() - start:.2f}s")


	with gr.Blocks(theme=theme) as iface:
	with gr.Row():
	with gr.Column():
	model_id_input = gr.Textbox(
	label="Hugging Face model id",
	placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
	)

	# TODO: Add supported model pairs: Text Classification - text-classification
	model_type = gr.Dropdown(
	label="Hugging Face model type",
	choices=[
	("Auto-detect", 0),
	("Text Classification", 1),
	],
	value=0,
	)
	example_labels = gr.Label(label='Model pipeline test prediction result', visible=False)

	with gr.Column():
	dataset_id_input = gr.Textbox(
	label="Hugging Face dataset id",
	placeholder="tweet_eval",
	)

	dataset_config_input = gr.Dropdown(
	label="Hugging Face dataset subset",
	choices=[
	"default",
	],
	allow_custom_value=True,
	value="default",
	)

	dataset_split_input = gr.Dropdown(
	label="Hugging Face dataset split",
	choices=[
	"test",
	],
	allow_custom_value=True,
	value="test",
	)

	id2label_mapping_dataframe = gr.DataFrame(visible=False)

	with gr.Row():
	with gr.Accordion("Advance", open=False):
	run_local = gr.Checkbox(value=True, label="Run in this Space")
	column_mapping_input = gr.Textbox(
	value="",
	lines=5,
	label="Column mapping",
	placeholder="Description of mapping of columns in model to dataset, in json format, e.g.:\n"
	'{\n'
	' "text": "context",\n'
	' "label": {0: "Positive", 1: "Negative"}\n'
	'}',
	)

	with gr.Row():
	validate_btn = gr.Button("Validate model and dataset", variant="primary")
	run_btn = gr.Button(
	"Submit evaluation task",
	variant="primary",
	interactive=False,
	)
	validate_btn.click(
	try_validate,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	column_mapping_input,
	],
	outputs=[
	dataset_config_input,
	dataset_split_input,
	run_btn,
	example_labels,
	id2label_mapping_dataframe,
	column_mapping_input,
	],
	)
	run_btn.click(
	try_submit,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	run_local,
	],
	)

	iface.queue(max_size=20)
	iface.launch()