Spaces:
Runtime error
Runtime error
import numpy as np | |
import tensorflow as tf | |
from datasets import load_dataset | |
from huggingface_hub import create_repo, from_pretrained_keras, push_to_hub_keras | |
from tensorflow import keras | |
from tensorflow.keras import layers | |
labeled_samples_repo_id = create_repo("actlearn_labeled_samples", exist_ok=True, repo_type="dataset").repo_id | |
unlabled_samples_repo_id = create_repo("actlearn_unlabeled_samples", exist_ok=True, repo_type="dataset").repo_id | |
to_label_samples_repo_id = create_repo("actlearn_to_label_samples", exist_ok=True, repo_type="dataset").repo_id | |
test_dataset_repo_id = create_repo("actlearn_test_mnist", exist_ok=True, repo_type="dataset").repo_id | |
model_repo_id = create_repo("actlearn_mnist_model", exist_ok=True).repo_id | |
def to_numpy(examples): | |
examples["pixel_values"] = [np.array(image.convert("1")) for image in examples["image"]] | |
return examples | |
def preprocess(): | |
train_dataset = load_dataset(labeled_samples_repo_id)["train"] | |
train_dataset = train_dataset.map(to_numpy, batched=True) | |
test_dataset = load_dataset(test_dataset_repo_id)["test"] | |
test_dataset = test_dataset.map(to_numpy, batched=True) | |
x_train = train_dataset["pixel_values"] | |
y_train = train_dataset["label"] | |
x_test = test_dataset["pixel_values"] | |
y_test = test_dataset["label"] | |
x_train = np.expand_dims(x_train, -1) | |
x_test = np.expand_dims(x_test, -1) | |
num_classes = 10 | |
y_train = keras.utils.to_categorical(y_train, num_classes) | |
y_test = keras.utils.to_categorical(y_test, num_classes) | |
return x_train, y_train, x_test, y_test | |
def train(): | |
input_shape = (28, 28, 1) | |
x_train, y_train, x_test, y_test = preprocess() | |
num_classes = 10 | |
model = keras.Sequential( | |
[ | |
keras.Input(shape=input_shape), | |
layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), | |
layers.MaxPooling2D(pool_size=(2, 2)), | |
layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), | |
layers.MaxPooling2D(pool_size=(2, 2)), | |
layers.Flatten(), | |
layers.Dropout(0.5), | |
layers.Dense(num_classes, activation="softmax"), | |
] | |
) | |
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) | |
model.fit(x_train, y_train, batch_size=128, epochs=4, validation_split=0.1) | |
score = model.evaluate(x_test, y_test, verbose=0) | |
print("Test loss:", score[0]) | |
print("Test accuracy:", score[1]) | |
push_to_hub_keras(model, model_repo_id) | |
def find_samples_to_label(): | |
loaded_model = from_pretrained_keras(model_repo_id) | |
loaded_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) | |
unlabeled_data = load_dataset(unlabled_samples_repo_id)["train"] | |
processed_data = unlabeled_data.map(to_numpy, batched=True) | |
processed_data = processed_data["pixel_values"] | |
processed_data = tf.expand_dims(processed_data, -1) | |
# Get all predictions | |
# And then get the 5 samples with the lowest prediction score | |
preds = loaded_model.predict(processed_data) | |
top_pred_confs = 1 - np.max(preds, axis=1) | |
idx_to_label = np.argpartition(top_pred_confs, -5)[-5:] | |
# Upload samples to the dataset to label | |
to_label_data = unlabeled_data.select(idx_to_label) | |
to_label_data.push_to_hub(to_label_samples_repo_id) | |
# Remove from the pool of samples | |
unlabeled_data = unlabeled_data.select((i for i in range(len(unlabeled_data)) if i not in set(idx_to_label))) | |
unlabeled_data.push_to_hub(unlabled_samples_repo_id) | |
def main(): | |
train() | |
find_samples_to_label() | |
if __name__ == "__main__": | |
main() | |