--- license: mit base_model: - distilbert/distilbert-base-uncased --- Deepest apologies for how fucked up this is, but: ``` import os import sys import json import torch from huggingface_hub import hf_hub_download import importlib.util # Repository ID and filenames repo_id = "dgaff/bsky_user_classifier" files_to_download = { "model_weights": "multioutput_regressor.pth", "train_script": "train.py", "data_processing": "data_processing.py", "utils": "utils.py", "label_mappings": "label_mappings.json", } # Download necessary files model_weights_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["model_weights"]) train_script_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["train_script"]) data_processing_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["data_processing"]) util_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["utils"]) label_mappings_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["label_mappings"]) # Update sys.path to include dependencies for path in [data_processing_path, util_path]: dir_path = os.path.dirname(path) if dir_path not in sys.path: sys.path.append(dir_path) # Load train.py as a module spec = importlib.util.spec_from_file_location("train_module", train_script_path) train_module = importlib.util.module_from_spec(spec) sys.modules["train_module"] = train_module spec.loader.exec_module(train_module) # Load label mappings with open(label_mappings_path) as f: label_mappings = json.load(f) # Initialize the model hidden_size = 768 # Ensure this matches your model's configuration num_outputs = 23 # Update if different model = train_module.MultiOutputRegressor(hidden_size=hidden_size, num_outputs=num_outputs) # Load weights and set model to evaluation mode model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu'))) model.eval() # Set device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # Prepare input sentences and generate embeddings new_sentences = [ "This is a test sentence.", "Another example of a sentence to predict." ] embedder = train_module.EmbeddingGenerator() new_embeddings = embedder.generate_embeddings(new_sentences) new_embeddings_tensor = torch.tensor(new_embeddings, dtype=torch.float).to(device) # Generate predictions with torch.no_grad(): predictions = model(new_embeddings_tensor).cpu().numpy() # Map predictions to labels and print results for sentence, pred in zip(new_sentences, predictions): label_pred_dict = {label_mappings["id2label"][str(i)]: float(pred[i]) for i in range(len(pred))} print(f"Sentence: {sentence}") print("Predictions:") for label, value in label_pred_dict.items(): print(f" {label}: {value}") print() ``` I'll do better next time