|
--- |
|
license: mit |
|
base_model: |
|
- distilbert/distilbert-base-uncased |
|
--- |
|
|
|
Deepest apologies for how fucked up this is, but: |
|
|
|
``` |
|
import os |
|
import sys |
|
import json |
|
import torch |
|
from huggingface_hub import hf_hub_download |
|
import importlib.util |
|
|
|
# Repository ID and filenames |
|
repo_id = "dgaff/bsky_user_classifier" |
|
files_to_download = { |
|
"model_weights": "multioutput_regressor.pth", |
|
"train_script": "train.py", |
|
"data_processing": "data_processing.py", |
|
"utils": "utils.py", |
|
"label_mappings": "label_mappings.json", |
|
} |
|
|
|
# Download necessary files |
|
model_weights_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["model_weights"]) |
|
train_script_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["train_script"]) |
|
data_processing_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["data_processing"]) |
|
util_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["utils"]) |
|
label_mappings_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["label_mappings"]) |
|
|
|
# Update sys.path to include dependencies |
|
for path in [data_processing_path, util_path]: |
|
dir_path = os.path.dirname(path) |
|
if dir_path not in sys.path: |
|
sys.path.append(dir_path) |
|
|
|
# Load train.py as a module |
|
spec = importlib.util.spec_from_file_location("train_module", train_script_path) |
|
train_module = importlib.util.module_from_spec(spec) |
|
sys.modules["train_module"] = train_module |
|
spec.loader.exec_module(train_module) |
|
|
|
# Load label mappings |
|
with open(label_mappings_path) as f: |
|
label_mappings = json.load(f) |
|
|
|
# Initialize the model |
|
hidden_size = 768 # Ensure this matches your model's configuration |
|
num_outputs = 23 # Update if different |
|
model = train_module.MultiOutputRegressor(hidden_size=hidden_size, num_outputs=num_outputs) |
|
|
|
# Load weights and set model to evaluation mode |
|
model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu'))) |
|
model.eval() |
|
|
|
# Set device |
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
model.to(device) |
|
|
|
# Prepare input sentences and generate embeddings |
|
new_sentences = [ |
|
"This is a test sentence.", |
|
"Another example of a sentence to predict." |
|
] |
|
embedder = train_module.EmbeddingGenerator() |
|
new_embeddings = embedder.generate_embeddings(new_sentences) |
|
new_embeddings_tensor = torch.tensor(new_embeddings, dtype=torch.float).to(device) |
|
|
|
# Generate predictions |
|
with torch.no_grad(): |
|
predictions = model(new_embeddings_tensor).cpu().numpy() |
|
|
|
# Map predictions to labels and print results |
|
for sentence, pred in zip(new_sentences, predictions): |
|
label_pred_dict = {label_mappings["id2label"][str(i)]: float(pred[i]) for i in range(len(pred))} |
|
print(f"Sentence: {sentence}") |
|
print("Predictions:") |
|
for label, value in label_pred_dict.items(): |
|
print(f" {label}: {value}") |
|
print() |
|
|
|
``` |
|
|
|
I'll do better next time |