File size: 2,886 Bytes
12ba572
 
 
 
cb65d6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
---
license: mit
base_model:
- distilbert/distilbert-base-uncased
---

Deepest apologies for how fucked up this is, but:

```
import os
import sys
import json
import torch
from huggingface_hub import hf_hub_download
import importlib.util

# Repository ID and filenames
repo_id = "dgaff/bsky_user_classifier"
files_to_download = {
    "model_weights": "multioutput_regressor.pth",
    "train_script": "train.py",
    "data_processing": "data_processing.py",
    "utils": "utils.py",
    "label_mappings": "label_mappings.json",
}

# Download necessary files
model_weights_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["model_weights"])
train_script_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["train_script"])
data_processing_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["data_processing"])
util_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["utils"])
label_mappings_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["label_mappings"])

# Update sys.path to include dependencies
for path in [data_processing_path, util_path]:
    dir_path = os.path.dirname(path)
    if dir_path not in sys.path:
        sys.path.append(dir_path)

# Load train.py as a module
spec = importlib.util.spec_from_file_location("train_module", train_script_path)
train_module = importlib.util.module_from_spec(spec)
sys.modules["train_module"] = train_module
spec.loader.exec_module(train_module)

# Load label mappings
with open(label_mappings_path) as f:
    label_mappings = json.load(f)

# Initialize the model
hidden_size = 768  # Ensure this matches your model's configuration
num_outputs = 23   # Update if different
model = train_module.MultiOutputRegressor(hidden_size=hidden_size, num_outputs=num_outputs)

# Load weights and set model to evaluation mode
model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu')))
model.eval()

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Prepare input sentences and generate embeddings
new_sentences = [
    "This is a test sentence.",
    "Another example of a sentence to predict."
]
embedder = train_module.EmbeddingGenerator()
new_embeddings = embedder.generate_embeddings(new_sentences)
new_embeddings_tensor = torch.tensor(new_embeddings, dtype=torch.float).to(device)

# Generate predictions
with torch.no_grad():
    predictions = model(new_embeddings_tensor).cpu().numpy()

# Map predictions to labels and print results
for sentence, pred in zip(new_sentences, predictions):
    label_pred_dict = {label_mappings["id2label"][str(i)]: float(pred[i]) for i in range(len(pred))}
    print(f"Sentence: {sentence}")
    print("Predictions:")
    for label, value in label_pred_dict.items():
        print(f"  {label}: {value}")
    print()

```

I'll do better next time