dgaff
/

bsky_user_classifier

Model card Files Files and versions Community

bsky_user_classifier / README.md

dgaff's picture

Update README.md

cb65d6c verified 3 months ago

|

history blame contribute delete

2.89 kB

	---
	license: mit
	base_model:
	- distilbert/distilbert-base-uncased
	---

	Deepest apologies for how fucked up this is, but:

	```
	import os
	import sys
	import json
	import torch
	from huggingface_hub import hf_hub_download
	import importlib.util

	# Repository ID and filenames
	repo_id = "dgaff/bsky_user_classifier"
	files_to_download = {
	"model_weights": "multioutput_regressor.pth",
	"train_script": "train.py",
	"data_processing": "data_processing.py",
	"utils": "utils.py",
	"label_mappings": "label_mappings.json",
	}

	# Download necessary files
	model_weights_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["model_weights"])
	train_script_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["train_script"])
	data_processing_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["data_processing"])
	util_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["utils"])
	label_mappings_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["label_mappings"])

	# Update sys.path to include dependencies
	for path in [data_processing_path, util_path]:
	dir_path = os.path.dirname(path)
	if dir_path not in sys.path:
	sys.path.append(dir_path)

	# Load train.py as a module
	spec = importlib.util.spec_from_file_location("train_module", train_script_path)
	train_module = importlib.util.module_from_spec(spec)
	sys.modules["train_module"] = train_module
	spec.loader.exec_module(train_module)

	# Load label mappings
	with open(label_mappings_path) as f:
	label_mappings = json.load(f)

	# Initialize the model
	hidden_size = 768 # Ensure this matches your model's configuration
	num_outputs = 23 # Update if different
	model = train_module.MultiOutputRegressor(hidden_size=hidden_size, num_outputs=num_outputs)

	# Load weights and set model to evaluation mode
	model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu')))
	model.eval()

	# Set device
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model.to(device)

	# Prepare input sentences and generate embeddings
	new_sentences = [
	"This is a test sentence.",
	"Another example of a sentence to predict."
	]
	embedder = train_module.EmbeddingGenerator()
	new_embeddings = embedder.generate_embeddings(new_sentences)
	new_embeddings_tensor = torch.tensor(new_embeddings, dtype=torch.float).to(device)

	# Generate predictions
	with torch.no_grad():
	predictions = model(new_embeddings_tensor).cpu().numpy()

	# Map predictions to labels and print results
	for sentence, pred in zip(new_sentences, predictions):
	label_pred_dict = {label_mappings["id2label"][str(i)]: float(pred[i]) for i in range(len(pred))}
	print(f"Sentence: {sentence}")
	print("Predictions:")
	for label, value in label_pred_dict.items():
	print(f" {label}: {value}")
	print()

	```

	I'll do better next time