Update README.md
Browse files
README.md
CHANGED
@@ -26,44 +26,38 @@ import torch
|
|
26 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
27 |
from joblib import load
|
28 |
|
29 |
-
|
30 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
31 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
32 |
|
33 |
-
|
34 |
-
label_encoder = load(
|
35 |
|
36 |
-
# Define the input peptide sequence
|
37 |
-
input_sequence = "GNLIVVGRVIMS" # Example peptide sequence
|
38 |
|
39 |
-
|
|
|
40 |
inputs = tokenizer(input_sequence, return_tensors="pt", truncation=True, padding=True)
|
41 |
|
42 |
-
# Make the prediction
|
43 |
with torch.no_grad():
|
44 |
outputs = model(**inputs)
|
45 |
-
|
46 |
-
|
47 |
-
predicted_class_idx = torch.argmax(probabilities, dim=1).item()
|
48 |
|
49 |
-
# Decode the predicted class index to the original label
|
50 |
predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]
|
51 |
|
52 |
-
# Get the probabilities for each class
|
53 |
class_probabilities = probabilities.squeeze().tolist()
|
54 |
class_labels = label_encoder.inverse_transform(range(len(class_probabilities)))
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
sorted_class_probabilities = [class_probabilities[i] for i in sorted_indices]
|
60 |
|
61 |
-
# Print the predicted class and probabilities
|
62 |
print(f"Predicted Receptor Class: {predicted_class}")
|
63 |
print("Top 10 Class Probabilities:")
|
64 |
for label, prob in zip(sorted_class_labels[:10], sorted_class_probabilities[:10]):
|
65 |
print(f"{label}: {prob:.4f}")
|
66 |
|
|
|
67 |
```
|
68 |
|
69 |
Which gives this output:
|
|
|
26 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
27 |
from joblib import load
|
28 |
|
29 |
+
MODEL_PATH = "littleworth/esm2_t6_8M_UR50D_pep2rec_cppp"
|
30 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
32 |
|
33 |
+
LABEL_ENCODER_PATH = f"{MODEL_PATH}/label_encoder.joblib"
|
34 |
+
label_encoder = load(LABEL_ENCODER_PATH)
|
35 |
|
|
|
|
|
36 |
|
37 |
+
input_sequence = "GNLIVVGRVIMS"
|
38 |
+
|
39 |
inputs = tokenizer(input_sequence, return_tensors="pt", truncation=True, padding=True)
|
40 |
|
|
|
41 |
with torch.no_grad():
|
42 |
outputs = model(**inputs)
|
43 |
+
probabilities = torch.softmax(outputs.logits, dim=1)
|
44 |
+
predicted_class_idx = probabilities.argmax(dim=1).item()
|
|
|
45 |
|
|
|
46 |
predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]
|
47 |
|
|
|
48 |
class_probabilities = probabilities.squeeze().tolist()
|
49 |
class_labels = label_encoder.inverse_transform(range(len(class_probabilities)))
|
50 |
|
51 |
+
sorted_indices = torch.argsort(probabilities, descending=True).squeeze()
|
52 |
+
sorted_class_labels = [class_labels[i] for i in sorted_indices.tolist()]
|
53 |
+
sorted_class_probabilities = probabilities.squeeze()[sorted_indices].tolist()
|
|
|
54 |
|
|
|
55 |
print(f"Predicted Receptor Class: {predicted_class}")
|
56 |
print("Top 10 Class Probabilities:")
|
57 |
for label, prob in zip(sorted_class_labels[:10], sorted_class_probabilities[:10]):
|
58 |
print(f"{label}: {prob:.4f}")
|
59 |
|
60 |
+
|
61 |
```
|
62 |
|
63 |
Which gives this output:
|