File size: 3,370 Bytes
3cf6ff9
 
 
 
 
 
 
 
 
 
 
 
2994578
3cf6ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import matplotlib.image as mpimg
import streamlit as st
import torch
import torchvision.transforms as transforms
from PIL import Image
from matplotlib import pyplot as plt
import pickle
from sklearn.preprocessing import LabelEncoder

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load the pre-trained model
model = torch.load('model.pt', map_location=torch.device('cpu'))
model.eval()

# Define the image transformation to normalize the image
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the label encoder
with open('label_encoder.pkl', 'rb') as f:
    lbl_enc = pickle.load(f)

def decode_predictions(preds, encoder):
    preds = preds.permute(1, 0, 2)
    preds = torch.softmax(preds, 2)
    preds = torch.argmax(preds, 2)
    preds = preds.detach().cpu().numpy()
    cap_preds = []
    for j in range(preds.shape[0]):
        temp = []
        for k in preds[j,:]:
            k = k - 1
            if k == -1:
                temp.append("-")
            else:
                temp.append(encoder.inverse_transform([k])[0])
        tp = "".join(temp)
        cap_preds.append(tp)
    return cap_preds

def predict_function(model, data):
    model.eval()
    fin_preds = []
    with torch.no_grad():
        # for data in data_loader:
        for k, v in data.items():
            data[k] = v.to(DEVICE)
        batch_preds, _ = model(**data)
        fin_preds.append(batch_preds)
    return fin_preds

def clean_decoded_predictions(unclean_predictions):
    cleaned_predictions = []
    for i in unclean_predictions:
        if i != "-":
            cleaned_predictions.append(i)
    cleaned_predictions = "".join(cleaned_predictions)
    if len(cleaned_predictions) == 10:
        return cleaned_predictions
    else:
        prev = "-"
        new_cleaned_predictions = []
        for char in cleaned_predictions:
            if char == prev:
                continue
            new_cleaned_predictions.append(char)
            prev = char
        res = "".join(new_cleaned_predictions)
        return res

def predict_captcha(model, image_path):
    plt.figure(figsize=(15, 5))
    image = mpimg.imread(image_path[0])
    # target = image_path[0].split("/")[-1].split(".")[0]
    plt.title(image_path[0].split("/")[-1])
    plt.imshow(image)

    valid_preds = predict_function(model, image)
    current_preds = decode_predictions(valid_preds, lbl_enc)
    preds = clean_decoded_predictions(current_preds[0])
    # success = True if preds == target else False
    return preds

# Define the Streamlit app
def app():
    st.title("Captcha Breaker Project")
    st.write("by - Pushkar Ambastha")
    st.write("Upload an image of a captcha to recognize the text")

    # Allow the user to upload an image
    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

    if uploaded_file is not None:
        # Load the image and transform it
        img = Image.open(uploaded_file)
        img = transform(img)

        # Make a prediction with the model
        with torch.no_grad():
            prediction = predict_captcha(model, img.unsqueeze(0))

        # Get the predicted text and display it
        captcha_text = "".join([chr(int(x)) for x in prediction])
        st.write(f"The captcha text is: {captcha_text}")