nhradek commited on
Commit
8cab145
1 Parent(s): c633b5a

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. cgi_classification_app.py +46 -180
cgi_classification_app.py CHANGED
@@ -8,197 +8,63 @@ Original file is located at
8
  """
9
 
10
  import gradio as gr
11
-
12
- from sklearn.model_selection import train_test_split
13
- from sklearn.metrics import (
14
- accuracy_score,
15
- f1_score,
16
- confusion_matrix,
17
- ConfusionMatrixDisplay,
18
- )
19
- from sklearn.preprocessing import StandardScaler
20
- from sklearn.decomposition import PCA
21
- import umap
22
- import pywt
23
- import os
24
  from PIL import Image
25
- import matplotlib.pyplot as plt
26
  import numpy as np
 
 
 
27
  from xgboost import XGBClassifier
28
- from sklearn.model_selection import cross_val_score, KFold
29
- from sklearn.dummy import DummyClassifier
30
- from sklearn.ensemble import RandomForestClassifier
31
- from sklearn.svm import SVC
32
- from sklearn.neighbors import KNeighborsClassifier
33
- from sklearn.model_selection import train_test_split
34
- from sklearn.metrics import classification_report
35
- import plotly.express as px
36
- import pandas as pd
37
- import joblib
38
- from tqdm import tqdm
39
- import lzma
40
-
41
-
42
- class WaveletClassifier:
43
- def __init__(
44
- self,
45
- wavelets=["db4", "db10"],
46
- umap_n_neighbors=16,
47
- umap_n_components=32,
48
- random_state=42,
49
- ):
50
- self.wavelets = wavelets
51
- self.umap_n_neighbors = umap_n_neighbors
52
- self.umap_n_components = umap_n_components
53
- self.random_state = random_state
54
- self.reducer = umap.UMAP(
55
- n_neighbors=self.umap_n_neighbors,
56
- n_components=self.umap_n_components,
57
- random_state=self.random_state,
58
- )
59
- self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier
60
-
61
- def load_images_from_folder(self, folder):
62
- images = []
63
- labels = []
64
- print(f"Loading images from {folder}")
65
- for filename in tqdm(os.listdir(folder)):
66
- if not (
67
- filename.endswith(".jpg")
68
- or filename.endswith(".png")
69
- or filename.endswith("jpeg")
70
- or filename.endswith("webp")
71
- ):
72
- continue
73
- img = Image.open(os.path.join(folder, filename))
74
- img = img.resize((512, 512))
75
- if img is not None:
76
- images.append(img)
77
- labels.append(
78
- 1 if "CGI" in folder else 0
79
- ) # Assuming folder names contain "AI" or not
80
- return images, labels
81
-
82
- def extract_wavelet_features(self, images):
83
- all_features = []
84
- for img in images:
85
- img_gray = img.convert("L")
86
- img_array = np.array(img_gray)
87
- features = []
88
- for wavelet in self.wavelets:
89
- cA, cD = pywt.dwt(img_array, wavelet)
90
- features.extend(cD.flatten())
91
- all_features.append(features)
92
- return np.array(all_features)
93
-
94
- def fit(self, train_folder1, train_folder2):
95
- # Load images and extract features
96
- images1, labels1 = self.load_images_from_folder(train_folder1)
97
- images2, labels2 = self.load_images_from_folder(train_folder2)
98
-
99
- min_length = min(len(images1), len(images2))
100
- images1 = images1[:min_length]
101
- images2 = images2[:min_length]
102
- labels1 = labels1[:min_length]
103
- labels2 = labels2[:min_length]
104
-
105
- images = images1 + images2
106
- labels = labels1 + labels2
107
- features = self.extract_wavelet_features(images)
108
-
109
- # Apply UMAP dimensionality reduction
110
- embeddings = self.reducer.fit_transform(features)
111
- X_train, X_test, y_train, y_test = train_test_split(
112
- embeddings, labels, test_size=0.2, random_state=42
113
- )
114
-
115
- # Train the classifier
116
- self.classifier.fit(X_train, y_train)
117
-
118
- acc = self.classifier.score(X_test, y_test)
119
- y_pred = self.classifier.predict(X_test)
120
- print(f"Classifier accuracy = {acc}")
121
-
122
- f1 = f1_score(y_test, y_pred)
123
- print(f"Classifier F1 = {f1}")
124
- print(classification_report(y_test, y_pred))
125
-
126
- def predict(self, images):
127
- # Load images and extract features
128
- features = self.extract_wavelet_features(images)
129
-
130
- # Apply UMAP dimensionality reduction
131
- embeddings = self.reducer.transform(features)
132
-
133
- # Make predictions
134
- return self.classifier.predict(embeddings)
135
-
136
- def predict_proba(self, images):
137
- # Load images and extract features
138
- features = self.extract_wavelet_features(images)
139
-
140
- # Apply UMAP dimensionality reduction
141
- embeddings = self.reducer.transform(features)
142
-
143
- # Make predictions
144
- return self.classifier.predict_proba(embeddings)
145
-
146
- def score(self, test_folder):
147
- # Load images and extract features
148
- images, labels = self.load_images_from_folder(test_folder)
149
- features = self.extract_wavelet_features(images)
150
-
151
- # Apply UMAP dimensionality reduction
152
- embeddings = self.reducer.transform(features)
153
-
154
- # Evaluate the classifier
155
- return self.classifier.score(embeddings, labels)
156
-
157
- def cross_val_score(self, folder1, folder2, n_splits=5):
158
- # Load images and extract features
159
- # Load images and extract features
160
- images1, labels1 = self.load_images_from_folder(folder1)
161
- images2, labels2 = self.load_images_from_folder(folder2)
162
-
163
- min_length = min(len(images1), len(images2))
164
- images1 = images1[:min_length]
165
- images2 = images2[:min_length]
166
- labels1 = labels1[:min_length]
167
- labels2 = labels2[:min_length]
168
-
169
- images = images1 + images2
170
- labels = labels1 + labels2
171
- features = self.extract_wavelet_features(images)
172
-
173
- # Apply UMAP dimensionality reduction
174
- embeddings = self.reducer.fit_transform(features)
175
- # Perform four-fold cross-validation
176
- kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
177
- scores = cross_val_score(
178
- self.classifier, embeddings, labels, cv=kfold, scoring="accuracy"
179
- )
180
-
181
- # Print the cross-validation scores
182
- print("Cross-validation scores:", scores)
183
- print("Average cross-validation score:", scores.mean())
184
-
185
- def save_model(self, filename):
186
- joblib.dump(self, filename, compress=("lzma", 9))
187
 
188
- @staticmethod
189
- def load_model(filename):
190
- return joblib.load(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
 
 
 
 
192
 
193
- model = WaveletClassifier.load_model("wavelet_classifier.pkl")
194
 
195
 
196
  def classify_image(image):
197
- image = Image.fromarray(image)
198
- probabilities = model.predict_proba([image.resize((512, 512))])
199
  # Convert to 2D array for model input
 
200
  labels = ["Photo", "CGI"]
201
- return {f"{labels[i]}": prob for i, prob in enumerate(probabilities[0])}
202
 
203
 
204
  interface = gr.Interface(
 
8
  """
9
 
10
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  from PIL import Image
 
12
  import numpy as np
13
+ from PIL import Image
14
+ from scipy.fftpack import fft2
15
+ from tensorflow.keras.models import load_model, Model
16
  from xgboost import XGBClassifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # classifier
19
+ xgb_clf = XGBClassifier()
20
+ xgb_clf.load_model("xgb_cgi_classifier.json")
21
+
22
+
23
+ # Function to apply Fourier transform
24
+ def apply_fourier_transform(image):
25
+ image = np.array(image)
26
+ fft_image = fft2(image)
27
+ return np.abs(fft_image)
28
+
29
+
30
+ def preprocess_image(image):
31
+ try:
32
+ image = Image.fromarray(image)
33
+ image = image.convert("L")
34
+ image = image.resize((256, 256))
35
+ image = apply_fourier_transform(image)
36
+ image = np.expand_dims(
37
+ image, axis=-1
38
+ ) # Expand dimensions to match model input shape
39
+ image = np.expand_dims(image, axis=0) # Expand to add batch dimension
40
+ return image
41
+ except Exception as e:
42
+ print(f"Error processing image: {e}")
43
+ return None
44
+
45
+
46
+ # Function to load embedding model and calculate embeddings
47
+ def calculate_embeddings(image, model_path="embedding_modelv2.keras"):
48
+ # Load the trained model
49
+ model = load_model(model_path)
50
+
51
+ # Remove the final classification layer to get embeddings
52
+ embedding_model = Model(inputs=model.input, outputs=model.output)
53
 
54
+ # Preprocess the image
55
+ preprocessed_image = preprocess_image(image)
56
+ # Calculate embeddings
57
+ embeddings = embedding_model.predict(preprocessed_image)
58
 
59
+ return embeddings
60
 
61
 
62
  def classify_image(image):
63
+ embeddings = calculate_embeddings(image)
 
64
  # Convert to 2D array for model input
65
+ probabilities = xgb_clf.predict_proba(embeddings)[0]
66
  labels = ["Photo", "CGI"]
67
+ return {f"{labels[i]}": prob for i, prob in enumerate(probabilities)}
68
 
69
 
70
  interface = gr.Interface(