nhradek commited on
Commit
c633b5a
1 Parent(s): 94ca3b5

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .DS_Store +0 -0
  2. cgi_classification_app.py +180 -46
  3. wavelet_classifier.pkl +3 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
cgi_classification_app.py CHANGED
@@ -8,63 +8,197 @@ Original file is located at
8
  """
9
 
10
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  from PIL import Image
 
12
  import numpy as np
13
- from PIL import Image
14
- from scipy.fftpack import fft2
15
- from tensorflow.keras.models import load_model, Model
16
  from xgboost import XGBClassifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # classifier
19
- xgb_clf = XGBClassifier()
20
- xgb_clf.load_model("xgb_cgi_classifier.json")
21
-
22
-
23
- # Function to apply Fourier transform
24
- def apply_fourier_transform(image):
25
- image = np.array(image)
26
- fft_image = fft2(image)
27
- return np.abs(fft_image)
28
-
29
-
30
- def preprocess_image(image):
31
- try:
32
- image = Image.fromarray(image)
33
- image = image.convert("L")
34
- image = image.resize((256, 256))
35
- image = apply_fourier_transform(image)
36
- image = np.expand_dims(
37
- image, axis=-1
38
- ) # Expand dimensions to match model input shape
39
- image = np.expand_dims(image, axis=0) # Expand to add batch dimension
40
- return image
41
- except Exception as e:
42
- print(f"Error processing image: {e}")
43
- return None
44
-
45
-
46
- # Function to load embedding model and calculate embeddings
47
- def calculate_embeddings(image, model_path="embedding_modelv2.keras"):
48
- # Load the trained model
49
- model = load_model(model_path)
50
-
51
- # Remove the final classification layer to get embeddings
52
- embedding_model = Model(inputs=model.input, outputs=model.output)
53
 
54
- # Preprocess the image
55
- preprocessed_image = preprocess_image(image)
56
- # Calculate embeddings
57
- embeddings = embedding_model.predict(preprocessed_image)
58
 
59
- return embeddings
60
 
61
 
62
  def classify_image(image):
63
- embeddings = calculate_embeddings(image)
 
64
  # Convert to 2D array for model input
65
- probabilities = xgb_clf.predict_proba(embeddings)[0]
66
  labels = ["Photo", "CGI"]
67
- return {f"{labels[i]}": prob for i, prob in enumerate(probabilities)}
68
 
69
 
70
  interface = gr.Interface(
 
8
  """
9
 
10
  import gradio as gr
11
+
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.metrics import (
14
+ accuracy_score,
15
+ f1_score,
16
+ confusion_matrix,
17
+ ConfusionMatrixDisplay,
18
+ )
19
+ from sklearn.preprocessing import StandardScaler
20
+ from sklearn.decomposition import PCA
21
+ import umap
22
+ import pywt
23
+ import os
24
  from PIL import Image
25
+ import matplotlib.pyplot as plt
26
  import numpy as np
 
 
 
27
  from xgboost import XGBClassifier
28
+ from sklearn.model_selection import cross_val_score, KFold
29
+ from sklearn.dummy import DummyClassifier
30
+ from sklearn.ensemble import RandomForestClassifier
31
+ from sklearn.svm import SVC
32
+ from sklearn.neighbors import KNeighborsClassifier
33
+ from sklearn.model_selection import train_test_split
34
+ from sklearn.metrics import classification_report
35
+ import plotly.express as px
36
+ import pandas as pd
37
+ import joblib
38
+ from tqdm import tqdm
39
+ import lzma
40
+
41
+
42
+ class WaveletClassifier:
43
+ def __init__(
44
+ self,
45
+ wavelets=["db4", "db10"],
46
+ umap_n_neighbors=16,
47
+ umap_n_components=32,
48
+ random_state=42,
49
+ ):
50
+ self.wavelets = wavelets
51
+ self.umap_n_neighbors = umap_n_neighbors
52
+ self.umap_n_components = umap_n_components
53
+ self.random_state = random_state
54
+ self.reducer = umap.UMAP(
55
+ n_neighbors=self.umap_n_neighbors,
56
+ n_components=self.umap_n_components,
57
+ random_state=self.random_state,
58
+ )
59
+ self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier
60
+
61
+ def load_images_from_folder(self, folder):
62
+ images = []
63
+ labels = []
64
+ print(f"Loading images from {folder}")
65
+ for filename in tqdm(os.listdir(folder)):
66
+ if not (
67
+ filename.endswith(".jpg")
68
+ or filename.endswith(".png")
69
+ or filename.endswith("jpeg")
70
+ or filename.endswith("webp")
71
+ ):
72
+ continue
73
+ img = Image.open(os.path.join(folder, filename))
74
+ img = img.resize((512, 512))
75
+ if img is not None:
76
+ images.append(img)
77
+ labels.append(
78
+ 1 if "CGI" in folder else 0
79
+ ) # Assuming folder names contain "AI" or not
80
+ return images, labels
81
+
82
+ def extract_wavelet_features(self, images):
83
+ all_features = []
84
+ for img in images:
85
+ img_gray = img.convert("L")
86
+ img_array = np.array(img_gray)
87
+ features = []
88
+ for wavelet in self.wavelets:
89
+ cA, cD = pywt.dwt(img_array, wavelet)
90
+ features.extend(cD.flatten())
91
+ all_features.append(features)
92
+ return np.array(all_features)
93
+
94
+ def fit(self, train_folder1, train_folder2):
95
+ # Load images and extract features
96
+ images1, labels1 = self.load_images_from_folder(train_folder1)
97
+ images2, labels2 = self.load_images_from_folder(train_folder2)
98
+
99
+ min_length = min(len(images1), len(images2))
100
+ images1 = images1[:min_length]
101
+ images2 = images2[:min_length]
102
+ labels1 = labels1[:min_length]
103
+ labels2 = labels2[:min_length]
104
+
105
+ images = images1 + images2
106
+ labels = labels1 + labels2
107
+ features = self.extract_wavelet_features(images)
108
+
109
+ # Apply UMAP dimensionality reduction
110
+ embeddings = self.reducer.fit_transform(features)
111
+ X_train, X_test, y_train, y_test = train_test_split(
112
+ embeddings, labels, test_size=0.2, random_state=42
113
+ )
114
+
115
+ # Train the classifier
116
+ self.classifier.fit(X_train, y_train)
117
+
118
+ acc = self.classifier.score(X_test, y_test)
119
+ y_pred = self.classifier.predict(X_test)
120
+ print(f"Classifier accuracy = {acc}")
121
+
122
+ f1 = f1_score(y_test, y_pred)
123
+ print(f"Classifier F1 = {f1}")
124
+ print(classification_report(y_test, y_pred))
125
+
126
+ def predict(self, images):
127
+ # Load images and extract features
128
+ features = self.extract_wavelet_features(images)
129
+
130
+ # Apply UMAP dimensionality reduction
131
+ embeddings = self.reducer.transform(features)
132
+
133
+ # Make predictions
134
+ return self.classifier.predict(embeddings)
135
+
136
+ def predict_proba(self, images):
137
+ # Load images and extract features
138
+ features = self.extract_wavelet_features(images)
139
+
140
+ # Apply UMAP dimensionality reduction
141
+ embeddings = self.reducer.transform(features)
142
+
143
+ # Make predictions
144
+ return self.classifier.predict_proba(embeddings)
145
+
146
+ def score(self, test_folder):
147
+ # Load images and extract features
148
+ images, labels = self.load_images_from_folder(test_folder)
149
+ features = self.extract_wavelet_features(images)
150
+
151
+ # Apply UMAP dimensionality reduction
152
+ embeddings = self.reducer.transform(features)
153
+
154
+ # Evaluate the classifier
155
+ return self.classifier.score(embeddings, labels)
156
+
157
+ def cross_val_score(self, folder1, folder2, n_splits=5):
158
+ # Load images and extract features
159
+ # Load images and extract features
160
+ images1, labels1 = self.load_images_from_folder(folder1)
161
+ images2, labels2 = self.load_images_from_folder(folder2)
162
+
163
+ min_length = min(len(images1), len(images2))
164
+ images1 = images1[:min_length]
165
+ images2 = images2[:min_length]
166
+ labels1 = labels1[:min_length]
167
+ labels2 = labels2[:min_length]
168
+
169
+ images = images1 + images2
170
+ labels = labels1 + labels2
171
+ features = self.extract_wavelet_features(images)
172
+
173
+ # Apply UMAP dimensionality reduction
174
+ embeddings = self.reducer.fit_transform(features)
175
+ # Perform four-fold cross-validation
176
+ kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
177
+ scores = cross_val_score(
178
+ self.classifier, embeddings, labels, cv=kfold, scoring="accuracy"
179
+ )
180
+
181
+ # Print the cross-validation scores
182
+ print("Cross-validation scores:", scores)
183
+ print("Average cross-validation score:", scores.mean())
184
+
185
+ def save_model(self, filename):
186
+ joblib.dump(self, filename, compress=("lzma", 9))
187
 
188
+ @staticmethod
189
+ def load_model(filename):
190
+ return joblib.load(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
 
 
 
 
192
 
193
+ model = WaveletClassifier.load_model("wavelet_classifier.pkl")
194
 
195
 
196
  def classify_image(image):
197
+ image = Image.fromarray(image)
198
+ probabilities = model.predict_proba([image.resize((512, 512))])
199
  # Convert to 2D array for model input
 
200
  labels = ["Photo", "CGI"]
201
+ return {f"{labels[i]}": prob for i, prob in enumerate(probabilities[0])}
202
 
203
 
204
  interface = gr.Interface(
wavelet_classifier.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db91c7f1d1841b5820ef7d9a043dfebf4ecf89568279fd1f79c490a13a206c0d
3
+ size 465142385