nhradek commited on
Commit
ca87569
1 Parent(s): 47ea9aa

Upload AI_Image_Classification.ipynb

Browse files
Files changed (1) hide show
  1. AI_Image_Classification.ipynb +856 -0
AI_Image_Classification.ipynb ADDED
@@ -0,0 +1,856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "private_outputs": true,
7
+ "provenance": [],
8
+ "machine_shape": "hm"
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "metadata": {
23
+ "id": "CSC6_ShCp6h9"
24
+ },
25
+ "outputs": [],
26
+ "source": [
27
+ "!unzip AI.zip\n",
28
+ "!unzip Photo.zip"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "source": [
34
+ "!pip install umap-learn\n",
35
+ "!pip install PyWavelets"
36
+ ],
37
+ "metadata": {
38
+ "id": "N6CWTCziLMbf"
39
+ },
40
+ "execution_count": null,
41
+ "outputs": []
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "source": [
46
+ "from sklearn.model_selection import train_test_split\n",
47
+ "from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
48
+ "from sklearn.preprocessing import StandardScaler\n",
49
+ "from sklearn.decomposition import PCA\n",
50
+ "import umap\n",
51
+ "import pywt"
52
+ ],
53
+ "metadata": {
54
+ "id": "53ZvG8NbATlR"
55
+ },
56
+ "execution_count": null,
57
+ "outputs": []
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "source": [
62
+ "# prompt: Create a function to load all the files in a folder as images.\n",
63
+ "\n",
64
+ "import os\n",
65
+ "from PIL import Image\n",
66
+ "def load_images_from_folder(folder):\n",
67
+ " images = []\n",
68
+ " labels = []\n",
69
+ " for filename in os.listdir(folder):\n",
70
+ " if not filename.endswith('.jpg') and not filename.endswith('.png') \\\n",
71
+ " and not filename.endswith('jpeg') and not filename.endswith('webp'):\n",
72
+ " continue\n",
73
+ " img = Image.open(os.path.join(folder,filename))\n",
74
+ " img = img.resize((512, 512))\n",
75
+ " if img is not None:\n",
76
+ " images.append(img)\n",
77
+ " labels.append(1 if folder == \"AI\" else 0)\n",
78
+ " return images, labels"
79
+ ],
80
+ "metadata": {
81
+ "id": "BH6bOWUXsi_D"
82
+ },
83
+ "execution_count": null,
84
+ "outputs": []
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "source": [
89
+ "# prompt: Can you write a function that can implement the discrete wavelet transform and display the wavelets given in an array for the image? The function should take in an image_path and a list of wavelets and perform the dwt and display the wavelets.\n",
90
+ "\n",
91
+ "import matplotlib.pyplot as plt\n",
92
+ "import numpy as np\n",
93
+ "def apply_wavelet_transform_and_display_multiple(image_path, wavelets):\n",
94
+ " # Load the image\n",
95
+ " img = Image.open(image_path).convert('L')\n",
96
+ "\n",
97
+ " # Convert image to numpy array\n",
98
+ " img_array = np.array(img)\n",
99
+ "\n",
100
+ " num_wavelets = len(wavelets)\n",
101
+ " fig, axes = plt.subplots(1, num_wavelets + 1, figsize=(5 * (num_wavelets + 1), 5))\n",
102
+ "\n",
103
+ " # Display the original image\n",
104
+ " axes[0].imshow(img_array, cmap='gray')\n",
105
+ " axes[0].set_title('Original Image')\n",
106
+ "\n",
107
+ " # Apply DWT and display wavelets\n",
108
+ " for i, wavelet in enumerate(wavelets):\n",
109
+ " cA, cD = pywt.dwt(img_array, wavelet)\n",
110
+ " axes[i + 1].imshow(cD, cmap='gray')\n",
111
+ " axes[i + 1].set_title(f'Approximate Image ({wavelet})')\n",
112
+ "\n",
113
+ " plt.tight_layout()\n",
114
+ " plt.show()\n"
115
+ ],
116
+ "metadata": {
117
+ "id": "sBRFYk0C2nfX"
118
+ },
119
+ "execution_count": null,
120
+ "outputs": []
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "source": [
125
+ "apply_wavelet_transform_and_display_multiple('kiri-in-high-resolution-love-her-3-v0-ezejx6try3va1.webp', ['db1', 'db6', 'db10', 'db12', 'db16'])"
126
+ ],
127
+ "metadata": {
128
+ "id": "KfY3qSfkxJnS"
129
+ },
130
+ "execution_count": null,
131
+ "outputs": []
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "source": [
136
+ "# prompt: Can you write a function that given a list of images from PIL can convert them to grayscale and apply a set of wavelets using dwt and then combined them into one feature vector?\n",
137
+ "\n",
138
+ "import numpy as np\n",
139
+ "def extract_wavelet_features(images, wavelets):\n",
140
+ " all_features = []\n",
141
+ " for img in images:\n",
142
+ " img_gray = img.convert('L')\n",
143
+ " img_array = np.array(img_gray)\n",
144
+ " features = []\n",
145
+ " for wavelet in wavelets:\n",
146
+ " cA, cD = pywt.dwt(img_array, wavelet)\n",
147
+ " features.extend(cD.flatten())\n",
148
+ " all_features.append(features)\n",
149
+ " return np.array(all_features)\n"
150
+ ],
151
+ "metadata": {
152
+ "id": "ufMhM7_86IbC"
153
+ },
154
+ "execution_count": null,
155
+ "outputs": []
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "source": [
160
+ "# prompt: Apply the Fourier transform to the images from the load_images_from_folder function.\n",
161
+ "\n",
162
+ "import numpy as np\n",
163
+ "\n",
164
+ "\n",
165
+ "# Example usage (assuming 'folder_path' contains your images)\n",
166
+ "ai_images, ai_labels = load_images_from_folder('AI')\n",
167
+ "photo_images, photo_labels = load_images_from_folder('Photo')\n",
168
+ "min_length = min(len(ai_images), len(photo_images))\n",
169
+ "ai_images = ai_images[:min_length]\n",
170
+ "photo_images = photo_images[:min_length]\n",
171
+ "ai_labels = ai_labels[:min_length]\n",
172
+ "photo_labels = photo_labels[:min_length]\n",
173
+ "\n",
174
+ "print(f\"Number of AI images: {len(ai_images)}\")\n",
175
+ "print(f\"Number of Photo images: {len(photo_images)}\")\n",
176
+ "images = ai_images + photo_images\n",
177
+ "labels = ai_labels + photo_labels\n",
178
+ "features = np.array(extract_wavelet_features(images, [\"db4\", \"db10\"]))"
179
+ ],
180
+ "metadata": {
181
+ "id": "7Pfn_0-QswSh"
182
+ },
183
+ "execution_count": null,
184
+ "outputs": []
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "source": [
189
+ "reducer = umap.UMAP(n_neighbors=16, n_components=32, random_state=42)\n",
190
+ "embeddings = reducer.fit_transform(features)"
191
+ ],
192
+ "metadata": {
193
+ "id": "xc_1hAuTLdUj"
194
+ },
195
+ "execution_count": null,
196
+ "outputs": []
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "source": [
201
+ "reducer.embedding_.dtype"
202
+ ],
203
+ "metadata": {
204
+ "id": "qprQSJTCaPpv"
205
+ },
206
+ "execution_count": null,
207
+ "outputs": []
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "source": [
212
+ "X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)"
213
+ ],
214
+ "metadata": {
215
+ "id": "dFQYuL3MbJLj"
216
+ },
217
+ "execution_count": null,
218
+ "outputs": []
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "source": [
223
+ "from xgboost import XGBClassifier"
224
+ ],
225
+ "metadata": {
226
+ "id": "HoySyJJ4cL3n"
227
+ },
228
+ "execution_count": null,
229
+ "outputs": []
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "source": [
234
+ "xgb_clf = XGBClassifier(n_estimators=200, eval_metric=\"logloss\", learning_rate=0.01,\n",
235
+ " reg_lambda=0.8, max_depth=5, gamma=1.0, subsample=0.5,\n",
236
+ " colsample_bytree=0.5, min_child_weight=10)\n",
237
+ "xgb_clf.fit(X_train, y_train, eval_set=[(X_test, y_test)],\n",
238
+ " verbose=True)\n",
239
+ "\n",
240
+ "xgb_clf_pred = xgb_clf.predict(X_test)\n",
241
+ "score = xgb_clf.score(X_test, y_test)\n",
242
+ "print(f\"Accuracy: {score}\")\n",
243
+ "\n",
244
+ "print(f\"F1 score: {f1_score(y_test, xgb_clf_pred)}\")"
245
+ ],
246
+ "metadata": {
247
+ "id": "vP5jesFXJHcY"
248
+ },
249
+ "execution_count": null,
250
+ "outputs": []
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "source": [
255
+ "# prompt: Calculate the training accuracy\n",
256
+ "\n",
257
+ "xgb_clf_pred_train = xgb_clf.predict(X_train)\n",
258
+ "score = xgb_clf.score(X_train, y_train)\n",
259
+ "print(f\"Training Accuracy: {score}\")\n",
260
+ "\n",
261
+ "score = xgb_clf.score(X_test, y_test)\n",
262
+ "print(f\"Test Accuracy: {score}\")"
263
+ ],
264
+ "metadata": {
265
+ "id": "IljcJVxVVlgI"
266
+ },
267
+ "execution_count": null,
268
+ "outputs": []
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "source": [
273
+ "# prompt: Can you perform four fold cross validation on the xgboost model?\n",
274
+ "\n",
275
+ "from sklearn.model_selection import cross_val_score, KFold\n",
276
+ "# Perform four-fold cross-validation\n",
277
+ "kfold = KFold(n_splits=4, shuffle=True, random_state=42)\n",
278
+ "scores = cross_val_score(xgb_clf, embeddings, labels, cv=kfold, scoring='accuracy')\n",
279
+ "\n",
280
+ "# Print the cross-validation scores\n",
281
+ "print(\"Cross-validation scores:\", scores)\n",
282
+ "print(\"Average cross-validation score:\", scores.mean())"
283
+ ],
284
+ "metadata": {
285
+ "id": "peofLwk78-mE"
286
+ },
287
+ "execution_count": null,
288
+ "outputs": []
289
+ },
290
+ {
291
+ "cell_type": "code",
292
+ "source": [
293
+ "ConfusionMatrixDisplay.from_estimator(xgb_clf, X_test, y_test)"
294
+ ],
295
+ "metadata": {
296
+ "id": "5GvVgOoXcbJ-"
297
+ },
298
+ "execution_count": null,
299
+ "outputs": []
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "source": [
304
+ "xgb_clf.save_model(\"xgb_flux_detection_model.json\")"
305
+ ],
306
+ "metadata": {
307
+ "id": "5TZsByCxQqbU"
308
+ },
309
+ "execution_count": null,
310
+ "outputs": []
311
+ },
312
+ {
313
+ "cell_type": "code",
314
+ "source": [
315
+ "# prompt: A random classifier\n",
316
+ "\n",
317
+ "from sklearn.dummy import DummyClassifier\n",
318
+ "\n",
319
+ "# Initialize a random classifier\n",
320
+ "dummy_clf = DummyClassifier(strategy='uniform') # Predicts randomly\n",
321
+ "\n",
322
+ "# Fit the classifier (not really necessary for a random classifier)\n",
323
+ "dummy_clf.fit(X_train, y_train)\n",
324
+ "\n",
325
+ "# Make predictions\n",
326
+ "dummy_pred = dummy_clf.predict(X_test)\n",
327
+ "\n",
328
+ "# Evaluate the performance\n",
329
+ "score = dummy_clf.score(X_test, y_test)\n",
330
+ "print(f\"Accuracy: {score}\")\n",
331
+ "print(f\"F1 score: {f1_score(y_test, dummy_pred)}\")\n",
332
+ "\n",
333
+ "ConfusionMatrixDisplay.from_estimator(dummy_clf, X_test, y_test)"
334
+ ],
335
+ "metadata": {
336
+ "id": "X7qkISlS4QjW"
337
+ },
338
+ "execution_count": null,
339
+ "outputs": []
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "source": [
344
+ "# prompt: random forests with pruning\n",
345
+ "\n",
346
+ "from sklearn.ensemble import RandomForestClassifier\n",
347
+ "\n",
348
+ "# Initialize the RandomForestClassifier with pruning parameters\n",
349
+ "rf_clf = RandomForestClassifier(n_estimators=100, # Number of trees in the forest\n",
350
+ " max_depth=5, # Maximum depth of each tree (pruning)\n",
351
+ " min_samples_split=5, # Minimum samples required to split a node (pruning)\n",
352
+ " random_state=42) # Random seed for reproducibility\n",
353
+ "\n",
354
+ "# Fit the classifier to the training data\n",
355
+ "rf_clf.fit(X_train, y_train)\n",
356
+ "\n",
357
+ "# Make predictions on the test data\n",
358
+ "rf_pred = rf_clf.predict(X_test)\n",
359
+ "\n",
360
+ "# Evaluate the performance\n",
361
+ "score = rf_clf.score(X_test, y_test)\n",
362
+ "print(f\"Accuracy: {score}\")\n",
363
+ "\n",
364
+ "print(f\"F1 score: {f1_score(y_test, rf_pred)}\")\n",
365
+ "\n",
366
+ "ConfusionMatrixDisplay.from_estimator(rf_clf, X_test, y_test)"
367
+ ],
368
+ "metadata": {
369
+ "id": "3qJFLsYT3xmi"
370
+ },
371
+ "execution_count": null,
372
+ "outputs": []
373
+ },
374
+ {
375
+ "cell_type": "code",
376
+ "source": [
377
+ "# prompt: Can you perform four fold cross validation on the rf model?\n",
378
+ "\n",
379
+ "from sklearn.model_selection import cross_val_score, KFold\n",
380
+ "# Perform four-fold cross-validation\n",
381
+ "kfold = KFold(n_splits=4, shuffle=True, random_state=42)\n",
382
+ "scores = cross_val_score(rf_clf, embeddings, labels, cv=kfold, scoring='accuracy')\n",
383
+ "\n",
384
+ "# Print the cross-validation scores\n",
385
+ "print(\"Cross-validation scores:\", scores)\n",
386
+ "print(\"Average cross-validation score:\", scores.mean())"
387
+ ],
388
+ "metadata": {
389
+ "id": "-gDc0KvD9_Yp"
390
+ },
391
+ "execution_count": null,
392
+ "outputs": []
393
+ },
394
+ {
395
+ "cell_type": "code",
396
+ "source": [
397
+ "# prompt: SVC classifier\n",
398
+ "\n",
399
+ "from sklearn.svm import SVC\n",
400
+ "\n",
401
+ "# Initialize the SVC classifier\n",
402
+ "svc_clf = SVC()\n",
403
+ "\n",
404
+ "# Fit the classifier to the training data\n",
405
+ "svc_clf.fit(X_train, y_train)\n",
406
+ "\n",
407
+ "# Make predictions on the test data\n",
408
+ "svc_pred = svc_clf.predict(X_test)\n",
409
+ "\n",
410
+ "# Evaluate the performance\n",
411
+ "score = svc_clf.score(X_test, y_test)\n",
412
+ "print(f\"Accuracy: {score}\")\n",
413
+ "\n",
414
+ "print(f\"F1 score: {f1_score(y_test, svc_pred)}\")\n",
415
+ "\n",
416
+ "ConfusionMatrixDisplay.from_estimator(svc_clf, X_test, y_test)\n"
417
+ ],
418
+ "metadata": {
419
+ "id": "1sQjrGeZ8Ir3"
420
+ },
421
+ "execution_count": null,
422
+ "outputs": []
423
+ },
424
+ {
425
+ "cell_type": "code",
426
+ "source": [
427
+ "# prompt: classify with KNN and K=7\n",
428
+ "\n",
429
+ "from sklearn.neighbors import KNeighborsClassifier\n",
430
+ "# Initialize the KNeighborsClassifier with K=7\n",
431
+ "knn_clf = KNeighborsClassifier(n_neighbors=7)\n",
432
+ "\n",
433
+ "# Fit the classifier to the training data\n",
434
+ "knn_clf.fit(X_train, y_train)\n",
435
+ "\n",
436
+ "# Make predictions on the test data\n",
437
+ "knn_pred = knn_clf.predict(X_test)\n",
438
+ "\n",
439
+ "# Evaluate the performance\n",
440
+ "score = knn_clf.score(X_test, y_test)\n",
441
+ "print(f\"Accuracy: {score}\")\n",
442
+ "\n",
443
+ "print(f\"F1 score: {f1_score(y_test, knn_pred)}\")\n",
444
+ "\n",
445
+ "ConfusionMatrixDisplay.from_estimator(knn_clf, X_test, y_test)\n"
446
+ ],
447
+ "metadata": {
448
+ "id": "vU8SRYsZ72Sr"
449
+ },
450
+ "execution_count": null,
451
+ "outputs": []
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "source": [
456
+ "# prompt: Can you perform four fold cross validation on the KNN model?\n",
457
+ "\n",
458
+ "from sklearn.model_selection import cross_val_score, KFold\n",
459
+ "# Perform four-fold cross-validation\n",
460
+ "kfold = KFold(n_splits=4, shuffle=True, random_state=42)\n",
461
+ "scores = cross_val_score(knn_clf, embeddings, labels, cv=kfold, scoring='accuracy')\n",
462
+ "\n",
463
+ "# Print the cross-validation scores\n",
464
+ "print(\"Cross-validation scores:\", scores)\n",
465
+ "print(\"Average cross-validation score:\", scores.mean())"
466
+ ],
467
+ "metadata": {
468
+ "id": "1X9_4kAKRlSm"
469
+ },
470
+ "execution_count": null,
471
+ "outputs": []
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "source": [
476
+ "import plotly.express as px\n",
477
+ "# Initialize UMAP with desired parameters\n",
478
+ "reducer = umap.UMAP(n_components=2, random_state=42)\n",
479
+ "\n",
480
+ "# Reduce the dimensionality of the features array\n",
481
+ "embedding = reducer.fit_transform(features)\n",
482
+ "import pandas as pd\n",
483
+ "\n",
484
+ "# Create a DataFrame for Plotly\n",
485
+ "embedding_df = pd.DataFrame(embedding, columns=['UMAP1', 'UMAP2'])\n",
486
+ "embedding_df['label'] = labels\n",
487
+ "# Create a scatter plot\n",
488
+ "fig = px.scatter(\n",
489
+ " embedding_df,\n",
490
+ " x='UMAP1',\n",
491
+ " y='UMAP2',\n",
492
+ " color='label',\n",
493
+ " title='UMAP Dimensionality Reduction',\n",
494
+ " labels={'color': 'Label'}\n",
495
+ ")\n",
496
+ "\n",
497
+ "# Show the plot\n",
498
+ "fig.show()"
499
+ ],
500
+ "metadata": {
501
+ "id": "wMEQoDF2Goj-"
502
+ },
503
+ "execution_count": null,
504
+ "outputs": []
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "source": [
509
+ "# prompt: Save the knn classifier as a file\n",
510
+ "\n",
511
+ "import joblib\n",
512
+ "\n",
513
+ "# Save the knn classifier to a file\n",
514
+ "filename = 'knn_model.pkl'\n",
515
+ "joblib.dump(knn_clf, filename)\n"
516
+ ],
517
+ "metadata": {
518
+ "id": "I-Myacr4zsVy"
519
+ },
520
+ "execution_count": null,
521
+ "outputs": []
522
+ },
523
+ {
524
+ "cell_type": "code",
525
+ "source": [
526
+ "# prompt: load the knn model\n",
527
+ "\n",
528
+ "# Load the knn classifier from the file\n",
529
+ "filename = 'knn_model.pkl'\n",
530
+ "loaded_knn_clf = joblib.load(filename)"
531
+ ],
532
+ "metadata": {
533
+ "id": "yayMkQELAbZO"
534
+ },
535
+ "execution_count": null,
536
+ "outputs": []
537
+ },
538
+ {
539
+ "cell_type": "code",
540
+ "source": [
541
+ "# prompt: load the validation images and apply the wavelet transforms\n",
542
+ "\n",
543
+ "# Assuming 'validation_folder' contains your validation images\n",
544
+ "validation_images, validation_labels = load_images_from_folder('validation_folder')\n",
545
+ "\n",
546
+ "# Extract wavelet features from validation images\n",
547
+ "validation_features = extract_wavelet_features(validation_images, [\"db4\", \"db10\"])\n",
548
+ "\n",
549
+ "# Reduce dimensionality of validation features using the same UMAP reducer\n",
550
+ "validation_embeddings = reducer.transform(validation_features)\n",
551
+ "\n",
552
+ "# Now you have 'validation_embeddings' and 'validation_labels' for further use\n",
553
+ "# (e.g., evaluating your trained models on validation data)\n"
554
+ ],
555
+ "metadata": {
556
+ "id": "GKCz35S8E9jn"
557
+ },
558
+ "execution_count": null,
559
+ "outputs": []
560
+ },
561
+ {
562
+ "cell_type": "markdown",
563
+ "source": [
564
+ "### Validation"
565
+ ],
566
+ "metadata": {
567
+ "id": "nrcTRu_ilEGk"
568
+ }
569
+ },
570
+ {
571
+ "cell_type": "code",
572
+ "source": [
573
+ "!unzip Validation.zip"
574
+ ],
575
+ "metadata": {
576
+ "id": "Yajcb-E5lDgl"
577
+ },
578
+ "execution_count": null,
579
+ "outputs": []
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "source": [
584
+ "# prompt: load the validation images\n",
585
+ "\n",
586
+ "# Assuming 'Validation' is the folder containing your validation images\n",
587
+ "ai_validation_images, ai_validation_labels = load_images_from_folder('Validation/AI')\n",
588
+ "photo_validation_images, photo_validation_labels = load_images_from_folder('Validation/Photo')\n",
589
+ "\n",
590
+ "\n",
591
+ "# Now you have 'validation_images' and 'validation_labels' for further use\n",
592
+ "print(f\"Number of AI Validation images: {len(ai_validation_images)}\")\n",
593
+ "print(f\"Number of Photo Validation images: {len(ai_validation_images)}\")"
594
+ ],
595
+ "metadata": {
596
+ "id": "mS8hzT-TlGER"
597
+ },
598
+ "execution_count": null,
599
+ "outputs": []
600
+ },
601
+ {
602
+ "cell_type": "code",
603
+ "source": [
604
+ "# prompt: Combine both validation datasets and extract the wavelet features.\n",
605
+ "\n",
606
+ "# Combine validation datasets\n",
607
+ "validation_images = ai_validation_images + photo_validation_images\n",
608
+ "validation_labels = ai_validation_labels + photo_validation_labels\n",
609
+ "\n",
610
+ "# Extract wavelet features from validation images\n",
611
+ "validation_features = extract_wavelet_features(validation_images, [\"db4\", \"db10\"])"
612
+ ],
613
+ "metadata": {
614
+ "id": "iTeZUqEblbu1"
615
+ },
616
+ "execution_count": null,
617
+ "outputs": []
618
+ },
619
+ {
620
+ "cell_type": "code",
621
+ "source": [
622
+ "# prompt: apply the reducer to find the validation embeddings\n",
623
+ "\n",
624
+ "# Reduce dimensionality of validation features using the same UMAP reducer\n",
625
+ "validation_embeddings = reducer.transform(validation_features)"
626
+ ],
627
+ "metadata": {
628
+ "id": "jdUbmE4Hltng"
629
+ },
630
+ "execution_count": null,
631
+ "outputs": []
632
+ },
633
+ {
634
+ "cell_type": "code",
635
+ "source": [
636
+ "# prompt: find the accuracy and f1 score on the knn classifier for validation features\n",
637
+ "\n",
638
+ "# Make predictions on the validation data\n",
639
+ "knn_pred_validation = knn_clf.predict(validation_embeddings)\n",
640
+ "\n",
641
+ "# Evaluate the performance on validation data\n",
642
+ "score_validation = knn_clf.score(validation_embeddings, validation_labels)\n",
643
+ "print(f\"Validation Accuracy: {score_validation}\")\n",
644
+ "\n",
645
+ "print(f\"Validation F1 score: {f1_score(validation_labels, knn_pred_validation)}\")\n"
646
+ ],
647
+ "metadata": {
648
+ "id": "ls2ij5VxlyOX"
649
+ },
650
+ "execution_count": null,
651
+ "outputs": []
652
+ },
653
+ {
654
+ "cell_type": "code",
655
+ "source": [
656
+ "# prompt: Can you combine the entire pipeline into one class?\n",
657
+ "\n",
658
+ "from sklearn.model_selection import train_test_split\n",
659
+ "from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
660
+ "from sklearn.preprocessing import StandardScaler\n",
661
+ "from sklearn.decomposition import PCA\n",
662
+ "import umap\n",
663
+ "import pywt\n",
664
+ "import os\n",
665
+ "from PIL import Image\n",
666
+ "import matplotlib.pyplot as plt\n",
667
+ "import numpy as np\n",
668
+ "from xgboost import XGBClassifier\n",
669
+ "from sklearn.model_selection import cross_val_score, KFold\n",
670
+ "from sklearn.dummy import DummyClassifier\n",
671
+ "from sklearn.ensemble import RandomForestClassifier\n",
672
+ "from sklearn.svm import SVC\n",
673
+ "from sklearn.neighbors import KNeighborsClassifier\n",
674
+ "from sklearn.model_selection import train_test_split\n",
675
+ "from sklearn.metrics import classification_report\n",
676
+ "import plotly.express as px\n",
677
+ "import pandas as pd\n",
678
+ "import joblib\n",
679
+ "from tqdm import tqdm\n",
680
+ "import lzma\n",
681
+ "\n",
682
+ "class FluxClassifier:\n",
683
+ " def __init__(self, wavelets=[\"db4\", \"db10\"], umap_n_neighbors=16, umap_n_components=32, random_state=42):\n",
684
+ " self.wavelets = wavelets\n",
685
+ " self.umap_n_neighbors = umap_n_neighbors\n",
686
+ " self.umap_n_components = umap_n_components\n",
687
+ " self.random_state = random_state\n",
688
+ " self.reducer = umap.UMAP(n_neighbors=self.umap_n_neighbors,\n",
689
+ " n_components=self.umap_n_components,\n",
690
+ " random_state=self.random_state)\n",
691
+ " self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier\n",
692
+ "\n",
693
+ " def load_images_from_folder(self, folder):\n",
694
+ " images = []\n",
695
+ " labels = []\n",
696
+ " print(f\"Loading images from {folder}\")\n",
697
+ " for filename in tqdm(os.listdir(folder)):\n",
698
+ " if not (filename.endswith('.jpg') or filename.endswith('.png') or\n",
699
+ " filename.endswith('jpeg') or filename.endswith('webp')):\n",
700
+ " continue\n",
701
+ " img = Image.open(os.path.join(folder, filename))\n",
702
+ " img = img.resize((512, 512))\n",
703
+ " if img is not None:\n",
704
+ " images.append(img)\n",
705
+ " labels.append(1 if \"AI\" in folder else 0) # Assuming folder names contain \"AI\" or not\n",
706
+ " return images, labels\n",
707
+ "\n",
708
+ " def extract_wavelet_features(self, images):\n",
709
+ " all_features = []\n",
710
+ " for img in images:\n",
711
+ " img_gray = img.convert('L')\n",
712
+ " img_array = np.array(img_gray)\n",
713
+ " features = []\n",
714
+ " for wavelet in self.wavelets:\n",
715
+ " cA, cD = pywt.dwt(img_array, wavelet)\n",
716
+ " features.extend(cD.flatten())\n",
717
+ " all_features.append(features)\n",
718
+ " return np.array(all_features)\n",
719
+ "\n",
720
+ " def fit(self, train_folder1, train_folder2):\n",
721
+ " # Load images and extract features\n",
722
+ " images1, labels1 = self.load_images_from_folder(train_folder1)\n",
723
+ " images2, labels2 = self.load_images_from_folder(train_folder2)\n",
724
+ "\n",
725
+ " min_length = min(len(images1), len(images2))\n",
726
+ " images1 = images1[:min_length]\n",
727
+ " images2 = images2[:min_length]\n",
728
+ " labels1 = labels1[:min_length]\n",
729
+ " labels2 = labels2[:min_length]\n",
730
+ "\n",
731
+ " images = images1 + images2\n",
732
+ " labels = labels1 + labels2\n",
733
+ " features = self.extract_wavelet_features(images)\n",
734
+ "\n",
735
+ " # Apply UMAP dimensionality reduction\n",
736
+ " embeddings = self.reducer.fit_transform(features)\n",
737
+ " X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)\n",
738
+ "\n",
739
+ " # Train the classifier\n",
740
+ " self.classifier.fit(X_train, y_train)\n",
741
+ "\n",
742
+ " acc = self.classifier.score(X_test, y_test)\n",
743
+ " y_pred = self.classifier.predict(X_test)\n",
744
+ " print(f\"Classifier accuracy = {acc}\")\n",
745
+ "\n",
746
+ " f1 = f1_score(y_test, y_pred)\n",
747
+ " print(f\"Classifier F1 = {f1}\")\n",
748
+ " print(classification_report(y_test, y_pred))\n",
749
+ "\n",
750
+ "\n",
751
+ " def predict(self, images):\n",
752
+ " # Load images and extract features\n",
753
+ " features = self.extract_wavelet_features(images)\n",
754
+ "\n",
755
+ " # Apply UMAP dimensionality reduction\n",
756
+ " embeddings = self.reducer.transform(features)\n",
757
+ "\n",
758
+ " # Make predictions\n",
759
+ " return self.classifier.predict(embeddings)\n",
760
+ "\n",
761
+ " def predict_proba(self, images):\n",
762
+ " # Load images and extract features\n",
763
+ " features = self.extract_wavelet_features(images)\n",
764
+ "\n",
765
+ " # Apply UMAP dimensionality reduction\n",
766
+ " embeddings = self.reducer.transform(features)\n",
767
+ "\n",
768
+ " # Make predictions\n",
769
+ " return self.classifier.predict_proba(embeddings)\n",
770
+ "\n",
771
+ " def score(self, test_folder):\n",
772
+ " # Load images and extract features\n",
773
+ " images, labels = self.load_images_from_folder(test_folder)\n",
774
+ " features = self.extract_wavelet_features(images)\n",
775
+ "\n",
776
+ " # Apply UMAP dimensionality reduction\n",
777
+ " embeddings = self.reducer.transform(features)\n",
778
+ "\n",
779
+ " # Evaluate the classifier\n",
780
+ " return self.classifier.score(embeddings, labels)\n",
781
+ "\n",
782
+ " def save_model(self, filename):\n",
783
+ " joblib.dump(self, filename, compress=('zlib', 9))\n",
784
+ "\n",
785
+ " @staticmethod\n",
786
+ " def load_model(filename):\n",
787
+ " return joblib.load(filename)"
788
+ ],
789
+ "metadata": {
790
+ "id": "V8NO_N4QteQK"
791
+ },
792
+ "execution_count": null,
793
+ "outputs": []
794
+ },
795
+ {
796
+ "cell_type": "code",
797
+ "source": [
798
+ "classifier = FluxClassifier()\n",
799
+ "classifier.fit(\"AI\", \"Photo\")"
800
+ ],
801
+ "metadata": {
802
+ "id": "sFYjKz1L6xgg"
803
+ },
804
+ "execution_count": null,
805
+ "outputs": []
806
+ },
807
+ {
808
+ "cell_type": "code",
809
+ "source": [
810
+ "classifier.save_model(\"flux_classifier.pkl\")"
811
+ ],
812
+ "metadata": {
813
+ "id": "tiLVrOTF_ZGM"
814
+ },
815
+ "execution_count": null,
816
+ "outputs": []
817
+ },
818
+ {
819
+ "cell_type": "code",
820
+ "source": [
821
+ "# prompt: save the model to my google drive.\n",
822
+ "\n",
823
+ "from google.colab import drive\n",
824
+ "drive.mount('/content/drive')\n",
825
+ "!cp flux_classifier.pkl /content/drive/MyDrive"
826
+ ],
827
+ "metadata": {
828
+ "id": "sXo1mHFSADuS"
829
+ },
830
+ "execution_count": null,
831
+ "outputs": []
832
+ },
833
+ {
834
+ "cell_type": "code",
835
+ "source": [
836
+ "images = [Image.open(\"pDGQUK1BYaJYhrFB5ouQU.jpeg\"), Image.open(\"jenta2.jpeg\")]\n",
837
+ "predictions = classifier.predict_proba(images)\n",
838
+ "print(predictions)"
839
+ ],
840
+ "metadata": {
841
+ "id": "cNVwQ7Oq6vWa"
842
+ },
843
+ "execution_count": null,
844
+ "outputs": []
845
+ },
846
+ {
847
+ "cell_type": "code",
848
+ "source": [],
849
+ "metadata": {
850
+ "id": "98TbK3uH-_CD"
851
+ },
852
+ "execution_count": null,
853
+ "outputs": []
854
+ }
855
+ ]
856
+ }