StandardCAS-NSTID commited on
Commit
9ba80f9
1 Parent(s): d8922f1

Create 1c3a.py

Browse files
Files changed (1) hide show
  1. 1c3a.py +237 -0
1c3a.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Added Retrain all clusters or only from new folder options
2
+ import os
3
+ import cv2
4
+ import numpy as np
5
+ from sklearn.cluster import KMeans
6
+ from tensorflow.keras.models import load_model
7
+ from sklearn.svm import SVC
8
+ from sklearn.model_selection import train_test_split
9
+ from joblib import dump, load
10
+ from sklearn.cluster import KMeans
11
+ from keras.models import Sequential
12
+ from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
13
+ import tensorflow as tf
14
+
15
+ # Define desired image size
16
+ img_size = (1000, 1000)
17
+
18
+ def load_images_from_folder(folder):
19
+ """
20
+ Load and resize images from the specified folder.
21
+
22
+ :param folder: The path to the folder containing the images to load.
23
+ :return: A tuple containing a list of loaded and resized images and a list of their corresponding file paths.
24
+ """
25
+ images = []
26
+ image_paths = []
27
+ for filename in os.listdir(folder):
28
+ file_path = os.path.join(folder, filename)
29
+ if os.path.isdir(file_path):
30
+ subfolder_images, subfolder_image_paths = load_images_from_folder(file_path)
31
+ images.extend(subfolder_images)
32
+ image_paths.extend(subfolder_image_paths)
33
+ elif filename.endswith(('.png', '.jpg', '.jpeg')):
34
+ img = cv2.imread(file_path, 0)
35
+ img = cv2.resize(img, img_size)
36
+ images.append(img)
37
+ image_paths.append(file_path)
38
+ return images, image_paths
39
+
40
+ def train_model(folder, model_file):
41
+ """
42
+ Train a model for the specified folder and save it to the specified file.
43
+
44
+ :param folder: The path to the folder containing the training data.
45
+ :param model_file: The path to the file where the trained model will be saved.
46
+ """
47
+ # Load and resize training data
48
+ images, image_paths = load_images_from_folder(folder)
49
+ images = np.array(images, dtype=object)
50
+
51
+ # Check if there are enough images
52
+ if len(images) > 0:
53
+ # Normalize pixel values
54
+ images = images.astype('float32') / 255.0
55
+
56
+ # Create CNN model
57
+ model = Sequential()
58
+ model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 1)))
59
+ model.add(MaxPooling2D((2, 2)))
60
+ model.add(Conv2D(64, (3, 3), activation='relu'))
61
+ model.add(MaxPooling2D((2, 2)))
62
+ model.add(Conv2D(64, (3, 3), activation='relu'))
63
+ model.add(Flatten())
64
+ model.add(Dense(64, activation='relu'))
65
+ model.add(Dense(1, activation='sigmoid'))
66
+
67
+ # Compile CNN model using SGD optimizer from tf.keras.optimizers.legacy
68
+ opt = tf.keras.optimizers.legacy.SGD()
69
+ model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
70
+
71
+ # Convert images array to float32
72
+ images = images.astype(np.float32)
73
+
74
+ # Train CNN model
75
+ try:
76
+ history = model.fit(images.reshape(len(images), img_size[0], img_size[1], 1), np.ones(len(images)), epochs=2, batch_size=150)
77
+ # Save trained model to file
78
+ print(model_file, 'here')
79
+ model.save(model_file)
80
+ except Exception as e:
81
+ print(e)
82
+
83
+
84
+
85
+
86
+ def classify_images(folder, model_folder, n_clusters=5, new_only=False):
87
+ """
88
+ Classify images in the specified folder using the specified model and a k-means algorithm.
89
+
90
+ :param folder: The path to the folder containing the images to classify.
91
+ :param model_folder: The path to the folder containing the trained model.
92
+ :param n_clusters: The number of clusters to form using the k-means algorithm.
93
+ :param new_only: Whether to classify only images in a subfolder named "new".
94
+ :return: A 2D list of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster.
95
+ """
96
+ # Load trained model from file
97
+ model_file = os.path.join(folder, os.path.basename(folder) + '.h5')
98
+ model = load_model(model_file)
99
+
100
+ # Load and resize images from specified folder
101
+ if new_only:
102
+ folder = os.path.join(folder, 'new')
103
+ images, image_paths = load_images_from_folder(folder)
104
+ images = np.array(images, dtype=object)
105
+
106
+ # Normalize pixel values
107
+ images = images.astype('float32') / 255.0
108
+
109
+ # Obtain classification scores for each image
110
+ scores = model.predict(images.reshape(len(images), img_size[0], img_size[1], 1), batch_size=200)
111
+
112
+ # Use k-means algorithm to cluster images based on their classification scores
113
+ if len(scores) >= n_clusters:
114
+ kmeans = KMeans(n_clusters=n_clusters, n_init=20)
115
+ kmeans.fit(scores)
116
+
117
+ # Create 2D list of image file paths, where each inner list corresponds to a cluster
118
+ clusters = [[] for _ in range(n_clusters)]
119
+ for i, label in enumerate(kmeans.labels_):
120
+ clusters[label].append(image_paths[i])
121
+ else:
122
+ clusters = [image_paths]
123
+
124
+ # Return 2D list of image file paths
125
+ return clusters
126
+
127
+
128
+
129
+
130
+ def remove_empty_folders_recursively(directory):
131
+ """
132
+ Remove and delete empty folders in the specified directory and all of its subdirectories.
133
+
134
+ :param directory: The path to the directory to remove empty folders from.
135
+ """
136
+ for folder in os.listdir(directory):
137
+ folder_path = os.path.join(directory, folder)
138
+ if os.path.isdir(folder_path):
139
+ # Recursively remove empty subfolders
140
+ remove_empty_folders_recursively(folder_path)
141
+ # Remove folder if it is empty
142
+ if not os.listdir(folder_path):
143
+ os.rmdir(folder_path)
144
+
145
+ def train_model_recursively(folder, model_folder, max_depth=None, depth=0):
146
+ """
147
+ Train a model for the specified folder and its subdirectories and save it to the specified file.
148
+
149
+ :param folder: The path to the folder containing the training data.
150
+ :param model_folder: The path to the folder where the trained models will be saved.
151
+ :param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed.
152
+ :param depth: The current depth of recursion.
153
+ """
154
+ # Train model for current folder
155
+ model_file = os.path.join(model_folder, os.path.basename(folder) + '.h5')
156
+ train_model(folder, model_file)
157
+
158
+ # Recursively train models for subdirectories
159
+ if max_depth is None or depth < max_depth:
160
+ for subfolder in os.listdir(folder):
161
+ subfolder_path = os.path.join(folder, subfolder)
162
+ if os.path.isdir(subfolder_path):
163
+ model_folder = subfolder_path
164
+ print(model_folder,subfolder_path)
165
+ #print(subfolder_path,folder,subfolder,model_folder)
166
+ train_model_recursively(subfolder_path, model_folder, max_depth, depth + 1)
167
+
168
+
169
+ def classify_images_recursively(folder, model_folder, n_clusters=5, max_depth=None, depth=0):
170
+ """
171
+ Classify images in the specified folder and its subdirectories using the specified model and a k-means algorithm.
172
+
173
+ :param folder: The path to the folder containing the images to classify.
174
+ :param model_folder: The path to the folder containing the trained models.
175
+ :param n_clusters: The number of clusters to form using the k-means algorithm.
176
+ :param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed.
177
+ :param depth: The current depth of recursion.
178
+ :return: A dictionary where the keys are folder paths and the values are 2D lists of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster.
179
+ """
180
+ # Classify images in current folder
181
+ clusters = classify_images(folder, model_folder, n_clusters)
182
+ result = {folder: clusters}
183
+
184
+ # Recursively classify images in subdirectories
185
+ if max_depth is None or depth < max_depth:
186
+ for subfolder in os.listdir(folder):
187
+ subfolder_path = os.path.join(folder, subfolder)
188
+ if os.path.isdir(subfolder_path):
189
+ result.update(classify_images_recursively(subfolder_path, model_folder, n_clusters, max_depth, depth + 1))
190
+
191
+ # Return result
192
+ return result
193
+
194
+
195
+
196
+ def main():
197
+ # Train models for textcv and buttoncv folders and their subdirectories
198
+ train_model_recursively('textcv', 'textcv')
199
+ train_model_recursively('buttoncv', 'buttoncv')
200
+
201
+ # Check for and remove empty subfolders
202
+ remove_empty_folders_recursively('textcv')
203
+ remove_empty_folders_recursively('buttoncv')
204
+
205
+ # Classify images in textcv and buttoncv folders and their subdirectories
206
+ text_clusters = classify_images_recursively('textcv', 'models')
207
+ button_clusters = classify_images_recursively('buttoncv', 'models')
208
+ try:
209
+ # Move images in textcv clusters to new folders
210
+ for folder, clusters in text_clusters.items():
211
+ for i, cluster in enumerate(clusters):
212
+ cluster_folder = os.path.join(folder, f'cluster_{i}')
213
+ os.makedirs(cluster_folder, exist_ok=True)
214
+ for image_path in cluster:
215
+ new_image_path = os.path.join(cluster_folder, os.path.basename(image_path))
216
+ os.rename(image_path, new_image_path)
217
+
218
+ except Exception as e:
219
+ print(e)
220
+ try:
221
+ # Move images in buttoncv clusters to new folders
222
+ for folder, clusters in button_clusters.items():
223
+ for i, cluster in enumerate(clusters):
224
+ cluster_folder = os.path.join(folder, f'cluster_{i}')
225
+ os.makedirs(cluster_folder, exist_ok=True)
226
+ for image_path in cluster:
227
+ new_image_path = os.path.join(cluster_folder, os.path.basename(image_path))
228
+ os.rename(image_path, new_image_path)
229
+ except Exception as e:
230
+ print(e)
231
+
232
+
233
+
234
+
235
+ if __name__ == '__main__':
236
+ main()
237
+