Sadashiv commited on
Commit
17c5137
1 Parent(s): 1e9452c

Upload 146 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/workflows/sync-to-huggingface.yml +21 -0
  2. .gitignore +164 -0
  3. .gitmodules +9 -0
  4. .vscode/extensions.json +10 -0
  5. .vscode/settings.json +8 -0
  6. .vscode/tasks.json +15 -0
  7. Fertilizer-Recommendation/.gitignore +166 -0
  8. Fertilizer-Recommendation/.vscode/extensions.json +13 -0
  9. Fertilizer-Recommendation/.vscode/settings.json +8 -0
  10. Fertilizer-Recommendation/.vscode/tasks.json +15 -0
  11. Fertilizer-Recommendation/LICENSE +21 -0
  12. Fertilizer-Recommendation/README.md +43 -0
  13. Fertilizer-Recommendation/fertilizer-prediction/Fertilizer Prediction.csv +100 -0
  14. Fertilizer-Recommendation/main.py +8 -0
  15. Fertilizer-Recommendation/notebook/fertilizer-prediction.ipynb +736 -0
  16. Fertilizer-Recommendation/requirements.txt +58 -0
  17. Fertilizer-Recommendation/saved_models/0/model/model.pkl +3 -0
  18. Fertilizer-Recommendation/saved_models/0/target_encoder/target_encoder.pkl +3 -0
  19. Fertilizer-Recommendation/saved_models/0/transformer/transformer.pkl +3 -0
  20. Fertilizer-Recommendation/src/__init__.py +0 -0
  21. Fertilizer-Recommendation/src/app.py +0 -0
  22. Fertilizer-Recommendation/src/components/__init__.py +0 -0
  23. Fertilizer-Recommendation/src/components/data_ingestion.py +69 -0
  24. Fertilizer-Recommendation/src/components/data_transformation.py +116 -0
  25. Fertilizer-Recommendation/src/components/data_validation.py +145 -0
  26. Fertilizer-Recommendation/src/components/model_evaluation.py +108 -0
  27. Fertilizer-Recommendation/src/components/model_pusher.py +71 -0
  28. Fertilizer-Recommendation/src/components/model_trainer.py +102 -0
  29. Fertilizer-Recommendation/src/config.py +23 -0
  30. Fertilizer-Recommendation/src/entity/__init__.py +0 -0
  31. Fertilizer-Recommendation/src/entity/artifact_entity.py +40 -0
  32. Fertilizer-Recommendation/src/entity/config_entity.py +120 -0
  33. Fertilizer-Recommendation/src/exception.py +21 -0
  34. Fertilizer-Recommendation/src/logger.py +22 -0
  35. Fertilizer-Recommendation/src/pipeline/__init__.py +0 -0
  36. Fertilizer-Recommendation/src/pipeline/training_pipeline.py +85 -0
  37. Fertilizer-Recommendation/src/predictor.py +118 -0
  38. Fertilizer-Recommendation/src/setup.py +0 -0
  39. Fertilizer-Recommendation/src/utils.py +115 -0
  40. Fertilizer-Recommendation/template.py +49 -0
  41. README.md +93 -6
  42. __pycache__/app.cpython-38.pyc +0 -0
  43. __pycache__/artifacts.cpython-311.pyc +0 -0
  44. __pycache__/artifacts.cpython-38.pyc +0 -0
  45. __pycache__/config.cpython-311.pyc +0 -0
  46. __pycache__/config.cpython-38.pyc +0 -0
  47. __pycache__/utils.cpython-311.pyc +0 -0
  48. __pycache__/utils.cpython-38.pyc +0 -0
  49. app.py +152 -0
  50. artifacts.py +31 -0
.github/workflows/sync-to-huggingface.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ with:
15
+ submodules: recursive
16
+ fetch-depth: 0
17
+ lfs: true
18
+ - name: Push to hub
19
+ env:
20
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
+ run: git push --force https://Sadashiv:[email protected]/spaces/Sadashiv/CropGaurd main
.gitignore ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ main.py
163
+ extra_notebook
164
+ notebok
.gitmodules ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [submodule "Fertilizer-Recommendation"]
2
+ path = Fertilizer-Recommendation
3
+ url = https://github.com/07Sada/Fertilizer-Recommendation.git
4
+ [submodule "crop-recommendation"]
5
+ path = crop-recommendation
6
+ url = https://github.com/07Sada/crop-recommendation.git
7
+ [submodule "plant-diseases-classifier"]
8
+ path = plant-diseases-classifier
9
+ url = https://github.com/07Sada/plant-diseases-classifier.git
.vscode/extensions.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "mongodb.mongodb-vscode",
4
+ "ms-python.python",
5
+ "ms-toolsai.jupyter",
6
+ "ms-toolsai.jupyter-keymap",
7
+ "ms-toolsai.jupyter-renderers",
8
+ "formulahendry.code-runner"
9
+ ]
10
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "workbench.colorTheme": "Cobalt2",
3
+ "workbench.preferredDarkColorTheme": "Default Dark+",
4
+ "task.allowAutomaticTasks": "on",
5
+ "workbench.editorAssociations": {
6
+ "*.md": "vscode.markdown.preview.editor"
7
+ }
8
+ }
.vscode/tasks.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "2.0.0",
3
+ "tasks": [
4
+ {
5
+ "label": "Installing extensions and dependencies...",
6
+ "type": "shell",
7
+ "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt",
8
+ "presentation": {
9
+ "reveal": "always",
10
+ "panel": "new"
11
+ },
12
+ "runOptions": { "runOn": "folderOpen" }
13
+ }
14
+ ]
15
+ }
Fertilizer-Recommendation/.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+ data_dump.py
162
+ dataset_download.py
163
+ demo.ipynb
164
+
165
+ artifact
166
+ catboost_info
Fertilizer-Recommendation/.vscode/extensions.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "mongodb.mongodb-vscode",
4
+ "ms-python.python",
5
+ "ms-toolsai.jupyter",
6
+ "ms-toolsai.jupyter-keymap",
7
+ "ms-toolsai.jupyter-renderers",
8
+ "formulahendry.code-runner",
9
+ "wesbos.theme-cobalt2",
10
+ "PKief.material-icon-theme",
11
+ "wesbos.theme-cobalt2"
12
+ ]
13
+ }
Fertilizer-Recommendation/.vscode/settings.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "workbench.colorTheme": "Cobalt2",
3
+ "workbench.preferredDarkColorTheme": "Cobalt2",
4
+ "task.allowAutomaticTasks": "on",
5
+ "workbench.editorAssociations": {
6
+ "*.md": "vscode.markdown.preview.editor"
7
+ }
8
+ }
Fertilizer-Recommendation/.vscode/tasks.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "2.0.0",
3
+ "tasks": [
4
+ {
5
+ "label": "Installing extensions and dependencies...",
6
+ "type": "shell",
7
+ "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt",
8
+ "presentation": {
9
+ "reveal": "always",
10
+ "panel": "new"
11
+ },
12
+ "runOptions": { "runOn": "folderOpen" }
13
+ }
14
+ ]
15
+ }
Fertilizer-Recommendation/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Sadashiv Nandanikar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Fertilizer-Recommendation/README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fertilizer Recommendation
2
+ ### Powered by machine learning models, evaluates input factors to provide precise fertilizer recommendations, enhancing crop health and productivity.
3
+
4
+ ## Demo
5
+ ### Input Interface
6
+ <img src="https://github.com/07Sada/Fertilizer-Recommendation/assets/112761379/7623ab94-c7c8-4343-814d-863849e67850" alt="Image 1" width="800">
7
+
8
+ ### Output Interface
9
+ <img src="https://github.com/07Sada/Fertilizer-Recommendation/assets/112761379/45e3dee7-f21d-4570-8df8-56e8cf2a9538" alt="Image 1" width="800">
10
+
11
+ ## Data Source
12
+ This dataset contains information about the soil, environmental conditions, crop type, and fertilizer use for different crops. The dataset includes the following columns:
13
+
14
+ - `Temparature`: The temperature in degrees Celsius.
15
+ - `Humidity`: The relative humidity in percent.
16
+ - `Moisture`: The moisture content of the soil in percent.
17
+ - `Soil Type`: The type of soil.
18
+ - `Crop Type`: The type of crop.
19
+ - `Nitrogen`: The amount of nitrogen in the soil in kilograms per hectare.
20
+ - `Potassium`: The amount of potassium in the soil in kilograms per hectare.
21
+ - `Phosphorous`: The amount of phosphorus in the soil in kilograms per hectare.
22
+ - `Fertilizer Name`: The name of the fertilizer used.
23
+
24
+ [Link](https://www.kaggle.com/datasets/gdabhishek/fertilizer-prediction) for the dataset
25
+
26
+ <details>
27
+ <summary>Supported fertilizer
28
+ </summary>
29
+
30
+ - UREA
31
+ - DAP
32
+ - 14-35-14
33
+ - 28-28
34
+ - 17-17-17
35
+ - 20-20
36
+ - 10-26-26
37
+ </details>
38
+
39
+ ## Project Details
40
+ This repository is submodule for [CropGaurd](https://github.com/07Sada/CropGaurd.git)
41
+
42
+ ## Project PipeLine Stages
43
+ ![Project PipeLine Stages](https://user-images.githubusercontent.com/112761379/225940480-2a7381b2-6abd-4c1c-8287-0fd49099be8c.jpg)
Fertilizer-Recommendation/fertilizer-prediction/Fertilizer Prediction.csv ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Temparature,Humidity ,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
2
+ 26,52,38,Sandy,Maize,37,0,0,Urea
3
+ 29,52,45,Loamy,Sugarcane,12,0,36,DAP
4
+ 34,65,62,Black,Cotton,7,9,30,14-35-14
5
+ 32,62,34,Red,Tobacco,22,0,20,28-28
6
+ 28,54,46,Clayey,Paddy,35,0,0,Urea
7
+ 26,52,35,Sandy,Barley,12,10,13,17-17-17
8
+ 25,50,64,Red,Cotton,9,0,10,20-20
9
+ 33,64,50,Loamy,Wheat,41,0,0,Urea
10
+ 30,60,42,Sandy,Millets,21,0,18,28-28
11
+ 29,58,33,Black,Oil seeds,9,7,30,14-35-14
12
+ 27,54,28,Clayey,Pulses,13,0,40,DAP
13
+ 31,62,48,Sandy,Maize,14,15,12,17-17-17
14
+ 25,50,65,Loamy,Cotton,36,0,0,Urea
15
+ 32,62,41,Clayey,Paddy,24,0,22,28-28
16
+ 26,52,31,Red,Ground Nuts,14,0,41,DAP
17
+ 31,62,49,Black,Sugarcane,10,13,14,17-17-17
18
+ 33,64,34,Clayey,Pulses,38,0,0,Urea
19
+ 25,50,39,Sandy,Barley,21,0,19,28-28
20
+ 28,54,65,Black,Cotton,39,0,0,Urea
21
+ 29,58,52,Loamy,Wheat,13,0,36,DAP
22
+ 30,60,44,Sandy,Millets,10,0,9,20-20
23
+ 34,65,53,Loamy,Sugarcane,12,14,12,17-17-17
24
+ 35,68,33,Red,Tobacco,11,0,37,DAP
25
+ 28,54,37,Black,Millets,36,0,0,Urea
26
+ 33,64,39,Clayey,Paddy,13,0,10,20-20
27
+ 26,52,44,Sandy,Maize,23,0,20,28-28
28
+ 30,60,63,Red,Cotton,9,9,29,14-35-14
29
+ 32,62,30,Loamy,Sugarcane,38,0,0,Urea
30
+ 37,70,32,Black,Oil seeds,12,0,39,DAP
31
+ 26,52,36,Clayey,Pulses,14,0,13,20-20
32
+ 29,58,40,Red,Ground Nuts,24,0,23,28-28
33
+ 30,60,27,Loamy,Sugarcane,12,0,40,DAP
34
+ 34,65,38,Clayey,Paddy,39,0,0,Urea
35
+ 36,68,38,Sandy,Barley,7,9,30,14-35-14
36
+ 26,52,48,Loamy,Wheat,23,0,19,28-28
37
+ 28,54,35,Black,Millets,41,0,0,Urea
38
+ 30,60,61,Loamy,Cotton,8,10,31,14-35-14
39
+ 37,70,37,Clayey,Paddy,12,0,41,DAP
40
+ 25,50,26,Red,Ground Nuts,15,14,11,17-17-17
41
+ 29,58,34,Sandy,Millets,15,0,37,DAP
42
+ 27,54,30,Clayey,Pulses,13,0,13,20-20
43
+ 30,60,58,Loamy,Sugarcane,10,7,32,14-35-14
44
+ 32,62,34,Red,Tobacco,22,0,24,28-28
45
+ 34,65,60,Black,Sugarcane,35,0,0,Urea
46
+ 35,67,42,Sandy,Barley,10,0,35,DAP
47
+ 38,70,48,Loamy,Wheat,8,8,28,14-35-14
48
+ 26,52,32,Black,Oil seeds,12,0,8,20-20
49
+ 29,58,43,Clayey,Paddy,24,0,18,28-28
50
+ 30,60,29,Red,Ground Nuts,41,0,0,Urea
51
+ 33,64,51,Sandy,Maize,5,9,29,14-35-14
52
+ 34,65,31,Red,Tobacco,23,0,21,28-28
53
+ 36,68,33,Black,Oil seeds,13,0,14,20-20
54
+ 28,54,38,Clayey,Pulses,40,0,0,Urea
55
+ 30,60,47,Sandy,Barley,12,0,42,DAP
56
+ 31,62,63,Red,Cotton,11,12,15,17-17-17
57
+ 27,53,43,Black,Millets,23,0,24,28-28
58
+ 34,65,54,Loamy,Wheat,38,0,0,Urea
59
+ 29,58,37,Sandy,Millets,8,0,15,20-20
60
+ 25,50,56,Loamy,Sugarcane,11,13,15,17-17-17
61
+ 32,62,34,Red,Ground Nuts,15,0,37,DAP
62
+ 28,54,41,Clayey,Paddy,36,0,0,Urea
63
+ 30,60,49,Loamy,Wheat,13,0,9,20-20
64
+ 34,65,64,Black,Cotton,24,0,20,28-28
65
+ 28,54,47,Sandy,Barley,5,18,15,10-26-26
66
+ 27,53,35,Black,Oil seeds,37,0,0,Urea
67
+ 36,68,62,Red,Cotton,15,0,40,DAP
68
+ 34,65,57,Black,Sugarcane,9,0,13,20-20
69
+ 29,58,55,Loamy,Sugarcane,8,8,33,14-35-14
70
+ 25,50,40,Clayey,Pulses,6,19,16,10-26-26
71
+ 30,60,38,Sandy,Millets,10,0,14,20-20
72
+ 26,52,39,Clayey,Pulses,21,0,23,28-28
73
+ 31,62,32,Red,Tobacco,39,0,0,Urea
74
+ 34,65,48,Loamy,Wheat,23,0,19,28-28
75
+ 27,53,34,Black,Oil seeds,42,0,0,Urea
76
+ 33,64,31,Red,Ground Nuts,13,0,39,DAP
77
+ 29,58,42,Clayey,Paddy,9,10,22,14-35-14
78
+ 30,60,47,Sandy,Maize,22,0,21,28-28
79
+ 27,53,59,Loamy,Sugarcane,10,0,15,20-20
80
+ 26,52,36,Clayey,Pulses,7,16,20,10-26-26
81
+ 34,65,63,Red,Cotton,14,0,38,DAP
82
+ 28,54,43,Clayey,Paddy,10,8,29,14-35-14
83
+ 30,60,40,Sandy,Millets,41,0,0,Urea
84
+ 29,58,65,Black,Cotton,14,0,35,DAP
85
+ 26,52,59,Loamy,Sugarcane,11,0,9,20-20
86
+ 31,62,44,Sandy,Barley,21,0,28,28-28
87
+ 35,67,28,Clayey,Pulses,8,7,31,14-35-14
88
+ 29,58,30,Red,Tobacco,13,17,16,10-26-26
89
+ 27,53,30,Black,Millets,35,0,0,Urea
90
+ 36,68,50,Loamy,Wheat,12,18,19,10-26-26
91
+ 29,58,61,Loamy,Cotton,11,0,38,DAP
92
+ 30,60,26,Black,Oil seeds,8,9,30,14-35-14
93
+ 34,65,45,Clayey,Paddy,6,19,21,10-26-26
94
+ 36,68,41,Red,Ground Nuts,41,0,0,Urea
95
+ 28,54,25,Sandy,Maize,9,10,30,14-35-14
96
+ 25,50,32,Clayey,Pulses,24,0,19,28-28
97
+ 30,60,27,Red,Tobacco,4,17,17,10-26-26
98
+ 38,72,51,Loamy,Wheat,39,0,0,Urea
99
+ 36,60,43,Sandy,Millets,15,0,41,DAP
100
+ 29,58,57,Black,Sugarcane,12,0,10,20-20
Fertilizer-Recommendation/main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from src.pipeline.training_pipeline import start_training_pipeline
2
+
3
+ if __name__ =="__main__":
4
+ try:
5
+ start_training_pipeline()
6
+
7
+ except Exception as e:
8
+ print(e)
Fertilizer-Recommendation/notebook/fertilizer-prediction.ipynb ADDED
@@ -0,0 +1,736 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import numpy as np \n",
11
+ "import matplotlib.pyplot as plt \n",
12
+ "import seaborn as sns\n",
13
+ "from sklearn.model_selection import train_test_split\n",
14
+ "from sklearn.preprocessing import OneHotEncoder\n",
15
+ "from sklearn.preprocessing import LabelEncoder\n",
16
+ "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score\n",
17
+ "\n",
18
+ "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n",
19
+ "from sklearn.tree import DecisionTreeClassifier\n",
20
+ "from sklearn.neighbors import KNeighborsClassifier\n",
21
+ "from xgboost import XGBClassifier\n",
22
+ "from catboost import CatBoostClassifier\n",
23
+ "\n",
24
+ "from sklearn.compose import ColumnTransformer\n",
25
+ "from sklearn.pipeline import Pipeline\n",
26
+ "from sklearn.preprocessing import StandardScaler\n",
27
+ "\n",
28
+ "import warnings\n",
29
+ "\n",
30
+ "# Ignore warnings\n",
31
+ "warnings.filterwarnings(\"ignore\")"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 2,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "import os\n",
41
+ "os.chdir(\"/config/workspace\")"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 3,
47
+ "metadata": {},
48
+ "outputs": [
49
+ {
50
+ "data": {
51
+ "text/html": [
52
+ "<div>\n",
53
+ "<style scoped>\n",
54
+ " .dataframe tbody tr th:only-of-type {\n",
55
+ " vertical-align: middle;\n",
56
+ " }\n",
57
+ "\n",
58
+ " .dataframe tbody tr th {\n",
59
+ " vertical-align: top;\n",
60
+ " }\n",
61
+ "\n",
62
+ " .dataframe thead th {\n",
63
+ " text-align: right;\n",
64
+ " }\n",
65
+ "</style>\n",
66
+ "<table border=\"1\" class=\"dataframe\">\n",
67
+ " <thead>\n",
68
+ " <tr style=\"text-align: right;\">\n",
69
+ " <th></th>\n",
70
+ " <th>Temparature</th>\n",
71
+ " <th>Humidity</th>\n",
72
+ " <th>Moisture</th>\n",
73
+ " <th>Soil Type</th>\n",
74
+ " <th>Crop Type</th>\n",
75
+ " <th>Nitrogen</th>\n",
76
+ " <th>Potassium</th>\n",
77
+ " <th>Phosphorous</th>\n",
78
+ " <th>Fertilizer Name</th>\n",
79
+ " </tr>\n",
80
+ " </thead>\n",
81
+ " <tbody>\n",
82
+ " <tr>\n",
83
+ " <th>0</th>\n",
84
+ " <td>26</td>\n",
85
+ " <td>52</td>\n",
86
+ " <td>38</td>\n",
87
+ " <td>Sandy</td>\n",
88
+ " <td>Maize</td>\n",
89
+ " <td>37</td>\n",
90
+ " <td>0</td>\n",
91
+ " <td>0</td>\n",
92
+ " <td>Urea</td>\n",
93
+ " </tr>\n",
94
+ " <tr>\n",
95
+ " <th>1</th>\n",
96
+ " <td>29</td>\n",
97
+ " <td>52</td>\n",
98
+ " <td>45</td>\n",
99
+ " <td>Loamy</td>\n",
100
+ " <td>Sugarcane</td>\n",
101
+ " <td>12</td>\n",
102
+ " <td>0</td>\n",
103
+ " <td>36</td>\n",
104
+ " <td>DAP</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>2</th>\n",
108
+ " <td>34</td>\n",
109
+ " <td>65</td>\n",
110
+ " <td>62</td>\n",
111
+ " <td>Black</td>\n",
112
+ " <td>Cotton</td>\n",
113
+ " <td>7</td>\n",
114
+ " <td>9</td>\n",
115
+ " <td>30</td>\n",
116
+ " <td>14-35-14</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>3</th>\n",
120
+ " <td>32</td>\n",
121
+ " <td>62</td>\n",
122
+ " <td>34</td>\n",
123
+ " <td>Red</td>\n",
124
+ " <td>Tobacco</td>\n",
125
+ " <td>22</td>\n",
126
+ " <td>0</td>\n",
127
+ " <td>20</td>\n",
128
+ " <td>28-28</td>\n",
129
+ " </tr>\n",
130
+ " <tr>\n",
131
+ " <th>4</th>\n",
132
+ " <td>28</td>\n",
133
+ " <td>54</td>\n",
134
+ " <td>46</td>\n",
135
+ " <td>Clayey</td>\n",
136
+ " <td>Paddy</td>\n",
137
+ " <td>35</td>\n",
138
+ " <td>0</td>\n",
139
+ " <td>0</td>\n",
140
+ " <td>Urea</td>\n",
141
+ " </tr>\n",
142
+ " </tbody>\n",
143
+ "</table>\n",
144
+ "</div>"
145
+ ],
146
+ "text/plain": [
147
+ " Temparature Humidity Moisture Soil Type Crop Type Nitrogen Potassium \\\n",
148
+ "0 26 52 38 Sandy Maize 37 0 \n",
149
+ "1 29 52 45 Loamy Sugarcane 12 0 \n",
150
+ "2 34 65 62 Black Cotton 7 9 \n",
151
+ "3 32 62 34 Red Tobacco 22 0 \n",
152
+ "4 28 54 46 Clayey Paddy 35 0 \n",
153
+ "\n",
154
+ " Phosphorous Fertilizer Name \n",
155
+ "0 0 Urea \n",
156
+ "1 36 DAP \n",
157
+ "2 30 14-35-14 \n",
158
+ "3 20 28-28 \n",
159
+ "4 0 Urea "
160
+ ]
161
+ },
162
+ "execution_count": 3,
163
+ "metadata": {},
164
+ "output_type": "execute_result"
165
+ }
166
+ ],
167
+ "source": [
168
+ "FILE_PATH =r\"fertilizer-prediction/Fertilizer Prediction.csv\"\n",
169
+ "\n",
170
+ "# Loading the dataset into pandas\n",
171
+ "df = pd.read_csv(FILE_PATH)\n",
172
+ "\n",
173
+ "df.head()"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 4,
179
+ "metadata": {},
180
+ "outputs": [
181
+ {
182
+ "name": "stdout",
183
+ "output_type": "stream",
184
+ "text": [
185
+ "Shape of the dataset: (99, 9)\n"
186
+ ]
187
+ }
188
+ ],
189
+ "source": [
190
+ "print(f\"Shape of the dataset: {df.shape}\")"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": 5,
196
+ "metadata": {},
197
+ "outputs": [
198
+ {
199
+ "name": "stdout",
200
+ "output_type": "stream",
201
+ "text": [
202
+ "<class 'pandas.core.frame.DataFrame'>\n",
203
+ "RangeIndex: 99 entries, 0 to 98\n",
204
+ "Data columns (total 9 columns):\n",
205
+ " # Column Non-Null Count Dtype \n",
206
+ "--- ------ -------------- ----- \n",
207
+ " 0 Temparature 99 non-null int64 \n",
208
+ " 1 Humidity 99 non-null int64 \n",
209
+ " 2 Moisture 99 non-null int64 \n",
210
+ " 3 Soil Type 99 non-null object\n",
211
+ " 4 Crop Type 99 non-null object\n",
212
+ " 5 Nitrogen 99 non-null int64 \n",
213
+ " 6 Potassium 99 non-null int64 \n",
214
+ " 7 Phosphorous 99 non-null int64 \n",
215
+ " 8 Fertilizer Name 99 non-null object\n",
216
+ "dtypes: int64(6), object(3)\n",
217
+ "memory usage: 7.1+ KB\n"
218
+ ]
219
+ }
220
+ ],
221
+ "source": [
222
+ "# datatypes \n",
223
+ "df.info()"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 6,
229
+ "metadata": {},
230
+ "outputs": [
231
+ {
232
+ "data": {
233
+ "text/plain": [
234
+ "Temparature 0\n",
235
+ "Humidity 0\n",
236
+ "Moisture 0\n",
237
+ "Soil Type 0\n",
238
+ "Crop Type 0\n",
239
+ "Nitrogen 0\n",
240
+ "Potassium 0\n",
241
+ "Phosphorous 0\n",
242
+ "Fertilizer Name 0\n",
243
+ "dtype: int64"
244
+ ]
245
+ },
246
+ "execution_count": 6,
247
+ "metadata": {},
248
+ "output_type": "execute_result"
249
+ }
250
+ ],
251
+ "source": [
252
+ "# checking for null values \n",
253
+ "df.isnull().sum()"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": 7,
259
+ "metadata": {},
260
+ "outputs": [
261
+ {
262
+ "data": {
263
+ "image/png": "",
264
+ "text/plain": [
265
+ "<Figure size 1000x700 with 1 Axes>"
266
+ ]
267
+ },
268
+ "metadata": {},
269
+ "output_type": "display_data"
270
+ }
271
+ ],
272
+ "source": [
273
+ "# checking the data distribution \n",
274
+ "plt.figure(figsize=(10, 7))\n",
275
+ "\n",
276
+ "sns.countplot(data=df, x='Fertilizer Name')\n",
277
+ "plt.title(\"Dataset Distribution\")\n",
278
+ "plt.show()"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 8,
284
+ "metadata": {},
285
+ "outputs": [
286
+ {
287
+ "data": {
288
+ "text/plain": [
289
+ "array(['Sandy', 'Loamy', 'Black', 'Red', 'Clayey'], dtype=object)"
290
+ ]
291
+ },
292
+ "execution_count": 8,
293
+ "metadata": {},
294
+ "output_type": "execute_result"
295
+ }
296
+ ],
297
+ "source": [
298
+ "# soil type\n",
299
+ "df['Soil Type'].unique()"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 9,
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "data": {
309
+ "text/plain": [
310
+ "array(['Maize', 'Sugarcane', 'Cotton', 'Tobacco', 'Paddy', 'Barley',\n",
311
+ " 'Wheat', 'Millets', 'Oil seeds', 'Pulses', 'Ground Nuts'],\n",
312
+ " dtype=object)"
313
+ ]
314
+ },
315
+ "execution_count": 9,
316
+ "metadata": {},
317
+ "output_type": "execute_result"
318
+ }
319
+ ],
320
+ "source": [
321
+ "df['Crop Type'].unique()"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": 10,
327
+ "metadata": {},
328
+ "outputs": [
329
+ {
330
+ "name": "stdout",
331
+ "output_type": "stream",
332
+ "text": [
333
+ "Shape of the training dataset: (79, 8)\n",
334
+ "Shape of the testing dataset: (20, 8)\n"
335
+ ]
336
+ }
337
+ ],
338
+ "source": [
339
+ "# splitting the dataset \n",
340
+ "X = df.drop(columns=[\"Fertilizer Name\"])\n",
341
+ "y = df[\"Fertilizer Name\"]\n",
342
+ "\n",
343
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
344
+ "\n",
345
+ "print(f\"Shape of the training dataset: {X_train.shape}\")\n",
346
+ "print(f\"Shape of the testing dataset: {X_test.shape}\")"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 23,
352
+ "metadata": {},
353
+ "outputs": [
354
+ {
355
+ "name": "stdout",
356
+ "output_type": "stream",
357
+ "text": [
358
+ "['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']\n"
359
+ ]
360
+ }
361
+ ],
362
+ "source": [
363
+ "# numerical columns in the dataset\n",
364
+ "print(df._get_numeric_data().columns.tolist())"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": 11,
370
+ "metadata": {},
371
+ "outputs": [
372
+ {
373
+ "data": {
374
+ "text/plain": [
375
+ "['Soil Type', 'Crop Type']"
376
+ ]
377
+ },
378
+ "execution_count": 11,
379
+ "metadata": {},
380
+ "output_type": "execute_result"
381
+ }
382
+ ],
383
+ "source": [
384
+ "# segrating categorical columns\n",
385
+ "categorical_columns = [i for i in df.columns if (i not in df._get_numeric_data().columns) & (i !='Fertilizer Name')]\n",
386
+ "categorical_columns"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "markdown",
391
+ "metadata": {},
392
+ "source": [
393
+ "## Encoding"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": 12,
399
+ "metadata": {},
400
+ "outputs": [],
401
+ "source": [
402
+ "ohe = OneHotEncoder(drop='first')\n",
403
+ "standard_scaler = StandardScaler()\n",
404
+ "\n",
405
+ "preprocessor = ColumnTransformer(\n",
406
+ " transformers =[\n",
407
+ " ('StandaradScaling', standard_scaler, df._get_numeric_data().columns),\n",
408
+ " ('One_hot_encoding', ohe, categorical_columns)\n",
409
+ " ],\n",
410
+ " remainder='passthrough'\n",
411
+ ")\n",
412
+ "\n",
413
+ "pipeline = Pipeline([\n",
414
+ " ('preprocess', preprocessor)\n",
415
+ "])"
416
+ ]
417
+ },
418
+ {
419
+ "cell_type": "code",
420
+ "execution_count": 13,
421
+ "metadata": {},
422
+ "outputs": [],
423
+ "source": [
424
+ "X_train_transformed = pipeline.fit_transform(X_train)\n",
425
+ "X_test_transformed = pipeline.transform(X_test)"
426
+ ]
427
+ },
428
+ {
429
+ "cell_type": "code",
430
+ "execution_count": 14,
431
+ "metadata": {},
432
+ "outputs": [],
433
+ "source": [
434
+ "le = LabelEncoder()\n",
435
+ "\n",
436
+ "y_train_transformed = le.fit_transform(y_train)\n",
437
+ "y_test_transformed = le.transform(y_test)"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": 15,
443
+ "metadata": {},
444
+ "outputs": [],
445
+ "source": [
446
+ "def evaluate_clf(true, predicted):\n",
447
+ " '''\n",
448
+ " This function takes in true values and predicted values\n",
449
+ " Returns: Accuracy, F1-Score, Precision, Recall, Roc-auc Score\n",
450
+ " '''\n",
451
+ " acc = accuracy_score(true, predicted)\n",
452
+ " f1 = f1_score(true, predicted, average='weighted')\n",
453
+ " precision = precision_score(true, predicted, average='weighted')\n",
454
+ " recall = recall_score(true, predicted, average='weighted')\n",
455
+ " \n",
456
+ " return acc, f1, precision, recall"
457
+ ]
458
+ },
459
+ {
460
+ "cell_type": "code",
461
+ "execution_count": 16,
462
+ "metadata": {},
463
+ "outputs": [],
464
+ "source": [
465
+ "# create a function which can evaluate models and returns a report \n",
466
+ "def evaluate_model(X_train, X_test, y_train, y_test, models):\n",
467
+ " '''\n",
468
+ " This function takes X_train, X_test, y_train, y_test and models dictionary as input\n",
469
+ " Iterate through the given model directory and evaluate metrics\n",
470
+ "\n",
471
+ " Returns:\n",
472
+ " DataFrame which contains report of all models metrics \n",
473
+ " '''\n",
474
+ "\n",
475
+ " model_list = []\n",
476
+ " metric_list = []\n",
477
+ "\n",
478
+ " for i in range(len(list(models))):\n",
479
+ " model = list(models.values())[i]\n",
480
+ " model.fit(X_train, y_train)\n",
481
+ "\n",
482
+ " # Make predictions\n",
483
+ " y_train_pred = model.predict(X_train)\n",
484
+ " y_test_pred = model.predict(X_test)\n",
485
+ "\n",
486
+ " # Training set performances\n",
487
+ " model_train_accuracy, model_train_f1, model_train_precision, \\\n",
488
+ " model_train_recall = evaluate_clf(y_train, y_train_pred)\n",
489
+ "\n",
490
+ " # Test set peformances \n",
491
+ " model_test_accuracy, model_test_f1, model_test_precision, \\\n",
492
+ " model_test_recall = evaluate_clf(y_test, y_test_pred)\n",
493
+ "\n",
494
+ " print(list(models.keys())[i])\n",
495
+ " model_list.append(list(models.keys())[i])\n",
496
+ "\n",
497
+ " result_dict ={'model_name':list(models.keys())[i], \n",
498
+ " \"train_accuracy\": model_train_accuracy, \"test_accuracy\": model_test_accuracy,\n",
499
+ " \"train_precision\": model_train_precision, \"test_precision\": model_test_precision,\n",
500
+ " 'train_recall': model_train_recall, \"test_recall\":model_test_recall,\n",
501
+ " \"train_f1_score\": model_train_f1, \"test_f1_score\": model_test_f1}\n",
502
+ "\n",
503
+ " metric_list.append(result_dict)\n",
504
+ "\n",
505
+ " \n",
506
+ " return metric_list\n"
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "code",
511
+ "execution_count": 17,
512
+ "metadata": {},
513
+ "outputs": [],
514
+ "source": [
515
+ "# Model Dictionary\n",
516
+ "models = {\n",
517
+ " \"Random Forest\": RandomForestClassifier(),\n",
518
+ " \"Decision Tree\": DecisionTreeClassifier(),\n",
519
+ " \"Gradient Boosting\": GradientBoostingClassifier(),\n",
520
+ " \"K-Neighbors Classifier\": KNeighborsClassifier(),\n",
521
+ " \"XGBClassifier\": XGBClassifier(), \n",
522
+ " \"CatBoosting Classifier\": CatBoostClassifier(verbose=False),\n",
523
+ " \"AdaBoost Classifier\": AdaBoostClassifier()\n",
524
+ "}"
525
+ ]
526
+ },
527
+ {
528
+ "cell_type": "code",
529
+ "execution_count": 18,
530
+ "metadata": {},
531
+ "outputs": [
532
+ {
533
+ "name": "stdout",
534
+ "output_type": "stream",
535
+ "text": [
536
+ "Random Forest\n",
537
+ "Decision Tree\n",
538
+ "Gradient Boosting\n",
539
+ "K-Neighbors Classifier\n",
540
+ "XGBClassifier\n",
541
+ "CatBoosting Classifier\n",
542
+ "AdaBoost Classifier\n"
543
+ ]
544
+ }
545
+ ],
546
+ "source": [
547
+ "resultant_metrics = evaluate_model(X_train_transformed, X_test_transformed, y_train_transformed, y_test_transformed, models)\n",
548
+ "\n",
549
+ "resultant_metrics_df = pd.DataFrame(data=resultant_metrics)"
550
+ ]
551
+ },
552
+ {
553
+ "cell_type": "code",
554
+ "execution_count": 19,
555
+ "metadata": {},
556
+ "outputs": [
557
+ {
558
+ "data": {
559
+ "text/html": [
560
+ "<div>\n",
561
+ "<style scoped>\n",
562
+ " .dataframe tbody tr th:only-of-type {\n",
563
+ " vertical-align: middle;\n",
564
+ " }\n",
565
+ "\n",
566
+ " .dataframe tbody tr th {\n",
567
+ " vertical-align: top;\n",
568
+ " }\n",
569
+ "\n",
570
+ " .dataframe thead th {\n",
571
+ " text-align: right;\n",
572
+ " }\n",
573
+ "</style>\n",
574
+ "<table border=\"1\" class=\"dataframe\">\n",
575
+ " <thead>\n",
576
+ " <tr style=\"text-align: right;\">\n",
577
+ " <th></th>\n",
578
+ " <th>model_name</th>\n",
579
+ " <th>train_accuracy</th>\n",
580
+ " <th>test_accuracy</th>\n",
581
+ " <th>train_precision</th>\n",
582
+ " <th>test_precision</th>\n",
583
+ " <th>train_recall</th>\n",
584
+ " <th>test_recall</th>\n",
585
+ " <th>train_f1_score</th>\n",
586
+ " <th>test_f1_score</th>\n",
587
+ " </tr>\n",
588
+ " </thead>\n",
589
+ " <tbody>\n",
590
+ " <tr>\n",
591
+ " <th>1</th>\n",
592
+ " <td>Decision Tree</td>\n",
593
+ " <td>1.000000</td>\n",
594
+ " <td>1.00</td>\n",
595
+ " <td>1.000000</td>\n",
596
+ " <td>1.000000</td>\n",
597
+ " <td>1.000000</td>\n",
598
+ " <td>1.00</td>\n",
599
+ " <td>1.000000</td>\n",
600
+ " <td>1.000000</td>\n",
601
+ " </tr>\n",
602
+ " <tr>\n",
603
+ " <th>4</th>\n",
604
+ " <td>XGBClassifier</td>\n",
605
+ " <td>1.000000</td>\n",
606
+ " <td>1.00</td>\n",
607
+ " <td>1.000000</td>\n",
608
+ " <td>1.000000</td>\n",
609
+ " <td>1.000000</td>\n",
610
+ " <td>1.00</td>\n",
611
+ " <td>1.000000</td>\n",
612
+ " <td>1.000000</td>\n",
613
+ " </tr>\n",
614
+ " <tr>\n",
615
+ " <th>5</th>\n",
616
+ " <td>CatBoosting Classifier</td>\n",
617
+ " <td>1.000000</td>\n",
618
+ " <td>1.00</td>\n",
619
+ " <td>1.000000</td>\n",
620
+ " <td>1.000000</td>\n",
621
+ " <td>1.000000</td>\n",
622
+ " <td>1.00</td>\n",
623
+ " <td>1.000000</td>\n",
624
+ " <td>1.000000</td>\n",
625
+ " </tr>\n",
626
+ " <tr>\n",
627
+ " <th>0</th>\n",
628
+ " <td>Random Forest</td>\n",
629
+ " <td>1.000000</td>\n",
630
+ " <td>0.95</td>\n",
631
+ " <td>1.000000</td>\n",
632
+ " <td>1.000000</td>\n",
633
+ " <td>1.000000</td>\n",
634
+ " <td>0.95</td>\n",
635
+ " <td>1.000000</td>\n",
636
+ " <td>0.966667</td>\n",
637
+ " </tr>\n",
638
+ " <tr>\n",
639
+ " <th>2</th>\n",
640
+ " <td>Gradient Boosting</td>\n",
641
+ " <td>1.000000</td>\n",
642
+ " <td>0.95</td>\n",
643
+ " <td>1.000000</td>\n",
644
+ " <td>0.975000</td>\n",
645
+ " <td>1.000000</td>\n",
646
+ " <td>0.95</td>\n",
647
+ " <td>1.000000</td>\n",
648
+ " <td>0.955556</td>\n",
649
+ " </tr>\n",
650
+ " <tr>\n",
651
+ " <th>6</th>\n",
652
+ " <td>AdaBoost Classifier</td>\n",
653
+ " <td>0.594937</td>\n",
654
+ " <td>0.70</td>\n",
655
+ " <td>0.477918</td>\n",
656
+ " <td>0.657143</td>\n",
657
+ " <td>0.594937</td>\n",
658
+ " <td>0.70</td>\n",
659
+ " <td>0.504147</td>\n",
660
+ " <td>0.662500</td>\n",
661
+ " </tr>\n",
662
+ " <tr>\n",
663
+ " <th>3</th>\n",
664
+ " <td>K-Neighbors Classifier</td>\n",
665
+ " <td>0.898734</td>\n",
666
+ " <td>0.65</td>\n",
667
+ " <td>0.904539</td>\n",
668
+ " <td>0.666667</td>\n",
669
+ " <td>0.898734</td>\n",
670
+ " <td>0.65</td>\n",
671
+ " <td>0.897599</td>\n",
672
+ " <td>0.647727</td>\n",
673
+ " </tr>\n",
674
+ " </tbody>\n",
675
+ "</table>\n",
676
+ "</div>"
677
+ ],
678
+ "text/plain": [
679
+ " model_name train_accuracy test_accuracy train_precision \\\n",
680
+ "1 Decision Tree 1.000000 1.00 1.000000 \n",
681
+ "4 XGBClassifier 1.000000 1.00 1.000000 \n",
682
+ "5 CatBoosting Classifier 1.000000 1.00 1.000000 \n",
683
+ "0 Random Forest 1.000000 0.95 1.000000 \n",
684
+ "2 Gradient Boosting 1.000000 0.95 1.000000 \n",
685
+ "6 AdaBoost Classifier 0.594937 0.70 0.477918 \n",
686
+ "3 K-Neighbors Classifier 0.898734 0.65 0.904539 \n",
687
+ "\n",
688
+ " test_precision train_recall test_recall train_f1_score test_f1_score \n",
689
+ "1 1.000000 1.000000 1.00 1.000000 1.000000 \n",
690
+ "4 1.000000 1.000000 1.00 1.000000 1.000000 \n",
691
+ "5 1.000000 1.000000 1.00 1.000000 1.000000 \n",
692
+ "0 1.000000 1.000000 0.95 1.000000 0.966667 \n",
693
+ "2 0.975000 1.000000 0.95 1.000000 0.955556 \n",
694
+ "6 0.657143 0.594937 0.70 0.504147 0.662500 \n",
695
+ "3 0.666667 0.898734 0.65 0.897599 0.647727 "
696
+ ]
697
+ },
698
+ "execution_count": 19,
699
+ "metadata": {},
700
+ "output_type": "execute_result"
701
+ }
702
+ ],
703
+ "source": [
704
+ "resultant_metrics_df = resultant_metrics_df.sort_values(by='test_f1_score', ascending=False)\n",
705
+ "resultant_metrics_df"
706
+ ]
707
+ }
708
+ ],
709
+ "metadata": {
710
+ "kernelspec": {
711
+ "display_name": "Python 3.8.10 64-bit",
712
+ "language": "python",
713
+ "name": "python3"
714
+ },
715
+ "language_info": {
716
+ "codemirror_mode": {
717
+ "name": "ipython",
718
+ "version": 3
719
+ },
720
+ "file_extension": ".py",
721
+ "mimetype": "text/x-python",
722
+ "name": "python",
723
+ "nbconvert_exporter": "python",
724
+ "pygments_lexer": "ipython3",
725
+ "version": "3.8.10"
726
+ },
727
+ "orig_nbformat": 4,
728
+ "vscode": {
729
+ "interpreter": {
730
+ "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
731
+ }
732
+ }
733
+ },
734
+ "nbformat": 4,
735
+ "nbformat_minor": 2
736
+ }
Fertilizer-Recommendation/requirements.txt ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ asttokens==2.2.1
2
+ backcall==0.2.0
3
+ bleach==6.0.0
4
+ certifi==2023.7.22
5
+ charset-normalizer==3.2.0
6
+ click==8.1.6
7
+ comm==0.1.3
8
+ debugpy==1.6.7
9
+ decorator==5.1.1
10
+ dnspython==2.4.1
11
+ executing==1.2.0
12
+ idna==3.4
13
+ importlib-metadata==6.8.0
14
+ ipykernel==6.25.0
15
+ ipython==8.12.2
16
+ jedi==0.18.2
17
+ jupyter-client==8.3.0
18
+ jupyter-core==5.3.1
19
+ kaggle==1.5.16
20
+ matplotlib-inline==0.1.6
21
+ nest-asyncio==1.5.6
22
+ numpy==1.24.4
23
+ opendatasets==0.1.22
24
+ packaging==23.1
25
+ pandas==2.0.3
26
+ parso==0.8.3
27
+ pexpect==4.8.0
28
+ pickleshare==0.7.5
29
+ platformdirs==3.9.1
30
+ prompt-toolkit==3.0.39
31
+ psutil==5.9.5
32
+ ptyprocess==0.7.0
33
+ pure-eval==0.2.2
34
+ Pygments==2.15.1
35
+ pymongo==4.4.1
36
+ python-dateutil==2.8.2
37
+ python-dotenv==1.0.0
38
+ python-slugify==8.0.1
39
+ pytz==2023.3
40
+ pyzmq==25.1.0
41
+ requests==2.31.0
42
+ six==1.16.0
43
+ stack-data==0.6.2
44
+ text-unidecode==1.3
45
+ tornado==6.3.2
46
+ tqdm==4.65.0
47
+ traitlets==5.9.0
48
+ typing-extensions==4.7.1
49
+ tzdata==2023.3
50
+ urllib3==2.0.4
51
+ wcwidth==0.2.6
52
+ webencodings==0.5.1
53
+ zipp==3.16.2
54
+ scikit-learn
55
+ matplotlib
56
+ seaborn
57
+ pyyaml
58
+ dill
Fertilizer-Recommendation/saved_models/0/model/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c89526de70ba6f924e11e82a344eb581e001228120c575fc73372179b91297ed
3
+ size 2808
Fertilizer-Recommendation/saved_models/0/target_encoder/target_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d651fde77ec9ae2d704442e566129721089259fef449d7e81620ac286ddc9d
3
+ size 338
Fertilizer-Recommendation/saved_models/0/transformer/transformer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427b030f88db787db36098d667cd6fb75e6e6a1d8bb6b504d47d2124b3a10a20
3
+ size 2323
Fertilizer-Recommendation/src/__init__.py ADDED
File without changes
Fertilizer-Recommendation/src/app.py ADDED
File without changes
Fertilizer-Recommendation/src/components/__init__.py ADDED
File without changes
Fertilizer-Recommendation/src/components/data_ingestion.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.logger import logging
4
+ from src.exception import FertilizerException
5
+ from src import utils
6
+
7
+ from sklearn.model_selection import train_test_split
8
+ import numpy as np
9
+ import pandas as pd
10
+ import sys
11
+ import os
12
+
13
+ class DataIngestion:
14
+
15
+ def __init__(self, data_ingestion_config:config_entity.DataIngestionConfig):
16
+ try:
17
+ logging.info(f"\n\n{'>'*50} Data Ingestion {'<'*50}\n")
18
+ self.data_ingestion_config = data_ingestion_config
19
+
20
+ except Exception as e:
21
+ raise FertilizerException(e, sys)
22
+
23
+ def initiate_data_ingestion(self) -> artifact_entity.DataIngestionArtifact:
24
+ try:
25
+ logging.info(f"Exporting collection data as pandas Dataframe ")
26
+
27
+ df: pd.DataFrame = utils.get_collection_as_dataframe(
28
+ database_name=self.data_ingestion_config.database_name,
29
+ collection_name=self.data_ingestion_config.collection_name)
30
+
31
+ logging.info(f"Saving data in feature store")
32
+
33
+ feature_store_dir = os.path.dirname(self.data_ingestion_config.feature_store_file_path)
34
+ os.makedirs(feature_store_dir, exist_ok=True)
35
+
36
+ logging.info(f"Saving dataframe into feature store")
37
+ df.to_csv(path_or_buf=self.data_ingestion_config.feature_store_file_path,
38
+ index=False,
39
+ header=True)
40
+
41
+ logging.info(f"Split the dataset into train and test")
42
+ train_df, test_df = train_test_split(
43
+ df, test_size=self.data_ingestion_config.test_size, random_state=42
44
+ )
45
+
46
+ logging.info(f"Create dataset directory if not available")
47
+ dataset_dir = os.path.dirname(self.data_ingestion_config.train_file_path)
48
+ os.makedirs(dataset_dir, exist_ok=True)
49
+
50
+ logging.info(f"Save df to feature store folder")
51
+ train_df.to_csv(path_or_buf=self.data_ingestion_config.train_file_path,
52
+ index=False,
53
+ header=True)
54
+
55
+ test_df.to_csv(path_or_buf=self.data_ingestion_config.test_file_path,
56
+ index=False,
57
+ header=True)
58
+
59
+ data_ingestion_artifact = artifact_entity.DataIngestionArtifact(
60
+ feature_store_file_path=self.data_ingestion_config.feature_store_file_path,
61
+ train_file_path=self.data_ingestion_config.train_file_path,
62
+ test_file_path=self.data_ingestion_config.test_file_path)
63
+
64
+ logging.info(f"Data Ingestion Completed. Artifacts saved")
65
+
66
+ return data_ingestion_artifact
67
+
68
+ except Exception as e:
69
+ raise FertilizerException(e, sys)
Fertilizer-Recommendation/src/components/data_transformation.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.logger import logging
4
+ from src.exception import FertilizerException
5
+ from typing import Optional
6
+ from src import utils
7
+ import sys
8
+ import os
9
+ from src.config import TARGET_COLUMN
10
+ from src.config import NUMERICAL_FEATURES
11
+ from src.config import CATEGORICAL_FEATURES
12
+ from src.config import BASE_FILE_PATH
13
+
14
+ from sklearn.pipeline import Pipeline
15
+ from sklearn.preprocessing import StandardScaler
16
+ from sklearn.preprocessing import LabelEncoder
17
+ from sklearn.preprocessing import OneHotEncoder
18
+ from sklearn.compose import ColumnTransformer
19
+ import pandas as pd
20
+ import numpy as np
21
+
22
+ class DataTransformation:
23
+
24
+ def __init__(
25
+ self,
26
+ data_transformation_config:config_entity.DataTransformationConfig,
27
+ data_ingestion_artifact:artifact_entity.DataIngestionArtifact
28
+ ):
29
+
30
+ try:
31
+ logging.info(f"\n\n{'>'*50} Data transformation Initiated {'<'*50}\n")
32
+ self.data_transformation_config = data_transformation_config
33
+ self.data_ingestion_artifact = data_ingestion_artifact
34
+
35
+ except Exception as e:
36
+ raise FertilizerException(e, sys)
37
+
38
+ @classmethod
39
+ def get_data_transformer_object(cls) -> Pipeline:
40
+ try:
41
+ standard_scaler = StandardScaler()
42
+ one_hot_encoder = OneHotEncoder(drop='first')
43
+
44
+ numerical_indices, categorical_indices = utils.get_column_indices(
45
+ numerical_features=NUMERICAL_FEATURES,
46
+ categorical_features=CATEGORICAL_FEATURES,
47
+ base_file_path=BASE_FILE_PATH)
48
+
49
+ preprocessor = ColumnTransformer(
50
+ transformers =[
51
+ ('StandaradScaling', standard_scaler, numerical_indices),
52
+ ('One_hot_encoding', one_hot_encoder, categorical_indices)
53
+ ],
54
+ remainder='passthrough'
55
+ )
56
+
57
+ pipeline = Pipeline([
58
+ ('preprocess', preprocessor)
59
+ ])
60
+
61
+ return pipeline
62
+
63
+ except Exception as e:
64
+ raise FertilizerException(e, sys)
65
+
66
+ def initiate_data_transformation(self) -> artifact_entity.DataTransformationArtifact:
67
+ try:
68
+ # reading training and testing files
69
+ train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
70
+ test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
71
+
72
+ # selecting input features for train and test dataframe
73
+ input_feature_train_df = train_df.drop(TARGET_COLUMN, axis=1)
74
+ input_feature_test_df = test_df.drop(TARGET_COLUMN, axis=1)
75
+
76
+ # selecting target feature for train and test dataframe
77
+ target_feature_train_df = train_df[TARGET_COLUMN]
78
+ target_feature_test_df = test_df[TARGET_COLUMN]
79
+
80
+ label_encoder = LabelEncoder()
81
+ label_encoder.fit(target_feature_train_df)
82
+
83
+ # transformation on target column
84
+ target_feature_train_arr = label_encoder.transform(target_feature_train_df)
85
+ target_feature_test_arr = label_encoder.transform(target_feature_test_df)
86
+
87
+ # transforming input features
88
+ transformation_pipeline = DataTransformation.get_data_transformer_object()
89
+ transformation_pipeline.fit(input_feature_train_df)
90
+
91
+ input_feature_train_arr = transformation_pipeline.transform(input_feature_train_df)
92
+ input_feature_test_arr = transformation_pipeline.transform(input_feature_test_df)
93
+
94
+ train_arr = np.c_[input_feature_train_arr, target_feature_train_arr]
95
+ test_arr = np.c_[input_feature_test_arr, target_feature_test_arr]
96
+
97
+ # save the numpy array
98
+ utils.save_object(file_path=self.data_transformation_config.transformed_train_path, obj=train_arr)
99
+ utils.save_object(file_path=self.data_transformation_config.transformed_test_path, obj=test_arr)
100
+
101
+ utils.save_object(file_path=self.data_transformation_config.transform_object_path, obj=transformation_pipeline)
102
+
103
+ utils.save_object(file_path=self.data_transformation_config.target_encoder_path, obj=label_encoder)
104
+
105
+ data_transformation_artifact = artifact_entity.DataTransformationArtifact(
106
+ transform_object_path = self.data_transformation_config.transform_object_path,
107
+ transformed_train_path = self.data_transformation_config.transformed_train_path,
108
+ transformed_test_path = self.data_transformation_config.transformed_test_path,
109
+ target_encoder_path = self.data_transformation_config.target_encoder_path)
110
+
111
+ logging.info(f"Data transformation Completed")
112
+
113
+ return data_transformation_artifact
114
+
115
+ except Exception as e:
116
+ raise FertilizerException(e, sys)
Fertilizer-Recommendation/src/components/data_validation.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.logger import logging
4
+ from src.exception import FertilizerException
5
+ from src.config import TARGET_COLUMN
6
+ from src import utils
7
+
8
+ from typing import Optional
9
+ from scipy.stats import ks_2samp
10
+ import pandas as pd
11
+ import numpy as np
12
+ import os
13
+ import sys
14
+
15
+ class DataValidation:
16
+
17
+ def __init__(
18
+ self,
19
+ data_validation_config: config_entity.DataValidationConfig,
20
+ data_ingestion_arfitact: artifact_entity.DataIngestionArtifact
21
+ ):
22
+
23
+ try:
24
+ logging.info(f"\n\n{'>'*50} Data Validation Initiated {'<'*50}\n")
25
+ self.data_validation_config = data_validation_config
26
+ self.data_ingestion_arfitact = data_ingestion_arfitact
27
+ self.validation_error = dict()
28
+
29
+ except Exception as e:
30
+ raise FertilizerException(e, sys)
31
+
32
+ def is_required_colums_exists(
33
+ self,
34
+ base_df: pd.DataFrame,
35
+ current_df: pd.DataFrame,
36
+ report_key_name: str
37
+ ) -> bool:
38
+ try:
39
+ base_columns = base_df.columns
40
+ current_columns = current_df.columns
41
+
42
+ missing_columns = []
43
+ for base_column in base_columns:
44
+ if base_column not in current_columns:
45
+ logging.info(f"Column: {base_column} is not available")
46
+ missing_columns.append(base_column)
47
+
48
+ if len(missing_columns) > 0:
49
+ self.validation_error[report_key_name] = missing_columns
50
+ return False
51
+
52
+ return True
53
+
54
+ except Exception as e:
55
+ raise FertilizerException(e, sys)
56
+
57
+ def data_drift(
58
+ self,
59
+ base_df: pd.DataFrame,
60
+ current_df: pd.DataFrame,
61
+ report_key_name: str
62
+ ):
63
+ try:
64
+ drift_report = dict()
65
+
66
+ base_columns = base_df.columns
67
+ current_columns = current_df.columns
68
+
69
+ for base_column in base_columns:
70
+ base_data, current_data = base_df[base_column], current_df[base_column]
71
+
72
+ # Null hypothesis is that both column data drawn from same distribution
73
+
74
+ logging.info(f"Hypothesis {base_column}: {base_data.dtype}, {current_data.dtype}")
75
+ same_distribution = ks_2samp(base_data, current_data)
76
+
77
+ if same_distribution.pvalue > 0.05:
78
+ # we are accepting the null hypothesis
79
+ drift_report[base_column] = {
80
+ "pvalue": float(same_distribution.pvalue),
81
+ "same_distribution":True
82
+ }
83
+
84
+ else:
85
+ drift_report[base_column] = {
86
+ "pvalue": float(same_distribution.pvalue),
87
+ "same_distribution":False
88
+ }
89
+ self.validation_error[report_key_name] = drift_report
90
+
91
+ except Exception as e:
92
+ raise FertilizerException(e, sys)
93
+
94
+ def initiate_data_validation(self) -> artifact_entity.DataValidationArtifact:
95
+ try:
96
+ logging.info(f"Reading base dataframe")
97
+ base_df = pd.read_csv(self.data_validation_config.base_file_path)
98
+
99
+ logging.info(f"Reading train dataframe")
100
+ train_df = pd.read_csv(self.data_ingestion_arfitact.train_file_path)
101
+
102
+ logging.info(f"Reading test dataframe")
103
+ test_df = pd.read_csv(self.data_ingestion_arfitact.test_file_path)
104
+
105
+ exclude_column = [TARGET_COLUMN]
106
+ base_df = utils.seperate_dependant_column(df=base_df, exclude_column=exclude_column)
107
+ train_df = utils.seperate_dependant_column(df=train_df, exclude_column=exclude_column)
108
+ test_df = utils.seperate_dependant_column(df=test_df, exclude_column=exclude_column)
109
+
110
+ logging.info(f"Is all required columns present in the train_df")
111
+ train_df_columns_status = self.is_required_colums_exists(
112
+ base_df=base_df,
113
+ current_df=train_df,
114
+ report_key_name='missing_columns_within_train_dataset')
115
+
116
+ test_df_columns_status = self.is_required_colums_exists(
117
+ base_df=base_df,
118
+ current_df=test_df,
119
+ report_key_name='missing_columns_within_test_dataset')
120
+
121
+ if train_df_columns_status:
122
+ logging.info(f"As all colum are availabel in train_df hence detecting data drift")
123
+
124
+ self.data_drift(base_df=base_df, current_df=train_df, report_key_name='data_drift_within_train_dataset')
125
+
126
+ if test_df_columns_status:
127
+ logging.info(f"As all columns are availabel in test_df hence detecting data drift")
128
+
129
+ self.data_drift(base_df=base_df, current_df=test_df, report_key_name='data_drift_within_test_dataset')
130
+
131
+ # writting the report
132
+ logging.info(f"Writing report in yaml format")
133
+ utils.write_yaml_file(
134
+ file_path=self.data_validation_config.report_file_path,
135
+ data=self.validation_error)
136
+
137
+ data_validation_artifact = artifact_entity.DataValidationArtifact(
138
+ report_file_path=self.data_validation_config.report_file_path)
139
+
140
+ logging.info(f"Data Vadidation Completed. Artifacts saved")
141
+
142
+ return data_validation_artifact
143
+
144
+ except Exception as e:
145
+ raise FertilizerException(e, sys)
Fertilizer-Recommendation/src/components/model_evaluation.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.predictor import ModelResolver
2
+ from src.entity import config_entity
3
+ from src.entity import artifact_entity
4
+ from src.logger import logging
5
+ from src.exception import FertilizerException
6
+ from src.utils import load_object
7
+
8
+ from src.config import TARGET_COLUMN
9
+
10
+ from sklearn.metrics import f1_score
11
+ import pandas as pd
12
+ import numpy as np
13
+ import os
14
+ import sys
15
+
16
+ class ModelEvaluation:
17
+
18
+ def __init__(
19
+ self,
20
+ model_eval_config: config_entity.ModelEvaluationConfig,
21
+ data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
22
+ data_transformation_artifact: artifact_entity.DataTransformationArtifact,
23
+ model_trainer_artifact: artifact_entity.ModelTrainerArtifact
24
+ ):
25
+
26
+ try:
27
+ logging.info(f"\n\n{'>'*50} Model Evaluation Initiated {'<'*50}\n")
28
+ self.model_eval_config = model_eval_config
29
+ self.data_ingestion_artifact = data_ingestion_artifact
30
+ self.data_transformation_artifact = data_transformation_artifact
31
+ self.model_trainer_artifact = model_trainer_artifact
32
+ self.model_resolver = ModelResolver()
33
+
34
+ except Exception as e:
35
+ raise FertilizerException(e, sys)
36
+
37
+
38
+ def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact:
39
+ try:
40
+ logging.info(f"If the saved model directory contains a model, we will compare which model is best trained:\
41
+ the model from the saved model folder or the new model."
42
+ )
43
+
44
+ latest_dir_path = self.model_resolver.get_latest_dir_path()
45
+ if latest_dir_path == None:
46
+ model_eval_artifact = artifact_entity.ModelEvaluationArtifact(is_model_accepted=True, improved_accuracy=None)
47
+
48
+ logging.info(f"Model Evaluation Artifacts: {model_eval_artifact}")
49
+ return model_eval_artifact
50
+
51
+ # finding location of transformer, model, and target encoder
52
+ logging.info(f"Finding location of transformer, model and target encoder")
53
+ transformer_path = self.model_resolver.get_latest_transformer_path()
54
+
55
+ model_path = self.model_resolver.get_latest_model_path()
56
+
57
+ target_encoder_path = self.model_resolver.get_latest_target_encoder_path()
58
+
59
+ # finding the location of previous transfomer, model and target encoder
60
+ logging.info(f"Previous trained objects of transformer, model and target encoder")
61
+ transformer = load_object(file_path=transformer_path)
62
+ model = load_object(file_path=model_path)
63
+ target_encoder = load_object(file_path=target_encoder_path)
64
+
65
+ # finding the location of currently trained objects
66
+ logging.info(f"Currently trained model objects")
67
+ current_transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path)
68
+
69
+ current_model = load_object(file_path=self.model_trainer_artifact.model_path)
70
+
71
+ current_target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path)
72
+
73
+ # fetching the testing data
74
+ test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
75
+ target_df = test_df[TARGET_COLUMN]
76
+
77
+ y_true = target_encoder.transform(target_df)
78
+
79
+ # accuracy using previous trained model
80
+ input_feature_name = list(transformer.feature_names_in_)
81
+ input_arr = transformer.transform(test_df[input_feature_name])
82
+
83
+ y_pred = current_model.predict(input_arr)
84
+ y_true = current_target_encoder.transform(target_df)
85
+
86
+ previous_model_score = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')
87
+
88
+ # accuracy using current model
89
+ input_feature_name = list(current_transformer.feature_names_in_)
90
+ input_arr = current_transformer.transform(test_df[input_feature_name])
91
+
92
+ y_pred = current_model.predict(input_arr)
93
+ y_true = current_target_encoder.transform(target_df)
94
+
95
+ current_model_score = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')
96
+
97
+ if current_model_score <= previous_model_score:
98
+ logging.info(f"Current trained model is not better than previous model")
99
+ raise Exception("Current trained model is not better than previous model")
100
+
101
+ model_eval_artifact = artifact_entity.ModelEvaluationArtifact(is_model_accepted=True,
102
+ improved_accuracy = current_model_score - previous_model_score)
103
+
104
+ logging.info(f"Model Eval Artifacts generated")
105
+ return model_eval_artifact
106
+
107
+ except Exception as e:
108
+ raise FertilizerException(e, sys)
Fertilizer-Recommendation/src/components/model_pusher.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.logger import logging
4
+ from src.exception import FertilizerException
5
+ from src.predictor import ModelResolver
6
+ from src.utils import load_object
7
+ from src.utils import save_object
8
+
9
+ from src.entity.config_entity import ModelPusherConfig
10
+
11
+ from src.entity.artifact_entity import DataTransformationArtifact
12
+ from src.entity.artifact_entity import ModelTrainerArtifact
13
+ from src.entity.artifact_entity import ModelPusherArtifact
14
+
15
+ import os
16
+ import sys
17
+
18
+ class ModelPusher:
19
+
20
+ def __init__(
21
+ self,
22
+ model_pusher_config: ModelPusherConfig,
23
+ data_transformation_artifact: DataTransformationArtifact,
24
+ model_trainer_artifact: ModelTrainerArtifact
25
+ ):
26
+
27
+ try:
28
+ logging.info(f"\n\n{'>'*50} Model Pusher Initiated {'<'*50}\n")
29
+ self.model_pusher_config = model_pusher_config
30
+ self.data_transformation_artifact = data_transformation_artifact
31
+ self.model_trainer_artifact = model_trainer_artifact
32
+ self.model_resolver = ModelResolver(model_registry=self.model_pusher_config.saved_model_dir)
33
+
34
+ except Exception as e:
35
+ raise FertilizerException(e, sys)
36
+
37
+ def initiate_model_pusher(self) -> ModelPusherArtifact:
38
+ try:
39
+ # load object
40
+ logging.info(f"Loading transformer model and target encoder")
41
+ transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path)
42
+ model = load_object(file_path=self.model_trainer_artifact.model_path)
43
+ target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path)
44
+
45
+ # model pusher dir
46
+ logging.info(f"Saving model into model pusher directory")
47
+ save_object(file_path=self.model_pusher_config.pusher_transformer_path, obj=transformer)
48
+ save_object(file_path=self.model_pusher_config.pusher_model_path, obj=model)
49
+ save_object(file_path=self.model_pusher_config.pusher_target_encoder_path, obj=target_encoder)
50
+
51
+ # saved model dir
52
+ logging.info(f"Saving model in saved model dir")
53
+
54
+ transformer_path = self.model_resolver.get_latest_save_transformer_path()
55
+ model_path = self.model_resolver.get_latest_save_model_path()
56
+ target_encoder_path = self.model_resolver.get_latest_save_target_encoder_path()
57
+
58
+ save_object(file_path=transformer_path, obj=transformer)
59
+ save_object(file_path=model_path, obj=model)
60
+ save_object(file_path=target_encoder_path, obj=target_encoder)
61
+
62
+ model_pusher_artifact = ModelPusherArtifact(
63
+ pusher_model_dir = self.model_pusher_config.pusher_model_dir,
64
+ saved_model_dir = self.model_pusher_config.saved_model_dir)
65
+
66
+ logging.info(f"Model Pusher Artifacts Generated")
67
+
68
+ return model_pusher_artifact
69
+
70
+ except Exception as e:
71
+ raise FertilizerException(e, sys)
Fertilizer-Recommendation/src/components/model_trainer.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.logger import logging
4
+ from src.exception import FertilizerException
5
+ from src import utils
6
+
7
+ from typing import Optional
8
+ from sklearn.metrics import f1_score
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ import os
11
+ import sys
12
+
13
+ class ModelTrainer:
14
+
15
+ def __init__(
16
+ self,
17
+ model_trainer_config: config_entity.ModelTrainerConfig,
18
+ data_transformation_artifact: artifact_entity.DataTransformationArtifact):
19
+
20
+ try:
21
+ logging.info(f"\n\n{'>'*50} Model Trainer Initiated {'<'*50}\n")
22
+ self.model_trainer_config = model_trainer_config
23
+ self.data_transformation_artifact = data_transformation_artifact
24
+
25
+ except Exception as e:
26
+ raise FertilizerException(e, sys)
27
+
28
+ def train_model(self, X, y):
29
+ try:
30
+ decision_tree_classifier = DecisionTreeClassifier()
31
+ decision_tree_classifier.fit(X, y)
32
+
33
+ return decision_tree_classifier
34
+
35
+ except Exception as e:
36
+ raise FertilizerException(e, sys)
37
+
38
+ def initial_model_trainer(self) -> artifact_entity.ModelTrainerArtifact:
39
+ try:
40
+ logging.info(f"Loading train and test array")
41
+
42
+ train_arr = utils.load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_train_path)
43
+ test_arr = utils.load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_test_path)
44
+
45
+ logging.info(f"Splitting the input and target feature from both train and test arr")
46
+
47
+ X_train, y_train = train_arr[:, :-1], train_arr[:, -1]
48
+ X_test, y_test = test_arr[:, :-1], test_arr[:, -1]
49
+
50
+ logging.info(f"Training the model")
51
+ model = self.train_model(X = X_train, y = y_train)
52
+
53
+ logging.info(f"Calculating the f1 train score")
54
+ yhat_train = model.predict(X_train)
55
+
56
+ f1_train_score = f1_score(y_true = y_train,
57
+ y_pred = yhat_train,
58
+ average="weighted")
59
+
60
+ logging.info(f"Calculating the f1 test score")
61
+ yhat_test = model.predict(X_test)
62
+
63
+ f1_test_score = f1_score(y_true = y_test,
64
+ y_pred = yhat_test,
65
+ average = 'weighted')
66
+
67
+ logging.info(f"train_score : {f1_train_score} and test_score : {f1_test_score}")
68
+
69
+ # checking for overfitting or underfitting or expected score
70
+ logging.info(f"Checking if our model is underfitting or not")
71
+ if f1_test_score < self.model_trainer_config.overfitting_threshold:
72
+ raise Exception(
73
+ f"Model is not good, as it is not able to give \
74
+ expected accuarcy: {self.model_trainer_config.expected_score}, \
75
+ model actual score: {f1_test_score}"
76
+ )
77
+ logging.info(f"Checking if our model is overfitting or not")
78
+ diff = abs(f1_train_score - f1_test_score)
79
+
80
+ if diff > self.model_trainer_config.overfitting_threshold:
81
+ raise Exception(
82
+ f"Train and test score diff: {diff} \
83
+ is more than overfitting threshold: {self.model_trainer_config.overfitting_threshold}"
84
+ )
85
+
86
+ # save the trained model
87
+ logging.info(f"Saving model object")
88
+ utils.save_object(file_path=self.model_trainer_config.model_path, obj=model)
89
+
90
+ # prepare the artifact
91
+ logging.info(f"Prepare the artifact")
92
+ model_trainer_artifact = artifact_entity.ModelTrainerArtifact(
93
+ model_path = self.model_trainer_config.model_path,
94
+ f1_train_score = f1_train_score,
95
+ f2_test_score = f1_test_score)
96
+
97
+ logging.info(f"Model Trainer Complete, Artifact Generated")
98
+
99
+ return model_trainer_artifact
100
+
101
+ except Exception as e:
102
+ raise FertilizerException(e, sys)
Fertilizer-Recommendation/src/config.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymongo
2
+ import pandas as pd
3
+ import json
4
+ from dataclasses import dataclass
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+
11
+ @dataclass
12
+ class EnvironmentVariable:
13
+ mongo_db_url = os.getenv("MONGO_URL")
14
+
15
+
16
+ env = EnvironmentVariable()
17
+
18
+ mongo_client = pymongo.MongoClient(env.mongo_db_url)
19
+
20
+ TARGET_COLUMN = "Fertilizer Name"
21
+ NUMERICAL_FEATURES = ['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
22
+ CATEGORICAL_FEATURES = ['Soil Type', 'Crop Type']
23
+ BASE_FILE_PATH = os.path.join("fertilizer-prediction/Fertilizer Prediction.csv")
Fertilizer-Recommendation/src/entity/__init__.py ADDED
File without changes
Fertilizer-Recommendation/src/entity/artifact_entity.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class DataIngestionArtifact:
6
+ feature_store_file_path: str
7
+ train_file_path: str
8
+ test_file_path: str
9
+
10
+
11
+ @dataclass
12
+ class DataValidationArtifact:
13
+ report_file_path: str
14
+
15
+
16
+ @dataclass
17
+ class DataTransformationArtifact:
18
+ transform_object_path: str
19
+ transformed_train_path: str
20
+ transformed_test_path: str
21
+ target_encoder_path: str
22
+
23
+
24
+ @dataclass
25
+ class ModelTrainerArtifact:
26
+ model_path: str
27
+ f1_train_score: float
28
+ f2_test_score: float
29
+
30
+
31
+ @dataclass
32
+ class ModelEvaluationArtifact:
33
+ is_model_accepted: bool
34
+ improved_accuracy: float
35
+
36
+
37
+ @dataclass
38
+ class ModelPusherArtifact:
39
+ pusher_model_dir: str
40
+ saved_model_dir: str
Fertilizer-Recommendation/src/entity/config_entity.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from src.exception import FertilizerException
4
+ from src.logger import logging
5
+ from datetime import datetime
6
+
7
+ FILE_NAME = "fertilizer.csv"
8
+ TRAIN_FILE_NAME = "train.csv"
9
+ TEST_FILE_NAME = "test.csv"
10
+ TRANSFORMER_OBJECT_FILE_NAME = "transformer.pkl"
11
+ TARGET_ENCODER_OBJECT_FILE_NAME = "target_encoder.pkl"
12
+ MODEL_FILE_NAME = "model.pkl"
13
+
14
+
15
+ class TrainingPipelineConfig:
16
+ def __init__(self):
17
+ try:
18
+ self.artifact_dir = os.path.join(
19
+ os.getcwd(), "artifact", f"{datetime.now().strftime('%m%d%Y__%H%M%S')}"
20
+ )
21
+ except Exception as e:
22
+ raise FertilizerException(e, sys)
23
+
24
+
25
+ class DataIngestionConfig:
26
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
27
+ try:
28
+ self.database_name = "smartcropguard"
29
+ self.collection_name = "fertilizer"
30
+ self.data_ingestion_dir = os.path.join(
31
+ training_pipeline_config.artifact_dir, "data_ingestion"
32
+ )
33
+ self.feature_store_file_path = os.path.join(
34
+ self.data_ingestion_dir, "feature_store", FILE_NAME
35
+ )
36
+ self.train_file_path = os.path.join(
37
+ self.data_ingestion_dir, "dataset", TRAIN_FILE_NAME
38
+ )
39
+ self.test_file_path = os.path.join(
40
+ self.data_ingestion_dir, "dataset", TEST_FILE_NAME
41
+ )
42
+ self.test_size = 0.2
43
+ except Exception as e:
44
+ raise FertilizerException(e, sys)
45
+
46
+ def to_dict(self) -> dict:
47
+ try:
48
+ return self.__dict__
49
+ except Exception as e:
50
+ raise FertilizerException(e, sys)
51
+
52
+
53
+ class DataValidationConfig:
54
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
55
+ self.data_validation_dir = os.path.join(
56
+ training_pipeline_config.artifact_dir, "data_validation"
57
+ )
58
+ self.report_file_path = os.path.join(self.data_validation_dir, "report.yaml")
59
+ self.missing_threshold = 0.2
60
+ self.base_file_path = os.path.join(
61
+ "fertilizer-prediction/Fertilizer Prediction.csv"
62
+ )
63
+
64
+
65
+ class DataTransformationConfig:
66
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
67
+ self.data_transformation_dir = os.path.join(
68
+ training_pipeline_config.artifact_dir, "data_transformation"
69
+ )
70
+ self.transform_object_path = os.path.join(
71
+ self.data_transformation_dir,
72
+ "transformer",
73
+ TRANSFORMER_OBJECT_FILE_NAME
74
+ )
75
+ self.transformed_train_path = os.path.join(
76
+ self.data_transformation_dir,
77
+ "transformed",
78
+ TRAIN_FILE_NAME.replace("csv", "npz"),
79
+ )
80
+ self.transformed_test_path = os.path.join(
81
+ self.data_transformation_dir,
82
+ "transformed",
83
+ TEST_FILE_NAME.replace("csv", "npz"),
84
+ )
85
+ self.target_encoder_path = os.path.join(
86
+ self.data_transformation_dir,
87
+ "target_encoder",
88
+ TARGET_ENCODER_OBJECT_FILE_NAME,
89
+ )
90
+
91
+
92
+ class ModelTrainerConfig:
93
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
94
+ self.model_trainer_dir = os.path.join(
95
+ training_pipeline_config.artifact_dir, "model_trainer"
96
+ )
97
+ self.model_path = os.path.join(self.model_trainer_dir, "model", MODEL_FILE_NAME)
98
+ self.expected_score = 0.9
99
+ self.overfitting_threshold = 0.1
100
+
101
+
102
+ class ModelEvaluationConfig:
103
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
104
+ self.change_threshold = 0.01
105
+
106
+
107
+ class ModelPusherConfig:
108
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
109
+ self.model_pusher_dir = os.path.join(
110
+ training_pipeline_config.artifact_dir, "model_pusher"
111
+ )
112
+ self.saved_model_dir = os.path.join("saved_models")
113
+ self.pusher_model_dir = os.path.join(self.model_pusher_dir, "saved_models")
114
+ self.pusher_model_path = os.path.join(self.pusher_model_dir, MODEL_FILE_NAME)
115
+ self.pusher_transformer_path = os.path.join(
116
+ self.pusher_model_dir, TRANSFORMER_OBJECT_FILE_NAME
117
+ )
118
+ self.pusher_target_encoder_path = os.path.join(
119
+ self.pusher_model_dir, TARGET_ENCODER_OBJECT_FILE_NAME
120
+ )
Fertilizer-Recommendation/src/exception.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+
4
+ def error_message_detail(error, error_detail: sys):
5
+ _, _, exc_tb = error_detail.exc_info()
6
+ file_name = exc_tb.tb_frame.f_code.co_filename
7
+ error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format(
8
+ file_name, exc_tb.tb_lineno, str(error)
9
+ )
10
+
11
+ return error_message
12
+
13
+
14
+ class FertilizerException(Exception):
15
+ def __init__(self, error_message, error_detail: sys):
16
+ self.error_message = error_message_detail(
17
+ error_message, error_detail=error_detail
18
+ )
19
+
20
+ def __str__(self):
21
+ return self.error_message
Fertilizer-Recommendation/src/logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from datetime import datetime
4
+
5
+ # log file name
6
+ LOG_FILE_NAME = f"{datetime.now().strftime('%m%d%Y__%H%M%S')}.log"
7
+
8
+ # Log directory
9
+ LOG_FILE_DIR = os.path.join(os.getcwd(), "logs")
10
+
11
+ # create folder if not available
12
+ os.makedirs(LOG_FILE_DIR, exist_ok=True)
13
+
14
+ # Log file path
15
+ LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME)
16
+
17
+
18
+ logging.basicConfig(
19
+ filename=LOG_FILE_PATH,
20
+ format="[ %(asctime)s ] %(filename)s - %(lineno)d %(name)s - %(levelname)s - %(message)s",
21
+ level=logging.INFO,
22
+ )
Fertilizer-Recommendation/src/pipeline/__init__.py ADDED
File without changes
Fertilizer-Recommendation/src/pipeline/training_pipeline.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.logger import logging
2
+ from src.exception import FertilizerException
3
+ from src.utils import get_collection_as_dataframe
4
+ from src.entity import config_entity
5
+ from src.entity import artifact_entity
6
+ import os
7
+ import sys
8
+
9
+ from src.components.data_ingestion import DataIngestion
10
+ from src.components.data_validation import DataValidation
11
+ from src.components.data_transformation import DataTransformation
12
+ from src.components.model_trainer import ModelTrainer
13
+ from src.components.model_evaluation import ModelEvaluation
14
+ from src.components.model_pusher import ModelPusher
15
+
16
+ def start_training_pipeline():
17
+ try:
18
+ training_pipeline_config = config_entity.TrainingPipelineConfig()
19
+
20
+ # data ingestion
21
+ data_ingestion_config = config_entity.DataIngestionConfig(
22
+ training_pipeline_config=training_pipeline_config)
23
+
24
+ data_ingestion_config.to_dict()
25
+
26
+ data_ingestion = DataIngestion(
27
+ data_ingestion_config=data_ingestion_config)
28
+
29
+ data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
30
+
31
+ print(f"Data Ingestin complete")
32
+
33
+ # data validation
34
+ data_validation_config = config_entity.DataValidationConfig(training_pipeline_config=training_pipeline_config)
35
+
36
+ data_validation = DataValidation(data_validation_config=data_validation_config,
37
+ data_ingestion_arfitact=data_ingestion_artifact)
38
+
39
+ data_validation.initiate_data_validation()
40
+ print(f"Data Validation Complete")
41
+
42
+ # data transformation
43
+ data_transformation_config = config_entity.DataTransformationConfig(training_pipeline_config=training_pipeline_config)
44
+
45
+ data_transformation = DataTransformation(data_transformation_config=data_transformation_config,
46
+ data_ingestion_artifact=data_ingestion_artifact)
47
+
48
+ data_transformation_artifact = data_transformation.initiate_data_transformation()
49
+ print(f"Data Transformation Complete")
50
+
51
+ # model trainer
52
+ model_trainer_config = config_entity.ModelTrainerConfig(training_pipeline_config=training_pipeline_config)
53
+
54
+ model_trainer = ModelTrainer(model_trainer_config=model_trainer_config,
55
+ data_transformation_artifact=data_transformation_artifact)
56
+
57
+ model_trainer_artifact = model_trainer.initial_model_trainer()
58
+ print(f"Model Trainer Complete")
59
+
60
+ # model evaluation
61
+ model_evaluation_config = config_entity.ModelEvaluationConfig(training_pipeline_config=training_pipeline_config)
62
+
63
+ model_evaluation = ModelEvaluation(
64
+ model_eval_config = model_evaluation_config,
65
+ data_ingestion_artifact = data_ingestion_artifact,
66
+ data_transformation_artifact = data_transformation_artifact,
67
+ model_trainer_artifact = model_trainer_artifact)
68
+
69
+ model_evalution_artifact = model_evaluation.initiate_model_evaluation()
70
+ print(f"Model Evaluation Complete")
71
+
72
+ # model pusher
73
+ model_pusher_config = config_entity.ModelPusherConfig(training_pipeline_config=training_pipeline_config)
74
+
75
+ model_pusher = ModelPusher(
76
+ model_pusher_config = model_pusher_config,
77
+ data_transformation_artifact = data_transformation_artifact,
78
+ model_trainer_artifact = model_trainer_artifact)
79
+
80
+ model_trainer_artifact = model_pusher.initiate_model_pusher()
81
+ print(f"Model Pusher Complete")
82
+
83
+ except Exception as e:
84
+ raise FertilizerException(e, sys)
85
+
Fertilizer-Recommendation/src/predictor.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity.config_entity import TRANSFORMER_OBJECT_FILE_NAME
2
+ from src.entity.config_entity import MODEL_FILE_NAME
3
+ from src.entity.config_entity import TARGET_ENCODER_OBJECT_FILE_NAME
4
+ from src.exception import FertilizerException
5
+ from src.logger import logging
6
+
7
+ import os
8
+ import sys
9
+ from glob import glob
10
+ from typing import Optional
11
+
12
+ class ModelResolver:
13
+
14
+ def __init__(
15
+ self,
16
+ model_registry: str = 'saved_models',
17
+ transformer_dir_name = 'transformer',
18
+ target_encoder_dir_name = 'target_encoder',
19
+ model_dir_name = 'model'
20
+ ):
21
+
22
+ self.model_registry = model_registry
23
+ os.makedirs(self.model_registry, exist_ok=True)
24
+
25
+ self.transformer_dir_name = transformer_dir_name
26
+ self.target_encoder_dir_name = target_encoder_dir_name
27
+ self.model_dir_name = model_dir_name
28
+
29
+ def get_latest_dir_path(self) ->Optional[str]:
30
+ try:
31
+ dir_names = os.listdir(self.model_registry)
32
+
33
+ if len(dir_names) == 0:
34
+ return None
35
+ dir_names = list(map(int, dir_names))
36
+ latest_dir_name = max(dir_names)
37
+
38
+ return os.path.join(self.model_registry, f"{latest_dir_name}")
39
+
40
+ except Exception as e:
41
+ raise FertilizerException(e, sys)
42
+
43
+ def get_latest_model_path(self):
44
+ try:
45
+ latest_dir = self.get_latest_dir_path()
46
+
47
+ if latest_dir is None:
48
+ raise Exception(f"Model is not available")
49
+
50
+ return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
51
+
52
+ except Exception as e:
53
+ raise FertilizerException(e, sys)
54
+
55
+ def get_latest_transformer_path(self):
56
+ try:
57
+ latest_dir = self.get_latest_dir_path()
58
+ if latest_dir is None:
59
+ raise Exception(f"Transformer is not available")
60
+
61
+ return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
62
+
63
+ except Exception as e:
64
+ raise FertilizerException(e, sys)
65
+
66
+ def get_latest_target_encoder_path(self):
67
+ try:
68
+ latest_dir = self.get_latest_dir_path()
69
+ if latest_dir is None:
70
+ raise Exception(f"Target Encoder is not available")
71
+
72
+ return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
73
+
74
+ except Exception as e:
75
+ raise FertilizerException(e, sys)
76
+
77
+ def get_latest_save_dir_path(self):
78
+ try:
79
+ latest_dir = self.get_latest_dir_path()
80
+
81
+ if latest_dir is None:
82
+ return os.path.join(self.model_registry, f"{0}")
83
+
84
+ latest_dir_num = int(os.path.basename(self.get_latest_dir_path()))
85
+
86
+ return os.path.join(self.model_registry, f"{latest_dir_num + 1}")
87
+
88
+ except Exception as e:
89
+ raise FertilizerException(e, sys)
90
+
91
+ def get_latest_save_model_path(self):
92
+ try:
93
+ latest_dir = self.get_latest_save_dir_path()
94
+
95
+ return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
96
+
97
+ except Exception as e:
98
+ raise FertilizerException(e, sys)
99
+
100
+ def get_latest_save_transformer_path(self):
101
+ try:
102
+ latest_dir = self.get_latest_save_dir_path()
103
+
104
+ return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
105
+
106
+ except Exception as e:
107
+ raise FertilizerException(e, sys)
108
+
109
+ def get_latest_save_target_encoder_path(self):
110
+ try:
111
+ latest_dir = self.get_latest_save_dir_path()
112
+
113
+ return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
114
+
115
+ except Exception as e:
116
+ raise FertilizerException(e, sys)
117
+
118
+
Fertilizer-Recommendation/src/setup.py ADDED
File without changes
Fertilizer-Recommendation/src/utils.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from src.logger import logging
3
+ from src.exception import FertilizerException
4
+ from src.config import mongo_client
5
+ import os
6
+ import sys
7
+ import numpy as np
8
+ import yaml
9
+ import dill
10
+
11
+ def get_collection_as_dataframe(
12
+ database_name: str, collection_name: str
13
+ ) -> pd.DataFrame:
14
+ """
15
+ Description: This function return collection as dataframe
16
+ =========================================================
17
+ Params:
18
+ database_name: database name
19
+ collection_name: collection name
20
+ =========================================================
21
+ return Pandas dataframe of a collection
22
+ """
23
+ try:
24
+ logging.info(
25
+ f"Reading data from database: {database_name} and collection: {collection_name}"
26
+ )
27
+ df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
28
+ logging.info(f"{database_name} found in the mongodb")
29
+
30
+ if "_id" in df.columns:
31
+ logging.info("Dropping column: '_id'")
32
+ df = df.drop(columns=["_id"], axis=1)
33
+ logging.info(f"Row and columns in df: {df.shape}")
34
+ return df
35
+ except Exception as e:
36
+ raise FertilizerException(e, sys)
37
+
38
+
39
+ def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
40
+ final_dataframe = df.drop(exclude_column, axis=1)
41
+
42
+ return final_dataframe
43
+
44
+
45
+ def get_column_indices(numerical_features: list, categorical_features: list, base_file_path: str):
46
+
47
+ dataset = pd.read_csv(base_file_path)
48
+
49
+ numerical_feature_indices = [dataset.columns.get_loc(feature) for feature in numerical_features]
50
+ categorical_feature_indices = [dataset.columns.get_loc(feature) for feature in categorical_features]
51
+
52
+ return numerical_feature_indices, categorical_feature_indices
53
+
54
+
55
+ def write_yaml_file(file_path, data: dict):
56
+ try:
57
+ file_dir = os.path.dirname(file_path)
58
+ os.makedirs(file_dir, exist_ok=True)
59
+
60
+ with open(file_path, "w") as file_writer:
61
+ yaml.dump(data, file_writer)
62
+ except Exception as e:
63
+ raise FertilizerException(e, sys)
64
+
65
+
66
+ def save_object(file_path: str, obj: object) -> None:
67
+ try:
68
+ logging.info("Entered the save object method of utils")
69
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
70
+ with open(file_path, "wb") as file_obj:
71
+ dill.dump(obj, file_obj)
72
+ logging.info("Exited the save object method of utils")
73
+ except Exception as e:
74
+ raise FertilizerException(e, sys)
75
+
76
+
77
+ def load_object(file_path: str) -> object:
78
+ try:
79
+ if not os.path.exists(file_path):
80
+ raise Exception(f"The file: {file_path} is not exists")
81
+ with open(file_path, "rb") as file_obj:
82
+ return dill.load(file_obj)
83
+ except Exception as e:
84
+ raise FertilizerException(e, sys)
85
+
86
+
87
+ def save_numpy_array_data(file_path: str, array: np.array):
88
+ """
89
+ save numpy array data to file
90
+ file_path : str location of the file to save
91
+ array: np.array data to save
92
+ """
93
+ try:
94
+ dir_path = os.path.dirname(file_path)
95
+ os.makedirs(dir_path, exist_ok=True)
96
+
97
+ with open(file_path, "wb") as file_ojb:
98
+ np.save(file_obj, array)
99
+
100
+ except Exception as e:
101
+ raise FertilizerException(e, sys)
102
+
103
+
104
+ def load_numpy_array_data(file_path: str) -> np.array:
105
+ """
106
+ load numpy array data from file
107
+ file_path: str location of file to load
108
+ return: np.array data loaded
109
+ """
110
+ try:
111
+ with open(file_path, "rb") as file_obj:
112
+ return np.load(file_obj, allow_pickle=True)
113
+
114
+ except Exception as e:
115
+ raise CropException(e, sys)
Fertilizer-Recommendation/template.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ from pathlib import Path
3
+ import logging
4
+
5
+ while True:
6
+ project_name = input("Enter your project name: ")
7
+ if project_name !="":
8
+ break
9
+
10
+ # src/__init__.py
11
+ # src/compontes/__init__.py
12
+ list_of_files = [
13
+ f"{project_name}/__init__.py",
14
+ f"{project_name}/components/__init__.py",
15
+ f"{project_name}/components/data_ingestion.py",
16
+ f"{project_name}/components/data_validation.py",
17
+ f"{project_name}/components/data_transformation.py",
18
+ f"{project_name}/components/model_trainer.py",
19
+ f"{project_name}/components/model_evaluation.py",
20
+ f"{project_name}/components/model_pusher.py",
21
+ f"{project_name}/entity/__init__.py",
22
+ f"{project_name}/entity/artifact_entity.py",
23
+ f"{project_name}/entity/config_entity.py",
24
+ f"{project_name}/pipeline/__init__.py",
25
+ f"{project_name}/pipeline/training_pipeline.py",
26
+ f"{project_name}/config.py",
27
+ f"{project_name}/app.py",
28
+ f"{project_name}/logger.py",
29
+ f"{project_name}/exception.py",
30
+ f"{project_name}/setup.py",
31
+ f"{project_name}/utils.py",
32
+ f"{project_name}/predictor.py",
33
+ "main.py",
34
+ ]
35
+
36
+
37
+ for filepth in list_of_files:
38
+ filepath = Path(filepth)
39
+ filedir, filename = os.path.split(filepath)
40
+
41
+ if filedir !="":
42
+ os.makedirs(filedir, exist_ok=True)
43
+
44
+ if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
45
+ with open(filepath, "w") as f:
46
+ pass
47
+
48
+ else:
49
+ logging.info("file is already present at : {filepath}")
README.md CHANGED
@@ -1,10 +1,97 @@
1
  ---
2
- title: C
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: indigo
6
- sdk: static
 
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: CropGaurd
3
+ emoji: 🏢
4
+ colorFrom: indigo
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.39.0
8
+ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # CropGaurd
13
+ ## Agriculture and Farming Machine Learning Project
14
+
15
+ Developed a comprehensive web application that harnesses the power of machine learning to provide valuable insights and recommendations to farmers, agriculture enthusiasts, and stakeholders.
16
+
17
+ ![CropGaurd-thumbnail](https://github.com/07Sada/CropGaurd/assets/112761379/fd5f1726-7450-4758-952e-23e7f7b9da06)
18
+
19
+ # Disclaimer
20
+ This project serves as a Proof of Concept (PoC) and is not intended for making actual farming decisions. The data utilized within this project is provided without any guarantee from the creator. Therefore, it is strongly advised not to utilize the information for real-world agricultural choices. Should you choose to do so, please be aware that the creator bears no responsibility for the outcomes.
21
+
22
+ It's important to note that this project primarily demonstrates the application of Machine Learning (ML) and Deep Learning (DL) concepts within precision farming. The hypothetical scenario presented here underscores the potential benefits of deploying ML/DL techniques on a larger scale, provided that authentic and verified data sources are used.
23
+
24
+ For reliable and accurate farming decisions, always rely on verified agricultural data sources, expert advice, and industry standards.
25
+
26
+ ## Project Links
27
+
28
+ - **Application Link:** Check out the live application on Hugging Face Spaces: [Application Link](https://huggingface.co/spaces/Sadashiv/CropGaurd)
29
+
30
+ - **Demo Video:** For a visual walkthrough of the application's features, watch demo video: [Demo Video Link](<insert_demo_video_link_here>)
31
+
32
+ ## Project Overview
33
+ The application integrates several key features to assist users in making informed decisions for their agricultural activities. These features include:
34
+
35
+ - ***Crop Recommendation System:*** Leveraging advanced machine learning techniques, the system recommends suitable crops based on various factors such as soil chemical contents, and climate conditions.
36
+
37
+ - ***Fertilizer Recommendation System:*** The application also offers personalized fertilizer recommendations, ensuring that crops receive the optimal nutrients for healthy growth and abundant yields.
38
+
39
+ - ***Plant Disease Classification:*** By employing cutting-edge image classification models, incorporated a feature that enables users to detect and diagnose diseases in plants. Users can simply upload images of their plants, and our system will accurately identify any diseases present and provide relevant information about them.
40
+
41
+ - ***Real-time Commodity Price Updates:*** To empower users with current market insights, we have integrated a government API that provides daily commodity prices across different Indian states. This information assists farmers and traders in making pricing and distribution decisions.
42
+
43
+ ## Purpose
44
+ The aim of project to revolutionize the agricultural sector by offering data-driven solutions that enhance productivity, reduce risks, and promote sustainable practices. By amalgamating technology and agriculture, we strive to address critical challenges faced by farmers and contribute to the growth of the farming community.
45
+
46
+ Whether you're a seasoned farmer seeking optimized strategies or an individual interested in sustainable agriculture, our application provides the tools you need to make well-informed decisions.
47
+
48
+ ## Additional Details
49
+ Here are some additional aspects of the project that contribute to its effectiveness and uniqueness:
50
+
51
+ - ***Machine Learning Models:*** We have trained our recommendation and classification models on extensive datasets specific to Indian agriculture. This ensures that the recommendations and classifications are accurate and relevant to the local context.
52
+
53
+ - ***User-Friendly Interface:*** Our web application boasts an intuitive and user-friendly interface designed to make navigation and interaction seamless, even for users with limited technological experience.
54
+
55
+ - ***Informational Insights:*** Apart from recommendations, our application provides detailed information about recommended crops, fertilizers, and identified plant diseases. This information helps users understand the rationale behind the suggestions and take well-informed actions.
56
+
57
+ - ***Scalability:*** Our project's architecture is designed to accommodate future expansions and enhancements. We are committed to continuously improving the application by incorporating user feedback and integrating emerging technologies.
58
+
59
+ ## Getting Started
60
+ - Clone or download the parent repository from [GitHub Repository Link](https://github.com/07Sada/CropGaurd)
61
+
62
+ ```
63
+ git clone --recurse-submodules https://github.com/07Sada/CropGaurd
64
+ ```
65
+ - The total project is divided into 4 repositories: one parent repository and 3 child repositories. The child repositories are dedicated to specific functionalities, namely [[crop recommendations](https://github.com/07Sada/crop-recommendation)], [[fertilizer recommendations](https://github.com/07Sada/Fertilizer-Recommendation)], and [[image classification](https://github.com/07Sada/plant-diseases-classifier)].
66
+ - The parent and child repositories are connected using Git submodules. This approach is taken to keep each recommendation system separate, as they contain their end-to-end pipelines – from data ingestion to model training and deploying the best models for inference.
67
+ - This modular structure allows us to maintain clean and organized code while efficiently managing updates and changes to each submodule.
68
+ - The data ingestion pipeline is flexible, as it is integrated with a MongoDB database. You can set up a scheduler to periodically update the training data. After new data is ingested, the models are trained and evaluated against the existing models. The best model is then pushed for inference, all of which is seamlessly automated through the pipeline, reducing the potential for errors.
69
+ - To get started, navigate to the parent repository and install the required dependencies.
70
+ - Explore each child repository for more specific details on their functionalities and pipelines.
71
+ - Launch the web application by running command in terminal.
72
+
73
+ ```
74
+ python app.py
75
+ ```
76
+ - Start exploring the features and making use of the insightful recommendations provided.
77
+
78
+
79
+ ## Further Improvements
80
+
81
+ These potential improvements are not only achievable but hold the promise of elevating the application's impact and utility:
82
+
83
+ - ***Integration of Govt Policies:*** Imagine having the latest government policies and farmer-centric updates right at your fingertips. Our vision includes seamlessly integrating these critical updates, enabling you to stay informed and navigate regulatory changes with ease.
84
+
85
+ - ***Language Translation:*** Empowering users globally is within our reach. We envision breaking language barriers by adding translation capabilities. This means you can explore our insights and recommendations in your preferred language, ensuring accessibility for all.
86
+
87
+ - ***Weather Information:*** Harnessing real-time weather data can revolutionize your decision-making. Picture accessing accurate weather information directly within the application, allowing you to adapt and strategize based on changing conditions.
88
+
89
+ - ***Enhanced Recommendations with More Data:*** Our recommendation systems already provide valuable guidance, but we're not stopping there. By expanding our dataset, we're poised to fine-tune these systems to deliver recommendations that are even more personalized and effective.
90
+
91
+ - ***Market Trends Analysis:*** Envision anticipating market trends and price fluctuations effortlessly. With our potential addition of market trend analysis, you can gain insights that empower you to make informed decisions about your produce's pricing and distribution.
92
+
93
+ - ***Community Forums:*** We foresee a thriving community within the application—a space where knowledge is freely exchanged. Imagine being part of a network of farmers, sharing insights, experiences, and innovative approaches to agriculture.
94
+
95
+ - ***Automated Data Updates:*** Our dedication to keeping our models up-to-date is unwavering. The potential implementation of automated data updates ensures that you're always working with the latest insights and recommendations.
96
+
97
+
__pycache__/app.cpython-38.pyc ADDED
Binary file (5.1 kB). View file
 
__pycache__/artifacts.cpython-311.pyc ADDED
Binary file (1.84 kB). View file
 
__pycache__/artifacts.cpython-38.pyc ADDED
Binary file (1.1 kB). View file
 
__pycache__/config.cpython-311.pyc ADDED
Binary file (1.07 kB). View file
 
__pycache__/config.cpython-38.pyc ADDED
Binary file (798 Bytes). View file
 
__pycache__/utils.cpython-311.pyc ADDED
Binary file (2.76 kB). View file
 
__pycache__/utils.cpython-38.pyc ADDED
Binary file (1.37 kB). View file
 
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import crop_model, crop_pipeline_encoder, crop_label_encoder
2
+ from config import fertilizer_model, fertilizer_pipeline_encoder, fertilizer_label_encoder
3
+ from config import plant_diseases_classifier_model
4
+ from utils import retrieve_image_by_name_from_mongodb, retrieve_data
5
+ from flask import Flask, request, render_template, jsonify
6
+ import requests
7
+ import os
8
+ import numpy as np
9
+ import base64
10
+
11
+ app = Flask(__name__)
12
+
13
+ app.config['UPLOAD_FOLDER'] = 'static/uploaded_image'
14
+
15
+ @app.route("/")
16
+ @app.route("/home")
17
+ def home():
18
+ return render_template('index.html')
19
+
20
+ @app.route('/crop_recommendation', methods=['GET', 'POST'])
21
+ def crop_recommendation():
22
+ return render_template('crop_recommendation_input.html')
23
+
24
+ @app.route("/crop_recommendation_output", methods=['GET', 'POST'])
25
+ def crop_recommendation_output():
26
+ temperature = request.form.get("temperature")
27
+ humidity = request.form.get("humidity")
28
+ ph = request.form.get("ph")
29
+ nitrogen = request.form.get("nitrogen")
30
+ potassium = request.form.get("potassium")
31
+ phosphorous = request.form.get("phosphorous")
32
+ rain_fall = request.form.get("rain_fall")
33
+
34
+ input_list = [nitrogen, phosphorous, potassium, temperature, humidity, ph, rain_fall]
35
+ input_array = np.array(input_list).reshape(-1, 7).astype(int)
36
+
37
+ transformed_data = crop_pipeline_encoder.transform(input_array)
38
+ model_prediction = crop_model.predict(transformed_data).astype(int)
39
+
40
+ label = crop_label_encoder.inverse_transform(model_prediction)
41
+ print(label)
42
+
43
+ # retrieving the image from mongodb dabase
44
+ image_data = retrieve_image_by_name_from_mongodb(database_name=os.getenv("CROP_DB_NAME"),
45
+ collection_name=os.getenv("CROP_IMAGE_COLLECTION_NAME"),
46
+ file_name=str(label[0]))
47
+
48
+ # encoding the byte data recieved from the mongodb
49
+ image_data_base64 = base64.b64encode(image_data).decode('utf-8')
50
+
51
+ # retrieving text data from mongodb
52
+ crop_details = retrieve_data(database_name=os.getenv("CROP_DB_NAME"), collection_name= os.getenv("CROP_INFO_COLLECTION_NAME"), search_query=label[0])
53
+
54
+ return render_template('crop_recommendation_output.html', image_data_base64=image_data_base64, input_file_name=label[0], crop_details=crop_details)
55
+
56
+
57
+ @app.route('/fertilizer_recommendation', methods=['GET', 'POST'])
58
+ def fertilizer_recommendation():
59
+ return render_template('fertilizer_recommendation_input.html')
60
+
61
+ @app.route('/fertilizer_recommendation_output', methods=['GET', 'POST'])
62
+ def fertilizer_recommendation_output():
63
+ temperature = request.form.get("temperature")
64
+ humidity = request.form.get("humidity")
65
+ moisture = request.form.get("moisture")
66
+ nitrogen = request.form.get("nitrogen")
67
+ potassium = request.form.get("potassium")
68
+ phosphorous = request.form.get("phosphorous")
69
+ soil_type = request.form.get("soil_type")
70
+ crop_type = request.form.get("crop_type")
71
+
72
+ input_data = [int(temperature), int(humidity), int(moisture), soil_type, crop_type, int(nitrogen), int(potassium), int(phosphorous)]
73
+ input_array = np.array(input_data).reshape(-1, 8)
74
+
75
+ transformed_data = fertilizer_pipeline_encoder.transform(input_array)
76
+ model_prediction = fertilizer_model.predict(transformed_data).astype(int)
77
+
78
+ label = fertilizer_label_encoder.inverse_transform(model_prediction)
79
+
80
+ # retrieving the image from mongodb dabase
81
+ image_data = retrieve_image_by_name_from_mongodb(database_name=os.getenv("FERTILIZER_DB_NAME"),
82
+ collection_name=os.getenv("FERTILIZER_IMAGE_COLLECTION_NAME"),
83
+ file_name=str(label[0]))
84
+
85
+ # encoding the byte data recieved from the mongodb
86
+ image_data_base64 = base64.b64encode(image_data).decode('utf-8')
87
+
88
+ # retrieving text data from mongodb
89
+ fertilizer_details = retrieve_data(database_name=os.getenv("FERTILIZER_DB_NAME"), collection_name= os.getenv("FERTILIZER_INFO_COLLECTION_NAME"), search_query=label[0])
90
+
91
+
92
+ return render_template('fertilizer_recommendation_ouput.html', image_data_base64=image_data_base64, label= label[0], fertilizer_details=fertilizer_details)
93
+
94
+
95
+ @app.route('/image_classification', methods=['GET', 'POST'])
96
+ def image_classification():
97
+ return render_template('image_classification_input.html')
98
+
99
+ @app.route('/image_classification_output', methods=['GET', 'POST'])
100
+ def image_classification_output():
101
+ file = request.files['image_file']
102
+ new_filename = "plant_image.JPG"
103
+ file.save(os.path.join(app.config['UPLOAD_FOLDER'], new_filename))
104
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], new_filename)
105
+
106
+ # infercing the with the uploaded image
107
+ results = plant_diseases_classifier_model(file_path)
108
+
109
+ #fetching all the labels
110
+ names_dict = results[0].names
111
+
112
+ # fetching the probalility of each class
113
+ probs = results[0].probs.data.tolist()
114
+
115
+ # selecting class with maximum probability
116
+ model_prediction= names_dict[np.argmax(probs)]
117
+
118
+ diseases_details = retrieve_data(database_name=os.getenv("DISEASE_DB_NAME"),
119
+ collection_name=os.getenv("DISEASE_INFO_COLLECTION_NAME"),
120
+ search_query=model_prediction)
121
+
122
+ return render_template("image_classification_output.html", model_prediction=model_prediction, diseases_details=diseases_details)
123
+
124
+
125
+ @app.route('/market_price')
126
+ def market_price():
127
+ return render_template("market_price_input.html")
128
+
129
+ @app.route('/market_price_output', methods=['POST'])
130
+ def market_price_output():
131
+ # input field name is 'selected_state'
132
+ user_input = request.form.get('selected_state')
133
+ api_key = os.getenv("COMMODITY_PRICE_API_KEY")
134
+
135
+ # Make a request to the API with the user input
136
+ api_url = f'https://api.data.gov.in/resource/9ef84268-d588-465a-a308-a864a43d0070?api-key={api_key}&format=json&filters%5Bstate%5D={user_input}'
137
+ response = requests.get(api_url)
138
+
139
+ if response.status_code == 200:
140
+ data = response.json()
141
+ data = data['records']
142
+ # return render_template('market_price_output.html', data=data)
143
+ if len(data) > 0:
144
+ # Return the JSON data as a response
145
+ return render_template('market_price_output.html', data=data)
146
+ else:
147
+ return render_template("market_price_no_data.html")
148
+ else:
149
+ return jsonify({'error': 'Unable to fetch data from the API'}), 400
150
+
151
+ if __name__ == "__main__":
152
+ app.run(debug=True, host="0.0.0.0", port=8000)
artifacts.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ MODEL_NAME = "model.pkl"
4
+ TARGET_ENCODER_OBJECT_NAME = "target_encoder.pkl"
5
+ TRANSFORMER_OJBCET_NAME = "transformer.pkl"
6
+
7
+ crop_recommendation_artifacts_path = "./crop-recommendation/saved_models"
8
+ fertilizer_recommendation_artifacts_path = "./Fertilizer-Recommendation/saved_models"
9
+
10
+ plant_diseases_classifier_model_path = "./plant-diseases-classifier/custom_model_weights/best.pt"
11
+
12
+
13
+ ## crop recommendation artifacts
14
+ latest_crop_recommendation_artifacts = max(os.listdir(crop_recommendation_artifacts_path)) #0, 1, 2
15
+
16
+ latest_crop_recommendation_artifacts_path = os.path.join(crop_recommendation_artifacts_path, latest_crop_recommendation_artifacts)
17
+
18
+ crop_model_path = os.path.join(latest_crop_recommendation_artifacts_path, 'model', MODEL_NAME)
19
+ crop_transformer_path = os.path.join(latest_crop_recommendation_artifacts_path,'transformer', TRANSFORMER_OJBCET_NAME)
20
+ crop_target_encoder_path = os.path.join(latest_crop_recommendation_artifacts_path, 'target_encoder', TARGET_ENCODER_OBJECT_NAME)
21
+
22
+
23
+ ## fertilizer recommendation artifacts
24
+ latest_fertilizer_recommendation_artifacts = max(os.listdir(fertilizer_recommendation_artifacts_path)) #0, 1, 2
25
+
26
+ latest_fertilizer_recommendation_artifacts_path = os.path.join(fertilizer_recommendation_artifacts_path, latest_fertilizer_recommendation_artifacts)
27
+
28
+ fertilizer_model_path = os.path.join(latest_fertilizer_recommendation_artifacts_path, 'model', MODEL_NAME)
29
+ fertilizer_transformer_path = os.path.join(latest_fertilizer_recommendation_artifacts_path,'transformer', TRANSFORMER_OJBCET_NAME)
30
+ fertilizer_target_encoder_path = os.path.join(latest_fertilizer_recommendation_artifacts_path, 'target_encoder', TARGET_ENCODER_OBJECT_NAME)
31
+