zapatacc commited on
Commit
800c846
·
1 Parent(s): e1a8062

initial commit

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.9" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/nyc-taxi-model.iml" filepath="$PROJECT_DIR$/.idea/nyc-taxi-model.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/nyc-taxi-model.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /code
8
+
9
+ RUN mkdir -p /code/preprocessor && chmod -R 777 /code/preprocessor
10
+
11
+ COPY ./requirements.txt /code/requirements.txt
12
+
13
+ RUN apt-get update && apt-get install -y procps && pip install -U pip && rm /etc/localtime && ln -s /usr/share/zoneinfo/America/Mexico_City /etc/localtime
14
+
15
+ RUN pip install -r ./requirements.txt
16
+
17
+ COPY ./main.py /code/
18
+
19
+ EXPOSE 8000
20
+
21
+ CMD ["uvicorn","main:app", "--host", "0.0.0.0", "--port", "8000"]
main.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import mlflow
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ from mlflow import MlflowClient
6
+
7
+
8
+ # MLflow settings
9
+ # dagshub_repo = "url-to-your-repo"
10
+ # dagshub_repo = "https://dagshub.com/zapatacc/nyc-taxi-time-prediction"
11
+
12
+ # dagshub.init(url=dagshub_repo, mlflow=True)
13
+
14
+ # MLFLOW_TRACKING_URI = mlflow.get_tracking_uri()
15
+ MLFLOW_TRACKING_URI = "https://dagshub.com/zapatacc/nyc-taxi-time-prediction.mlflow"
16
+
17
+ mlflow.set_tracking_uri(uri=MLFLOW_TRACKING_URI)
18
+ client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
19
+
20
+ run_ = mlflow.search_runs(order_by=['metrics.rmse ASC'],
21
+ output_format="list",
22
+ experiment_names=["nyc-taxi-experiment-prefect"]
23
+ )[0]
24
+
25
+ run_id = run_.info.run_id
26
+
27
+ run_uri = f"runs:/{run_id}/preprocessor"
28
+
29
+ client.download_artifacts(
30
+ run_id=run_id,
31
+ path='preprocessor',
32
+ dst_path='.'
33
+ )
34
+
35
+ with open("preprocessor/preprocessor.b", "rb") as f_in:
36
+ dv = pickle.load(f_in)
37
+
38
+ model_name = "nyc-taxi-model"
39
+ alias = "champion"
40
+
41
+ model_uri = f"models:/{model_name}@{alias}"
42
+
43
+ champion_model = mlflow.pyfunc.load_model(
44
+ model_uri=model_uri
45
+ )
46
+
47
+ def preprocess(input_data):
48
+
49
+ input_dict = {
50
+ 'PU_DO': input_data.PULocationID + "_" + input_data.DOLocationID,
51
+ 'trip_distance': input_data.trip_distance,
52
+ }
53
+
54
+ return dv.transform(input_dict)
55
+
56
+ def predict(input_data):
57
+
58
+ X_pred = preprocess(input_data)
59
+
60
+ return champion_model.predict(X_pred)
61
+
62
+
63
+ app = FastAPI()
64
+
65
+ class InputData(BaseModel):
66
+ PULocationID: str
67
+ DOLocationID: str
68
+ trip_distance: float
69
+
70
+
71
+ @app.post("/predict")
72
+ def predict_endpoint(input_data: InputData):
73
+ result = predict(input_data)[0]
74
+
75
+ return {
76
+ "prediction": float(result)
77
+ }
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.2
2
+ mlflow==2.16.1
3
+ xgboost==2.1.1
4
+ uvicorn
5
+ numpy==2.1.1
6
+ pandas==2.2.2
7
+ psutil==6.0.0
8
+ scikit-learn==1.5.2
9
+ scipy==1.14.1
10
+ xgboost==2.1.1