diff --git a/app.py b/app.py
index fb28d2b5678de85e2c299369b25bf7a0da456a6c..c094cc687cf7a377212bbc69a52a3f8d616ee448 100644
--- a/app.py
+++ b/app.py
@@ -4,26 +4,98 @@ import plotly.graph_objects as go
 import plotly.express as px
 from sklearn.metrics import pairwise_distances
 import torch
+from facility_location import multi_eval
+import pickle
 
-def plot_from_npy(npy_data):
-    fig = go.Figure()
 
-    fig.add_trace(go.Scatter(x=[1, 2, 3, 4], y=[10, 11, 12, 13], mode='lines', name='New York'))
-    fig.update_layout(title_text="Facility Distribution in Cities")
-    fig.update_xaxes(title_text="Time")
-    fig.update_yaxes(title_text="Facility Count")
-    
-
-    actual_fig = fig  # Replace this line with your actual_fig
-    solution_fig = fig  # Replace this line with your solution_fig
-    
-    return actual_fig, solution_fig
 
 def solver_plot(data_npy, boost=False):
+    multi_eval.main(data_npy, boost)
+    all_solutions = pickle.loads(open('./facility_location/solutions.pkl', 'rb').read())
+    
+    data = data_npy.split('\n')
+    n = len(data)
+    p = int((len(data[0].split(' '))-2) / 2)
+    
+    positions = []
+    demands = []
+    actual_facilities = []
+    for row in data:
+        row = row.split(' ')
+        row = [x for x in row if len(x)]
+        
+        positions.append([float(row[0]), float(row[1])])
+        
+        demand = []
+        for i in range(2, 2+p):
+            demand.append(float(row[i]))
+        demands.append(demand)
+        
+        actual_facility = []
+        for i in range(2+p, 2+2*p):
+            actual_facility.append(bool(int(float(row[i]))))
+        actual_facilities.append(actual_facility)
+    positions = np.array(positions)
+    demands = np.array(demands)
+    actual_facilities = np.array(actual_facilities)
+    solution_facilities =  np.array(all_solutions).T
+    # print(solution_facilities)
+    # print(actual_facilities)
+        
     actual_fig = go.Figure()
     solution_fig = go.Figure()
-    actual_ac = 0  # Replace this line with your actual_ac
-    solution_ac = 0  # Replace this line with your solution_ac
+    for i in range(p):
+        actual_fig.add_trace(go.Scattermapbox(
+            lat=positions[actual_facilities[:, i]][:, 0],
+            lon=positions[actual_facilities[:, i]][:, 1],
+            mode='markers',
+            marker=go.scattermapbox.Marker(
+                size=10,
+                color=px.colors.qualitative.Plotly[i]
+            ),
+            name=f'Facility {i+1}'
+        ))
+        solution_fig.add_trace(go.Scattermapbox(
+            lat=positions[solution_facilities[:, i]][:, 0],
+            lon=positions[solution_facilities[:, i]][:, 1],
+            mode='markers',
+            marker=go.scattermapbox.Marker(
+                size=10,
+                color=px.colors.qualitative.Plotly[i]
+            ),
+            name=f'Facility {i+1}'
+        ))
+    
+        actual_fig.update_layout(
+        mapbox=dict(
+            style='carto-positron',
+            center=dict(lat=np.mean(positions[actual_facilities[:, i]][:, 0]), \
+                lon=np.mean(positions[actual_facilities[:, i]][:, 1])),
+            zoom=11.0
+        ),
+        margin=dict(l=0, r=0, b=0, t=0),)
+
+        solution_fig.update_layout(
+        mapbox=dict(
+            style='carto-positron',
+            center=dict(lat=np.mean(positions[solution_facilities[:, i]][:, 0]), \
+                lon=np.mean(positions[solution_facilities[:, i]][:, 1])),
+            zoom=11.0
+        ),
+        margin=dict(l=0, r=0, b=0, t=0),)
+        # show legend
+        actual_fig.update_layout(showlegend=True)
+        solution_fig.update_layout(showlegend=True)
+    
+    positions = np.deg2rad(positions)
+    dist = pairwise_distances(positions, metric='haversine') * 6371
+    actual_ac = 0
+    solution_ac = 0
+    for i in range(p):
+        ac_matrix = dist * demands[:, i][:, None]
+        actual_ac += ac_matrix[:, actual_facilities[:, i]].min(axis=-1).sum()
+        solution_ac += ac_matrix[:, solution_facilities[:, i]].min(axis=-1).sum()
+        
     return actual_fig, solution_fig, actual_ac, solution_ac
     
 def demo_plot(city, facility):
@@ -104,7 +176,7 @@ def demo_plot(city, facility):
     return actual_fig, solution_fig, actual_ac, solution_ac
 
 
-def solver_plot(data_npy, boost=False):
+def solver_plot1(data_npy, boost=False):
     data = data_npy.split('\n')
     n = len(data)
     p = int((len(data[0].split(' '))-2) / 2)
@@ -115,7 +187,6 @@ def solver_plot(data_npy, boost=False):
     for row in data:
         row = row.split(' ')
         row = [x for x in row if len(x)]
-        print(row)
         
         positions.append([float(row[0]), float(row[1])])
         
@@ -132,7 +203,6 @@ def solver_plot(data_npy, boost=False):
     demands = np.array(demands)
     actual_facilities = np.array(actual_facilities)
     solution_facilities =  ~actual_facilities
-    print(actual_facilities)
         
     actual_fig = go.Figure()
     solution_fig = go.Figure()
@@ -193,13 +263,13 @@ def solver_plot(data_npy, boost=False):
 
 def get_example():
     return [
-        ('40.71 -73.93 213 0\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
-        ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.61 -73.95 189 264 1 0")
+        ('40.71 -73.93 213 1\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
+        ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.60 -73.92 129 214 1 0\n40.61 -73.95 189 264 0 1\n40.63 -73.94 124 164 1 0"),
         ]
 
 
 def load_npy_file(file_obj):
-    data = np.load(file_obj.name)
+    data = np.loadtxt(file_obj.name)
     string_array = '\n'.join([' '.join(map(str, row)) for row in data])
     return string_array
     
@@ -231,8 +301,8 @@ with gr.Blocks() as demo:
             gr.Examples(
                 examples=get_example(),
                 inputs=[data_npy],
-                fn=plot_from_npy,
-                outputs=[actual_map, solution_map],
+                fn=solver_plot1,
+                outputs=[actual_map, solution_map, actual_ac, solution_ac],
             )
         with gr.Row():
             boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)
diff --git a/facility_location/__init__.py b/facility_location/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/facility_location/__pycache__/__init__.cpython-39.pyc b/facility_location/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17faab9c8acaf4a1fb983e92ad21c238aea52ee5
Binary files /dev/null and b/facility_location/__pycache__/__init__.cpython-39.pyc differ
diff --git a/facility_location/__pycache__/multi_eval.cpython-39.pyc b/facility_location/__pycache__/multi_eval.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f50f7bf88ce6f403575bd6872945ecfbc1aa8288
Binary files /dev/null and b/facility_location/__pycache__/multi_eval.cpython-39.pyc differ
diff --git a/facility_location/agent/__init__.py b/facility_location/agent/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..42b44bbcbc94818944f11ef41f22f6982bffb53a
--- /dev/null
+++ b/facility_location/agent/__init__.py
@@ -0,0 +1,4 @@
+from .policy import MaskedFacilityLocationActorCriticPolicy
+from .features_extractor import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
+
+__all__ = ['MaskedFacilityLocationActorCriticPolicy', 'FacilityLocationMLPExtractor', 'FacilityLocationGNNExtractor', 'FacilityLocationAttentionGNNExtractor']
diff --git a/facility_location/agent/__pycache__/__init__.cpython-310.pyc b/facility_location/agent/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebfb4ce0856a3c9a27c9c0538ab21d333a0ad7b6
Binary files /dev/null and b/facility_location/agent/__pycache__/__init__.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/__init__.cpython-39.pyc b/facility_location/agent/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9580f6f26296b93f043381bab5aed0bb4f3817ba
Binary files /dev/null and b/facility_location/agent/__pycache__/__init__.cpython-39.pyc differ
diff --git a/facility_location/agent/__pycache__/features_extractor.cpython-310.pyc b/facility_location/agent/__pycache__/features_extractor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6138bdfef58295424d518bab7b7d932edbfecedb
Binary files /dev/null and b/facility_location/agent/__pycache__/features_extractor.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/features_extractor.cpython-39.pyc b/facility_location/agent/__pycache__/features_extractor.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e4f1ed1c4b94332931b24c42554308b8024e050
Binary files /dev/null and b/facility_location/agent/__pycache__/features_extractor.cpython-39.pyc differ
diff --git a/facility_location/agent/__pycache__/ga.cpython-310.pyc b/facility_location/agent/__pycache__/ga.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d498287ef372fd9cbe7d897c92daf1a87462c254
Binary files /dev/null and b/facility_location/agent/__pycache__/ga.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/ga.cpython-39.pyc b/facility_location/agent/__pycache__/ga.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..161a9de6b5134c84f369f9aeac3d5ae1d9506288
Binary files /dev/null and b/facility_location/agent/__pycache__/ga.cpython-39.pyc differ
diff --git a/facility_location/agent/__pycache__/heuristic.cpython-310.pyc b/facility_location/agent/__pycache__/heuristic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04b16d23ecd5c4d7d36ac293316abd8e603a16f5
Binary files /dev/null and b/facility_location/agent/__pycache__/heuristic.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/heuristic.cpython-39.pyc b/facility_location/agent/__pycache__/heuristic.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c00f92696ac954e72c9118693de5962aaf010a8d
Binary files /dev/null and b/facility_location/agent/__pycache__/heuristic.cpython-39.pyc differ
diff --git a/facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc b/facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..490d27a6aafcff62674880b924da7c4de0bf4f97
Binary files /dev/null and b/facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc b/facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a103ca5dcb01c16ceea7dde1c39aefdf0979d46f
Binary files /dev/null and b/facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc differ
diff --git a/facility_location/agent/__pycache__/policy.cpython-310.pyc b/facility_location/agent/__pycache__/policy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b0d123558bb99a9d800e771a932a27496dc35319
Binary files /dev/null and b/facility_location/agent/__pycache__/policy.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/policy.cpython-39.pyc b/facility_location/agent/__pycache__/policy.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f68a11ebcec8f4e65c26b2de96d81bc1d13afd3e
Binary files /dev/null and b/facility_location/agent/__pycache__/policy.cpython-39.pyc differ
diff --git a/facility_location/agent/__pycache__/solver.cpython-310.pyc b/facility_location/agent/__pycache__/solver.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef5f60ef552ab6e11cc6d8502f3547f2daf682ea
Binary files /dev/null and b/facility_location/agent/__pycache__/solver.cpython-310.pyc differ
diff --git a/facility_location/agent/__pycache__/solver.cpython-39.pyc b/facility_location/agent/__pycache__/solver.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..30f1b18539132a1f9a5d0d14edc21a2469bea7a8
Binary files /dev/null and b/facility_location/agent/__pycache__/solver.cpython-39.pyc differ
diff --git a/facility_location/agent/features_extractor.py b/facility_location/agent/features_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..da47bbe6ee49fecb8e40db79663428b6b4fab040
--- /dev/null
+++ b/facility_location/agent/features_extractor.py
@@ -0,0 +1,225 @@
+from collections import OrderedDict
+from typing import Tuple
+
+from gym import spaces
+import torch as th
+from torch import nn
+
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+from stable_baselines3.common.type_aliases import TensorDict
+
+import time
+
+
+def mean_features(h: th.Tensor, mask: th.Tensor):
+    float_mask = mask.float()
+    mean_h = (h * float_mask.unsqueeze(-1)).sum(dim=1) / float_mask.sum(dim=1, keepdim=True)
+    return mean_h
+
+
+# def compute_state(observations: TensorDict, h_nodes: th.Tensor):
+#     node_mask = observations['node_mask'].bool()
+#     mean_h_nodes = mean_features(h_nodes, node_mask)
+
+#     old_facility_mask = observations['old_facility_mask'].bool()
+#     h_old_facility = mean_features(h_nodes, old_facility_mask)
+#     h_old_facility_repeat = h_old_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
+#     state_policy_old_facility = th.cat([
+#         h_nodes,
+#         h_old_facility_repeat,
+#         h_nodes - h_old_facility_repeat,
+#         h_nodes * h_old_facility_repeat], dim=-1)
+
+#     new_facility_mask = observations['new_facility_mask'].bool()
+#     h_new_facility = mean_features(h_nodes, new_facility_mask)
+#     h_new_facility_repeat = h_new_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
+#     state_policy_new_facility = th.cat([
+#         h_nodes,
+#         h_new_facility_repeat,
+#         h_nodes - h_new_facility_repeat,
+
+#     state_value = th.cat([
+#         mean_h_nodes,
+#         h_old_facility,
+#         h_new_facility], dim=-1)
+
+#     return state_policy_old_facility, state_policy_new_facility, state_value, old_facility_mask, new_facility_mask
+
+def compute_state(observations: TensorDict, h_edges: th.Tensor):
+    dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
+    mean_h_edges = mean_features(h_edges, dynamic_edge_mask)
+
+    state_policy_facility_pair = h_edges
+    state_value = mean_h_edges
+
+    return state_policy_facility_pair, state_value, dynamic_edge_mask
+
+
+class FacilityLocationMLPExtractor(BaseFeaturesExtractor):
+    def __init__(
+        self,
+        observation_space: spaces.Dict,
+        hidden_units: Tuple = (32, 32),
+    ) -> None:
+        super().__init__(observation_space, features_dim=1)
+
+        self.node_mlp = self.create_mlp(observation_space.spaces['node_features'].shape[1], hidden_units)
+
+    @staticmethod
+    def create_mlp(input_dim: int, hidden_units: Tuple) -> nn.Sequential:
+        layers = OrderedDict()
+        for i, units in enumerate(hidden_units):
+            if i == 0:
+                layers[f'mlp-extractor-linear_{i}'] = nn.Linear(input_dim, units)
+            else:
+                layers[f'mlp-extractor-linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
+            layers[f'mlp-extractor-tanh_{i}'] = nn.Tanh()
+        return nn.Sequential(layers)
+
+    def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
+        node_features = observations['node_features']
+        h_nodes = self.node_mlp(node_features)
+        return compute_state(observations, h_nodes)
+
+    @staticmethod
+    def get_policy_feature_dim(node_dim: int) -> int:
+        return node_dim * 4
+
+    @staticmethod
+    def get_value_feature_dim(node_dim: int) -> int:
+        return node_dim * 3
+
+
+class FacilityLocationGNNExtractor(BaseFeaturesExtractor):
+    def __init__(
+        self,
+        observation_space: spaces.Dict,
+        num_gnn_layers: int = 2,
+        node_dim: int = 32,
+    ) -> None:
+        super().__init__(observation_space, features_dim=1)
+
+        num_node_features = observation_space.spaces['node_features'].shape[1]
+        self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
+        self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
+        self.single_gnn_layer = self.create_gnn(1, node_dim)[0]
+
+    @staticmethod
+    def create_node_encoder(num_node_features: int, node_dim: int) -> nn.Sequential:
+        node_encoder = nn.Sequential(
+            nn.Linear(num_node_features, node_dim),
+            nn.Tanh())
+        return node_encoder
+
+    @staticmethod
+    def create_gnn(num_gnn_layers: int, node_dim: int) -> nn.ModuleList:
+        layers = nn.ModuleList()
+        for i in range(num_gnn_layers):
+            gnn_layer = nn.Sequential(
+                nn.Linear(node_dim, node_dim),
+                nn.Tanh())
+            layers.append(gnn_layer)
+        return layers
+
+    @staticmethod
+    def scatter_count(h_edges, indices, edge_mask, max_num_nodes):
+        batch_size = h_edges.shape[0]
+        num_latents = h_edges.shape[2]
+
+        h_nodes = th.zeros(batch_size, max_num_nodes, num_latents).to(h_edges.device)
+        count_edge = th.zeros_like(h_nodes)
+        count = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges.shape).float()
+
+        idx = indices.unsqueeze(-1).expand(-1, -1, num_latents)
+        h_nodes = h_nodes.scatter_add_(1, idx, h_edges)
+        count_edge = count_edge.scatter_add_(1, idx, count)
+        return h_nodes, count_edge
+
+    @staticmethod
+    def gather_to_edges(h_nodes, edge_index, edge_mask, gnn_layer):
+        h_nodes = gnn_layer(h_nodes)
+        h_edges_12 = th.gather(h_nodes, 1, edge_index[:, :, 0].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
+        h_edges_21 = th.gather(h_nodes, 1, edge_index[:, :, 1].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
+        mask = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges_12.shape)
+        h_edges_12 = th.where(mask, h_edges_12, th.zeros_like(h_edges_12))
+        h_edges_21 = th.where(mask, h_edges_21, th.zeros_like(h_edges_21))
+        return h_edges_12, h_edges_21
+
+    @classmethod
+    def scatter_to_nodes(cls, h_edges, edge_index, edge_mask, node_mask):
+        h_edges_12, h_edges_21 = h_edges
+        max_num_nodes = node_mask.shape[1]
+        h_nodes_1, count_1 = cls.scatter_count(h_edges_21, edge_index[:, :, 0], edge_mask, max_num_nodes)
+        h_nodes_2, count_2 = cls.scatter_count(h_edges_12, edge_index[:, :, 1], edge_mask, max_num_nodes)
+
+        h_nodes_sum = h_nodes_1 + h_nodes_2
+
+        mask = th.broadcast_to(node_mask.unsqueeze(-1), h_nodes_sum.shape)
+        count = count_1 + count_2
+        count_padding = th.ones_like(count)
+        count = th.where(mask, count, count_padding)
+
+        h_nodes = h_nodes_sum / count
+        return h_nodes
+
+    def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
+        t1 = time.time()
+        node_features = observations['node_features']
+        h_nodes = self.node_encoder(node_features)
+
+        edge_static_index = observations['static_adjacency_list'].long()
+        edge_dynamic_index = observations['dynamic_adjacency_list'].long()
+        node_mask = observations['node_mask'].bool()
+        static_edge_mask = observations['static_edge_mask'].bool()
+        dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
+        for gnn_layer in self.gnn_layers:
+            h_edges = self.gather_to_edges(h_nodes, edge_static_index, static_edge_mask, gnn_layer)
+            h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, static_edge_mask, node_mask)
+            h_nodes = h_nodes + h_nodes_new
+        h_edges12 , h_edges21 = self.gather_to_edges(h_nodes, edge_dynamic_index, dynamic_edge_mask, self.single_gnn_layer)
+        h_edges = th.cat([h_edges12, h_edges21], dim=-1)
+        
+        t2 = time.time()
+        # print('cal embedding time:', t2-t1)
+
+        return compute_state(observations, h_edges)
+
+    @staticmethod
+    def get_policy_feature_dim(node_dim: int) -> int:
+        return node_dim * 2
+
+    @staticmethod
+    def get_value_feature_dim(node_dim: int) -> int:
+        return node_dim * 2
+
+
+class FacilityLocationAttentionGNNExtractor(FacilityLocationGNNExtractor):
+    def __init__(
+        self,
+        observation_space: spaces.Dict,
+        num_gnn_layers: int = 2,
+        node_dim: int = 32,
+    ) -> None:
+        super().__init__(observation_space, num_gnn_layers, node_dim)
+
+        num_node_features = observation_space.spaces['node_features'].shape[1]
+        self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
+        self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
+        self.attention = nn.MultiheadAttention(node_dim, node_dim)
+        
+    def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
+        node_features = observations['node_features']
+        h_nodes = self.node_encoder(node_features)
+
+        edge_static_index = observations['static_adjacency_list'].long()
+        edge_dynamic_index = observations['dynamic_adjacency_list'].long()
+        node_mask = observations['node_mask'].bool()
+        edge_mask = observations['edge_mask'].bool()
+        for gnn_layer in self.gnn_layers:
+            h_edges = self.gather_to_edges(h_nodes, edge_static_index, edge_mask, gnn_layer)
+            h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, edge_mask, node_mask)
+            h_nodes = h_nodes + h_nodes_new
+            
+        h_nodes = self.attention(h_nodes, h_nodes, h_nodes)[0]
+
+        return compute_state(observations, h_nodes)
\ No newline at end of file
diff --git a/facility_location/agent/policy.py b/facility_location/agent/policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e9ac152061b30d3bd6ab273b7c5171a8f78eac1
--- /dev/null
+++ b/facility_location/agent/policy.py
@@ -0,0 +1,229 @@
+from functools import partial
+from typing import Callable, Tuple, Text, Union
+from collections import OrderedDict
+
+import numpy as np
+from gym import spaces
+import torch as th
+from torch import nn
+
+from stable_baselines3.common.policies import ActorCriticPolicy
+from stable_baselines3.common.utils import get_device
+from stable_baselines3.common.type_aliases import Schedule
+
+
+def create_mlp(head: Text, input_dim: int, hidden_units: Tuple) -> nn.Sequential:
+    layers = OrderedDict()
+    for i, units in enumerate(hidden_units):
+        if i == 0:
+            layers[f'{head}_linear_{i}'] = nn.Linear(input_dim, units)
+        else:
+            layers[f'{head}_linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
+        if i != len(hidden_units) - 1:
+            layers[f'{head}_tanh_{i}'] = nn.Tanh()
+    if head.startswith('policy'):
+        layers[f'{head}_flatten'] = nn.Flatten()
+    return nn.Sequential(layers)
+
+
+class MaskedFacilityLocationNetwork(nn.Module):
+
+    def __init__(
+        self,
+        policy_feature_dim: int,
+        value_feature_dim: int,
+        policy_hidden_units: Tuple = (32, 32, 1),
+        value_hidden_units: Tuple = (32, 32, 1),
+        device: Union[th.device, Text] = "auto",
+    ):
+        super().__init__()
+        device = get_device(device)
+
+        # Policy network
+        # self.old_facility_policy_net = create_mlp('policy-old-facility',
+        #                                           policy_feature_dim,
+        #                                           policy_hidden_units).to(device)
+        # self.new_facility_policy_net = create_mlp('policy-new-facility',
+        #                                           policy_feature_dim,
+        #                                           policy_hidden_units).to(device)
+        self.pair_facility_policy_net = create_mlp('policy-pair-facility',
+                                                    policy_feature_dim,
+                                                    policy_hidden_units).to(device)
+        # Value network
+        self.value_net = create_mlp('value',
+                                    value_feature_dim,
+                                    value_hidden_units).to(device)
+
+    def forward(self,
+                features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
+        return self.forward_actor(features), self.forward_critic(features)
+
+    # def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+    #     state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
+
+    #     old_facility_logits = self.old_facility_policy_net(state_policy_old_facility)  # (batch_size, node_range)
+    #     old_facility_padding = th.full_like(old_facility_mask, -th.inf, dtype=th.float32)
+    #     masked_old_facility_logits = th.where(old_facility_mask, old_facility_logits, old_facility_padding)
+
+    #     new_facility_logits = self.new_facility_policy_net(state_policy_new_facility)  # (batch_size, node_range)
+    #     new_facility_padding = th.full_like(new_facility_mask, -th.inf, dtype=th.float32)
+    #     masked_new_facility_logits = th.where(new_facility_mask, new_facility_logits, new_facility_padding)
+
+    #     masked_old_new_facility_logits = th.cat([masked_old_facility_logits, masked_new_facility_logits], dim=1)
+    #     return masked_old_new_facility_logits
+    
+    def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        state_policy_pair_facility, _, dynamic_edge_mask = features
+        pair_facility_logits = self.pair_facility_policy_net(state_policy_pair_facility)
+        pair_facility_padding = th.full_like(dynamic_edge_mask, -th.inf, dtype=th.float32)
+        masked_pair_facility_logits = th.where(dynamic_edge_mask, pair_facility_logits, pair_facility_padding)
+        
+        return masked_pair_facility_logits
+
+    def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        _, state_value, _ = features
+        return self.value_net(state_value)
+
+
+class POPSTARMaskedFacilityLocationNetwork(nn.Module):
+
+    def __init__(
+            self,
+            policy_feature_dim: int,
+            value_feature_dim: int,
+            policy_hidden_units: Tuple = (32, 32, 1),
+            value_hidden_units: Tuple = (32, 32, 1),
+            device: Union[th.device, Text] = "auto",
+    ):
+        super().__init__()
+        device = get_device(device)
+
+        # Policy network
+        self.old_facility_policy_net = create_mlp('policy-old-facility',
+                                                  policy_feature_dim,
+                                                  policy_hidden_units).to(device)
+        self.new_facility_policy_net = create_mlp('policy-new-facility',
+                                                  policy_feature_dim,
+                                                  policy_hidden_units).to(device)
+        self.old_new_facility_policy_net = create_mlp('policy-old-new-facility',
+                                                      policy_feature_dim * 4,
+                                                      policy_hidden_units).to(device)
+
+        # Value network
+        self.value_net = create_mlp('value',
+                                    value_feature_dim,
+                                    value_hidden_units).to(device)
+
+    def forward(self,
+                features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
+        return self.forward_actor(features), self.forward_critic(features)
+
+    def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
+
+        node_range = old_facility_mask.shape[1]
+
+        loss = self.old_facility_policy_net(state_policy_old_facility)  # (batch_size, node_range)
+        loss = loss.repeat_interleave(node_range, dim=1)
+
+        gain = self.new_facility_policy_net(state_policy_new_facility)  # (batch_size, node_range)
+        gain = gain.repeat(1, node_range)
+
+
+        state_policy_old_facility_expand = state_policy_old_facility.unsqueeze(2).expand(-1, -1, node_range, -1)
+        state_policy_new_facility_expand = state_policy_new_facility.unsqueeze(1).expand(-1, node_range, -1, -1)
+        state_policy_old_new_facility = th.cat(
+            [
+                state_policy_old_facility_expand,
+                state_policy_new_facility_expand,
+                state_policy_old_facility_expand - state_policy_new_facility_expand,
+                state_policy_old_facility_expand * state_policy_new_facility_expand
+            ], dim=-1
+        )
+        extra = self.old_new_facility_policy_net(state_policy_old_new_facility)  # (batch_size, node_range * node_range)
+
+        logits = gain - loss + extra
+
+        action_mask = th.logical_and(old_facility_mask.unsqueeze(2), new_facility_mask.unsqueeze(1)).flatten(start_dim=1)
+        padding = th.full_like(action_mask, -th.inf, dtype=th.float32)
+        masked_logits = th.where(action_mask, logits, padding)
+
+        return masked_logits
+
+    def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        _, _, state_value, _, _ = features
+        return self.value_net(state_value)
+
+
+class MaskedFacilityLocationActorCriticPolicy(ActorCriticPolicy):
+    def __init__(
+        self,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        lr_schedule: Callable[[float], float],
+        *args,
+        **kwargs,
+    ):
+        self.policy_feature_dim = kwargs.pop('policy_feature_dim')
+        self.value_feature_dim = kwargs.pop('value_feature_dim')
+        self.policy_hidden_units = kwargs.pop('policy_hidden_units')
+        self.value_hidden_units = kwargs.pop('value_hidden_units')
+
+        self.popstar = kwargs.pop('popstar')
+
+        super().__init__(
+            observation_space,
+            action_space,
+            lr_schedule,
+            # Pass remaining arguments to base class
+            *args,
+            **kwargs,
+        )
+
+    def _build(self, lr_schedule: Schedule) -> None:
+        self._build_mlp_extractor()
+
+        self.action_net = nn.Identity()
+        self.value_net = nn.Identity()
+
+        # Init weights: use orthogonal initialization
+        # with small initial weight for the output
+        if self.ortho_init:
+            # TODO: check for features_extractor
+            # Values from stable-baselines.
+            # features_extractor/mlp values are
+            # originally from openai/baselines (default gains/init_scales).
+            module_gains = {
+                self.features_extractor: np.sqrt(2),
+                self.mlp_extractor: np.sqrt(2),
+            }
+            # if not self.share_features_extractor:
+            #     # Note(antonin): this is to keep SB3 results
+            #     # consistent, see GH#1148
+            #     del module_gains[self.features_extractor]
+            #     module_gains[self.pi_features_extractor] = np.sqrt(2)
+            #     module_gains[self.vf_features_extractor] = np.sqrt(2)
+
+            for module, gain in module_gains.items():
+                module.apply(partial(self.init_weights, gain=gain))
+
+        # Setup optimizer with initial learning rate
+        self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs)
+
+    def _build_mlp_extractor(self) -> None:
+        if not self.popstar:
+            self.mlp_extractor = MaskedFacilityLocationNetwork(
+                self.policy_feature_dim,
+                self.value_feature_dim,
+                self.policy_hidden_units,
+                self.value_hidden_units,
+                self.device,
+            )
+        else:
+            self.mlp_extractor = POPSTARMaskedFacilityLocationNetwork(
+                self.policy_feature_dim,
+                self.value_feature_dim,
+                self.policy_hidden_units,
+                self.value_hidden_units,
+                self.device,
+            )
diff --git a/facility_location/agent/solver.py b/facility_location/agent/solver.py
new file mode 100644
index 0000000000000000000000000000000000000000..92391ad589792b88956aafa7119f890c21972bba
--- /dev/null
+++ b/facility_location/agent/solver.py
@@ -0,0 +1,33 @@
+from typing import Text
+
+import numpy as np
+import pulp
+from spopt.locate import PMedian
+
+from facility_location.env import EvalPMPEnv
+
+
+class PMPSolver:
+    def __init__(self, solver: Text, env: EvalPMPEnv):
+        if solver == 'GUROBI':
+            self._solver = pulp.GUROBI(msg=False)
+        elif solver == 'GUROBI_CMD':
+            self._solver = pulp.GUROBI_CMD(msg=False)
+        elif solver == 'PULP_CBC_CMD':
+            self._solver = pulp.PULP_CBC_CMD(msg=False)
+        elif solver == 'GLPK_CMD':
+            self._solver = pulp.GLPK_CMD(msg=False)
+        elif solver == 'MOSEK':
+            self._solver = pulp.MOSEK(msg=False)
+        else:
+            raise ValueError(f'Solver {solver} not supported.')
+
+        self.env = env
+
+    def solve(self):
+        _, demands, _, p = self.env.get_instance()
+        distance_matrix, _ = self.env.get_distance_and_cost()
+        pmedian_from_cost_matrix = PMedian.from_cost_matrix(distance_matrix, demands, p_facilities=p)
+        pmedian_from_cost_matrix = pmedian_from_cost_matrix.solve(self._solver)
+        solution = np.array([len(temp) > 0 for temp in pmedian_from_cost_matrix.fac2cli], dtype=bool)
+        return solution
diff --git a/facility_location/cfg/__init__.py b/facility_location/cfg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/facility_location/cfg/plot.yaml b/facility_location/cfg/plot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..231d4c9640d28912d500e817f94f18714a437250
--- /dev/null
+++ b/facility_location/cfg/plot.yaml
@@ -0,0 +1,64 @@
+
+env_specs:
+  region:
+  min_n: 20
+  max_n: 50
+  min_p_ratio: 0.1
+  max_p_ratio: 0.4
+  max_steps_scale: 0.5
+  tabu_time: 3
+  tabu_stable_steps_scale: 0.2
+  popstar: false
+
+# evaluation
+eval_specs:
+  region: 
+  seed: 12345
+  max_nodes: 2488
+  max_edges: 5000
+  val_num_cases: 100
+  test_num_cases: 1
+  val_np: !!python/tuple [50,5]
+  test_np:
+    - !!python/tuple [2214,36]
+    - !!python/tuple [2214,189]
+    - !!python/tuple [2214,425]
+# agent
+agent_specs:
+  policy_feature_dim: 32
+  value_feature_dim: 32
+  policy_hidden_units: !!python/tuple [32, 32, 1]
+  value_hidden_units: !!python/tuple [32, 32, 1]
+
+# mlp
+mlp_specs:
+  hidden_units: !!python/tuple [32, 32]
+
+gnn_specs:
+  num_gnn_layers: 2
+  node_dim: 32
+
+
+# ts
+ts_specs:
+  max_steps_scale: 2
+  stable_iterations_scale: 0.2
+
+
+# popstar
+popstar_specs:
+  graspit: 32
+  elite: 10
+
+
+# ga
+ga_specs:
+  num_generations: 100
+  num_parents_mating: 50
+  sol_per_pop: 100
+  parent_selection_type: sss
+  crossover_probability: 0.8
+  mutation_probability: 0.1
+
+
+
diff --git a/facility_location/env/__init__.py b/facility_location/env/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e684fd6ed3d6f35e99f6bce6a6e4adf44845ee2d
--- /dev/null
+++ b/facility_location/env/__init__.py
@@ -0,0 +1,3 @@
+from .pmp import PMPEnv, EvalPMPEnv, MULTIPMP
+
+__all__ = ['PMPEnv', 'EvalPMPEnv', 'MULTIPMP']
\ No newline at end of file
diff --git a/facility_location/env/__pycache__/__init__.cpython-310.pyc b/facility_location/env/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5ea7b63c7d94ab7589ed515211679f0c6703f5d
Binary files /dev/null and b/facility_location/env/__pycache__/__init__.cpython-310.pyc differ
diff --git a/facility_location/env/__pycache__/__init__.cpython-39.pyc b/facility_location/env/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7099f2203514bcd9c36080712fd47341686fdad1
Binary files /dev/null and b/facility_location/env/__pycache__/__init__.cpython-39.pyc differ
diff --git a/facility_location/env/__pycache__/facility_location_client.cpython-310.pyc b/facility_location/env/__pycache__/facility_location_client.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e8307acbddc9ab6f90b5fca5c6bc76b76b788f1
Binary files /dev/null and b/facility_location/env/__pycache__/facility_location_client.cpython-310.pyc differ
diff --git a/facility_location/env/__pycache__/facility_location_client.cpython-39.pyc b/facility_location/env/__pycache__/facility_location_client.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b778d958aa2238b17295c9f7bd16d7ae96a48332
Binary files /dev/null and b/facility_location/env/__pycache__/facility_location_client.cpython-39.pyc differ
diff --git a/facility_location/env/__pycache__/obs_extractor.cpython-310.pyc b/facility_location/env/__pycache__/obs_extractor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be78fd4333b9202aac95a38a727f51f19d130f64
Binary files /dev/null and b/facility_location/env/__pycache__/obs_extractor.cpython-310.pyc differ
diff --git a/facility_location/env/__pycache__/obs_extractor.cpython-39.pyc b/facility_location/env/__pycache__/obs_extractor.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..747801bd2a8b5ae687b19365832a076c222645eb
Binary files /dev/null and b/facility_location/env/__pycache__/obs_extractor.cpython-39.pyc differ
diff --git a/facility_location/env/__pycache__/pmp.cpython-310.pyc b/facility_location/env/__pycache__/pmp.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..95765910c5916349f8d5f15a66ec10e9c71fffb6
Binary files /dev/null and b/facility_location/env/__pycache__/pmp.cpython-310.pyc differ
diff --git a/facility_location/env/__pycache__/pmp.cpython-39.pyc b/facility_location/env/__pycache__/pmp.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a4f2cb0b5a6b2e47c5a5ff8cfdd40b12cf0b6bc
Binary files /dev/null and b/facility_location/env/__pycache__/pmp.cpython-39.pyc differ
diff --git a/facility_location/env/facility_location_client.py b/facility_location/env/facility_location_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6b8f1e3f96649eb623a624fddc6502d675fab17
--- /dev/null
+++ b/facility_location/env/facility_location_client.py
@@ -0,0 +1,278 @@
+import warnings
+from typing import Tuple, Dict
+
+import networkx as nx
+import numpy as np
+from geopandas import GeoDataFrame
+from shapely.geometry import MultiPoint
+from libpysal.weights.contiguity import Voronoi as Voronoi_weights
+from sklearn.neighbors import kneighbors_graph
+from sklearn.metrics import pairwise_distances
+
+from facility_location.utils.config import Config
+import time
+
+class FacilityLocationClient:
+    def __init__(self, cfg: Config, rng: np.random.Generator):
+        self.cfg = cfg
+        self.rng = rng
+        self._cfg_tabu_time = cfg.env_specs['tabu_time']
+        self._t = 0
+
+    def set_instance(self, points: np.ndarray, demands: np.ndarray, n: int, p: int, real: bool) -> None:
+        self._points = points
+        self._demands = demands
+        points_geom = MultiPoint(points)
+        self._gdf = GeoDataFrame({
+            'geometry': points_geom.geoms,
+            'demand': demands,
+        })
+        self._n = n
+        self._p = p
+        self._old_facility_mask = np.zeros(self._n, dtype=bool)
+        self._new_facility_mask = np.zeros(self._n, dtype=bool)
+        self._construct_static_graph()
+        
+        if real:
+            self._distance_matrix = pairwise_distances(points, metric='haversine')
+        else:
+            self._distance_matrix = pairwise_distances(points, metric='euclidean')
+        self._cost_matrix = self._distance_matrix * self._demands[:, None]
+        self._gain = np.zeros(self._n)
+        self._loss = np.zeros(self._n)
+        self._add_time = np.full(self._n, -np.inf)
+        self._drop_time = np.full(self._n, -np.inf)
+        self.reset_tabu_time()
+
+    def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        return self._points, self._demands, self._n, self._p
+
+    def get_distance_and_cost_matrix(self) -> Tuple[np.ndarray, np.ndarray]:
+        return self._distance_matrix, self._cost_matrix
+
+    def get_avg_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
+        avg_distance = self._distance_matrix.sum(axis=-1)/(self._n - 1)
+        avg_cost = self._cost_matrix.sum(axis=-1)/(self._n - 1)
+        return avg_distance, avg_cost
+
+    def _construct_static_graph(self) -> None:
+        self._connection_matrix = kneighbors_graph(self._points, n_neighbors=3, mode="connectivity").toarray()
+        self._static_graph = nx.from_numpy_matrix(self._connection_matrix)
+        self._static_edges = np.array(self._static_graph.edges(), dtype=np.int64)
+
+    def _construct_dynamic_graph(self) -> None:
+        t1 = time.time()
+        try:
+            solution_distace_min = np.partition(self._distance_matrix[:, self._solution][self._solution, :], 3, axis=-1)[:,2]
+        except:
+            raise ValueError('stop')
+        solution_distance_matrix = np.zeros((self._n, self._n))
+        solution_distance_matrix[:, self._solution] = solution_distace_min
+        solution_knearest_matrix = np.logical_and(self._distance_matrix < solution_distance_matrix, self._distance_matrix > 0)
+        old_tabu_mask, new_tabu_mask = self.get_tabu_mask(self._t)
+        solution_matrix = np.logical_and(np.logical_and(self._solution, old_tabu_mask)[:, None], (np.logical_and(~self._solution, new_tabu_mask)[None, :]))
+        solution_matrix = np.logical_or(solution_matrix, solution_matrix.T)
+        gainloss_matrix = np.logical_and((self._gain[:, None] > self._loss[None, :]), self._loss[None, :] > 0)
+        graph_matrix = np.logical_and(solution_matrix, np.logical_or(gainloss_matrix, solution_knearest_matrix))
+
+        if not np.any(graph_matrix):
+            if np.any(solution_matrix):
+                graph_matrix = solution_matrix
+                if not np.any(graph_matrix):
+                    raise ValueError('Invalid graph_matrix')
+            else:
+                graph_matrix = self._solution[:, None] ^ self._solution[None, :]     
+        self._dynamic_graph = nx.from_numpy_matrix(graph_matrix)
+        self._dynamic_edges = np.array(self._dynamic_graph.edges(), dtype=np.int64)
+        
+        t2 = time.time()
+        # print('dynamic graph time:',t2-t1)
+
+
+    def get_static_adjacency_list(self) -> np.ndarray:
+        return self._static_edges
+    
+    def get_dynamic_adjacency_list(self) -> np.ndarray:
+        return self._dynamic_edges
+
+    def compute_initial_solution(self) -> Tuple[float, np.ndarray]:
+        self._solution = np.zeros(self._n, dtype=bool)
+        p_0 = self._demands.argmax()
+        self._solution[p_0] = True
+        for _ in range(self._p - 1):
+            p_max_cost = self._cost_matrix[:, self._solution].min(axis=-1).argmax()
+            self._solution[p_max_cost] = True
+        self._init_gain_and_loss()
+        self._construct_dynamic_graph()
+        self._old_facility_mask = self._solution
+        self._new_facility_mask = ~self._solution
+        return self.compute_obj_value(), self._solution
+
+    def compute_obj_value(self) -> float:
+        obj_value = self._cost_matrix[:, self._solution].min(axis=-1).sum()
+        return obj_value
+    
+    def compute_obj_value_from_solution(self, solution) -> float:
+        self._solution = solution
+        self._init_gain_and_loss()
+        self._construct_dynamic_graph()
+        obj_value = self.compute_obj_value()
+        return obj_value
+
+    # def swap(self, old_facility: int, new_facility: int, t: int) -> Tuple[float, np.ndarray, Dict]:
+    #     if old_facility >= self._n or not self._solution[old_facility]:
+    #         warn_msg = f'Old facility {old_facility} is not a facility of the current solution {self._solution}.'
+    #         warnings.warn(warn_msg)
+    #         old_facility = self.rng.choice(np.arange(self._n)[self._solution])
+    #     if new_facility >= self._n or self._solution[new_facility]:
+    #         warn_msg = f'New facility {new_facility} is already a facility of the current solution {self._solution}.'
+    #         warnings.warn(warn_msg)
+    #         new_facility = self.rng.choice(np.arange(self._n)[~self._solution])
+    #     self._solution[old_facility] = False
+    #     self._solution[new_facility] = True
+    #     self._drop_time[old_facility] = t
+    #     self._add_time[new_facility] = t
+    #     self._t = t
+    #     return self.compute_obj_value(), self._solution, {}
+    
+    def swap(self, facility_pair_index: int, t: int) -> Tuple[float, np.ndarray, Dict]:
+        facility_pair = self._dynamic_edges[facility_pair_index]
+        facility1 = facility_pair[0] 
+        facility2 = facility_pair[1] 
+        
+        if (not self._solution[facility1]) and (self._solution[facility2]):
+            new_facility = facility1
+            old_facility = facility2
+        elif (not self._solution[facility2]) and (self._solution[facility1]):
+            new_facility = facility2
+            old_facility = facility1
+        else:
+            raise ValueError('stop')
+        
+        self._solution[old_facility] = False
+        self._solution[new_facility] = True
+        self._old_facility_mask[new_facility] = True
+        self._new_facility_mask[old_facility] = True
+        self._drop_time[old_facility] = t
+        self._add_time[new_facility] = t
+        self._t = t
+        self._update_env(new_facility, old_facility)
+        # print('st:',self._t)
+        return self.compute_obj_value(), self._solution, {}
+
+    def get_tabu_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
+        old_tabu_mask = self._add_time < t - self._drop_tabu_time
+        new_tabu_mask = self._drop_time < t - self._add_tabu_time
+        return old_tabu_mask, new_tabu_mask
+
+    def reset_tabu_time(self) -> None:
+        self._t = 0
+        if self._cfg_tabu_time <= 0:
+            self._add_tabu_time = 0
+            self._drop_tabu_time = 0
+        else:
+            self._add_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
+            self._drop_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
+
+    def get_current_solution(self) -> np.ndarray:
+        return self._solution
+
+    def set_solution(self, solution: np.ndarray) -> None:
+        self._solution = solution
+
+    def get_current_distance(self) -> np.ndarray:
+        dis2poi = self._distance_matrix[:, self._solution]
+        if self._p > 2:
+            dis = np.partition(dis2poi, 2, axis=-1)[:,:2]
+        else:
+            dis = dis2poi.min(axis=-1)
+            dis = np.stack([dis, dis], axis=-1)
+        return dis
+
+    def get_current_cost(self) -> np.ndarray:
+        cost2poi = self._cost_matrix[:, self._solution]
+        if self._p > 2:
+            cost = np.partition(cost2poi, 2, axis=-1)[:,:2]
+        else:
+            cost = cost2poi.min(axis=-1)
+            cost = np.stack([cost, cost], axis=-1)
+        return cost
+            
+    def get_gain_and_loss(self) -> Tuple[np.ndarray, np.ndarray]:
+        return self._gain, self._loss
+
+    def get_gdf_facilities(self) -> Tuple[GeoDataFrame, np.ndarray]:
+        solution = self._solution
+        facilities = np.arange(self._n)[solution]
+        gdf = self._gdf.copy()
+        gdf['facility'] = False
+        gdf.loc[facilities, 'facility'] = True
+        node2facility = np.arange(self._n)[solution][self._cost_matrix[:, solution].argmin(axis=-1)]
+        gdf['assignment'] = node2facility
+        return gdf, facilities
+    
+    def _init_env(self):
+        self._init_gain_and_loss()
+        self._construct_dynamic_graph()
+    
+    def _update_env(self, insert_facility, remove_facility):
+        self._update_gain_and_loss(insert_facility, remove_facility)
+        self._construct_dynamic_graph()
+    
+    def _init_gain_and_loss(self):
+        t1 = time.time()
+        
+        for i in range(self._n):
+            _fake_solution = list(self._solution)
+            if self._solution[i]:
+                _fake_solution[i] = False
+                self._loss[i] = self._cost_matrix[:, _fake_solution].min(axis=-1).sum() - self._cost_matrix[:, self._solution].min(axis=-1).sum()
+                self._gain[i] = 0
+            else:
+                _fake_solution[i] = True
+                self._gain[i] = self._cost_matrix[:, self._solution].min(axis=-1).sum() - self._cost_matrix[:, _fake_solution].min(axis=-1).sum()
+                self._loss[i] = 0
+
+        self.argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
+        t2 = time.time()
+        # print('init gainloss time:',t2-t1)
+        
+    def _update_gain_and_loss(self, insert_facility, remove_facility):
+
+        t1 = time.time()
+
+        _pre_solution = list(self._solution)
+        _pre_solution[insert_facility] = False
+        _pre_solution[remove_facility] = True
+        pre_closest_demands2solution = self._cost_matrix[:, _pre_solution][np.arange(self._n)[:, None], self.argpartition]
+        argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
+        closest_demands2solution = self._cost_matrix[:, self._solution][np.arange(self._n)[:, None], argpartition]
+        
+        pre_solution_idx = np.where(_pre_solution)[0]
+        solution_idx = np.where(self._solution)[0]
+        for i in range(self._n):
+            if remove_facility in self.argpartition[i] or insert_facility in argpartition[i]:
+                self._loss[solution_idx[argpartition[i][0]]] += closest_demands2solution[i][1] - closest_demands2solution[i][0]
+                self._loss[pre_solution_idx[self.argpartition[i][0]]] -= pre_closest_demands2solution[i][1] - pre_closest_demands2solution[i][0]
+            # if self.argpartition[i][0] != argpartition[i][0]:
+            #     for j in range(self._n):
+            #         if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][argpartition[i][0]]:
+            #             self._gain[j] += max(0, closest_demands2solution[i][0] - self._cost_matrix[i, j])
+            #         if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][self.argpartition[i][0]]:
+            #             self._gain[j] -= max(0, pre_closest_demands2solution[i][0] - self._cost_matrix[i, j])
+
+        self._loss[remove_facility] = 0
+        self._gain[insert_facility] = 0
+        
+        self.argpartition = list(argpartition)
+        # print(self._gain, self._loss)
+        t2 = time.time()
+        # print('update gainloss time:',t2-t1)
+        
+                
+    def init_facility_mask(self, old_facility, new_facility):
+        self._old_facility_mask[old_facility] = True
+        self._new_facility_mask[new_facility] = True
+    
+    def get_facility_mask(self):
+        return self._old_facility_mask, self._new_facility_mask
diff --git a/facility_location/env/obs_extractor.py b/facility_location/env/obs_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..d111ed1b9b980e6192b65497f674356dd9650a9b
--- /dev/null
+++ b/facility_location/env/obs_extractor.py
@@ -0,0 +1,184 @@
+from typing import Dict, Tuple, Text
+
+import numpy as np
+
+from facility_location.env.facility_location_client import FacilityLocationClient
+from facility_location.utils.config import Config
+
+
+class ObsExtractor:
+    def __init__(self, cfg: Config, flc: FacilityLocationClient, node_range: int, edge_range: int):
+        self.cfg = cfg
+        self._flc = flc
+        self._node_range = node_range
+        self._edge_range = edge_range
+
+        self._construct_virtual_node_feature()
+        self._construct_node_features()
+        self._construct_action_mask()
+
+    def _construct_virtual_node_feature(self) -> None:
+        virtual_node_facility = 0
+        virtual_node_distance_min = 0
+        virtual_node_distance_sub_min = 0
+        virtual_node_cost_min = 0
+        virtual_node_cost_sub_min = 0
+        virtual_gain = 0
+        virtual_loss = 0
+        
+        virtual_node_x = 0.5
+        virtual_node_y = 0.5
+        virtual_node_demand = 1
+        virtual_node_avg_distance = 0
+        virtual_node_avg_cost = 0
+        self._virtual_dynamic_node_feature = np.array([
+            virtual_node_facility,
+            virtual_node_distance_min,
+            virtual_node_distance_sub_min,
+            virtual_node_cost_min,
+            virtual_node_cost_sub_min,
+            virtual_gain,
+            virtual_loss,
+        ], dtype=np.float32)
+        self._virtual_static_node_feature = np.array([
+            virtual_node_x,
+            virtual_node_y,
+            virtual_node_demand,
+            virtual_node_avg_distance,
+            virtual_node_avg_cost,
+        ], dtype=np.float32)
+        self._virtual_node_feature = np.concatenate([
+            self._virtual_dynamic_node_feature,
+            self._virtual_static_node_feature,
+        ], axis=-1)
+
+    def _construct_node_features(self) -> None:
+        self._node_features = np.zeros((self._node_range, self._virtual_node_feature.size), dtype=np.float32)
+
+    def _construct_action_mask(self) -> None:
+        self._old_facility_mask = np.full(self._node_range, False)
+        self._new_facility_mask = np.full(self._node_range, False)
+
+    def get_node_dim(self) -> int:
+        return self._virtual_node_feature.size
+
+    def reset(self) -> None:
+        self._compute_static_obs()
+        self._reset_node_features()
+        self._reset_action_mask()
+
+    def _compute_static_obs(self) -> None:
+        xy, demands, n, _ = self._flc.get_instance()
+        if n + 2 > self._node_range:
+            print(n, self._node_range)
+            # raise ValueError('The number of nodes exceeds the maximum limit.')
+        self._n = n
+        avg_distance, avg_cost = self._flc.get_avg_distance_and_cost()
+        avg_distance = avg_distance / np.max(avg_distance)
+        avg_cost = avg_cost / np.max(avg_cost)
+        self._static_node_features = np.stack([
+            xy[:, 0],
+            xy[:, 1],
+            demands,
+            avg_distance,
+            avg_cost,
+        ], axis=-1).astype(np.float32)
+        static_adjacency_list = self._flc.get_static_adjacency_list()
+
+        obs_node_mask = np.full(1 + n, True)
+        self._obs_node_mask = self._pad_mask(obs_node_mask, self._node_range, 'nodes')
+
+        obs_static_edge_mask = np.full(n + static_adjacency_list.shape[0], True)
+        self._obs_static_edge_mask = self._pad_mask(obs_static_edge_mask, self._edge_range, 'edges')
+
+        self._static_adjacency_list = self._pad_edge(static_adjacency_list)
+
+    def _reset_node_features(self) -> None:
+        self._node_features[:, :] = 0
+        self._node_features[0] = self._virtual_node_feature
+        self._node_features[1:self._n+1, len(self._virtual_dynamic_node_feature):] = self._static_node_features
+
+    def _reset_action_mask(self) -> None:
+        self._old_facility_mask[:] = False
+        self._new_facility_mask[:] = False
+
+    def get_obs(self, t: int) -> Dict:
+        obs_nodes, obs_static_edges, obs_dynamic_edges, \
+            obs_node_mask, obs_static_edge_mask, obs_dynamic_edges_mask = self._get_obs_graph()
+        obs = {
+            'node_features': obs_nodes,
+            'static_adjacency_list': obs_static_edges,
+            'dynamic_adjacency_list': obs_dynamic_edges,
+            'node_mask': obs_node_mask,
+            'static_edge_mask': obs_static_edge_mask,
+            'dynamic_edge_mask': obs_dynamic_edges_mask,
+        }
+
+        return obs
+
+    def _get_obs_graph(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        facility = self._flc.get_current_solution().astype(np.float32)
+        distance = self._flc.get_current_distance().astype(np.float32)
+        distance = distance / np.max(distance)
+        cost = self._flc.get_current_cost().astype(np.float32)
+        cost = cost / np.max(cost)
+        gain, loss = self._flc.get_gain_and_loss()
+        gain = gain / np.max(gain)
+        loss = loss / np.max(loss)
+        dynamic_node_features = np.stack([
+            facility,
+            distance[:,0],
+            distance[:,1],
+            cost[:,0],
+            cost[:,1],
+            gain,
+            loss,
+        ], axis=-1)
+        self._node_features[1:self._n+1, :len(self._virtual_dynamic_node_feature)] = dynamic_node_features
+        obs_nodes = self._node_features
+        obs_static_edges = self._static_adjacency_list
+        obs_dynamic_edges = self._flc.get_dynamic_adjacency_list()
+        # print(obs_dynamic_edges.shape)
+        obs_dynamic_edge_mask = np.full(obs_dynamic_edges.shape[0], True)
+        obs_node_mask = self._obs_node_mask
+        obs_static_edge_mask = self._obs_static_edge_mask
+        obs_dynamic_edges = self._pad_edge_wo_virtual(obs_dynamic_edges)
+        obs_dynamic_edge_mask = self._pad_mask(obs_dynamic_edge_mask, self._edge_range, 'edges')
+
+        return obs_nodes, obs_static_edges, obs_dynamic_edges, obs_node_mask, obs_static_edge_mask, obs_dynamic_edge_mask
+        # return obs_nodes, obs_static_edges, obs_node_mask, obs_edge_mask
+
+    def _get_obs_action_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
+        old_facility_mask, new_facility_mask = self._flc.get_facility_mask()
+        old_tabu_mask, new_tabu_mask = self._flc.get_tabu_mask(t)
+        self._old_facility_mask[1:self._n+1] = np.logical_and(old_facility_mask, old_tabu_mask)
+        self._new_facility_mask[1:self._n+1] = np.logical_and(new_facility_mask, new_tabu_mask)
+        obs_old_facility_mask = self._old_facility_mask
+        obs_new_facility_mask = self._new_facility_mask
+        if not np.any(obs_old_facility_mask) or not np.any(obs_new_facility_mask):
+            raise ValueError('The action mask is empty.')
+        return obs_old_facility_mask, obs_new_facility_mask
+
+    @staticmethod
+    def _pad_mask(mask: np.ndarray, max_num: int, name: Text) -> np.ndarray:
+        pad = (0, max_num - mask.size)
+        if pad[1] < 0:
+            raise ValueError(f'The number of {name} exceeds the maximum limit.')
+        return np.pad(mask, pad, mode='constant', constant_values=False)
+
+    def _pad_edge(self, edge: np.ndarray) -> np.ndarray:
+        virtual_edge = np.stack([np.zeros(self._n), np.arange(1, self._n + 1)], axis=-1).astype(np.int32)
+        edge = np.concatenate([virtual_edge, edge + 1], axis=0)
+        pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
+        if pad[0][1] < 0:
+            raise ValueError('The number of edges exceeds the maximum limit.')
+        return np.pad(edge, pad, mode='constant', constant_values=self._node_range - 1)
+    
+    def _pad_edge_wo_virtual(self, edge: np.ndarray) -> np.ndarray:
+        pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
+        if pad[0][1] < 0:
+            print(self._edge_range, edge.shape[0])
+            raise ValueError('The number of edges exceeds the maximum limit.')
+
+        return np.pad(edge + 1, pad, mode='constant', constant_values=self._node_range - 1)
+
diff --git a/facility_location/env/pmp.py b/facility_location/env/pmp.py
new file mode 100644
index 0000000000000000000000000000000000000000..219a9b475730ed106c73fa8e0e3cdac871a08e55
--- /dev/null
+++ b/facility_location/env/pmp.py
@@ -0,0 +1,502 @@
+import io
+import warnings
+from typing import Tuple, Dict, Optional, List, Text
+
+import gym
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle, os
+
+from numpy import ndarray
+
+from facility_location.utils.config import Config
+from facility_location.env.facility_location_client import FacilityLocationClient
+from facility_location.env.obs_extractor import ObsExtractor
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
+from facility_location.utils.policy import get_policy_kwargs
+
+
+class PMPEnv(gym.Env):
+    EPSILON = 1e-6
+
+    def __init__(self,
+                 cfg: Config):
+        self.cfg = cfg
+        self._train_region = cfg.env_specs['region']
+        self._eval_region = cfg.eval_specs['region']
+        self._min_n = cfg.env_specs['min_n']
+        self._max_n = cfg.env_specs['max_n']
+        self._min_p_ratio = cfg.env_specs['min_p_ratio']
+        self._max_p_ratio = cfg.env_specs['max_p_ratio']
+        self._max_steps_scale = cfg.env_specs['max_steps_scale']
+        self._tabu_stable_steps_scale = cfg.env_specs['tabu_stable_steps_scale']
+        self._popstar = cfg.env_specs['popstar']
+
+        self._seed(cfg.seed)
+
+        self._done = False
+
+        self._set_node_edge_range()
+
+        self._flc = FacilityLocationClient(cfg, self._np_random)
+        self._obs_extractor = ObsExtractor(cfg, self._flc, self._node_range, self._edge_range)
+
+        self._declare_spaces()
+
+    def _declare_spaces(self) -> None:
+        self.observation_space = gym.spaces.Dict({
+            'node_features': gym.spaces.Box(low=0, high=1, shape=(self._node_range, self.get_node_feature_dim())),
+            'static_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
+            'dynamic_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
+            'node_mask': gym.spaces.Box(low=0, high=1, shape=(self._node_range,), dtype=np.bool),
+            'static_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
+            'dynamic_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
+        })
+        if not self._popstar:
+            self.action_space = gym.spaces.Discrete(self._node_range ** 2)
+        else:
+            self.action_space = gym.spaces.Discrete(self._node_range ** 2)
+
+    def _set_node_edge_range(self) -> None:
+        self._node_range = self._max_n + 2
+        self._edge_range = int(self._max_n ** 2 * self._max_p_ratio)
+
+    def get_node_feature_dim(self) -> int:
+        return self._obs_extractor.get_node_dim()
+
+    def _seed(self, seed: int) -> None:
+        self._np_random = np.random.default_rng(seed)
+
+    def get_reward(self) -> float:
+        reward = self._obj_value[self._t - 1] - self._obj_value[self._t]
+        return reward
+
+    def _transform_action(self, action: np.ndarray) -> np.ndarray:
+        if self._popstar:
+            action = np.array(np.unravel_index(action, (self._node_range, self._node_range)))
+        action = action - 1
+        return action
+
+    def step(self, action: np.ndarray):
+        if self._done:
+            raise RuntimeError('Action taken after episode is done.')
+        obj_value, solution, info = self._flc.swap(action, self._t)
+        self._t += 1
+        self._done = (self._t == self._max_steps)
+        self._obj_value[self._t] = obj_value
+        self._solution[self._t] = solution
+        reward = self.get_reward()
+        if obj_value < self._best_obj_value - self.EPSILON:
+            self._best_obj_value = obj_value
+            self._best_solution = solution
+            self._last_best_t = self._t
+        elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
+            self._flc.reset_tabu_time()
+    
+        # if self._done:
+        #     print('done')
+        #     for i in range(self._t):
+        #         print(f'{i}:',np.where(self._solution[i]))
+            
+        return self._get_obs(self._t), reward, self._done, False, info
+
+    def reset(self, seed = 0) -> Optional[Dict]:
+        if self._train_region is None:
+            points, demands, n, p = self._generate_new_instance()
+            self._flc.set_instance(points, demands, n, p, False)
+        else:
+            points, demands, n, p = self._use_real_instance()
+            self._flc.set_instance(points, demands, n, p, True)
+        
+        return self.prepare(n, p), {}
+
+    def prepare(self, n: int, p: int) -> Dict:
+        initial_obj_value, initial_solution = self._flc.compute_initial_solution()
+        self._obs_extractor.reset()
+        self._done = False
+        self._t = 0
+        self._max_steps = max(int(p * self._max_steps_scale), 5)
+        self._obj_value = np.zeros(self._max_steps + 1)
+        self._obj_value[0] = initial_obj_value
+        self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
+        self._solution[0] = initial_solution
+        self._best_solution = initial_solution
+        self._best_obj_value = initial_obj_value
+        self._last_best_t = 0
+        self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
+        return self._get_obs(self._t)
+
+    def render(self, mode='human', dpi=300) -> Optional[np.ndarray]:
+        gdf, facilities = self._flc.get_gdf_facilities()
+        if len(facilities) > 10:
+            warnings.warn('Too many facilities to render. Only rendering the first 10.')
+            facilities = facilities[:10]
+
+        cm = plt.get_cmap('tab10')
+        fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=dpi)
+        for i, f in enumerate(facilities):
+            gdf.loc[gdf['assignment'] == f].plot(ax=axs[0],
+                                                 zorder=2,
+                                                 alpha=0.7,
+                                                 edgecolor="k",
+                                                 color=cm(i))
+            gdf.loc[[f]].plot(ax=axs[0],
+                              marker='*',
+                              markersize=300,
+                              zorder=3,
+                              alpha=0.7,
+                              edgecolor="k",
+                              color=cm(i))
+        axs[0].set_title("Facility Location", fontweight="bold")
+        plot_obj_value = self._obj_value[:self._t + 1]
+        axs[1].plot(plot_obj_value, marker='.', markersize=10, color='k')
+        axs[1].set_title("Objective Value", fontweight="bold")
+        axs[1].set_xticks(np.arange(self._max_steps + 1, step=math.ceil((self._max_steps + 1) / 10)))
+        fig.tight_layout()
+
+        if mode == 'human':
+            plt.show()
+
+        else:
+            io_buf = io.BytesIO()
+            fig.savefig(io_buf, format='raw', dpi=dpi)
+            io_buf.seek(0)
+            img_arr = np.reshape(np.frombuffer(io_buf.getvalue(), dtype=np.uint8),
+                                 newshape=(int(fig.bbox.bounds[3]), int(fig.bbox.bounds[2]), -1))
+            io_buf.close()
+            return img_arr
+
+    def close(self):
+        plt.close()
+
+    def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
+        p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
+        p = int(max(n * p_ratio, 4))
+
+        points = self._np_random.uniform(size=(n, 2))
+        while np.unique(points, axis=0).shape[0] != n:
+            points = self._np_random.uniform(size=(n, 2))
+        demands = self._np_random.random(size=(n,))
+        return points, demands, n, p
+    
+    def _use_real_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        data_path = './data/{}/pkl'.format(self.cfg.eval_specs['region'])
+        files = os.listdir(data_path)
+        files = [f for f in files if f.endswith('.pkl')]
+        sample_data_path = os.path.join(data_path, files[self._np_random.integers(len(files))])
+        with open(sample_data_path, 'rb') as f:
+            np_data = pickle.load(f)
+        
+        n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
+        p = max(int(n * self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)), 4)
+        sample_cbgs = self._np_random.choice(list(np_data[1].keys()), n, replace=False)
+        points = []
+        demands = []
+        for cbg in sample_cbgs:
+            points.append(np_data[1][cbg]['pos'])
+            demands.append(np_data[1][cbg]['demand'])
+        points = np.array(points)
+        demands = np.array(demands)
+        
+        return points, demands, n, p
+
+    def  _get_obs(self, t: int) -> Dict:
+        return self._obs_extractor.get_obs(t)
+
+    def get_initial_solution(self) -> np.ndarray:
+        return self._solution[0]
+
+
+class EvalPMPEnv(PMPEnv):
+    def __init__(self,
+                 cfg: Config,
+                positions, demands, n, p, boost=False):
+        self._eval_np = (n,p)
+        self._eval_seed = cfg.eval_specs['seed']
+        self._boost = boost
+        print(self._boost)
+        self.points = positions
+        self.demands = demands
+        self._n = n
+        self._p = p
+        
+        super().__init__(cfg)
+
+    def _set_node_edge_range(self) -> None:
+        n, p = self._eval_np
+
+        self._node_range = n + 2
+        self._edge_range = n * p
+
+    def get_eval_num_cases(self) -> int:
+        return self._eval_num_cases
+
+    def get_eval_np(self) -> Tuple[int, int]:
+        return self._eval_np
+
+    def reset_instance_id(self) -> None:
+        self._instance_id = 0
+
+    def step(self, action: np.ndarray):
+        if self._done:
+            raise RuntimeError('Action taken after episode is done.')
+        obj_value, solution, info = self._flc.swap(action, self._t)
+        self._t += 1
+        self._done = (self._t == self._max_steps)
+        self._obj_value[self._t] = obj_value
+        self._solution[self._t] = solution
+        reward = self.get_reward()
+        if obj_value < self._best_obj_value - self.EPSILON:
+            self._best_obj_value = obj_value
+            self._best_solution = solution
+            self._last_best_t = self._t
+        elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
+            self._flc.reset_tabu_time() 
+        print(self._t, self._max_steps)
+                
+        return self._get_obs(self._t), reward, self._done, False, info
+
+    def get_reward(self) -> float:
+        if self._done:
+            reward = -np.min(self._obj_value)
+        else:
+            reward = 0.0
+        
+        return reward
+    
+    def get_best_solution(self) -> np.ndarray:
+        return self._best_solution
+    
+    def reset(self, seed = 0) -> Dict:
+        self._flc.set_instance(self.points, self.demands, self._n, self._p, False)
+        return self.prepare(self._n, self._p, self._boost), {}
+    
+    def prepare(self, n: int, p: int, boost: bool) -> Dict:
+        initial_obj_value, initial_solution = self._flc.compute_initial_solution()
+        self._obs_extractor.reset()
+        self._done = False
+        self._t = 0
+        self._max_steps = max(int(p * self._max_steps_scale), 5)
+        if boost:
+            self._max_steps = max(int(self._max_steps_scale / 10), 5)
+        self._obj_value = np.zeros(self._max_steps + 1)
+        self._obj_value[0] = initial_obj_value
+        self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
+        self._solution[0] = initial_solution
+        self._best_solution = initial_solution
+        self._best_obj_value = initial_obj_value
+        self._last_best_t = 0
+        self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
+        return self._get_obs(self._t)
+
+    def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        points, demands, n, p = self._flc.get_instance()
+        return points, demands, n, p
+
+    def get_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
+        return self._flc.get_distance_and_cost_matrix()
+
+    def evaluate(self, solution: np.ndarray) -> float:
+        self._flc.set_solution(solution)
+        obj_value = self._flc.compute_obj_value()
+        return obj_value
+
+class MULTIPMP(PMPEnv):
+    EPSILON = 1e-6
+    def __init__(self,
+                 cfg,
+                 data_npy,
+                 boost = False):
+        self.cfg = cfg
+        self.data_npy =  data_npy
+        self._boost = boost
+        self._all_points, self._all_demands, self._n, self._all_p = self._load_multi_facility_data(data_npy)
+        self.boost = boost
+        self._all_solutions = self._load_multi_facility_solutions(boost)
+        self._final_solutions = list(self._all_solutions)
+        self._num_types = len(self._all_p) 
+        self._current_type = 0
+        self._all_max_steps, self._old_mask, self._new_mask = self._get_max_steps()
+        super().__init__(cfg)
+
+    def _set_node_edge_range(self) -> None:
+        self._node_range = self._n + 2
+        self._edge_range = self._n * max(self._all_p)
+
+    def step(self, action: np.ndarray):
+        if self._done:
+            raise RuntimeError('Action taken after episode is done.')
+        obj_value, solution, info = self._flc.swap(action, self._t)
+        self._t += 1
+        self._done = (self._t == self._all_max_steps[-1] and self._current_type == len(self._all_max_steps) - 1)
+        self._obj_value[self._t] = obj_value
+        self._solution[self._t] = solution
+        reward = self.get_reward()
+        if obj_value < self._best_obj_value - self.EPSILON:
+            self._best_obj_value = obj_value
+            self._best_solution = solution
+            self._last_best_t = self._t
+        elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
+            self._flc.reset_tabu_time()
+            
+        if self._t == self._all_max_steps[self._current_type] and not self._done:
+            self._t = 0
+            self._multi_obj += obj_value
+            self._final_solutions[self._current_type] = solution
+            self._update_type()
+            
+        if self._done:
+            pickle.dump(self._final_solutions, open('./facility_location/solutions.pkl', 'wb'))
+            
+        return self._get_obs(self._t), reward, self._done, False, info
+
+    def reset(self, seed = 0) -> Optional[Dict]:
+        self._current_type = 0
+        points = self._all_points
+        demands = self._all_demands[:,0]
+        n = self._n
+        p = self._all_p[0]
+        solution = self._all_solutions[0]
+        self._multi_obj = 0
+        
+        self._flc.set_instance(points, demands, n, p, True)
+        
+        return self.prepare(n, p, solution), {}
+    
+    def _update_type(self):
+        if self._current_type >= self._num_types:
+            raise RuntimeError('Action taken after episode is done.')
+        self._current_type += 1
+        if self._current_type < self._num_types - 1:
+            points = self._all_points
+            demands = self._all_demands[:,self._current_type]
+            n = self._n
+            p = self._all_p[self._current_type]
+            solution = self._all_solutions[self._current_type]
+            self._flc.set_instance(points, demands, n, p, True)
+            self.prepare(n, p, solution)
+
+    def prepare(self, n: int, p: int, solution: list) -> Dict:
+        initial_solution = solution
+        initial_obj_value = self._flc.compute_obj_value_from_solution(initial_solution)
+        self._obs_extractor.reset()
+        self._done = False
+        self._t = 0
+        self._max_steps = self._all_max_steps[self._current_type]
+        self._flc.init_facility_mask(self._old_mask[self._current_type], self._new_mask[self._current_type])
+        self._obj_value = np.zeros(self._max_steps + 1)
+        self._obj_value[0] = initial_obj_value
+        self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
+        self._solution[0] = initial_solution
+        self._best_solution = initial_solution
+        self._best_obj_value = initial_obj_value
+        self._last_best_t = 0
+        self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
+        return self._get_obs(self._t)
+    
+    def _get_max_steps(self) -> list:
+        tmp_all_solitions = list(self._all_solutions)
+        count_true = [sum(s) for s in zip(*tmp_all_solitions)]
+        max_steps = []
+        old_idx = []
+        new_idx = []
+        for t in range(self._num_types):
+            old = [i for i in range(len(count_true)) if count_true[i] > 1 and tmp_all_solitions[t][i]]
+            new = [i for i in range(len(count_true)) if count_true[i] == 0]
+            if len(old):
+                old_idx.append(old)
+                new_idx.append(new)
+                max_steps.append(len(old))
+                for i in old:
+                    count_true[i] = count_true[i] - 1
+        return max_steps, old_idx, new_idx
+
+    def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
+        p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
+        p = int(max(n * p_ratio, 4))
+
+        points = self._np_random.uniform(size=(n, 2))
+        while np.unique(points, axis=0).shape[0] != n:
+            points = self._np_random.uniform(size=(n, 2))
+        demands = self._np_random.random(size=(n,))
+        return points, demands, n, p
+    
+    def _load_multi_facility_data(self, data_npy) -> Tuple[np.ndarray, np.ndarray]:
+        data = data_npy.split('\n')
+        n = len(data)
+        p = int((len(data[0].split(' '))-2) / 2)
+        
+        positions = []
+        demands = []
+        actual_facilities = []
+        ps = []
+        for row in data:
+            row = row.split(' ')
+            row = [x for x in row if len(x)]
+            positions.append([float(row[0]), float(row[1])])
+
+            demand = []
+            for i in range(2, 2+p):
+                demand.append(float(row[i]))
+            demands.append(demand)
+            
+            actual_facility = []
+            for i in range(2+p, 2+2*p):
+                actual_facility.append(bool(int(float(row[i]))))
+            actual_facilities.append(actual_facility)
+
+        positions = np.array(positions)
+        positions = np.deg2rad(positions)
+        demands = np.array(demands)
+        actual_facilities = np.array(actual_facilities)
+        ps = actual_facilities.sum(axis=0)
+        
+        return positions, demands, n, ps
+        
+    def _load_multi_facility_solutions(self, boost) -> list:
+        def load_model(positions, demands, n, p, boost):
+            eval_env = EvalPMPEnv(self.cfg, positions, demands, n, p, boost)
+            eval_env = DummyVecEnv([lambda: eval_env])
+
+            policy_kwargs = get_policy_kwargs(self.cfg)
+            test_model = PPO(MaskedFacilityLocationActorCriticPolicy,
+                        eval_env,
+                        verbose=1,
+                        policy_kwargs=policy_kwargs,
+                        device='cuda:1')
+            train_model = PPO.load(self.cfg.load_model_path)
+            test_model.set_parameters(train_model.get_parameters())
+            return test_model, eval_env
+
+        def get_optimal_solution(model, eval_env):
+            obs = eval_env.reset()
+            done = False
+            while not done:
+                action, _ = model.predict(obs, deterministic=True)
+                obs, _, done, info = eval_env.step(action)
+            return eval_env.get_attr('_best_solution')[0]
+        
+        multi_solutions = []
+        for i in range(len(self._all_p)):
+            positions = self._all_points
+            demands = self._all_demands[:,i]
+            n = self._n
+            p = self._all_p[i]
+            model, env = load_model(positions,demands,n,p,boost)
+            multi_solutions.append(get_optimal_solution(model, env))
+    
+        return multi_solutions
+    
+    def get_reward(self) -> float:
+        if self._done:
+            reward = np.min(self._obj_value)
+        else:
+            reward = 0.0
+        return reward
+    
+    
\ No newline at end of file
diff --git a/facility_location/multi_eval.py b/facility_location/multi_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..9234685942bb70fc8d966df5e496388e7da1864e
--- /dev/null
+++ b/facility_location/multi_eval.py
@@ -0,0 +1,96 @@
+import os
+import pickle
+
+import setproctitle
+from absl import app, flags
+import time
+import random
+from typing import Tuple, Union, Text
+
+import numpy as np
+import torch as th
+
+import sys
+import gymnasium
+sys.modules["gym"] = gymnasium
+
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3 import PPO
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnvWrapper
+
+from facility_location.agent.solver import PMPSolver
+from facility_location.env import EvalPMPEnv, MULTIPMP
+from facility_location.utils import Config
+from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
+from facility_location.utils.policy import get_policy_kwargs
+
+import warnings
+warnings.filterwarnings('ignore')
+
+
+AGENT = Union[PMPSolver, PPO]
+
+def get_model(cfg: Config,
+              env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
+              device: str) -> PPO:
+    policy_kwargs = get_policy_kwargs(cfg)
+    model = PPO(MaskedFacilityLocationActorCriticPolicy,
+                env,
+                verbose=1,
+                policy_kwargs=policy_kwargs,
+                device=device)
+    return model
+
+
+def get_agent(cfg: Config,
+              env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
+              model_path: Text) -> AGENT:
+    if cfg.agent in ['rl-mlp', 'rl-gnn', 'rl-agnn']:
+        test_model = get_model(cfg, env, device='cuda:0')
+        trained_model = PPO.load(model_path)
+        test_model.set_parameters(trained_model.get_parameters())
+        agent = test_model
+    else:
+        raise ValueError(f'Agent {cfg.agent} not supported.')
+    return agent
+
+
+def evaluate(agent: AGENT,
+             env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
+             num_cases: int,
+             return_episode_rewards: bool):
+    if isinstance(agent, PPO):
+        return evaluate_ppo(agent, env, num_cases, return_episode_rewards=return_episode_rewards)
+    else:
+        raise ValueError(f'Agent {agent} not supported.')
+
+from stable_baselines3.common.callbacks import BaseCallback
+
+
+def evaluate_ppo(agent: PPO, env: EvalPMPEnv, num_cases: int, return_episode_rewards: bool) -> Tuple[float, float]:
+    rewards, _ = evaluate_policy(agent, env, n_eval_episodes=num_cases, return_episode_rewards=return_episode_rewards)
+    return rewards
+
+
+def main(data_npy, boost=False):
+    th.manual_seed(0)
+    np.random.seed(0)
+    random.seed(0)
+    model_path = './facility_location/best_model.zip'
+
+    cfg = Config('plot', 0, False, '/data2/suhongyuan/flp', 'rl-gnn', model_path=model_path)
+    
+    eval_env = MULTIPMP(cfg, data_npy, boost)
+    eval_env = Monitor(eval_env)
+    eval_env = DummyVecEnv([lambda: eval_env])
+    agent = get_agent(cfg, eval_env, model_path)
+    start_time = time.time()
+    _ = evaluate(agent, eval_env, 1, return_episode_rewards=True)
+    eval_time = time.time() - start_time
+    print(f'\t time: {eval_time}')
+
+
+if __name__ == '__main__':
+    app.run(main)
+
diff --git a/facility_location/solutions.pkl b/facility_location/solutions.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f298cba6cc1e50665bcb3367bda51d523b0398a6
Binary files /dev/null and b/facility_location/solutions.pkl differ
diff --git a/facility_location/utils/__init__.py b/facility_location/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..786c82da6ef1c869171178ca4da7cee566efa94d
--- /dev/null
+++ b/facility_location/utils/__init__.py
@@ -0,0 +1,3 @@
+from .config import Config
+
+__all__ = ["Config"]
diff --git a/facility_location/utils/__pycache__/__init__.cpython-310.pyc b/facility_location/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0974f9e34eb9f1155fd25ad77042cff379312db1
Binary files /dev/null and b/facility_location/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/facility_location/utils/__pycache__/__init__.cpython-39.pyc b/facility_location/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bfd8dffbc14cfacd47065ea3cbee35fd051a11b2
Binary files /dev/null and b/facility_location/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/facility_location/utils/__pycache__/config.cpython-310.pyc b/facility_location/utils/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c00f5636229e88527dc43f00dadf123144a79de1
Binary files /dev/null and b/facility_location/utils/__pycache__/config.cpython-310.pyc differ
diff --git a/facility_location/utils/__pycache__/config.cpython-39.pyc b/facility_location/utils/__pycache__/config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0d221ec213bdcfb8883271fafd276e7b35a24822
Binary files /dev/null and b/facility_location/utils/__pycache__/config.cpython-39.pyc differ
diff --git a/facility_location/utils/__pycache__/policy.cpython-310.pyc b/facility_location/utils/__pycache__/policy.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..671d0f69e2ecc1e07f26582029407317eea1fa58
Binary files /dev/null and b/facility_location/utils/__pycache__/policy.cpython-310.pyc differ
diff --git a/facility_location/utils/__pycache__/policy.cpython-39.pyc b/facility_location/utils/__pycache__/policy.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..436853b284fc6bbb70fc7c03ee50fd20e7c57466
Binary files /dev/null and b/facility_location/utils/__pycache__/policy.cpython-39.pyc differ
diff --git a/facility_location/utils/config.py b/facility_location/utils/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e4d0ad242fcfa0cb67217f65fae20c5cf637d87
--- /dev/null
+++ b/facility_location/utils/config.py
@@ -0,0 +1,133 @@
+import os
+from typing import Text, Dict
+
+from stable_baselines3.common.utils import get_latest_run_id
+
+import yaml
+
+
+class Config:
+
+    def __init__(self, cfg_id: Text, global_seed: int, tmp: bool, root_dir: Text,
+                 agent: Text = 'rl-gnn', reset_num_timesteps: bool = True, cfg_dict: Dict = None, model_path: Text = None):
+        self.cfg_id = cfg_id
+        self.seed = global_seed
+        if cfg_dict is not None:
+            cfg = cfg_dict
+        else:
+            file_path = './facility_location/cfg/{}.yaml'.format(self.cfg_id)
+            class TupleSafeLoader(yaml.SafeLoader):
+                def construct_python_tuple(self, node):
+                    return tuple(self.construct_sequence(node))
+            TupleSafeLoader.add_constructor(
+                u'tag:yaml.org,2002:python/tuple',
+                TupleSafeLoader.construct_python_tuple)
+            def load_yaml(file_path):
+                cfg = yaml.load(open(file_path, 'r'), Loader=TupleSafeLoader)
+                return cfg
+            cfg = load_yaml(file_path)
+        # create dirs
+        self.root_dir = '/tmp/flp' if tmp else root_dir
+        self.agent = agent
+        self.multi = cfg.get('multi', False)
+
+        self.tb_log_path = os.path.join(self.root_dir, 'runs')
+        self.tb_log_name = f'{cfg_id}-agent-{agent}-seed-{global_seed}'
+        latest_run_id = get_latest_run_id(self.tb_log_path, self.tb_log_name)
+        if not reset_num_timesteps:
+            # Continue training in the same directory
+            latest_run_id -= 1
+        self.cfg_dir = os.path.join(self.root_dir,
+                                    'output', f'{cfg_id}-agent-{agent}-seed-{global_seed}_{latest_run_id + 1}')
+        self.ckpt_save_path = os.path.join(self.cfg_dir, 'ckpt')
+        self.best_model_path = os.path.join(self.cfg_dir, 'best-models')
+        self.latest_model_path = os.path.join(self.cfg_dir, 'latest-models')
+        self.load_model_path = model_path
+
+
+        # env
+        self.env_specs = cfg.get('env_specs', dict())
+        self.reward_specs = cfg.get('reward_specs', dict())
+        self.obs_specs = cfg.get('obs_specs', dict())
+        self.eval_specs = cfg.get('eval_specs', dict())
+
+        # agent config
+        self.agent_specs = cfg.get('agent_specs', dict())
+        self.mlp_specs = cfg.get('mlp_specs', dict())
+        self.gnn_specs = cfg.get('gnn_specs', dict())
+        self.ts_specs = cfg.get('ts_specs', dict())
+        self.popstar_specs = cfg.get('popstar_specs', dict())
+        self.ga_specs = cfg.get('ga_specs', dict())
+
+        # training config
+        self.gamma = cfg.get('gamma', 0.99)
+        self.tau = cfg.get('tau', 0.95)
+        self.state_encoder_specs = cfg.get('state_encoder_specs', dict())
+        self.policy_specs = cfg.get('policy_specs', dict())
+        self.value_specs = cfg.get('value_specs', dict())
+        self.lr = cfg.get('lr', 4e-4)
+        self.weightdecay = cfg.get('weightdecay', 0.0)
+        self.eps = cfg.get('eps', 1e-5)
+        self.value_pred_coef = cfg.get('value_pred_coef', 0.5)
+        self.entropy_coef = cfg.get('entropy_coef', 0.01)
+        self.clip_epsilon = cfg.get('clip_epsilon', 0.2)
+        self.max_num_iterations = cfg.get('max_num_iterations', 1000)
+        self.num_episodes_per_iteration = cfg.get('num_episodes_per_iteration', 1000)
+        self.max_sequence_length = cfg.get('max_sequence_length', 100)
+        self.num_optim_epoch = cfg.get('num_optim_epoch', 4)
+        self.mini_batch_size = cfg.get('mini_batch_size', 1024)
+        self.save_model_interval = cfg.get('save_model_interval', 10)
+
+    def log(self, logger, tb_logger):
+        """Log cfg to logger and tensorboard."""
+        logger.info(f'id: {self.cfg_id}')
+        logger.info(f'seed: {self.seed}')
+        logger.info(f'env_specs: {self.env_specs}')
+        logger.info(f'reward_specs: {self.reward_specs}')
+        logger.info(f'obs_specs: {self.obs_specs}')
+        logger.info(f'agent_specs: {self.agent_specs}')
+        logger.info(f'gamma: {self.gamma}')
+        logger.info(f'tau: {self.tau}')
+        logger.info(f'state_encoder_specs: {self.state_encoder_specs}')
+        logger.info(f'policy_specs: {self.policy_specs}')
+        logger.info(f'value_specs: {self.value_specs}')
+        logger.info(f'lr: {self.lr}')
+        logger.info(f'weightdecay: {self.weightdecay}')
+        logger.info(f'eps: {self.eps}')
+        logger.info(f'value_pred_coef: {self.value_pred_coef}')
+        logger.info(f'entropy_coef: {self.entropy_coef}')
+        logger.info(f'clip_epsilon: {self.clip_epsilon}')
+        logger.info(f'max_num_iterations: {self.max_num_iterations}')
+        logger.info(f'num_episodes_per_iteration: {self.num_episodes_per_iteration}')
+        logger.info(f'max_sequence_length: {self.max_sequence_length}')
+        logger.info(f'num_optim_epoch: {self.num_optim_epoch}')
+        logger.info(f'mini_batch_size: {self.mini_batch_size}')
+        logger.info(f'save_model_interval: {self.save_model_interval}')
+
+        if tb_logger is not None:
+            tb_logger.add_hparams(
+                hparam_dict={
+                    'id': self.cfg_id,
+                    'seed': self.seed,
+                    'env_specs': str(self.env_specs),
+                    'reward_specs': str(self.reward_specs),
+                    'obs_specs': str(self.obs_specs),
+                    'agent_specs': str(self.agent_specs),
+                    'gamma': self.gamma,
+                    'tau': self.tau,
+                    'state_encoder_specs': str(self.state_encoder_specs),
+                    'policy_specs': str(self.policy_specs),
+                    'value_specs': str(self.value_specs),
+                    'lr': self.lr,
+                    'weightdecay': self.weightdecay,
+                    'eps': self.eps,
+                    'value_pred_coef': self.value_pred_coef,
+                    'entropy_coef': self.entropy_coef,
+                    'clip_epsilon': self.clip_epsilon,
+                    'max_num_iterations': self.max_num_iterations,
+                    'num_episodes_per_iteration': self.num_episodes_per_iteration,
+                    'max_sequence_length': self.max_sequence_length,
+                    'num_optim_epoch': self.num_optim_epoch,
+                    'mini_batch_size': self.mini_batch_size,
+                    'save_model_interval': self.save_model_interval},
+                metric_dict={'hparam/placeholder': 0.0})
diff --git a/facility_location/utils/policy.py b/facility_location/utils/policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c73b0c1cb5c38edcb88abf051a76320bce54a95
--- /dev/null
+++ b/facility_location/utils/policy.py
@@ -0,0 +1,57 @@
+from typing import Dict
+
+from facility_location.agent import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
+from facility_location.utils import Config
+
+
+def get_policy_kwargs(cfg: Config) -> Dict:
+    if cfg.agent == 'rl-mlp':
+        hidden_units = cfg.mlp_specs.get('hidden_units', (32, 32))
+        node_dim = hidden_units[-1]
+        policy_feature_dim = FacilityLocationMLPExtractor.get_policy_feature_dim(node_dim)
+        value_feature_dim = FacilityLocationMLPExtractor.get_value_feature_dim(node_dim)
+        policy_kwargs = dict(
+            policy_feature_dim=policy_feature_dim,
+            value_feature_dim=value_feature_dim,
+            policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
+            value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
+            features_extractor_class=FacilityLocationMLPExtractor,
+            features_extractor_kwargs=dict(
+                hidden_units=hidden_units,),
+            popstar=cfg.env_specs.get('popstar', False),)
+
+    elif cfg.agent == 'rl-gnn':
+        num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
+        node_dim = cfg.gnn_specs.get('node_dim', 32)
+        policy_feature_dim = FacilityLocationGNNExtractor.get_policy_feature_dim(node_dim)
+        value_feature_dim = FacilityLocationGNNExtractor.get_value_feature_dim(node_dim)
+        policy_kwargs = dict(
+            policy_feature_dim=policy_feature_dim,
+            value_feature_dim=value_feature_dim,
+            policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
+            value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
+            features_extractor_class=FacilityLocationGNNExtractor,
+            features_extractor_kwargs=dict(
+                num_gnn_layers=num_gnn_layers,
+                node_dim=node_dim),
+            popstar=cfg.env_specs.get('popstar', False),)
+                
+    elif cfg.agent == 'rl-agnn':
+        num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
+        node_dim = cfg.gnn_specs.get('node_dim', 32)
+        policy_feature_dim = FacilityLocationAttentionGNNExtractor.get_policy_feature_dim(node_dim)
+        value_feature_dim = FacilityLocationAttentionGNNExtractor.get_value_feature_dim(node_dim)
+        policy_kwargs = dict(
+            policy_feature_dim=policy_feature_dim,
+            value_feature_dim=value_feature_dim,
+            policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
+            value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
+            features_extractor_class=FacilityLocationAttentionGNNExtractor,
+            features_extractor_kwargs=dict(
+                num_gnn_layers=num_gnn_layers,
+                node_dim=node_dim),
+            popstar=cfg.env_specs.get('popstar', False),)
+        
+    else:
+        raise NotImplementedError
+    return policy_kwargs
diff --git a/final_solutions.pkl b/final_solutions.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/model.pth b/model.pth
deleted file mode 100644
index 50dc1e548be4bddcd8c592e58084a7c66cde6f4b..0000000000000000000000000000000000000000
Binary files a/model.pth and /dev/null differ
diff --git a/model.py b/model.py
deleted file mode 100644
index 9e85f16503263823562c052855f51323613d9f18..0000000000000000000000000000000000000000
--- a/model.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Net(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super(Net, self).__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, num_classes)
-        self.softmax = nn.Softmax(dim=1)
-
-    def forward(self, x):
-        out = self.fc1(x)
-        out = self.relu(out)
-        out = self.fc2(out)
-        out = self.softmax(out)
-        return out
-
-
-if __name__ == '__main__':
-    net = Net(100, 50, 10)
-    torch.save(net.state_dict(), 'model.pth')
diff --git a/requirements.txt b/requirements.txt
index f824cf9cfb4e0a733f67a1f12e82d36cdd27f12e..c954a422e0ad11781c25a908235e9edeacc9d147 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,22 @@ numpy >= 1.26.1
 plotly
 pandas
 scikit-learn
-scipy
\ No newline at end of file
+scipy
+absl-py>=1.1.0
+pulp>=2.7.0
+spopt>=0.4.1
+geopandas>=0.11.1
+matplotlib>=3.5.2
+shapely>=1.8.4
+gym>=0.21.0
+pygad>=2.18.1
+stable_baselines3>=1.6.2
+networkx>=2.8.4
+libpysal>=4.7.0
+scikit-learn>=1.1.1
+torch>=1.13.1
+PyYAML>=6.0
+tqdm>=4.64.0
+rich>=13.0.1
+numba>=0.55.2
+tensorboard>=2.11.0
\ No newline at end of file
diff --git a/test_model.py b/test_model.py
deleted file mode 100644
index 4d8ff9f62948ee6387f847c86ce056b2adf6b5c6..0000000000000000000000000000000000000000
--- a/test_model.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import torch
-import numpy as np
-
-from model import Net
-
-a = np.arange(100).reshape(1, 100)
-model = Net(100, 50, 10)
-model.load_state_dict(torch.load('model.pth'))
-model.eval()
-output = model(torch.from_numpy(a).float())
-print(output)
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e5237fc36fafa4436ad75d32214f583820298e8
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1,8 @@
+from .load_save import load_yaml
+from .vec_check_nan import DictVecCheckNan
+from .update_val_env_callback import UpdateValEnv, UpdateValEnvAndStopTrainingOnNoModelImprovement
+from .hparam_callback import HParamCallback
+
+__all__ = ['load_yaml', 'DictVecCheckNan',
+           'UpdateValEnv', 'UpdateValEnvAndStopTrainingOnNoModelImprovement',
+           'HParamCallback']
diff --git a/utils/__pycache__/__init__.cpython-310.pyc b/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..34c3087a87fb8bdb7c4af63744d5bf7c0837de4c
Binary files /dev/null and b/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/utils/__pycache__/__init__.cpython-39.pyc b/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a27647a0850795f07ba6001dff66277a1a07f22
Binary files /dev/null and b/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/utils/__pycache__/hparam_callback.cpython-310.pyc b/utils/__pycache__/hparam_callback.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f189063bfcb06a6610b3ee512e4b8725aa26a6d
Binary files /dev/null and b/utils/__pycache__/hparam_callback.cpython-310.pyc differ
diff --git a/utils/__pycache__/hparam_callback.cpython-39.pyc b/utils/__pycache__/hparam_callback.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..317a0c5d7bba5a8b816426cf2815e0c7500b6d8c
Binary files /dev/null and b/utils/__pycache__/hparam_callback.cpython-39.pyc differ
diff --git a/utils/__pycache__/load_save.cpython-310.pyc b/utils/__pycache__/load_save.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48132a643dd7dc899b8221f577c5c0409264593f
Binary files /dev/null and b/utils/__pycache__/load_save.cpython-310.pyc differ
diff --git a/utils/__pycache__/load_save.cpython-39.pyc b/utils/__pycache__/load_save.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e967a9c51a982f636b141d7897fbaab55dbe161f
Binary files /dev/null and b/utils/__pycache__/load_save.cpython-39.pyc differ
diff --git a/utils/__pycache__/update_val_env_callback.cpython-310.pyc b/utils/__pycache__/update_val_env_callback.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1c664ea97c8bf6c4c99b101897d38d4b99909241
Binary files /dev/null and b/utils/__pycache__/update_val_env_callback.cpython-310.pyc differ
diff --git a/utils/__pycache__/update_val_env_callback.cpython-39.pyc b/utils/__pycache__/update_val_env_callback.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbcb7b66fc6097c6044c4ee6c73fdf8c3053f7c4
Binary files /dev/null and b/utils/__pycache__/update_val_env_callback.cpython-39.pyc differ
diff --git a/utils/__pycache__/vec_check_nan.cpython-310.pyc b/utils/__pycache__/vec_check_nan.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c36532ff2df56d42e39db64f14f31a386bb719d
Binary files /dev/null and b/utils/__pycache__/vec_check_nan.cpython-310.pyc differ
diff --git a/utils/__pycache__/vec_check_nan.cpython-39.pyc b/utils/__pycache__/vec_check_nan.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49c1b38b89730e23903bf31cda64d9d416ff85bc
Binary files /dev/null and b/utils/__pycache__/vec_check_nan.cpython-39.pyc differ
diff --git a/utils/hparam_callback.py b/utils/hparam_callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..32aaaf83d315cc8d9a7006ddd0500332eaa606d1
--- /dev/null
+++ b/utils/hparam_callback.py
@@ -0,0 +1,32 @@
+from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.logger import HParam
+
+
+class HParamCallback(BaseCallback):
+    def __init__(self):
+        super().__init__()
+
+    def _on_training_start(self) -> None:
+        hparam_dict = {
+            "algorithm": self.model.__class__.__name__,
+            "learning rate": self.model.learning_rate,
+            "steps_per_iteration": self.model.n_steps * self.model.n_envs,
+            "batch_size": self.model.batch_size,
+            "optim_epochs_per_iteration": self.model.n_epochs,
+            "gamma": self.model.gamma,
+            "gae_lambda": self.model.gae_lambda,
+            "ent_coef": self.model.ent_coef,
+            "vf_coef": self.model.vf_coef,
+        }
+        metric_dict = {
+            "eval/mean_reward": 0,
+            "train/loss": 0,
+        }
+        self.logger.record(
+            "hparams",
+            HParam(hparam_dict, metric_dict),
+            exclude=("stdout", "log", "json", "csv"),
+        )
+
+    def _on_step(self) -> bool:
+        return True
diff --git a/utils/load_save.py b/utils/load_save.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5fe95e0379985c20422c7dafbb6510988b831c2
--- /dev/null
+++ b/utils/load_save.py
@@ -0,0 +1,52 @@
+import yaml
+import glob
+import pickle
+import os
+import socket
+
+
+def get_file_path(file_path):
+    hostname = socket.gethostname()
+    if hostname == 'fib':
+        file_path = os.path.join('/home/mas/zhengyu/workspace/flp', file_path)
+    elif hostname == 'rl2':
+        file_path = os.path.join('/home/zhengyu/workspace/flp', file_path)
+    elif hostname == 'rl3':
+        file_path = os.path.join('/home/zhengyu/workspace/flp', file_path)
+    elif hostname == 'rl4':
+        file_path = os.path.join('/data2/suhongyuan/flp', file_path)
+    elif hostname == 'DL4':
+        file_path = os.path.join('/data2/zhengyu/workspace/flp', file_path)
+    elif hostname == 'S4rawBer2y.local':
+        file_path = os.path.join('/Users/zhengyu/Seafile/code/workspace/flp', file_path)
+    elif hostname == 'DESKTOP-FTK3FVH':
+        file_path = os.path.join('C:\\Users\\123\\Seafile\\code\\workspace\\flp', file_path)
+    else:
+        raise ValueError('Unknown hostname: {}'.format(socket.gethostname()))
+    return file_path
+
+
+class TupleSafeLoader(yaml.SafeLoader):
+    def construct_python_tuple(self, node):
+        return tuple(self.construct_sequence(node))
+
+
+TupleSafeLoader.add_constructor(
+    u'tag:yaml.org,2002:python/tuple',
+    TupleSafeLoader.construct_python_tuple)
+
+
+def load_yaml(file_path):
+    file_path = get_file_path(file_path)
+    files = glob.glob(file_path, recursive=True)
+    assert(len(files) == 1)
+    cfg = yaml.load(open(files[0], 'r'), Loader=TupleSafeLoader)
+    return cfg
+
+
+def load_pickle(file_path):
+    file_path = get_file_path(file_path)
+    files = glob.glob(file_path, recursive=True)
+    assert(len(files) == 1)
+    data = pickle.load(open(files[0], 'rb'))
+    return data
diff --git a/utils/update_val_env_callback.py b/utils/update_val_env_callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..375ee4d5acbda70e022b7afc09de538ba6e60165
--- /dev/null
+++ b/utils/update_val_env_callback.py
@@ -0,0 +1,34 @@
+from typing import Union
+
+import gym
+from stable_baselines3.common.callbacks import BaseCallback, StopTrainingOnNoModelImprovement
+from stable_baselines3.common.vec_env import VecEnv, DummyVecEnv
+
+
+class UpdateValEnv(BaseCallback):
+    def __init__(self, val_env: Union[gym.Env, VecEnv], verbose: int = 0):
+        super().__init__(verbose=verbose)
+        if not isinstance(val_env, VecEnv):
+            val_env = DummyVecEnv([lambda: val_env])
+
+        self.val_env = val_env
+
+    def _on_step(self) -> bool:
+        assert self.parent is not None, "``UpdateValEnv`` callback must be used with an ``EvalCallback``"
+
+        self.val_env.env_method('reset_instance_id')
+        return True
+
+
+class UpdateValEnvAndStopTrainingOnNoModelImprovement(StopTrainingOnNoModelImprovement):
+    def __init__(self, val_env: Union[gym.Env, VecEnv],
+                 max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0):
+        super().__init__(max_no_improvement_evals=max_no_improvement_evals, min_evals=min_evals, verbose=verbose)
+        if not isinstance(val_env, VecEnv):
+            val_env = DummyVecEnv([lambda: val_env])
+
+        self.val_env = val_env
+
+    def _on_step(self) -> bool:
+        self.val_env.env_method('reset_instance_id')
+        return super()._on_step()
diff --git a/utils/vec_check_nan.py b/utils/vec_check_nan.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a2fed032019cde2252374cbe745b2473c2ecf35
--- /dev/null
+++ b/utils/vec_check_nan.py
@@ -0,0 +1,56 @@
+import warnings
+from collections import OrderedDict
+from typing import Text
+
+import numpy as np
+from stable_baselines3.common.vec_env import VecCheckNan
+
+
+class DictVecCheckNan(VecCheckNan):
+    def _check_val(self, event: str, **kwargs) -> None:
+        # if warn and warn once and have warned once: then stop checking
+        if not self.raise_exception and self.warn_once and self._user_warned:
+            return
+
+        found = []
+
+        def check_val_np(check_name: Text, check_val: np.ndarray) -> None:
+            has_nan = np.any(np.isnan(check_val))
+            has_inf = self.check_inf and np.any(np.isinf(check_val))
+            if has_inf:
+                found.append((check_name, "inf"))
+            if has_nan:
+                found.append((check_name, "nan"))
+
+        for name, val in kwargs.items():
+            if isinstance(val, np.ndarray):
+                check_val_np(name, val)
+            elif isinstance(val, OrderedDict):
+                for inner_name, inner_val in val.items():
+                    check_val_np(f"{name}-{inner_name}", inner_val)
+            else:
+                raise ValueError(f"Unsupported observation type {type(val)}.")
+
+        if found:
+            self._user_warned = True
+            msg = ""
+            for i, (name, type_val) in enumerate(found):
+                msg += f"found {type_val} in {name}"
+                if i != len(found) - 1:
+                    msg += ", "
+
+            msg += ".\r\nOriginated from the "
+
+            if event == "reset":
+                msg += "environment observation (at reset)"
+            elif event == "step_wait":
+                msg += f"environment, Last given value was: \r\n\taction={self._actions}"
+            elif event == "step_async":
+                msg += f"RL model, Last given value was: \r\n\tobservations={self._observations}"
+            else:
+                raise ValueError("Internal error.")
+
+            if self.raise_exception:
+                raise ValueError(msg)
+            else:
+                warnings.warn(msg, UserWarning)