diff --git a/app.py b/app.py index fb28d2b5678de85e2c299369b25bf7a0da456a6c..c094cc687cf7a377212bbc69a52a3f8d616ee448 100644 --- a/app.py +++ b/app.py @@ -4,26 +4,98 @@ import plotly.graph_objects as go import plotly.express as px from sklearn.metrics import pairwise_distances import torch +from facility_location import multi_eval +import pickle -def plot_from_npy(npy_data): - fig = go.Figure() - fig.add_trace(go.Scatter(x=[1, 2, 3, 4], y=[10, 11, 12, 13], mode='lines', name='New York')) - fig.update_layout(title_text="Facility Distribution in Cities") - fig.update_xaxes(title_text="Time") - fig.update_yaxes(title_text="Facility Count") - - - actual_fig = fig # Replace this line with your actual_fig - solution_fig = fig # Replace this line with your solution_fig - - return actual_fig, solution_fig def solver_plot(data_npy, boost=False): + multi_eval.main(data_npy, boost) + all_solutions = pickle.loads(open('./facility_location/solutions.pkl', 'rb').read()) + + data = data_npy.split('\n') + n = len(data) + p = int((len(data[0].split(' '))-2) / 2) + + positions = [] + demands = [] + actual_facilities = [] + for row in data: + row = row.split(' ') + row = [x for x in row if len(x)] + + positions.append([float(row[0]), float(row[1])]) + + demand = [] + for i in range(2, 2+p): + demand.append(float(row[i])) + demands.append(demand) + + actual_facility = [] + for i in range(2+p, 2+2*p): + actual_facility.append(bool(int(float(row[i])))) + actual_facilities.append(actual_facility) + positions = np.array(positions) + demands = np.array(demands) + actual_facilities = np.array(actual_facilities) + solution_facilities = np.array(all_solutions).T + # print(solution_facilities) + # print(actual_facilities) + actual_fig = go.Figure() solution_fig = go.Figure() - actual_ac = 0 # Replace this line with your actual_ac - solution_ac = 0 # Replace this line with your solution_ac + for i in range(p): + actual_fig.add_trace(go.Scattermapbox( + lat=positions[actual_facilities[:, i]][:, 0], + lon=positions[actual_facilities[:, i]][:, 1], + mode='markers', + marker=go.scattermapbox.Marker( + size=10, + color=px.colors.qualitative.Plotly[i] + ), + name=f'Facility {i+1}' + )) + solution_fig.add_trace(go.Scattermapbox( + lat=positions[solution_facilities[:, i]][:, 0], + lon=positions[solution_facilities[:, i]][:, 1], + mode='markers', + marker=go.scattermapbox.Marker( + size=10, + color=px.colors.qualitative.Plotly[i] + ), + name=f'Facility {i+1}' + )) + + actual_fig.update_layout( + mapbox=dict( + style='carto-positron', + center=dict(lat=np.mean(positions[actual_facilities[:, i]][:, 0]), \ + lon=np.mean(positions[actual_facilities[:, i]][:, 1])), + zoom=11.0 + ), + margin=dict(l=0, r=0, b=0, t=0),) + + solution_fig.update_layout( + mapbox=dict( + style='carto-positron', + center=dict(lat=np.mean(positions[solution_facilities[:, i]][:, 0]), \ + lon=np.mean(positions[solution_facilities[:, i]][:, 1])), + zoom=11.0 + ), + margin=dict(l=0, r=0, b=0, t=0),) + # show legend + actual_fig.update_layout(showlegend=True) + solution_fig.update_layout(showlegend=True) + + positions = np.deg2rad(positions) + dist = pairwise_distances(positions, metric='haversine') * 6371 + actual_ac = 0 + solution_ac = 0 + for i in range(p): + ac_matrix = dist * demands[:, i][:, None] + actual_ac += ac_matrix[:, actual_facilities[:, i]].min(axis=-1).sum() + solution_ac += ac_matrix[:, solution_facilities[:, i]].min(axis=-1).sum() + return actual_fig, solution_fig, actual_ac, solution_ac def demo_plot(city, facility): @@ -104,7 +176,7 @@ def demo_plot(city, facility): return actual_fig, solution_fig, actual_ac, solution_ac -def solver_plot(data_npy, boost=False): +def solver_plot1(data_npy, boost=False): data = data_npy.split('\n') n = len(data) p = int((len(data[0].split(' '))-2) / 2) @@ -115,7 +187,6 @@ def solver_plot(data_npy, boost=False): for row in data: row = row.split(' ') row = [x for x in row if len(x)] - print(row) positions.append([float(row[0]), float(row[1])]) @@ -132,7 +203,6 @@ def solver_plot(data_npy, boost=False): demands = np.array(demands) actual_facilities = np.array(actual_facilities) solution_facilities = ~actual_facilities - print(actual_facilities) actual_fig = go.Figure() solution_fig = go.Figure() @@ -193,13 +263,13 @@ def solver_plot(data_npy, boost=False): def get_example(): return [ - ('40.71 -73.93 213 0\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'), - ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.61 -73.95 189 264 1 0") + ('40.71 -73.93 213 1\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'), + ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.60 -73.92 129 214 1 0\n40.61 -73.95 189 264 0 1\n40.63 -73.94 124 164 1 0"), ] def load_npy_file(file_obj): - data = np.load(file_obj.name) + data = np.loadtxt(file_obj.name) string_array = '\n'.join([' '.join(map(str, row)) for row in data]) return string_array @@ -231,8 +301,8 @@ with gr.Blocks() as demo: gr.Examples( examples=get_example(), inputs=[data_npy], - fn=plot_from_npy, - outputs=[actual_map, solution_map], + fn=solver_plot1, + outputs=[actual_map, solution_map, actual_ac, solution_ac], ) with gr.Row(): boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False) diff --git a/facility_location/__init__.py b/facility_location/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facility_location/__pycache__/__init__.cpython-39.pyc b/facility_location/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17faab9c8acaf4a1fb983e92ad21c238aea52ee5 Binary files /dev/null and b/facility_location/__pycache__/__init__.cpython-39.pyc differ diff --git a/facility_location/__pycache__/multi_eval.cpython-39.pyc b/facility_location/__pycache__/multi_eval.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f50f7bf88ce6f403575bd6872945ecfbc1aa8288 Binary files /dev/null and b/facility_location/__pycache__/multi_eval.cpython-39.pyc differ diff --git a/facility_location/agent/__init__.py b/facility_location/agent/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..42b44bbcbc94818944f11ef41f22f6982bffb53a --- /dev/null +++ b/facility_location/agent/__init__.py @@ -0,0 +1,4 @@ +from .policy import MaskedFacilityLocationActorCriticPolicy +from .features_extractor import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor + +__all__ = ['MaskedFacilityLocationActorCriticPolicy', 'FacilityLocationMLPExtractor', 'FacilityLocationGNNExtractor', 'FacilityLocationAttentionGNNExtractor'] diff --git a/facility_location/agent/__pycache__/__init__.cpython-310.pyc b/facility_location/agent/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ebfb4ce0856a3c9a27c9c0538ab21d333a0ad7b6 Binary files /dev/null and b/facility_location/agent/__pycache__/__init__.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/__init__.cpython-39.pyc b/facility_location/agent/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9580f6f26296b93f043381bab5aed0bb4f3817ba Binary files /dev/null and b/facility_location/agent/__pycache__/__init__.cpython-39.pyc differ diff --git a/facility_location/agent/__pycache__/features_extractor.cpython-310.pyc b/facility_location/agent/__pycache__/features_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6138bdfef58295424d518bab7b7d932edbfecedb Binary files /dev/null and b/facility_location/agent/__pycache__/features_extractor.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/features_extractor.cpython-39.pyc b/facility_location/agent/__pycache__/features_extractor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e4f1ed1c4b94332931b24c42554308b8024e050 Binary files /dev/null and b/facility_location/agent/__pycache__/features_extractor.cpython-39.pyc differ diff --git a/facility_location/agent/__pycache__/ga.cpython-310.pyc b/facility_location/agent/__pycache__/ga.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d498287ef372fd9cbe7d897c92daf1a87462c254 Binary files /dev/null and b/facility_location/agent/__pycache__/ga.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/ga.cpython-39.pyc b/facility_location/agent/__pycache__/ga.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..161a9de6b5134c84f369f9aeac3d5ae1d9506288 Binary files /dev/null and b/facility_location/agent/__pycache__/ga.cpython-39.pyc differ diff --git a/facility_location/agent/__pycache__/heuristic.cpython-310.pyc b/facility_location/agent/__pycache__/heuristic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04b16d23ecd5c4d7d36ac293316abd8e603a16f5 Binary files /dev/null and b/facility_location/agent/__pycache__/heuristic.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/heuristic.cpython-39.pyc b/facility_location/agent/__pycache__/heuristic.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c00f92696ac954e72c9118693de5962aaf010a8d Binary files /dev/null and b/facility_location/agent/__pycache__/heuristic.cpython-39.pyc differ diff --git a/facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc b/facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..490d27a6aafcff62674880b924da7c4de0bf4f97 Binary files /dev/null and b/facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc b/facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a103ca5dcb01c16ceea7dde1c39aefdf0979d46f Binary files /dev/null and b/facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc differ diff --git a/facility_location/agent/__pycache__/policy.cpython-310.pyc b/facility_location/agent/__pycache__/policy.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0d123558bb99a9d800e771a932a27496dc35319 Binary files /dev/null and b/facility_location/agent/__pycache__/policy.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/policy.cpython-39.pyc b/facility_location/agent/__pycache__/policy.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f68a11ebcec8f4e65c26b2de96d81bc1d13afd3e Binary files /dev/null and b/facility_location/agent/__pycache__/policy.cpython-39.pyc differ diff --git a/facility_location/agent/__pycache__/solver.cpython-310.pyc b/facility_location/agent/__pycache__/solver.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef5f60ef552ab6e11cc6d8502f3547f2daf682ea Binary files /dev/null and b/facility_location/agent/__pycache__/solver.cpython-310.pyc differ diff --git a/facility_location/agent/__pycache__/solver.cpython-39.pyc b/facility_location/agent/__pycache__/solver.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30f1b18539132a1f9a5d0d14edc21a2469bea7a8 Binary files /dev/null and b/facility_location/agent/__pycache__/solver.cpython-39.pyc differ diff --git a/facility_location/agent/features_extractor.py b/facility_location/agent/features_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..da47bbe6ee49fecb8e40db79663428b6b4fab040 --- /dev/null +++ b/facility_location/agent/features_extractor.py @@ -0,0 +1,225 @@ +from collections import OrderedDict +from typing import Tuple + +from gym import spaces +import torch as th +from torch import nn + +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor +from stable_baselines3.common.type_aliases import TensorDict + +import time + + +def mean_features(h: th.Tensor, mask: th.Tensor): + float_mask = mask.float() + mean_h = (h * float_mask.unsqueeze(-1)).sum(dim=1) / float_mask.sum(dim=1, keepdim=True) + return mean_h + + +# def compute_state(observations: TensorDict, h_nodes: th.Tensor): +# node_mask = observations['node_mask'].bool() +# mean_h_nodes = mean_features(h_nodes, node_mask) + +# old_facility_mask = observations['old_facility_mask'].bool() +# h_old_facility = mean_features(h_nodes, old_facility_mask) +# h_old_facility_repeat = h_old_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1) +# state_policy_old_facility = th.cat([ +# h_nodes, +# h_old_facility_repeat, +# h_nodes - h_old_facility_repeat, +# h_nodes * h_old_facility_repeat], dim=-1) + +# new_facility_mask = observations['new_facility_mask'].bool() +# h_new_facility = mean_features(h_nodes, new_facility_mask) +# h_new_facility_repeat = h_new_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1) +# state_policy_new_facility = th.cat([ +# h_nodes, +# h_new_facility_repeat, +# h_nodes - h_new_facility_repeat, + +# state_value = th.cat([ +# mean_h_nodes, +# h_old_facility, +# h_new_facility], dim=-1) + +# return state_policy_old_facility, state_policy_new_facility, state_value, old_facility_mask, new_facility_mask + +def compute_state(observations: TensorDict, h_edges: th.Tensor): + dynamic_edge_mask = observations['dynamic_edge_mask'].bool() + mean_h_edges = mean_features(h_edges, dynamic_edge_mask) + + state_policy_facility_pair = h_edges + state_value = mean_h_edges + + return state_policy_facility_pair, state_value, dynamic_edge_mask + + +class FacilityLocationMLPExtractor(BaseFeaturesExtractor): + def __init__( + self, + observation_space: spaces.Dict, + hidden_units: Tuple = (32, 32), + ) -> None: + super().__init__(observation_space, features_dim=1) + + self.node_mlp = self.create_mlp(observation_space.spaces['node_features'].shape[1], hidden_units) + + @staticmethod + def create_mlp(input_dim: int, hidden_units: Tuple) -> nn.Sequential: + layers = OrderedDict() + for i, units in enumerate(hidden_units): + if i == 0: + layers[f'mlp-extractor-linear_{i}'] = nn.Linear(input_dim, units) + else: + layers[f'mlp-extractor-linear_{i}'] = nn.Linear(hidden_units[i - 1], units) + layers[f'mlp-extractor-tanh_{i}'] = nn.Tanh() + return nn.Sequential(layers) + + def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]: + node_features = observations['node_features'] + h_nodes = self.node_mlp(node_features) + return compute_state(observations, h_nodes) + + @staticmethod + def get_policy_feature_dim(node_dim: int) -> int: + return node_dim * 4 + + @staticmethod + def get_value_feature_dim(node_dim: int) -> int: + return node_dim * 3 + + +class FacilityLocationGNNExtractor(BaseFeaturesExtractor): + def __init__( + self, + observation_space: spaces.Dict, + num_gnn_layers: int = 2, + node_dim: int = 32, + ) -> None: + super().__init__(observation_space, features_dim=1) + + num_node_features = observation_space.spaces['node_features'].shape[1] + self.node_encoder = self.create_node_encoder(num_node_features, node_dim) + self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim) + self.single_gnn_layer = self.create_gnn(1, node_dim)[0] + + @staticmethod + def create_node_encoder(num_node_features: int, node_dim: int) -> nn.Sequential: + node_encoder = nn.Sequential( + nn.Linear(num_node_features, node_dim), + nn.Tanh()) + return node_encoder + + @staticmethod + def create_gnn(num_gnn_layers: int, node_dim: int) -> nn.ModuleList: + layers = nn.ModuleList() + for i in range(num_gnn_layers): + gnn_layer = nn.Sequential( + nn.Linear(node_dim, node_dim), + nn.Tanh()) + layers.append(gnn_layer) + return layers + + @staticmethod + def scatter_count(h_edges, indices, edge_mask, max_num_nodes): + batch_size = h_edges.shape[0] + num_latents = h_edges.shape[2] + + h_nodes = th.zeros(batch_size, max_num_nodes, num_latents).to(h_edges.device) + count_edge = th.zeros_like(h_nodes) + count = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges.shape).float() + + idx = indices.unsqueeze(-1).expand(-1, -1, num_latents) + h_nodes = h_nodes.scatter_add_(1, idx, h_edges) + count_edge = count_edge.scatter_add_(1, idx, count) + return h_nodes, count_edge + + @staticmethod + def gather_to_edges(h_nodes, edge_index, edge_mask, gnn_layer): + h_nodes = gnn_layer(h_nodes) + h_edges_12 = th.gather(h_nodes, 1, edge_index[:, :, 0].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1))) + h_edges_21 = th.gather(h_nodes, 1, edge_index[:, :, 1].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1))) + mask = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges_12.shape) + h_edges_12 = th.where(mask, h_edges_12, th.zeros_like(h_edges_12)) + h_edges_21 = th.where(mask, h_edges_21, th.zeros_like(h_edges_21)) + return h_edges_12, h_edges_21 + + @classmethod + def scatter_to_nodes(cls, h_edges, edge_index, edge_mask, node_mask): + h_edges_12, h_edges_21 = h_edges + max_num_nodes = node_mask.shape[1] + h_nodes_1, count_1 = cls.scatter_count(h_edges_21, edge_index[:, :, 0], edge_mask, max_num_nodes) + h_nodes_2, count_2 = cls.scatter_count(h_edges_12, edge_index[:, :, 1], edge_mask, max_num_nodes) + + h_nodes_sum = h_nodes_1 + h_nodes_2 + + mask = th.broadcast_to(node_mask.unsqueeze(-1), h_nodes_sum.shape) + count = count_1 + count_2 + count_padding = th.ones_like(count) + count = th.where(mask, count, count_padding) + + h_nodes = h_nodes_sum / count + return h_nodes + + def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]: + t1 = time.time() + node_features = observations['node_features'] + h_nodes = self.node_encoder(node_features) + + edge_static_index = observations['static_adjacency_list'].long() + edge_dynamic_index = observations['dynamic_adjacency_list'].long() + node_mask = observations['node_mask'].bool() + static_edge_mask = observations['static_edge_mask'].bool() + dynamic_edge_mask = observations['dynamic_edge_mask'].bool() + for gnn_layer in self.gnn_layers: + h_edges = self.gather_to_edges(h_nodes, edge_static_index, static_edge_mask, gnn_layer) + h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, static_edge_mask, node_mask) + h_nodes = h_nodes + h_nodes_new + h_edges12 , h_edges21 = self.gather_to_edges(h_nodes, edge_dynamic_index, dynamic_edge_mask, self.single_gnn_layer) + h_edges = th.cat([h_edges12, h_edges21], dim=-1) + + t2 = time.time() + # print('cal embedding time:', t2-t1) + + return compute_state(observations, h_edges) + + @staticmethod + def get_policy_feature_dim(node_dim: int) -> int: + return node_dim * 2 + + @staticmethod + def get_value_feature_dim(node_dim: int) -> int: + return node_dim * 2 + + +class FacilityLocationAttentionGNNExtractor(FacilityLocationGNNExtractor): + def __init__( + self, + observation_space: spaces.Dict, + num_gnn_layers: int = 2, + node_dim: int = 32, + ) -> None: + super().__init__(observation_space, num_gnn_layers, node_dim) + + num_node_features = observation_space.spaces['node_features'].shape[1] + self.node_encoder = self.create_node_encoder(num_node_features, node_dim) + self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim) + self.attention = nn.MultiheadAttention(node_dim, node_dim) + + def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]: + node_features = observations['node_features'] + h_nodes = self.node_encoder(node_features) + + edge_static_index = observations['static_adjacency_list'].long() + edge_dynamic_index = observations['dynamic_adjacency_list'].long() + node_mask = observations['node_mask'].bool() + edge_mask = observations['edge_mask'].bool() + for gnn_layer in self.gnn_layers: + h_edges = self.gather_to_edges(h_nodes, edge_static_index, edge_mask, gnn_layer) + h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, edge_mask, node_mask) + h_nodes = h_nodes + h_nodes_new + + h_nodes = self.attention(h_nodes, h_nodes, h_nodes)[0] + + return compute_state(observations, h_nodes) \ No newline at end of file diff --git a/facility_location/agent/policy.py b/facility_location/agent/policy.py new file mode 100644 index 0000000000000000000000000000000000000000..7e9ac152061b30d3bd6ab273b7c5171a8f78eac1 --- /dev/null +++ b/facility_location/agent/policy.py @@ -0,0 +1,229 @@ +from functools import partial +from typing import Callable, Tuple, Text, Union +from collections import OrderedDict + +import numpy as np +from gym import spaces +import torch as th +from torch import nn + +from stable_baselines3.common.policies import ActorCriticPolicy +from stable_baselines3.common.utils import get_device +from stable_baselines3.common.type_aliases import Schedule + + +def create_mlp(head: Text, input_dim: int, hidden_units: Tuple) -> nn.Sequential: + layers = OrderedDict() + for i, units in enumerate(hidden_units): + if i == 0: + layers[f'{head}_linear_{i}'] = nn.Linear(input_dim, units) + else: + layers[f'{head}_linear_{i}'] = nn.Linear(hidden_units[i - 1], units) + if i != len(hidden_units) - 1: + layers[f'{head}_tanh_{i}'] = nn.Tanh() + if head.startswith('policy'): + layers[f'{head}_flatten'] = nn.Flatten() + return nn.Sequential(layers) + + +class MaskedFacilityLocationNetwork(nn.Module): + + def __init__( + self, + policy_feature_dim: int, + value_feature_dim: int, + policy_hidden_units: Tuple = (32, 32, 1), + value_hidden_units: Tuple = (32, 32, 1), + device: Union[th.device, Text] = "auto", + ): + super().__init__() + device = get_device(device) + + # Policy network + # self.old_facility_policy_net = create_mlp('policy-old-facility', + # policy_feature_dim, + # policy_hidden_units).to(device) + # self.new_facility_policy_net = create_mlp('policy-new-facility', + # policy_feature_dim, + # policy_hidden_units).to(device) + self.pair_facility_policy_net = create_mlp('policy-pair-facility', + policy_feature_dim, + policy_hidden_units).to(device) + # Value network + self.value_net = create_mlp('value', + value_feature_dim, + value_hidden_units).to(device) + + def forward(self, + features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]: + return self.forward_actor(features), self.forward_critic(features) + + # def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor: + # state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features + + # old_facility_logits = self.old_facility_policy_net(state_policy_old_facility) # (batch_size, node_range) + # old_facility_padding = th.full_like(old_facility_mask, -th.inf, dtype=th.float32) + # masked_old_facility_logits = th.where(old_facility_mask, old_facility_logits, old_facility_padding) + + # new_facility_logits = self.new_facility_policy_net(state_policy_new_facility) # (batch_size, node_range) + # new_facility_padding = th.full_like(new_facility_mask, -th.inf, dtype=th.float32) + # masked_new_facility_logits = th.where(new_facility_mask, new_facility_logits, new_facility_padding) + + # masked_old_new_facility_logits = th.cat([masked_old_facility_logits, masked_new_facility_logits], dim=1) + # return masked_old_new_facility_logits + + def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor: + state_policy_pair_facility, _, dynamic_edge_mask = features + pair_facility_logits = self.pair_facility_policy_net(state_policy_pair_facility) + pair_facility_padding = th.full_like(dynamic_edge_mask, -th.inf, dtype=th.float32) + masked_pair_facility_logits = th.where(dynamic_edge_mask, pair_facility_logits, pair_facility_padding) + + return masked_pair_facility_logits + + def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor: + _, state_value, _ = features + return self.value_net(state_value) + + +class POPSTARMaskedFacilityLocationNetwork(nn.Module): + + def __init__( + self, + policy_feature_dim: int, + value_feature_dim: int, + policy_hidden_units: Tuple = (32, 32, 1), + value_hidden_units: Tuple = (32, 32, 1), + device: Union[th.device, Text] = "auto", + ): + super().__init__() + device = get_device(device) + + # Policy network + self.old_facility_policy_net = create_mlp('policy-old-facility', + policy_feature_dim, + policy_hidden_units).to(device) + self.new_facility_policy_net = create_mlp('policy-new-facility', + policy_feature_dim, + policy_hidden_units).to(device) + self.old_new_facility_policy_net = create_mlp('policy-old-new-facility', + policy_feature_dim * 4, + policy_hidden_units).to(device) + + # Value network + self.value_net = create_mlp('value', + value_feature_dim, + value_hidden_units).to(device) + + def forward(self, + features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]: + return self.forward_actor(features), self.forward_critic(features) + + def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor: + state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features + + node_range = old_facility_mask.shape[1] + + loss = self.old_facility_policy_net(state_policy_old_facility) # (batch_size, node_range) + loss = loss.repeat_interleave(node_range, dim=1) + + gain = self.new_facility_policy_net(state_policy_new_facility) # (batch_size, node_range) + gain = gain.repeat(1, node_range) + + + state_policy_old_facility_expand = state_policy_old_facility.unsqueeze(2).expand(-1, -1, node_range, -1) + state_policy_new_facility_expand = state_policy_new_facility.unsqueeze(1).expand(-1, node_range, -1, -1) + state_policy_old_new_facility = th.cat( + [ + state_policy_old_facility_expand, + state_policy_new_facility_expand, + state_policy_old_facility_expand - state_policy_new_facility_expand, + state_policy_old_facility_expand * state_policy_new_facility_expand + ], dim=-1 + ) + extra = self.old_new_facility_policy_net(state_policy_old_new_facility) # (batch_size, node_range * node_range) + + logits = gain - loss + extra + + action_mask = th.logical_and(old_facility_mask.unsqueeze(2), new_facility_mask.unsqueeze(1)).flatten(start_dim=1) + padding = th.full_like(action_mask, -th.inf, dtype=th.float32) + masked_logits = th.where(action_mask, logits, padding) + + return masked_logits + + def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor: + _, _, state_value, _, _ = features + return self.value_net(state_value) + + +class MaskedFacilityLocationActorCriticPolicy(ActorCriticPolicy): + def __init__( + self, + observation_space: spaces.Space, + action_space: spaces.Space, + lr_schedule: Callable[[float], float], + *args, + **kwargs, + ): + self.policy_feature_dim = kwargs.pop('policy_feature_dim') + self.value_feature_dim = kwargs.pop('value_feature_dim') + self.policy_hidden_units = kwargs.pop('policy_hidden_units') + self.value_hidden_units = kwargs.pop('value_hidden_units') + + self.popstar = kwargs.pop('popstar') + + super().__init__( + observation_space, + action_space, + lr_schedule, + # Pass remaining arguments to base class + *args, + **kwargs, + ) + + def _build(self, lr_schedule: Schedule) -> None: + self._build_mlp_extractor() + + self.action_net = nn.Identity() + self.value_net = nn.Identity() + + # Init weights: use orthogonal initialization + # with small initial weight for the output + if self.ortho_init: + # TODO: check for features_extractor + # Values from stable-baselines. + # features_extractor/mlp values are + # originally from openai/baselines (default gains/init_scales). + module_gains = { + self.features_extractor: np.sqrt(2), + self.mlp_extractor: np.sqrt(2), + } + # if not self.share_features_extractor: + # # Note(antonin): this is to keep SB3 results + # # consistent, see GH#1148 + # del module_gains[self.features_extractor] + # module_gains[self.pi_features_extractor] = np.sqrt(2) + # module_gains[self.vf_features_extractor] = np.sqrt(2) + + for module, gain in module_gains.items(): + module.apply(partial(self.init_weights, gain=gain)) + + # Setup optimizer with initial learning rate + self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs) + + def _build_mlp_extractor(self) -> None: + if not self.popstar: + self.mlp_extractor = MaskedFacilityLocationNetwork( + self.policy_feature_dim, + self.value_feature_dim, + self.policy_hidden_units, + self.value_hidden_units, + self.device, + ) + else: + self.mlp_extractor = POPSTARMaskedFacilityLocationNetwork( + self.policy_feature_dim, + self.value_feature_dim, + self.policy_hidden_units, + self.value_hidden_units, + self.device, + ) diff --git a/facility_location/agent/solver.py b/facility_location/agent/solver.py new file mode 100644 index 0000000000000000000000000000000000000000..92391ad589792b88956aafa7119f890c21972bba --- /dev/null +++ b/facility_location/agent/solver.py @@ -0,0 +1,33 @@ +from typing import Text + +import numpy as np +import pulp +from spopt.locate import PMedian + +from facility_location.env import EvalPMPEnv + + +class PMPSolver: + def __init__(self, solver: Text, env: EvalPMPEnv): + if solver == 'GUROBI': + self._solver = pulp.GUROBI(msg=False) + elif solver == 'GUROBI_CMD': + self._solver = pulp.GUROBI_CMD(msg=False) + elif solver == 'PULP_CBC_CMD': + self._solver = pulp.PULP_CBC_CMD(msg=False) + elif solver == 'GLPK_CMD': + self._solver = pulp.GLPK_CMD(msg=False) + elif solver == 'MOSEK': + self._solver = pulp.MOSEK(msg=False) + else: + raise ValueError(f'Solver {solver} not supported.') + + self.env = env + + def solve(self): + _, demands, _, p = self.env.get_instance() + distance_matrix, _ = self.env.get_distance_and_cost() + pmedian_from_cost_matrix = PMedian.from_cost_matrix(distance_matrix, demands, p_facilities=p) + pmedian_from_cost_matrix = pmedian_from_cost_matrix.solve(self._solver) + solution = np.array([len(temp) > 0 for temp in pmedian_from_cost_matrix.fac2cli], dtype=bool) + return solution diff --git a/facility_location/cfg/__init__.py b/facility_location/cfg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facility_location/cfg/plot.yaml b/facility_location/cfg/plot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..231d4c9640d28912d500e817f94f18714a437250 --- /dev/null +++ b/facility_location/cfg/plot.yaml @@ -0,0 +1,64 @@ + +env_specs: + region: + min_n: 20 + max_n: 50 + min_p_ratio: 0.1 + max_p_ratio: 0.4 + max_steps_scale: 0.5 + tabu_time: 3 + tabu_stable_steps_scale: 0.2 + popstar: false + +# evaluation +eval_specs: + region: + seed: 12345 + max_nodes: 2488 + max_edges: 5000 + val_num_cases: 100 + test_num_cases: 1 + val_np: !!python/tuple [50,5] + test_np: + - !!python/tuple [2214,36] + - !!python/tuple [2214,189] + - !!python/tuple [2214,425] +# agent +agent_specs: + policy_feature_dim: 32 + value_feature_dim: 32 + policy_hidden_units: !!python/tuple [32, 32, 1] + value_hidden_units: !!python/tuple [32, 32, 1] + +# mlp +mlp_specs: + hidden_units: !!python/tuple [32, 32] + +gnn_specs: + num_gnn_layers: 2 + node_dim: 32 + + +# ts +ts_specs: + max_steps_scale: 2 + stable_iterations_scale: 0.2 + + +# popstar +popstar_specs: + graspit: 32 + elite: 10 + + +# ga +ga_specs: + num_generations: 100 + num_parents_mating: 50 + sol_per_pop: 100 + parent_selection_type: sss + crossover_probability: 0.8 + mutation_probability: 0.1 + + + diff --git a/facility_location/env/__init__.py b/facility_location/env/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e684fd6ed3d6f35e99f6bce6a6e4adf44845ee2d --- /dev/null +++ b/facility_location/env/__init__.py @@ -0,0 +1,3 @@ +from .pmp import PMPEnv, EvalPMPEnv, MULTIPMP + +__all__ = ['PMPEnv', 'EvalPMPEnv', 'MULTIPMP'] \ No newline at end of file diff --git a/facility_location/env/__pycache__/__init__.cpython-310.pyc b/facility_location/env/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5ea7b63c7d94ab7589ed515211679f0c6703f5d Binary files /dev/null and b/facility_location/env/__pycache__/__init__.cpython-310.pyc differ diff --git a/facility_location/env/__pycache__/__init__.cpython-39.pyc b/facility_location/env/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7099f2203514bcd9c36080712fd47341686fdad1 Binary files /dev/null and b/facility_location/env/__pycache__/__init__.cpython-39.pyc differ diff --git a/facility_location/env/__pycache__/facility_location_client.cpython-310.pyc b/facility_location/env/__pycache__/facility_location_client.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e8307acbddc9ab6f90b5fca5c6bc76b76b788f1 Binary files /dev/null and b/facility_location/env/__pycache__/facility_location_client.cpython-310.pyc differ diff --git a/facility_location/env/__pycache__/facility_location_client.cpython-39.pyc b/facility_location/env/__pycache__/facility_location_client.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b778d958aa2238b17295c9f7bd16d7ae96a48332 Binary files /dev/null and b/facility_location/env/__pycache__/facility_location_client.cpython-39.pyc differ diff --git a/facility_location/env/__pycache__/obs_extractor.cpython-310.pyc b/facility_location/env/__pycache__/obs_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be78fd4333b9202aac95a38a727f51f19d130f64 Binary files /dev/null and b/facility_location/env/__pycache__/obs_extractor.cpython-310.pyc differ diff --git a/facility_location/env/__pycache__/obs_extractor.cpython-39.pyc b/facility_location/env/__pycache__/obs_extractor.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..747801bd2a8b5ae687b19365832a076c222645eb Binary files /dev/null and b/facility_location/env/__pycache__/obs_extractor.cpython-39.pyc differ diff --git a/facility_location/env/__pycache__/pmp.cpython-310.pyc b/facility_location/env/__pycache__/pmp.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95765910c5916349f8d5f15a66ec10e9c71fffb6 Binary files /dev/null and b/facility_location/env/__pycache__/pmp.cpython-310.pyc differ diff --git a/facility_location/env/__pycache__/pmp.cpython-39.pyc b/facility_location/env/__pycache__/pmp.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a4f2cb0b5a6b2e47c5a5ff8cfdd40b12cf0b6bc Binary files /dev/null and b/facility_location/env/__pycache__/pmp.cpython-39.pyc differ diff --git a/facility_location/env/facility_location_client.py b/facility_location/env/facility_location_client.py new file mode 100644 index 0000000000000000000000000000000000000000..d6b8f1e3f96649eb623a624fddc6502d675fab17 --- /dev/null +++ b/facility_location/env/facility_location_client.py @@ -0,0 +1,278 @@ +import warnings +from typing import Tuple, Dict + +import networkx as nx +import numpy as np +from geopandas import GeoDataFrame +from shapely.geometry import MultiPoint +from libpysal.weights.contiguity import Voronoi as Voronoi_weights +from sklearn.neighbors import kneighbors_graph +from sklearn.metrics import pairwise_distances + +from facility_location.utils.config import Config +import time + +class FacilityLocationClient: + def __init__(self, cfg: Config, rng: np.random.Generator): + self.cfg = cfg + self.rng = rng + self._cfg_tabu_time = cfg.env_specs['tabu_time'] + self._t = 0 + + def set_instance(self, points: np.ndarray, demands: np.ndarray, n: int, p: int, real: bool) -> None: + self._points = points + self._demands = demands + points_geom = MultiPoint(points) + self._gdf = GeoDataFrame({ + 'geometry': points_geom.geoms, + 'demand': demands, + }) + self._n = n + self._p = p + self._old_facility_mask = np.zeros(self._n, dtype=bool) + self._new_facility_mask = np.zeros(self._n, dtype=bool) + self._construct_static_graph() + + if real: + self._distance_matrix = pairwise_distances(points, metric='haversine') + else: + self._distance_matrix = pairwise_distances(points, metric='euclidean') + self._cost_matrix = self._distance_matrix * self._demands[:, None] + self._gain = np.zeros(self._n) + self._loss = np.zeros(self._n) + self._add_time = np.full(self._n, -np.inf) + self._drop_time = np.full(self._n, -np.inf) + self.reset_tabu_time() + + def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]: + return self._points, self._demands, self._n, self._p + + def get_distance_and_cost_matrix(self) -> Tuple[np.ndarray, np.ndarray]: + return self._distance_matrix, self._cost_matrix + + def get_avg_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]: + avg_distance = self._distance_matrix.sum(axis=-1)/(self._n - 1) + avg_cost = self._cost_matrix.sum(axis=-1)/(self._n - 1) + return avg_distance, avg_cost + + def _construct_static_graph(self) -> None: + self._connection_matrix = kneighbors_graph(self._points, n_neighbors=3, mode="connectivity").toarray() + self._static_graph = nx.from_numpy_matrix(self._connection_matrix) + self._static_edges = np.array(self._static_graph.edges(), dtype=np.int64) + + def _construct_dynamic_graph(self) -> None: + t1 = time.time() + try: + solution_distace_min = np.partition(self._distance_matrix[:, self._solution][self._solution, :], 3, axis=-1)[:,2] + except: + raise ValueError('stop') + solution_distance_matrix = np.zeros((self._n, self._n)) + solution_distance_matrix[:, self._solution] = solution_distace_min + solution_knearest_matrix = np.logical_and(self._distance_matrix < solution_distance_matrix, self._distance_matrix > 0) + old_tabu_mask, new_tabu_mask = self.get_tabu_mask(self._t) + solution_matrix = np.logical_and(np.logical_and(self._solution, old_tabu_mask)[:, None], (np.logical_and(~self._solution, new_tabu_mask)[None, :])) + solution_matrix = np.logical_or(solution_matrix, solution_matrix.T) + gainloss_matrix = np.logical_and((self._gain[:, None] > self._loss[None, :]), self._loss[None, :] > 0) + graph_matrix = np.logical_and(solution_matrix, np.logical_or(gainloss_matrix, solution_knearest_matrix)) + + if not np.any(graph_matrix): + if np.any(solution_matrix): + graph_matrix = solution_matrix + if not np.any(graph_matrix): + raise ValueError('Invalid graph_matrix') + else: + graph_matrix = self._solution[:, None] ^ self._solution[None, :] + self._dynamic_graph = nx.from_numpy_matrix(graph_matrix) + self._dynamic_edges = np.array(self._dynamic_graph.edges(), dtype=np.int64) + + t2 = time.time() + # print('dynamic graph time:',t2-t1) + + + def get_static_adjacency_list(self) -> np.ndarray: + return self._static_edges + + def get_dynamic_adjacency_list(self) -> np.ndarray: + return self._dynamic_edges + + def compute_initial_solution(self) -> Tuple[float, np.ndarray]: + self._solution = np.zeros(self._n, dtype=bool) + p_0 = self._demands.argmax() + self._solution[p_0] = True + for _ in range(self._p - 1): + p_max_cost = self._cost_matrix[:, self._solution].min(axis=-1).argmax() + self._solution[p_max_cost] = True + self._init_gain_and_loss() + self._construct_dynamic_graph() + self._old_facility_mask = self._solution + self._new_facility_mask = ~self._solution + return self.compute_obj_value(), self._solution + + def compute_obj_value(self) -> float: + obj_value = self._cost_matrix[:, self._solution].min(axis=-1).sum() + return obj_value + + def compute_obj_value_from_solution(self, solution) -> float: + self._solution = solution + self._init_gain_and_loss() + self._construct_dynamic_graph() + obj_value = self.compute_obj_value() + return obj_value + + # def swap(self, old_facility: int, new_facility: int, t: int) -> Tuple[float, np.ndarray, Dict]: + # if old_facility >= self._n or not self._solution[old_facility]: + # warn_msg = f'Old facility {old_facility} is not a facility of the current solution {self._solution}.' + # warnings.warn(warn_msg) + # old_facility = self.rng.choice(np.arange(self._n)[self._solution]) + # if new_facility >= self._n or self._solution[new_facility]: + # warn_msg = f'New facility {new_facility} is already a facility of the current solution {self._solution}.' + # warnings.warn(warn_msg) + # new_facility = self.rng.choice(np.arange(self._n)[~self._solution]) + # self._solution[old_facility] = False + # self._solution[new_facility] = True + # self._drop_time[old_facility] = t + # self._add_time[new_facility] = t + # self._t = t + # return self.compute_obj_value(), self._solution, {} + + def swap(self, facility_pair_index: int, t: int) -> Tuple[float, np.ndarray, Dict]: + facility_pair = self._dynamic_edges[facility_pair_index] + facility1 = facility_pair[0] + facility2 = facility_pair[1] + + if (not self._solution[facility1]) and (self._solution[facility2]): + new_facility = facility1 + old_facility = facility2 + elif (not self._solution[facility2]) and (self._solution[facility1]): + new_facility = facility2 + old_facility = facility1 + else: + raise ValueError('stop') + + self._solution[old_facility] = False + self._solution[new_facility] = True + self._old_facility_mask[new_facility] = True + self._new_facility_mask[old_facility] = True + self._drop_time[old_facility] = t + self._add_time[new_facility] = t + self._t = t + self._update_env(new_facility, old_facility) + # print('st:',self._t) + return self.compute_obj_value(), self._solution, {} + + def get_tabu_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]: + old_tabu_mask = self._add_time < t - self._drop_tabu_time + new_tabu_mask = self._drop_time < t - self._add_tabu_time + return old_tabu_mask, new_tabu_mask + + def reset_tabu_time(self) -> None: + self._t = 0 + if self._cfg_tabu_time <= 0: + self._add_tabu_time = 0 + self._drop_tabu_time = 0 + else: + self._add_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p) + self._drop_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p) + + def get_current_solution(self) -> np.ndarray: + return self._solution + + def set_solution(self, solution: np.ndarray) -> None: + self._solution = solution + + def get_current_distance(self) -> np.ndarray: + dis2poi = self._distance_matrix[:, self._solution] + if self._p > 2: + dis = np.partition(dis2poi, 2, axis=-1)[:,:2] + else: + dis = dis2poi.min(axis=-1) + dis = np.stack([dis, dis], axis=-1) + return dis + + def get_current_cost(self) -> np.ndarray: + cost2poi = self._cost_matrix[:, self._solution] + if self._p > 2: + cost = np.partition(cost2poi, 2, axis=-1)[:,:2] + else: + cost = cost2poi.min(axis=-1) + cost = np.stack([cost, cost], axis=-1) + return cost + + def get_gain_and_loss(self) -> Tuple[np.ndarray, np.ndarray]: + return self._gain, self._loss + + def get_gdf_facilities(self) -> Tuple[GeoDataFrame, np.ndarray]: + solution = self._solution + facilities = np.arange(self._n)[solution] + gdf = self._gdf.copy() + gdf['facility'] = False + gdf.loc[facilities, 'facility'] = True + node2facility = np.arange(self._n)[solution][self._cost_matrix[:, solution].argmin(axis=-1)] + gdf['assignment'] = node2facility + return gdf, facilities + + def _init_env(self): + self._init_gain_and_loss() + self._construct_dynamic_graph() + + def _update_env(self, insert_facility, remove_facility): + self._update_gain_and_loss(insert_facility, remove_facility) + self._construct_dynamic_graph() + + def _init_gain_and_loss(self): + t1 = time.time() + + for i in range(self._n): + _fake_solution = list(self._solution) + if self._solution[i]: + _fake_solution[i] = False + self._loss[i] = self._cost_matrix[:, _fake_solution].min(axis=-1).sum() - self._cost_matrix[:, self._solution].min(axis=-1).sum() + self._gain[i] = 0 + else: + _fake_solution[i] = True + self._gain[i] = self._cost_matrix[:, self._solution].min(axis=-1).sum() - self._cost_matrix[:, _fake_solution].min(axis=-1).sum() + self._loss[i] = 0 + + self.argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2] + t2 = time.time() + # print('init gainloss time:',t2-t1) + + def _update_gain_and_loss(self, insert_facility, remove_facility): + + t1 = time.time() + + _pre_solution = list(self._solution) + _pre_solution[insert_facility] = False + _pre_solution[remove_facility] = True + pre_closest_demands2solution = self._cost_matrix[:, _pre_solution][np.arange(self._n)[:, None], self.argpartition] + argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2] + closest_demands2solution = self._cost_matrix[:, self._solution][np.arange(self._n)[:, None], argpartition] + + pre_solution_idx = np.where(_pre_solution)[0] + solution_idx = np.where(self._solution)[0] + for i in range(self._n): + if remove_facility in self.argpartition[i] or insert_facility in argpartition[i]: + self._loss[solution_idx[argpartition[i][0]]] += closest_demands2solution[i][1] - closest_demands2solution[i][0] + self._loss[pre_solution_idx[self.argpartition[i][0]]] -= pre_closest_demands2solution[i][1] - pre_closest_demands2solution[i][0] + # if self.argpartition[i][0] != argpartition[i][0]: + # for j in range(self._n): + # if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][argpartition[i][0]]: + # self._gain[j] += max(0, closest_demands2solution[i][0] - self._cost_matrix[i, j]) + # if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][self.argpartition[i][0]]: + # self._gain[j] -= max(0, pre_closest_demands2solution[i][0] - self._cost_matrix[i, j]) + + self._loss[remove_facility] = 0 + self._gain[insert_facility] = 0 + + self.argpartition = list(argpartition) + # print(self._gain, self._loss) + t2 = time.time() + # print('update gainloss time:',t2-t1) + + + def init_facility_mask(self, old_facility, new_facility): + self._old_facility_mask[old_facility] = True + self._new_facility_mask[new_facility] = True + + def get_facility_mask(self): + return self._old_facility_mask, self._new_facility_mask diff --git a/facility_location/env/obs_extractor.py b/facility_location/env/obs_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..d111ed1b9b980e6192b65497f674356dd9650a9b --- /dev/null +++ b/facility_location/env/obs_extractor.py @@ -0,0 +1,184 @@ +from typing import Dict, Tuple, Text + +import numpy as np + +from facility_location.env.facility_location_client import FacilityLocationClient +from facility_location.utils.config import Config + + +class ObsExtractor: + def __init__(self, cfg: Config, flc: FacilityLocationClient, node_range: int, edge_range: int): + self.cfg = cfg + self._flc = flc + self._node_range = node_range + self._edge_range = edge_range + + self._construct_virtual_node_feature() + self._construct_node_features() + self._construct_action_mask() + + def _construct_virtual_node_feature(self) -> None: + virtual_node_facility = 0 + virtual_node_distance_min = 0 + virtual_node_distance_sub_min = 0 + virtual_node_cost_min = 0 + virtual_node_cost_sub_min = 0 + virtual_gain = 0 + virtual_loss = 0 + + virtual_node_x = 0.5 + virtual_node_y = 0.5 + virtual_node_demand = 1 + virtual_node_avg_distance = 0 + virtual_node_avg_cost = 0 + self._virtual_dynamic_node_feature = np.array([ + virtual_node_facility, + virtual_node_distance_min, + virtual_node_distance_sub_min, + virtual_node_cost_min, + virtual_node_cost_sub_min, + virtual_gain, + virtual_loss, + ], dtype=np.float32) + self._virtual_static_node_feature = np.array([ + virtual_node_x, + virtual_node_y, + virtual_node_demand, + virtual_node_avg_distance, + virtual_node_avg_cost, + ], dtype=np.float32) + self._virtual_node_feature = np.concatenate([ + self._virtual_dynamic_node_feature, + self._virtual_static_node_feature, + ], axis=-1) + + def _construct_node_features(self) -> None: + self._node_features = np.zeros((self._node_range, self._virtual_node_feature.size), dtype=np.float32) + + def _construct_action_mask(self) -> None: + self._old_facility_mask = np.full(self._node_range, False) + self._new_facility_mask = np.full(self._node_range, False) + + def get_node_dim(self) -> int: + return self._virtual_node_feature.size + + def reset(self) -> None: + self._compute_static_obs() + self._reset_node_features() + self._reset_action_mask() + + def _compute_static_obs(self) -> None: + xy, demands, n, _ = self._flc.get_instance() + if n + 2 > self._node_range: + print(n, self._node_range) + # raise ValueError('The number of nodes exceeds the maximum limit.') + self._n = n + avg_distance, avg_cost = self._flc.get_avg_distance_and_cost() + avg_distance = avg_distance / np.max(avg_distance) + avg_cost = avg_cost / np.max(avg_cost) + self._static_node_features = np.stack([ + xy[:, 0], + xy[:, 1], + demands, + avg_distance, + avg_cost, + ], axis=-1).astype(np.float32) + static_adjacency_list = self._flc.get_static_adjacency_list() + + obs_node_mask = np.full(1 + n, True) + self._obs_node_mask = self._pad_mask(obs_node_mask, self._node_range, 'nodes') + + obs_static_edge_mask = np.full(n + static_adjacency_list.shape[0], True) + self._obs_static_edge_mask = self._pad_mask(obs_static_edge_mask, self._edge_range, 'edges') + + self._static_adjacency_list = self._pad_edge(static_adjacency_list) + + def _reset_node_features(self) -> None: + self._node_features[:, :] = 0 + self._node_features[0] = self._virtual_node_feature + self._node_features[1:self._n+1, len(self._virtual_dynamic_node_feature):] = self._static_node_features + + def _reset_action_mask(self) -> None: + self._old_facility_mask[:] = False + self._new_facility_mask[:] = False + + def get_obs(self, t: int) -> Dict: + obs_nodes, obs_static_edges, obs_dynamic_edges, \ + obs_node_mask, obs_static_edge_mask, obs_dynamic_edges_mask = self._get_obs_graph() + obs = { + 'node_features': obs_nodes, + 'static_adjacency_list': obs_static_edges, + 'dynamic_adjacency_list': obs_dynamic_edges, + 'node_mask': obs_node_mask, + 'static_edge_mask': obs_static_edge_mask, + 'dynamic_edge_mask': obs_dynamic_edges_mask, + } + + return obs + + def _get_obs_graph(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + facility = self._flc.get_current_solution().astype(np.float32) + distance = self._flc.get_current_distance().astype(np.float32) + distance = distance / np.max(distance) + cost = self._flc.get_current_cost().astype(np.float32) + cost = cost / np.max(cost) + gain, loss = self._flc.get_gain_and_loss() + gain = gain / np.max(gain) + loss = loss / np.max(loss) + dynamic_node_features = np.stack([ + facility, + distance[:,0], + distance[:,1], + cost[:,0], + cost[:,1], + gain, + loss, + ], axis=-1) + self._node_features[1:self._n+1, :len(self._virtual_dynamic_node_feature)] = dynamic_node_features + obs_nodes = self._node_features + obs_static_edges = self._static_adjacency_list + obs_dynamic_edges = self._flc.get_dynamic_adjacency_list() + # print(obs_dynamic_edges.shape) + obs_dynamic_edge_mask = np.full(obs_dynamic_edges.shape[0], True) + obs_node_mask = self._obs_node_mask + obs_static_edge_mask = self._obs_static_edge_mask + obs_dynamic_edges = self._pad_edge_wo_virtual(obs_dynamic_edges) + obs_dynamic_edge_mask = self._pad_mask(obs_dynamic_edge_mask, self._edge_range, 'edges') + + return obs_nodes, obs_static_edges, obs_dynamic_edges, obs_node_mask, obs_static_edge_mask, obs_dynamic_edge_mask + # return obs_nodes, obs_static_edges, obs_node_mask, obs_edge_mask + + def _get_obs_action_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]: + old_facility_mask, new_facility_mask = self._flc.get_facility_mask() + old_tabu_mask, new_tabu_mask = self._flc.get_tabu_mask(t) + self._old_facility_mask[1:self._n+1] = np.logical_and(old_facility_mask, old_tabu_mask) + self._new_facility_mask[1:self._n+1] = np.logical_and(new_facility_mask, new_tabu_mask) + obs_old_facility_mask = self._old_facility_mask + obs_new_facility_mask = self._new_facility_mask + if not np.any(obs_old_facility_mask) or not np.any(obs_new_facility_mask): + raise ValueError('The action mask is empty.') + return obs_old_facility_mask, obs_new_facility_mask + + @staticmethod + def _pad_mask(mask: np.ndarray, max_num: int, name: Text) -> np.ndarray: + pad = (0, max_num - mask.size) + if pad[1] < 0: + raise ValueError(f'The number of {name} exceeds the maximum limit.') + return np.pad(mask, pad, mode='constant', constant_values=False) + + def _pad_edge(self, edge: np.ndarray) -> np.ndarray: + virtual_edge = np.stack([np.zeros(self._n), np.arange(1, self._n + 1)], axis=-1).astype(np.int32) + edge = np.concatenate([virtual_edge, edge + 1], axis=0) + pad = ((0, self._edge_range - edge.shape[0]), (0, 0)) + if pad[0][1] < 0: + raise ValueError('The number of edges exceeds the maximum limit.') + return np.pad(edge, pad, mode='constant', constant_values=self._node_range - 1) + + def _pad_edge_wo_virtual(self, edge: np.ndarray) -> np.ndarray: + pad = ((0, self._edge_range - edge.shape[0]), (0, 0)) + if pad[0][1] < 0: + print(self._edge_range, edge.shape[0]) + raise ValueError('The number of edges exceeds the maximum limit.') + + return np.pad(edge + 1, pad, mode='constant', constant_values=self._node_range - 1) + diff --git a/facility_location/env/pmp.py b/facility_location/env/pmp.py new file mode 100644 index 0000000000000000000000000000000000000000..219a9b475730ed106c73fa8e0e3cdac871a08e55 --- /dev/null +++ b/facility_location/env/pmp.py @@ -0,0 +1,502 @@ +import io +import warnings +from typing import Tuple, Dict, Optional, List, Text + +import gym +import math +import numpy as np +import matplotlib.pyplot as plt +import pickle, os + +from numpy import ndarray + +from facility_location.utils.config import Config +from facility_location.env.facility_location_client import FacilityLocationClient +from facility_location.env.obs_extractor import ObsExtractor +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from facility_location.agent import MaskedFacilityLocationActorCriticPolicy +from facility_location.utils.policy import get_policy_kwargs + + +class PMPEnv(gym.Env): + EPSILON = 1e-6 + + def __init__(self, + cfg: Config): + self.cfg = cfg + self._train_region = cfg.env_specs['region'] + self._eval_region = cfg.eval_specs['region'] + self._min_n = cfg.env_specs['min_n'] + self._max_n = cfg.env_specs['max_n'] + self._min_p_ratio = cfg.env_specs['min_p_ratio'] + self._max_p_ratio = cfg.env_specs['max_p_ratio'] + self._max_steps_scale = cfg.env_specs['max_steps_scale'] + self._tabu_stable_steps_scale = cfg.env_specs['tabu_stable_steps_scale'] + self._popstar = cfg.env_specs['popstar'] + + self._seed(cfg.seed) + + self._done = False + + self._set_node_edge_range() + + self._flc = FacilityLocationClient(cfg, self._np_random) + self._obs_extractor = ObsExtractor(cfg, self._flc, self._node_range, self._edge_range) + + self._declare_spaces() + + def _declare_spaces(self) -> None: + self.observation_space = gym.spaces.Dict({ + 'node_features': gym.spaces.Box(low=0, high=1, shape=(self._node_range, self.get_node_feature_dim())), + 'static_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64), + 'dynamic_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64), + 'node_mask': gym.spaces.Box(low=0, high=1, shape=(self._node_range,), dtype=np.bool), + 'static_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool), + 'dynamic_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool), + }) + if not self._popstar: + self.action_space = gym.spaces.Discrete(self._node_range ** 2) + else: + self.action_space = gym.spaces.Discrete(self._node_range ** 2) + + def _set_node_edge_range(self) -> None: + self._node_range = self._max_n + 2 + self._edge_range = int(self._max_n ** 2 * self._max_p_ratio) + + def get_node_feature_dim(self) -> int: + return self._obs_extractor.get_node_dim() + + def _seed(self, seed: int) -> None: + self._np_random = np.random.default_rng(seed) + + def get_reward(self) -> float: + reward = self._obj_value[self._t - 1] - self._obj_value[self._t] + return reward + + def _transform_action(self, action: np.ndarray) -> np.ndarray: + if self._popstar: + action = np.array(np.unravel_index(action, (self._node_range, self._node_range))) + action = action - 1 + return action + + def step(self, action: np.ndarray): + if self._done: + raise RuntimeError('Action taken after episode is done.') + obj_value, solution, info = self._flc.swap(action, self._t) + self._t += 1 + self._done = (self._t == self._max_steps) + self._obj_value[self._t] = obj_value + self._solution[self._t] = solution + reward = self.get_reward() + if obj_value < self._best_obj_value - self.EPSILON: + self._best_obj_value = obj_value + self._best_solution = solution + self._last_best_t = self._t + elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0: + self._flc.reset_tabu_time() + + # if self._done: + # print('done') + # for i in range(self._t): + # print(f'{i}:',np.where(self._solution[i])) + + return self._get_obs(self._t), reward, self._done, False, info + + def reset(self, seed = 0) -> Optional[Dict]: + if self._train_region is None: + points, demands, n, p = self._generate_new_instance() + self._flc.set_instance(points, demands, n, p, False) + else: + points, demands, n, p = self._use_real_instance() + self._flc.set_instance(points, demands, n, p, True) + + return self.prepare(n, p), {} + + def prepare(self, n: int, p: int) -> Dict: + initial_obj_value, initial_solution = self._flc.compute_initial_solution() + self._obs_extractor.reset() + self._done = False + self._t = 0 + self._max_steps = max(int(p * self._max_steps_scale), 5) + self._obj_value = np.zeros(self._max_steps + 1) + self._obj_value[0] = initial_obj_value + self._solution = np.zeros((self._max_steps + 1, n), dtype=bool) + self._solution[0] = initial_solution + self._best_solution = initial_solution + self._best_obj_value = initial_obj_value + self._last_best_t = 0 + self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale)) + return self._get_obs(self._t) + + def render(self, mode='human', dpi=300) -> Optional[np.ndarray]: + gdf, facilities = self._flc.get_gdf_facilities() + if len(facilities) > 10: + warnings.warn('Too many facilities to render. Only rendering the first 10.') + facilities = facilities[:10] + + cm = plt.get_cmap('tab10') + fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=dpi) + for i, f in enumerate(facilities): + gdf.loc[gdf['assignment'] == f].plot(ax=axs[0], + zorder=2, + alpha=0.7, + edgecolor="k", + color=cm(i)) + gdf.loc[[f]].plot(ax=axs[0], + marker='*', + markersize=300, + zorder=3, + alpha=0.7, + edgecolor="k", + color=cm(i)) + axs[0].set_title("Facility Location", fontweight="bold") + plot_obj_value = self._obj_value[:self._t + 1] + axs[1].plot(plot_obj_value, marker='.', markersize=10, color='k') + axs[1].set_title("Objective Value", fontweight="bold") + axs[1].set_xticks(np.arange(self._max_steps + 1, step=math.ceil((self._max_steps + 1) / 10))) + fig.tight_layout() + + if mode == 'human': + plt.show() + + else: + io_buf = io.BytesIO() + fig.savefig(io_buf, format='raw', dpi=dpi) + io_buf.seek(0) + img_arr = np.reshape(np.frombuffer(io_buf.getvalue(), dtype=np.uint8), + newshape=(int(fig.bbox.bounds[3]), int(fig.bbox.bounds[2]), -1)) + io_buf.close() + return img_arr + + def close(self): + plt.close() + + def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]: + n = self._np_random.integers(self._min_n, self._max_n, endpoint=True) + p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio) + p = int(max(n * p_ratio, 4)) + + points = self._np_random.uniform(size=(n, 2)) + while np.unique(points, axis=0).shape[0] != n: + points = self._np_random.uniform(size=(n, 2)) + demands = self._np_random.random(size=(n,)) + return points, demands, n, p + + def _use_real_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]: + data_path = './data/{}/pkl'.format(self.cfg.eval_specs['region']) + files = os.listdir(data_path) + files = [f for f in files if f.endswith('.pkl')] + sample_data_path = os.path.join(data_path, files[self._np_random.integers(len(files))]) + with open(sample_data_path, 'rb') as f: + np_data = pickle.load(f) + + n = self._np_random.integers(self._min_n, self._max_n, endpoint=True) + p = max(int(n * self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)), 4) + sample_cbgs = self._np_random.choice(list(np_data[1].keys()), n, replace=False) + points = [] + demands = [] + for cbg in sample_cbgs: + points.append(np_data[1][cbg]['pos']) + demands.append(np_data[1][cbg]['demand']) + points = np.array(points) + demands = np.array(demands) + + return points, demands, n, p + + def _get_obs(self, t: int) -> Dict: + return self._obs_extractor.get_obs(t) + + def get_initial_solution(self) -> np.ndarray: + return self._solution[0] + + +class EvalPMPEnv(PMPEnv): + def __init__(self, + cfg: Config, + positions, demands, n, p, boost=False): + self._eval_np = (n,p) + self._eval_seed = cfg.eval_specs['seed'] + self._boost = boost + print(self._boost) + self.points = positions + self.demands = demands + self._n = n + self._p = p + + super().__init__(cfg) + + def _set_node_edge_range(self) -> None: + n, p = self._eval_np + + self._node_range = n + 2 + self._edge_range = n * p + + def get_eval_num_cases(self) -> int: + return self._eval_num_cases + + def get_eval_np(self) -> Tuple[int, int]: + return self._eval_np + + def reset_instance_id(self) -> None: + self._instance_id = 0 + + def step(self, action: np.ndarray): + if self._done: + raise RuntimeError('Action taken after episode is done.') + obj_value, solution, info = self._flc.swap(action, self._t) + self._t += 1 + self._done = (self._t == self._max_steps) + self._obj_value[self._t] = obj_value + self._solution[self._t] = solution + reward = self.get_reward() + if obj_value < self._best_obj_value - self.EPSILON: + self._best_obj_value = obj_value + self._best_solution = solution + self._last_best_t = self._t + elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0: + self._flc.reset_tabu_time() + print(self._t, self._max_steps) + + return self._get_obs(self._t), reward, self._done, False, info + + def get_reward(self) -> float: + if self._done: + reward = -np.min(self._obj_value) + else: + reward = 0.0 + + return reward + + def get_best_solution(self) -> np.ndarray: + return self._best_solution + + def reset(self, seed = 0) -> Dict: + self._flc.set_instance(self.points, self.demands, self._n, self._p, False) + return self.prepare(self._n, self._p, self._boost), {} + + def prepare(self, n: int, p: int, boost: bool) -> Dict: + initial_obj_value, initial_solution = self._flc.compute_initial_solution() + self._obs_extractor.reset() + self._done = False + self._t = 0 + self._max_steps = max(int(p * self._max_steps_scale), 5) + if boost: + self._max_steps = max(int(self._max_steps_scale / 10), 5) + self._obj_value = np.zeros(self._max_steps + 1) + self._obj_value[0] = initial_obj_value + self._solution = np.zeros((self._max_steps + 1, n), dtype=bool) + self._solution[0] = initial_solution + self._best_solution = initial_solution + self._best_obj_value = initial_obj_value + self._last_best_t = 0 + self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale)) + return self._get_obs(self._t) + + def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]: + points, demands, n, p = self._flc.get_instance() + return points, demands, n, p + + def get_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]: + return self._flc.get_distance_and_cost_matrix() + + def evaluate(self, solution: np.ndarray) -> float: + self._flc.set_solution(solution) + obj_value = self._flc.compute_obj_value() + return obj_value + +class MULTIPMP(PMPEnv): + EPSILON = 1e-6 + def __init__(self, + cfg, + data_npy, + boost = False): + self.cfg = cfg + self.data_npy = data_npy + self._boost = boost + self._all_points, self._all_demands, self._n, self._all_p = self._load_multi_facility_data(data_npy) + self.boost = boost + self._all_solutions = self._load_multi_facility_solutions(boost) + self._final_solutions = list(self._all_solutions) + self._num_types = len(self._all_p) + self._current_type = 0 + self._all_max_steps, self._old_mask, self._new_mask = self._get_max_steps() + super().__init__(cfg) + + def _set_node_edge_range(self) -> None: + self._node_range = self._n + 2 + self._edge_range = self._n * max(self._all_p) + + def step(self, action: np.ndarray): + if self._done: + raise RuntimeError('Action taken after episode is done.') + obj_value, solution, info = self._flc.swap(action, self._t) + self._t += 1 + self._done = (self._t == self._all_max_steps[-1] and self._current_type == len(self._all_max_steps) - 1) + self._obj_value[self._t] = obj_value + self._solution[self._t] = solution + reward = self.get_reward() + if obj_value < self._best_obj_value - self.EPSILON: + self._best_obj_value = obj_value + self._best_solution = solution + self._last_best_t = self._t + elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0: + self._flc.reset_tabu_time() + + if self._t == self._all_max_steps[self._current_type] and not self._done: + self._t = 0 + self._multi_obj += obj_value + self._final_solutions[self._current_type] = solution + self._update_type() + + if self._done: + pickle.dump(self._final_solutions, open('./facility_location/solutions.pkl', 'wb')) + + return self._get_obs(self._t), reward, self._done, False, info + + def reset(self, seed = 0) -> Optional[Dict]: + self._current_type = 0 + points = self._all_points + demands = self._all_demands[:,0] + n = self._n + p = self._all_p[0] + solution = self._all_solutions[0] + self._multi_obj = 0 + + self._flc.set_instance(points, demands, n, p, True) + + return self.prepare(n, p, solution), {} + + def _update_type(self): + if self._current_type >= self._num_types: + raise RuntimeError('Action taken after episode is done.') + self._current_type += 1 + if self._current_type < self._num_types - 1: + points = self._all_points + demands = self._all_demands[:,self._current_type] + n = self._n + p = self._all_p[self._current_type] + solution = self._all_solutions[self._current_type] + self._flc.set_instance(points, demands, n, p, True) + self.prepare(n, p, solution) + + def prepare(self, n: int, p: int, solution: list) -> Dict: + initial_solution = solution + initial_obj_value = self._flc.compute_obj_value_from_solution(initial_solution) + self._obs_extractor.reset() + self._done = False + self._t = 0 + self._max_steps = self._all_max_steps[self._current_type] + self._flc.init_facility_mask(self._old_mask[self._current_type], self._new_mask[self._current_type]) + self._obj_value = np.zeros(self._max_steps + 1) + self._obj_value[0] = initial_obj_value + self._solution = np.zeros((self._max_steps + 1, n), dtype=bool) + self._solution[0] = initial_solution + self._best_solution = initial_solution + self._best_obj_value = initial_obj_value + self._last_best_t = 0 + self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale)) + return self._get_obs(self._t) + + def _get_max_steps(self) -> list: + tmp_all_solitions = list(self._all_solutions) + count_true = [sum(s) for s in zip(*tmp_all_solitions)] + max_steps = [] + old_idx = [] + new_idx = [] + for t in range(self._num_types): + old = [i for i in range(len(count_true)) if count_true[i] > 1 and tmp_all_solitions[t][i]] + new = [i for i in range(len(count_true)) if count_true[i] == 0] + if len(old): + old_idx.append(old) + new_idx.append(new) + max_steps.append(len(old)) + for i in old: + count_true[i] = count_true[i] - 1 + return max_steps, old_idx, new_idx + + def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]: + n = self._np_random.integers(self._min_n, self._max_n, endpoint=True) + p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio) + p = int(max(n * p_ratio, 4)) + + points = self._np_random.uniform(size=(n, 2)) + while np.unique(points, axis=0).shape[0] != n: + points = self._np_random.uniform(size=(n, 2)) + demands = self._np_random.random(size=(n,)) + return points, demands, n, p + + def _load_multi_facility_data(self, data_npy) -> Tuple[np.ndarray, np.ndarray]: + data = data_npy.split('\n') + n = len(data) + p = int((len(data[0].split(' '))-2) / 2) + + positions = [] + demands = [] + actual_facilities = [] + ps = [] + for row in data: + row = row.split(' ') + row = [x for x in row if len(x)] + positions.append([float(row[0]), float(row[1])]) + + demand = [] + for i in range(2, 2+p): + demand.append(float(row[i])) + demands.append(demand) + + actual_facility = [] + for i in range(2+p, 2+2*p): + actual_facility.append(bool(int(float(row[i])))) + actual_facilities.append(actual_facility) + + positions = np.array(positions) + positions = np.deg2rad(positions) + demands = np.array(demands) + actual_facilities = np.array(actual_facilities) + ps = actual_facilities.sum(axis=0) + + return positions, demands, n, ps + + def _load_multi_facility_solutions(self, boost) -> list: + def load_model(positions, demands, n, p, boost): + eval_env = EvalPMPEnv(self.cfg, positions, demands, n, p, boost) + eval_env = DummyVecEnv([lambda: eval_env]) + + policy_kwargs = get_policy_kwargs(self.cfg) + test_model = PPO(MaskedFacilityLocationActorCriticPolicy, + eval_env, + verbose=1, + policy_kwargs=policy_kwargs, + device='cuda:1') + train_model = PPO.load(self.cfg.load_model_path) + test_model.set_parameters(train_model.get_parameters()) + return test_model, eval_env + + def get_optimal_solution(model, eval_env): + obs = eval_env.reset() + done = False + while not done: + action, _ = model.predict(obs, deterministic=True) + obs, _, done, info = eval_env.step(action) + return eval_env.get_attr('_best_solution')[0] + + multi_solutions = [] + for i in range(len(self._all_p)): + positions = self._all_points + demands = self._all_demands[:,i] + n = self._n + p = self._all_p[i] + model, env = load_model(positions,demands,n,p,boost) + multi_solutions.append(get_optimal_solution(model, env)) + + return multi_solutions + + def get_reward(self) -> float: + if self._done: + reward = np.min(self._obj_value) + else: + reward = 0.0 + return reward + + \ No newline at end of file diff --git a/facility_location/multi_eval.py b/facility_location/multi_eval.py new file mode 100644 index 0000000000000000000000000000000000000000..9234685942bb70fc8d966df5e496388e7da1864e --- /dev/null +++ b/facility_location/multi_eval.py @@ -0,0 +1,96 @@ +import os +import pickle + +import setproctitle +from absl import app, flags +import time +import random +from typing import Tuple, Union, Text + +import numpy as np +import torch as th + +import sys +import gymnasium +sys.modules["gym"] = gymnasium + +from stable_baselines3.common.evaluation import evaluate_policy +from stable_baselines3 import PPO +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.vec_env import DummyVecEnv, VecEnvWrapper + +from facility_location.agent.solver import PMPSolver +from facility_location.env import EvalPMPEnv, MULTIPMP +from facility_location.utils import Config +from facility_location.agent import MaskedFacilityLocationActorCriticPolicy +from facility_location.utils.policy import get_policy_kwargs + +import warnings +warnings.filterwarnings('ignore') + + +AGENT = Union[PMPSolver, PPO] + +def get_model(cfg: Config, + env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv], + device: str) -> PPO: + policy_kwargs = get_policy_kwargs(cfg) + model = PPO(MaskedFacilityLocationActorCriticPolicy, + env, + verbose=1, + policy_kwargs=policy_kwargs, + device=device) + return model + + +def get_agent(cfg: Config, + env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv], + model_path: Text) -> AGENT: + if cfg.agent in ['rl-mlp', 'rl-gnn', 'rl-agnn']: + test_model = get_model(cfg, env, device='cuda:0') + trained_model = PPO.load(model_path) + test_model.set_parameters(trained_model.get_parameters()) + agent = test_model + else: + raise ValueError(f'Agent {cfg.agent} not supported.') + return agent + + +def evaluate(agent: AGENT, + env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv], + num_cases: int, + return_episode_rewards: bool): + if isinstance(agent, PPO): + return evaluate_ppo(agent, env, num_cases, return_episode_rewards=return_episode_rewards) + else: + raise ValueError(f'Agent {agent} not supported.') + +from stable_baselines3.common.callbacks import BaseCallback + + +def evaluate_ppo(agent: PPO, env: EvalPMPEnv, num_cases: int, return_episode_rewards: bool) -> Tuple[float, float]: + rewards, _ = evaluate_policy(agent, env, n_eval_episodes=num_cases, return_episode_rewards=return_episode_rewards) + return rewards + + +def main(data_npy, boost=False): + th.manual_seed(0) + np.random.seed(0) + random.seed(0) + model_path = './facility_location/best_model.zip' + + cfg = Config('plot', 0, False, '/data2/suhongyuan/flp', 'rl-gnn', model_path=model_path) + + eval_env = MULTIPMP(cfg, data_npy, boost) + eval_env = Monitor(eval_env) + eval_env = DummyVecEnv([lambda: eval_env]) + agent = get_agent(cfg, eval_env, model_path) + start_time = time.time() + _ = evaluate(agent, eval_env, 1, return_episode_rewards=True) + eval_time = time.time() - start_time + print(f'\t time: {eval_time}') + + +if __name__ == '__main__': + app.run(main) + diff --git a/facility_location/solutions.pkl b/facility_location/solutions.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f298cba6cc1e50665bcb3367bda51d523b0398a6 Binary files /dev/null and b/facility_location/solutions.pkl differ diff --git a/facility_location/utils/__init__.py b/facility_location/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..786c82da6ef1c869171178ca4da7cee566efa94d --- /dev/null +++ b/facility_location/utils/__init__.py @@ -0,0 +1,3 @@ +from .config import Config + +__all__ = ["Config"] diff --git a/facility_location/utils/__pycache__/__init__.cpython-310.pyc b/facility_location/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0974f9e34eb9f1155fd25ad77042cff379312db1 Binary files /dev/null and b/facility_location/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/facility_location/utils/__pycache__/__init__.cpython-39.pyc b/facility_location/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfd8dffbc14cfacd47065ea3cbee35fd051a11b2 Binary files /dev/null and b/facility_location/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/facility_location/utils/__pycache__/config.cpython-310.pyc b/facility_location/utils/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c00f5636229e88527dc43f00dadf123144a79de1 Binary files /dev/null and b/facility_location/utils/__pycache__/config.cpython-310.pyc differ diff --git a/facility_location/utils/__pycache__/config.cpython-39.pyc b/facility_location/utils/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d221ec213bdcfb8883271fafd276e7b35a24822 Binary files /dev/null and b/facility_location/utils/__pycache__/config.cpython-39.pyc differ diff --git a/facility_location/utils/__pycache__/policy.cpython-310.pyc b/facility_location/utils/__pycache__/policy.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..671d0f69e2ecc1e07f26582029407317eea1fa58 Binary files /dev/null and b/facility_location/utils/__pycache__/policy.cpython-310.pyc differ diff --git a/facility_location/utils/__pycache__/policy.cpython-39.pyc b/facility_location/utils/__pycache__/policy.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..436853b284fc6bbb70fc7c03ee50fd20e7c57466 Binary files /dev/null and b/facility_location/utils/__pycache__/policy.cpython-39.pyc differ diff --git a/facility_location/utils/config.py b/facility_location/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..7e4d0ad242fcfa0cb67217f65fae20c5cf637d87 --- /dev/null +++ b/facility_location/utils/config.py @@ -0,0 +1,133 @@ +import os +from typing import Text, Dict + +from stable_baselines3.common.utils import get_latest_run_id + +import yaml + + +class Config: + + def __init__(self, cfg_id: Text, global_seed: int, tmp: bool, root_dir: Text, + agent: Text = 'rl-gnn', reset_num_timesteps: bool = True, cfg_dict: Dict = None, model_path: Text = None): + self.cfg_id = cfg_id + self.seed = global_seed + if cfg_dict is not None: + cfg = cfg_dict + else: + file_path = './facility_location/cfg/{}.yaml'.format(self.cfg_id) + class TupleSafeLoader(yaml.SafeLoader): + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + TupleSafeLoader.add_constructor( + u'tag:yaml.org,2002:python/tuple', + TupleSafeLoader.construct_python_tuple) + def load_yaml(file_path): + cfg = yaml.load(open(file_path, 'r'), Loader=TupleSafeLoader) + return cfg + cfg = load_yaml(file_path) + # create dirs + self.root_dir = '/tmp/flp' if tmp else root_dir + self.agent = agent + self.multi = cfg.get('multi', False) + + self.tb_log_path = os.path.join(self.root_dir, 'runs') + self.tb_log_name = f'{cfg_id}-agent-{agent}-seed-{global_seed}' + latest_run_id = get_latest_run_id(self.tb_log_path, self.tb_log_name) + if not reset_num_timesteps: + # Continue training in the same directory + latest_run_id -= 1 + self.cfg_dir = os.path.join(self.root_dir, + 'output', f'{cfg_id}-agent-{agent}-seed-{global_seed}_{latest_run_id + 1}') + self.ckpt_save_path = os.path.join(self.cfg_dir, 'ckpt') + self.best_model_path = os.path.join(self.cfg_dir, 'best-models') + self.latest_model_path = os.path.join(self.cfg_dir, 'latest-models') + self.load_model_path = model_path + + + # env + self.env_specs = cfg.get('env_specs', dict()) + self.reward_specs = cfg.get('reward_specs', dict()) + self.obs_specs = cfg.get('obs_specs', dict()) + self.eval_specs = cfg.get('eval_specs', dict()) + + # agent config + self.agent_specs = cfg.get('agent_specs', dict()) + self.mlp_specs = cfg.get('mlp_specs', dict()) + self.gnn_specs = cfg.get('gnn_specs', dict()) + self.ts_specs = cfg.get('ts_specs', dict()) + self.popstar_specs = cfg.get('popstar_specs', dict()) + self.ga_specs = cfg.get('ga_specs', dict()) + + # training config + self.gamma = cfg.get('gamma', 0.99) + self.tau = cfg.get('tau', 0.95) + self.state_encoder_specs = cfg.get('state_encoder_specs', dict()) + self.policy_specs = cfg.get('policy_specs', dict()) + self.value_specs = cfg.get('value_specs', dict()) + self.lr = cfg.get('lr', 4e-4) + self.weightdecay = cfg.get('weightdecay', 0.0) + self.eps = cfg.get('eps', 1e-5) + self.value_pred_coef = cfg.get('value_pred_coef', 0.5) + self.entropy_coef = cfg.get('entropy_coef', 0.01) + self.clip_epsilon = cfg.get('clip_epsilon', 0.2) + self.max_num_iterations = cfg.get('max_num_iterations', 1000) + self.num_episodes_per_iteration = cfg.get('num_episodes_per_iteration', 1000) + self.max_sequence_length = cfg.get('max_sequence_length', 100) + self.num_optim_epoch = cfg.get('num_optim_epoch', 4) + self.mini_batch_size = cfg.get('mini_batch_size', 1024) + self.save_model_interval = cfg.get('save_model_interval', 10) + + def log(self, logger, tb_logger): + """Log cfg to logger and tensorboard.""" + logger.info(f'id: {self.cfg_id}') + logger.info(f'seed: {self.seed}') + logger.info(f'env_specs: {self.env_specs}') + logger.info(f'reward_specs: {self.reward_specs}') + logger.info(f'obs_specs: {self.obs_specs}') + logger.info(f'agent_specs: {self.agent_specs}') + logger.info(f'gamma: {self.gamma}') + logger.info(f'tau: {self.tau}') + logger.info(f'state_encoder_specs: {self.state_encoder_specs}') + logger.info(f'policy_specs: {self.policy_specs}') + logger.info(f'value_specs: {self.value_specs}') + logger.info(f'lr: {self.lr}') + logger.info(f'weightdecay: {self.weightdecay}') + logger.info(f'eps: {self.eps}') + logger.info(f'value_pred_coef: {self.value_pred_coef}') + logger.info(f'entropy_coef: {self.entropy_coef}') + logger.info(f'clip_epsilon: {self.clip_epsilon}') + logger.info(f'max_num_iterations: {self.max_num_iterations}') + logger.info(f'num_episodes_per_iteration: {self.num_episodes_per_iteration}') + logger.info(f'max_sequence_length: {self.max_sequence_length}') + logger.info(f'num_optim_epoch: {self.num_optim_epoch}') + logger.info(f'mini_batch_size: {self.mini_batch_size}') + logger.info(f'save_model_interval: {self.save_model_interval}') + + if tb_logger is not None: + tb_logger.add_hparams( + hparam_dict={ + 'id': self.cfg_id, + 'seed': self.seed, + 'env_specs': str(self.env_specs), + 'reward_specs': str(self.reward_specs), + 'obs_specs': str(self.obs_specs), + 'agent_specs': str(self.agent_specs), + 'gamma': self.gamma, + 'tau': self.tau, + 'state_encoder_specs': str(self.state_encoder_specs), + 'policy_specs': str(self.policy_specs), + 'value_specs': str(self.value_specs), + 'lr': self.lr, + 'weightdecay': self.weightdecay, + 'eps': self.eps, + 'value_pred_coef': self.value_pred_coef, + 'entropy_coef': self.entropy_coef, + 'clip_epsilon': self.clip_epsilon, + 'max_num_iterations': self.max_num_iterations, + 'num_episodes_per_iteration': self.num_episodes_per_iteration, + 'max_sequence_length': self.max_sequence_length, + 'num_optim_epoch': self.num_optim_epoch, + 'mini_batch_size': self.mini_batch_size, + 'save_model_interval': self.save_model_interval}, + metric_dict={'hparam/placeholder': 0.0}) diff --git a/facility_location/utils/policy.py b/facility_location/utils/policy.py new file mode 100644 index 0000000000000000000000000000000000000000..8c73b0c1cb5c38edcb88abf051a76320bce54a95 --- /dev/null +++ b/facility_location/utils/policy.py @@ -0,0 +1,57 @@ +from typing import Dict + +from facility_location.agent import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor +from facility_location.utils import Config + + +def get_policy_kwargs(cfg: Config) -> Dict: + if cfg.agent == 'rl-mlp': + hidden_units = cfg.mlp_specs.get('hidden_units', (32, 32)) + node_dim = hidden_units[-1] + policy_feature_dim = FacilityLocationMLPExtractor.get_policy_feature_dim(node_dim) + value_feature_dim = FacilityLocationMLPExtractor.get_value_feature_dim(node_dim) + policy_kwargs = dict( + policy_feature_dim=policy_feature_dim, + value_feature_dim=value_feature_dim, + policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)), + value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)), + features_extractor_class=FacilityLocationMLPExtractor, + features_extractor_kwargs=dict( + hidden_units=hidden_units,), + popstar=cfg.env_specs.get('popstar', False),) + + elif cfg.agent == 'rl-gnn': + num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2) + node_dim = cfg.gnn_specs.get('node_dim', 32) + policy_feature_dim = FacilityLocationGNNExtractor.get_policy_feature_dim(node_dim) + value_feature_dim = FacilityLocationGNNExtractor.get_value_feature_dim(node_dim) + policy_kwargs = dict( + policy_feature_dim=policy_feature_dim, + value_feature_dim=value_feature_dim, + policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)), + value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)), + features_extractor_class=FacilityLocationGNNExtractor, + features_extractor_kwargs=dict( + num_gnn_layers=num_gnn_layers, + node_dim=node_dim), + popstar=cfg.env_specs.get('popstar', False),) + + elif cfg.agent == 'rl-agnn': + num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2) + node_dim = cfg.gnn_specs.get('node_dim', 32) + policy_feature_dim = FacilityLocationAttentionGNNExtractor.get_policy_feature_dim(node_dim) + value_feature_dim = FacilityLocationAttentionGNNExtractor.get_value_feature_dim(node_dim) + policy_kwargs = dict( + policy_feature_dim=policy_feature_dim, + value_feature_dim=value_feature_dim, + policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)), + value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)), + features_extractor_class=FacilityLocationAttentionGNNExtractor, + features_extractor_kwargs=dict( + num_gnn_layers=num_gnn_layers, + node_dim=node_dim), + popstar=cfg.env_specs.get('popstar', False),) + + else: + raise NotImplementedError + return policy_kwargs diff --git a/final_solutions.pkl b/final_solutions.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/model.pth b/model.pth deleted file mode 100644 index 50dc1e548be4bddcd8c592e58084a7c66cde6f4b..0000000000000000000000000000000000000000 Binary files a/model.pth and /dev/null differ diff --git a/model.py b/model.py deleted file mode 100644 index 9e85f16503263823562c052855f51323613d9f18..0000000000000000000000000000000000000000 --- a/model.py +++ /dev/null @@ -1,24 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Net(nn.Module): - def __init__(self, input_size, hidden_size, num_classes): - super(Net, self).__init__() - self.fc1 = nn.Linear(input_size, hidden_size) - self.relu = nn.ReLU() - self.fc2 = nn.Linear(hidden_size, num_classes) - self.softmax = nn.Softmax(dim=1) - - def forward(self, x): - out = self.fc1(x) - out = self.relu(out) - out = self.fc2(out) - out = self.softmax(out) - return out - - -if __name__ == '__main__': - net = Net(100, 50, 10) - torch.save(net.state_dict(), 'model.pth') diff --git a/requirements.txt b/requirements.txt index f824cf9cfb4e0a733f67a1f12e82d36cdd27f12e..c954a422e0ad11781c25a908235e9edeacc9d147 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,22 @@ numpy >= 1.26.1 plotly pandas scikit-learn -scipy \ No newline at end of file +scipy +absl-py>=1.1.0 +pulp>=2.7.0 +spopt>=0.4.1 +geopandas>=0.11.1 +matplotlib>=3.5.2 +shapely>=1.8.4 +gym>=0.21.0 +pygad>=2.18.1 +stable_baselines3>=1.6.2 +networkx>=2.8.4 +libpysal>=4.7.0 +scikit-learn>=1.1.1 +torch>=1.13.1 +PyYAML>=6.0 +tqdm>=4.64.0 +rich>=13.0.1 +numba>=0.55.2 +tensorboard>=2.11.0 \ No newline at end of file diff --git a/test_model.py b/test_model.py deleted file mode 100644 index 4d8ff9f62948ee6387f847c86ce056b2adf6b5c6..0000000000000000000000000000000000000000 --- a/test_model.py +++ /dev/null @@ -1,11 +0,0 @@ -import torch -import numpy as np - -from model import Net - -a = np.arange(100).reshape(1, 100) -model = Net(100, 50, 10) -model.load_state_dict(torch.load('model.pth')) -model.eval() -output = model(torch.from_numpy(a).float()) -print(output) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e5237fc36fafa4436ad75d32214f583820298e8 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,8 @@ +from .load_save import load_yaml +from .vec_check_nan import DictVecCheckNan +from .update_val_env_callback import UpdateValEnv, UpdateValEnvAndStopTrainingOnNoModelImprovement +from .hparam_callback import HParamCallback + +__all__ = ['load_yaml', 'DictVecCheckNan', + 'UpdateValEnv', 'UpdateValEnvAndStopTrainingOnNoModelImprovement', + 'HParamCallback'] diff --git a/utils/__pycache__/__init__.cpython-310.pyc b/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34c3087a87fb8bdb7c4af63744d5bf7c0837de4c Binary files /dev/null and b/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/utils/__pycache__/__init__.cpython-39.pyc b/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a27647a0850795f07ba6001dff66277a1a07f22 Binary files /dev/null and b/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/utils/__pycache__/hparam_callback.cpython-310.pyc b/utils/__pycache__/hparam_callback.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f189063bfcb06a6610b3ee512e4b8725aa26a6d Binary files /dev/null and b/utils/__pycache__/hparam_callback.cpython-310.pyc differ diff --git a/utils/__pycache__/hparam_callback.cpython-39.pyc b/utils/__pycache__/hparam_callback.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..317a0c5d7bba5a8b816426cf2815e0c7500b6d8c Binary files /dev/null and b/utils/__pycache__/hparam_callback.cpython-39.pyc differ diff --git a/utils/__pycache__/load_save.cpython-310.pyc b/utils/__pycache__/load_save.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48132a643dd7dc899b8221f577c5c0409264593f Binary files /dev/null and b/utils/__pycache__/load_save.cpython-310.pyc differ diff --git a/utils/__pycache__/load_save.cpython-39.pyc b/utils/__pycache__/load_save.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e967a9c51a982f636b141d7897fbaab55dbe161f Binary files /dev/null and b/utils/__pycache__/load_save.cpython-39.pyc differ diff --git a/utils/__pycache__/update_val_env_callback.cpython-310.pyc b/utils/__pycache__/update_val_env_callback.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c664ea97c8bf6c4c99b101897d38d4b99909241 Binary files /dev/null and b/utils/__pycache__/update_val_env_callback.cpython-310.pyc differ diff --git a/utils/__pycache__/update_val_env_callback.cpython-39.pyc b/utils/__pycache__/update_val_env_callback.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbcb7b66fc6097c6044c4ee6c73fdf8c3053f7c4 Binary files /dev/null and b/utils/__pycache__/update_val_env_callback.cpython-39.pyc differ diff --git a/utils/__pycache__/vec_check_nan.cpython-310.pyc b/utils/__pycache__/vec_check_nan.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c36532ff2df56d42e39db64f14f31a386bb719d Binary files /dev/null and b/utils/__pycache__/vec_check_nan.cpython-310.pyc differ diff --git a/utils/__pycache__/vec_check_nan.cpython-39.pyc b/utils/__pycache__/vec_check_nan.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49c1b38b89730e23903bf31cda64d9d416ff85bc Binary files /dev/null and b/utils/__pycache__/vec_check_nan.cpython-39.pyc differ diff --git a/utils/hparam_callback.py b/utils/hparam_callback.py new file mode 100644 index 0000000000000000000000000000000000000000..32aaaf83d315cc8d9a7006ddd0500332eaa606d1 --- /dev/null +++ b/utils/hparam_callback.py @@ -0,0 +1,32 @@ +from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.logger import HParam + + +class HParamCallback(BaseCallback): + def __init__(self): + super().__init__() + + def _on_training_start(self) -> None: + hparam_dict = { + "algorithm": self.model.__class__.__name__, + "learning rate": self.model.learning_rate, + "steps_per_iteration": self.model.n_steps * self.model.n_envs, + "batch_size": self.model.batch_size, + "optim_epochs_per_iteration": self.model.n_epochs, + "gamma": self.model.gamma, + "gae_lambda": self.model.gae_lambda, + "ent_coef": self.model.ent_coef, + "vf_coef": self.model.vf_coef, + } + metric_dict = { + "eval/mean_reward": 0, + "train/loss": 0, + } + self.logger.record( + "hparams", + HParam(hparam_dict, metric_dict), + exclude=("stdout", "log", "json", "csv"), + ) + + def _on_step(self) -> bool: + return True diff --git a/utils/load_save.py b/utils/load_save.py new file mode 100644 index 0000000000000000000000000000000000000000..d5fe95e0379985c20422c7dafbb6510988b831c2 --- /dev/null +++ b/utils/load_save.py @@ -0,0 +1,52 @@ +import yaml +import glob +import pickle +import os +import socket + + +def get_file_path(file_path): + hostname = socket.gethostname() + if hostname == 'fib': + file_path = os.path.join('/home/mas/zhengyu/workspace/flp', file_path) + elif hostname == 'rl2': + file_path = os.path.join('/home/zhengyu/workspace/flp', file_path) + elif hostname == 'rl3': + file_path = os.path.join('/home/zhengyu/workspace/flp', file_path) + elif hostname == 'rl4': + file_path = os.path.join('/data2/suhongyuan/flp', file_path) + elif hostname == 'DL4': + file_path = os.path.join('/data2/zhengyu/workspace/flp', file_path) + elif hostname == 'S4rawBer2y.local': + file_path = os.path.join('/Users/zhengyu/Seafile/code/workspace/flp', file_path) + elif hostname == 'DESKTOP-FTK3FVH': + file_path = os.path.join('C:\\Users\\123\\Seafile\\code\\workspace\\flp', file_path) + else: + raise ValueError('Unknown hostname: {}'.format(socket.gethostname())) + return file_path + + +class TupleSafeLoader(yaml.SafeLoader): + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + +TupleSafeLoader.add_constructor( + u'tag:yaml.org,2002:python/tuple', + TupleSafeLoader.construct_python_tuple) + + +def load_yaml(file_path): + file_path = get_file_path(file_path) + files = glob.glob(file_path, recursive=True) + assert(len(files) == 1) + cfg = yaml.load(open(files[0], 'r'), Loader=TupleSafeLoader) + return cfg + + +def load_pickle(file_path): + file_path = get_file_path(file_path) + files = glob.glob(file_path, recursive=True) + assert(len(files) == 1) + data = pickle.load(open(files[0], 'rb')) + return data diff --git a/utils/update_val_env_callback.py b/utils/update_val_env_callback.py new file mode 100644 index 0000000000000000000000000000000000000000..375ee4d5acbda70e022b7afc09de538ba6e60165 --- /dev/null +++ b/utils/update_val_env_callback.py @@ -0,0 +1,34 @@ +from typing import Union + +import gym +from stable_baselines3.common.callbacks import BaseCallback, StopTrainingOnNoModelImprovement +from stable_baselines3.common.vec_env import VecEnv, DummyVecEnv + + +class UpdateValEnv(BaseCallback): + def __init__(self, val_env: Union[gym.Env, VecEnv], verbose: int = 0): + super().__init__(verbose=verbose) + if not isinstance(val_env, VecEnv): + val_env = DummyVecEnv([lambda: val_env]) + + self.val_env = val_env + + def _on_step(self) -> bool: + assert self.parent is not None, "``UpdateValEnv`` callback must be used with an ``EvalCallback``" + + self.val_env.env_method('reset_instance_id') + return True + + +class UpdateValEnvAndStopTrainingOnNoModelImprovement(StopTrainingOnNoModelImprovement): + def __init__(self, val_env: Union[gym.Env, VecEnv], + max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0): + super().__init__(max_no_improvement_evals=max_no_improvement_evals, min_evals=min_evals, verbose=verbose) + if not isinstance(val_env, VecEnv): + val_env = DummyVecEnv([lambda: val_env]) + + self.val_env = val_env + + def _on_step(self) -> bool: + self.val_env.env_method('reset_instance_id') + return super()._on_step() diff --git a/utils/vec_check_nan.py b/utils/vec_check_nan.py new file mode 100644 index 0000000000000000000000000000000000000000..3a2fed032019cde2252374cbe745b2473c2ecf35 --- /dev/null +++ b/utils/vec_check_nan.py @@ -0,0 +1,56 @@ +import warnings +from collections import OrderedDict +from typing import Text + +import numpy as np +from stable_baselines3.common.vec_env import VecCheckNan + + +class DictVecCheckNan(VecCheckNan): + def _check_val(self, event: str, **kwargs) -> None: + # if warn and warn once and have warned once: then stop checking + if not self.raise_exception and self.warn_once and self._user_warned: + return + + found = [] + + def check_val_np(check_name: Text, check_val: np.ndarray) -> None: + has_nan = np.any(np.isnan(check_val)) + has_inf = self.check_inf and np.any(np.isinf(check_val)) + if has_inf: + found.append((check_name, "inf")) + if has_nan: + found.append((check_name, "nan")) + + for name, val in kwargs.items(): + if isinstance(val, np.ndarray): + check_val_np(name, val) + elif isinstance(val, OrderedDict): + for inner_name, inner_val in val.items(): + check_val_np(f"{name}-{inner_name}", inner_val) + else: + raise ValueError(f"Unsupported observation type {type(val)}.") + + if found: + self._user_warned = True + msg = "" + for i, (name, type_val) in enumerate(found): + msg += f"found {type_val} in {name}" + if i != len(found) - 1: + msg += ", " + + msg += ".\r\nOriginated from the " + + if event == "reset": + msg += "environment observation (at reset)" + elif event == "step_wait": + msg += f"environment, Last given value was: \r\n\taction={self._actions}" + elif event == "step_async": + msg += f"RL model, Last given value was: \r\n\tobservations={self._observations}" + else: + raise ValueError("Internal error.") + + if self.raise_exception: + raise ValueError(msg) + else: + warnings.warn(msg, UserWarning)