Spaces:

angelazhu2
/

fs_sinr

Sleeping

App Files Files Community

angelazhu96 commited on Dec 16, 2024

Commit

9ff98d7

1 Parent(s): dcb7cfe

code for viz

Browse files

Files changed (10) hide show

app.py +87 -0
create_inputs_to_fs_sinr.py +124 -0
eval.py +0 -0
get_gt.py +369 -0
models.py +1434 -0
paths.json +10 -0
requirements.txt +10 -0
setup.py +0 -0
utils.py +326 -0
viz_ls_map.py +283 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import gradio as gr
+from viz_ls_map import main
+from get_gt import generate_ground_truth
+def predict_species_distribution(taxa_id, taxa_name, text_type, num_context_points):
+    """
+    Function to predict species distribution and visualize the map.
+    """
+    isSnt = False
+    taxa_id = int(taxa_id)
+    #num_context_points = [0, 1, 2, 5, 10, 20]
+    num_context_points = [1]
+    # Generate ground truth for the species
+    generate_ground_truth(taxa_id, isSnt)
+    image_path_gt = f'images/species_presence_hr_{taxa_id}.png'
+    output_images = []
+    #print(num_context_points)
+    for text_type_i in ['none','range','habitat']:
+        # Set up evaluation parameters
+        eval_params = {
+            'model_path': './experiments/zero_shot_ls_sin_cos_env_cap_1000_text_context_20_sinr_two_layer_nn/model.pt',
+            'taxa_id': taxa_id,
+            'threshold': -1,
+            'op_path': './images/',
+            'rand_taxa': False,
+            'high_res': True,
+            'disable_ocean_mask': False,
+            'set_max_cmap_to_1': False,
+            'device': 'cpu',
+            'show_map': 1,
+            'show_context_points': 1,
+            'prefix': '',
+            'num_context': num_context_points,
+            'choose_context_points': 1,
+            'additional_save_name': "",
+            'taxa_name': taxa_name,
+            'test_taxa': taxa_id,
+            'text_type': text_type_i,  # 'none', 'habitat', or 'range'
+            'context_pt_trial': num_context_points,
+        }
+        # Run the FS-SINR model with the specified parameters
+        main(eval_params)
+        # The output image is saved in './images/' with the predicted range map
+        #image_path = f'./images/{taxa_name}_predicted_range.png'
+        for k in num_context_points:
+            # Assume image filenames are stored like this
+            image_path = f'./images/testenv_{taxa_name}(selected_points)_{text_type_i}_{k}.png'
+            output_images.append(image_path)
+    return [image_path_gt] + output_images
+    #return True
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# View Species Distribution Predictions using FS-SINR")
+    # Input fields for the Gradio interface
+    taxa_id = gr.Number(label="Taxa ID", value=43188)
+    taxa_name = gr.Textbox(label="Taxa Name", value="test_pika")
+    text_type = gr.Radio(label="Text Type", choices=['none', 'habitat', 'range'], value='none')
+    #num_context_points = gr.Slider(label="Number of Context Points", minimum=1, maximum=20, value=5, step=1)
+    num_context_points = gr.CheckboxGroup([0,1,2,3,4,5,10,15,20], label="Number of Context Points")
+    # Button to trigger the prediction
+    predict_button = gr.Button("Predict Species Distribution")
+    # Output: predicted range map
+    ground_truth = gr.Image(label="Ground Truth Map")
+    none_maps = gr.Image(label=f"Map for No Text Input and Context Point {1}")
+    range_maps = gr.Image(label=f"Map for Range Text input and Context Point {1}")
+    hab_maps = gr.Image(label=f"Map for Habitat Text input and Context Point {1}")
+    output_images = [ground_truth, none_maps, range_maps, hab_maps]
+    # Link the button to the function and inputs
+    predict_button.click(fn=predict_species_distribution,
+                         inputs=[taxa_id, taxa_name, text_type, num_context_points],
+                         outputs=output_images)
+# Launch the Gradio interface
+demo.launch()

create_inputs_to_fs_sinr.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import torch
+import csv
+from gritlm import GritLM
+import pandas as pd
+import ast
+import numpy as np
+input_text4 = ['The hyacinth macaw prefers semi-open, somewhat wooded habitats. It usually avoids dense, humid forest, and in regions dominated by such habitats, it is generally restricted to the edge or relatively open sections (e.g. along major rivers). In different areas of their range, these parrots are found in savannah grasslands, in dry thorn forests known as caatinga, and in palm stands or swamps, particularly the moriche palm (Mauritia flexuosa).',
+            'The hyacinth macaw occurs today in three main areas in South America: In the Pantanal region of Brazil, and adjacent eastern Bolivia and northeastern Paraguay, in the cerrado regions of the eastern interior of Brazil (Maranhão, Piauí, Bahia, Tocantins, Goiás, Mato Grosso, Mato Grosso do Sul, and Minas Gerais), and in the relatively open areas associated with the Tocantins River, Xingu River, Tapajós River, and the Marajó island in the eastern Amazon Basin of Brazil.',
+            'They are diurnal, terrestrial, and live in complex, mixed-gender social groups of 8 to 200 individuals per troop. They prefer savannas and light forests with a climate that is suitable for their omnivorous diet.',
+            'Yellow baboons inhabit savannas and light forests in eastern Africa, from Kenya and Tanzania to Zimbabwe and Botswana.']
+input_text5 = ['chappell roan', 'europe', 'pawpaw',
+            'sierra nevada', 'great lakes', 'Treaty of Waitangi',
+            'hello kitty', 'disney', 'madagascar', 'Andes', 'africa',
+            'dessert', 'whale', 'moon snail', 'unicorn', 'rainfall',
+            'species occurs above 2000m of elevation', 'froyo', 'desert',
+            'dragon', 'bear', 'selkie', 'loch ness monster']
+def extract_grit_token(model, text:str):
+    def gritlm_instruction(instruction):
+        return "<|user|>\n" + instruction + "\n<|embed|>\n" if instruction else "<|embed|>\n"
+    d_rep = model.encode([text], instruction=gritlm_instruction(""))
+    d_rep = torch.from_numpy(d_rep)
+    return d_rep
+def generate_text_embs(text, output_file):
+    grit = GritLM("GritLM/GritLM-7B", torch_dtype="auto", mode="embedding")
+    with open(output_file, mode='w') as file:
+        writer = csv.writer(file)
+        writer.writerow(['Text', 'Embedding'])
+        for i in range(0, len(text)):
+            text_emb = extract_grit_token(grit, text[i]).to('cpu')
+            print(f" {text[i]}: {text_emb} ")
+            writer.writerow([text[i], text_emb.tolist()])
+#TODO: max's generate text using grit
+def generate_text_emb(text):
+    grit = GritLM("GritLM/GritLM-7B", torch_dtype="auto", mode="embedding")
+    text_emb = extract_grit_token(grit, text)
+    return text_emb
+def use_pregenerated_textemb_fromgpt(taxon_id):
+    embs_loaded = torch.load('experiments/gpt_data.pt', map_location='cpu')
+    emb_ids = embs_loaded['taxon_id'].tolist() #(2785,)
+    keys1 = embs_loaded['keys'] #(11140, 2)
+    embs = embs_loaded['data'] # torch.Size([11140, 4096])
+    print(embs_loaded['taxon_id'].size())
+    matching_indices = [i for i, (tid) in enumerate(emb_ids) if tid == taxon_id]
+    print(matching_indices)
+    taxon_embeddings = embs[matching_indices, :]  # Get embeddings for the matching indices
+    matching_keys = [keys1[i] for i in matching_indices]  # Get the corresponding (taxon_id, text_type) keys
+    print(f"Found {len(matching_keys)} embeddings for taxon ID {taxon_id}:")
+    for i, key in enumerate(matching_keys):
+        print(f"Text Type: {key[1]}, Embedding: {taxon_embeddings[i, :]}")
+    return taxon_embeddings[i, :]
+def use_pregenerated_textemb_fromchris(taxon_id, text_type):
+    #zero vector is for no text input
+    text_embedding = torch.zeros(1,4096)
+    if text_type is None or text_type == 'none':
+        return text_embedding, 0
+    embs1 = torch.load('experiments/gpt_data.pt', map_location='cpu')
+    emb_ids1 = embs1['taxon_id'].tolist()
+    keys1 = embs1['keys']
+    embs1 = embs1['data']
+    taxa_of_interest = taxon_id
+    taxa_index_of_interest = emb_ids1.index(taxa_of_interest) # gets 5
+    #keys_with_taxa_of_interest = [key for key in keys1 if key[0] == taxa_index_of_interest]
+    #indices_with_taxa_of_interest = [(key, i) for i, key in enumerate(keys1) if key[0] == taxa_index_of_interest]
+    possible_text_embedding_indexes = [i for i, key in enumerate(keys1) if key[0] == taxa_index_of_interest and key[1]==text_type]
+    if len(possible_text_embedding_indexes) != 1:
+        return text_embedding, 0
+    # take a look and choose what you want
+    # for key in indices_with_taxa_of_interest:
+    #     print(key)
+    # ((5, 'range'), 20)
+    # ((5, 'habitat'), 21)
+    # ((5, 'species_description'), 22)
+    # ((5, 'overview_summary'), 23)
+    #macaw: range: 20, habitat: 21
+    #baboon: range: 7928, habitat: 7929
+    #black&white warbler: range: 16, habitat: 17
+    #barn swallow: range: 1652, habitat: 1653
+    #pika: range: 7116, habitat: 7117
+    #loon: range: 11056, habitat:11057
+    #euro robin: range: 2020, habitat: 2021
+    #sfs: range: 7148, habitat: 7149
+    text_embedding_index = possible_text_embedding_indexes[0]
+    text_embedding = embs1[text_embedding_index].unsqueeze(0)
+    #print(text_embedding_index)
+    return text_embedding, text_embedding_index
+def use_pregenerated_textemb_fromcsv(input_text):
+    text_data = pd.read_csv('data/text_embs/text_embeddings_fig4.csv')
+    result_row = text_data[text_data['Text'] == input_text]
+    text_emb = ast.literal_eval(result_row['Embedding'].values[0])
+    embedding_tensor = torch.FloatTensor(text_emb)
+    return embedding_tensor
+def get_eval_context_points(taxa_id, context_data, size):
+  all_context_pts = context_data['locs'][context_data['labels'] == np.argwhere(context_data['class_to_taxa'] == taxa_id)[0]][1:]
+  context_pts = all_context_pts[0:size]
+  dummy_classtoken = np.array([[0,0]])
+  context_pts = np.vstack((dummy_classtoken, context_pts))
+  #print(f"context point shape: {np.shape(context_pts)}")
+  normalized_pts = torch.from_numpy(context_pts) * torch.tensor([[1/180,1/90]], device='cpu')
+  return normalized_pts
+if __name__ == '__main__':
+    print('starting to generate text_embs')
+    output_file = './data/text_embs/text_embeddings_fig4.csv'
+    use_pregenerated_textemb_fromchris()

eval.py ADDED Viewed

The diff for this file is too large to render. See raw diff

get_gt.py ADDED Viewed

	@@ -0,0 +1,369 @@

+# import numpy as np
+# import h3
+# import json
+# import os
+#
+# snt=False
+#
+# def get_labels(species, data):
+#     species = str(species)
+#     lat = []
+#     lon = []
+#     gt = []
+#     for hx in data:
+#         cur_lat, cur_lon = h3.h3_to_geo(hx)
+#         if species in data[hx]:
+#             cur_label = int(len(data[hx][species]) > 0)
+#             gt.append(cur_label)
+#             lat.append(cur_lat)
+#             lon.append(cur_lon)
+#     lat = np.array(lat).astype(np.float32)
+#     lon = np.array(lon).astype(np.float32)
+#     obs_locs = np.vstack((lon, lat)).T
+#     gt = np.array(gt).astype(np.float32)
+#     return obs_locs, gt
+#
+# def lonlat_to_pixel(lonlat, grid_width, grid_height):
+#     # Convert normalized lon/lat (-1 to 1) to pixel coordinates
+#     x_pixel = np.floor((lonlat[:, 0] + 1) / 2 * (grid_width - 1)).astype(int)
+#     y_pixel = np.floor((1 - (lonlat[:, 1] + 1) / 2) * (grid_height - 1)).astype(int)
+#     return x_pixel, y_pixel
+#
+# ocean_mask = np.load("data/masks/ocean_mask.npy", allow_pickle=True)
+# # 1002, 2004 pixels
+# # 0 in ocean (needs to be masked out)
+#
+# if snt:
+#     with open('paths.json', 'r') as f:
+#         paths = json.load(f)
+#     D = np.load(os.path.join(paths['snt'], 'snt_res_5.npy'), allow_pickle=True)
+#     D = D.item()
+#     loc_indices_per_species = D['loc_indices_per_species']
+#     labels_per_species = D['labels_per_species']
+#     taxa = D['taxa']
+#     obs_locs = D['obs_locs']
+#     obs_locs_idx = D['obs_locs_idx']
+# else:
+#     with open('paths.json', 'r') as f:
+#         paths = json.load(f)
+#     with open(os.path.join(paths['iucn'], 'iucn_res_5.json'), 'r') as f:
+#         data = json.load(f)
+#     obs_locs = np.array(data['locs'], dtype=np.float32)
+#     taxa = [int(tt) for tt in data['taxa_presence'].keys()]
+#     a = 6
+#     # data['taxa_presence'] is a dict where keys are "taxa" and then the values are the indices of "obs_locs" where the species is present
+#     # obs locs is in lon, lat with -180 to 180 and -90 to 90
+import numpy as np
+import h3
+import json
+import os
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+def get_labels(species, data):
+    species = str(species)
+    lat = []
+    lon = []
+    gt = []
+    for hx in data:
+        cur_lat, cur_lon = h3.h3_to_geo(hx)
+        if species in data[hx]:
+            cur_label = int(len(data[hx][species]) > 0)
+            gt.append(cur_label)
+            lat.append(cur_lat)
+            lon.append(cur_lon)
+    lat = np.array(lat).astype(np.float32)
+    lon = np.array(lon).astype(np.float32)
+    obs_locs = np.vstack((lon, lat)).T
+    gt = np.array(gt).astype(np.float32)
+    return obs_locs, gt
+def lonlat_to_pixel(lonlat, grid_width, grid_height):
+    # Convert normalized lon/lat (-1 to 1) to pixel coordinates
+    x_pixel = np.floor((lonlat[:, 0] + 1) / 2 * (grid_width - 1)).astype(int)
+    y_pixel = np.floor((1 - (lonlat[:, 1] + 1) / 2) * (grid_height - 1)).astype(int)
+    return x_pixel, y_pixel
+# def plot_heatmap(data,save_loc):
+#     # Apply mask if provided
+#     ocean_mask = np.load("data/masks/ocean_mask.npy", allow_pickle=True)
+#     # 1002, 2004 pixels
+#     # 0 in ocean (needs to be masked out)
+#
+#     # Convert ocean_mask to boolean mask
+#     mask = ocean_mask.astype(bool)
+#     mask = mask[::2, ::2]
+#
+#     if mask is not None:
+#         data = np.where(mask, data, 0)
+#
+#     # Set NaN values to 0 for plotting
+#     data = np.nan_to_num(data, nan=0.0)
+#
+#     fig, ax = plt.subplots(figsize=(20.04, 10.02), dpi=100)
+#     ax.set_xlim(-180, 180)
+#     ax.set_ylim(-90, 90)
+#     ax.axis('off')
+#
+#     # Use 'magma' colormap with two discrete colors
+#     cmap = plt.get_cmap('magma', 2)
+#     cmap.set_bad(color='none')
+#     plt.rcParams['font.family'] = 'serif'
+#
+#     cax_im = ax.imshow(data, extent=(-180, 180, -90, 90), origin='upper', cmap=cmap, vmin=0, vmax=1)
+#
+#     plt.tight_layout()
+#     pdf_save_loc = save_loc + '.pdf'
+#     png_save_loc = save_loc + '.png'
+#     plt.savefig(pdf_save_loc, bbox_inches='tight', pad_inches=0)
+#     plt.savefig(png_save_loc, bbox_inches='tight', pad_inches=0)
+#     plt.close(fig)
+def plot_heatmap(data, save_loc):
+    # Load the ocean mask
+    ocean_mask = np.load("data/masks/ocean_mask.npy", allow_pickle=True)
+    # 1002, 2004 pixels
+    # 0 in ocean (needs to be masked out)
+    # Convert ocean_mask to boolean mask
+    mask = ocean_mask.astype(bool)
+    # If you need to downsample the mask, uncomment the following line
+    mask = mask[::2, ::2]
+    # Set ocean areas to np.nan
+    data = np.where(mask, data, np.nan)
+    # Create a masked array where NaNs are masked
+    data_masked = np.ma.array(data, mask=np.isnan(data))
+    fig, ax = plt.subplots(figsize=(20.04, 10.02), dpi=100)
+    ax.set_xlim(-180, 180)
+    ax.set_ylim(-90, 90)
+    ax.axis('off')
+    # Use 'magma' colormap with two discrete colors
+    cmap = plt.get_cmap('plasma', 2)
+    # Set color for masked (NaN) values
+    cmap.set_bad(color='none')  # 'none' makes it transparent; use 'white' for white background
+    # Plot the data
+    cax_im = ax.imshow(
+        data_masked,
+        extent=(-180, 180, -90, 90),
+        origin='upper',
+        cmap=cmap,
+        vmin=0,
+        vmax=1,
+        interpolation='nearest'
+    )
+    plt.tight_layout()
+    pdf_save_loc = save_loc + '.pdf'
+    png_save_loc = save_loc + '.png'
+    plt.savefig(pdf_save_loc, bbox_inches='tight', pad_inches=0)
+    plt.savefig(png_save_loc, bbox_inches='tight', pad_inches=0)
+    plt.close(fig)
+def plot_heatmap_2(data, save_loc):
+    # Load the ocean mask
+    ocean_mask = np.load("data/masks/ocean_mask.npy", allow_pickle=True)
+    # 1002, 2004 pixels
+    # 0 in ocean (needs to be masked out)
+    # Convert ocean_mask to boolean mask
+    mask = ocean_mask.astype(bool)
+    # If you need to downsample the mask, uncomment the following line
+    # Set ocean areas to np.nan
+    data = np.where(mask, data, np.nan)
+    # Create a masked array where NaNs are masked
+    data_masked = np.ma.array(data, mask=np.isnan(data))
+    fig, ax = plt.subplots(figsize=(20.04, 10.02), dpi=100)
+    ax.set_xlim(-180, 180)
+    ax.set_ylim(-90, 90)
+    ax.axis('off')
+    # Use 'magma' colormap with two discrete colors
+    cmap = plt.get_cmap('plasma', 2)
+    # Set color for masked (NaN) values
+    cmap.set_bad(color='none')  # 'none' makes it transparent; use 'white' for white background
+    # Plot the data
+    cax_im = ax.imshow(
+        data_masked,
+        extent=(-180, 180, -90, 90),
+        origin='upper',
+        cmap=cmap,
+        vmin=0,
+        vmax=1,
+        interpolation='nearest'
+    )
+    plt.tight_layout()
+    pdf_save_loc = save_loc + '.pdf'
+    png_save_loc = save_loc + '.png'
+    plt.savefig(pdf_save_loc, bbox_inches='tight', pad_inches=0)
+    plt.savefig(png_save_loc, bbox_inches='tight', pad_inches=0)
+    plt.show(block=False)
+    plt.close(fig)
+def generate_ground_truth(taxa_id, snt=True, grid_height=501, grid_width=1002):
+    print(taxa_id)
+    if snt:
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        D = np.load(os.path.join(paths['snt'], 'snt_res_5.npy'), allow_pickle=True)
+        D = D.item()
+        loc_indices_per_species = D['loc_indices_per_species']
+        labels_per_species = D['labels_per_species']
+        taxa = D['taxa']
+        obs_locs = D['obs_locs']
+        obs_locs_idx = D['obs_locs_idx']
+        # class_index = np.where(taxa==taxa_id)
+        # class_index = class_index[0]
+        # class_index = class_index[0]
+        # species_loc_indices = loc_indices_per_species[class_index]
+        # species_locs = obs_locs[species_loc_indices]
+        # presence_indices = labels_per_species[class_index]
+        # species_locs = species_locs[presence_indices==1]
+        # Ensure class_index is correctly obtained as an integer index
+        class_indices = np.where(taxa == taxa_id)[0]
+        if len(class_indices) == 0:
+            raise ValueError(f"taxa_id {taxa_id} not found in taxa")
+        class_index = class_indices[0]
+        # Convert loc_indices_per_species[class_index] to a NumPy array
+        species_loc_indices = np.array(loc_indices_per_species[class_index])
+        # Retrieve the species locations using the indices
+        species_locs = obs_locs[species_loc_indices]
+        # Convert labels_per_species[class_index] to a NumPy array
+        presence_indices = np.array(labels_per_species[class_index])
+        # Filter species_locs where presence_indices == 1
+        species_locs = species_locs[presence_indices == 1]
+    else:
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        with open(os.path.join(paths['iucn'], 'iucn_res_5.json'), 'r') as f:
+            data = json.load(f)
+        obs_locs = np.array(data['locs'], dtype=np.float32)
+        taxa = [int(tt) for tt in data['taxa_presence'].keys()]
+        indices = data['taxa_presence'][str(taxa_id)]
+        species_locs = obs_locs[indices]  # shape (N, 2)
+    # Normalize lonlat
+    species_locs_normalized = species_locs.copy()
+    species_locs_normalized[:, 0] = species_locs_normalized[:, 0] / 180  # lon / 180
+    species_locs_normalized[:, 1] = species_locs_normalized[:, 1] / 90   # lat / 90# Get grid dimensions from ocean_mas
+    # Get pixel coordinates
+    x_pixel, y_pixel = lonlat_to_pixel(species_locs_normalized, grid_width, grid_height)
+    # Ensure x_pixel and y_pixel are within bounds
+    x_pixel = np.clip(x_pixel, 0, grid_width - 1)
+    y_pixel = np.clip(y_pixel, 0, grid_height - 1)
+    # Create data array
+    data_array = np.zeros((grid_height, grid_width))
+    # Set pixels where species is present
+    data_array[y_pixel, x_pixel] = 1
+    # Now call plot_heatmap
+    title = f"Species presence for taxa {taxa_id}"
+    save_loc = f"./images/species_presence_{taxa_id}"
+    plot_heatmap(data_array, save_loc)
+    grid_height = 1002
+    grid_width = 2004
+    if snt:
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        D = np.load(os.path.join(paths['snt'], 'snt_res_5.npy'), allow_pickle=True)
+        D = D.item()
+        loc_indices_per_species = D['loc_indices_per_species']
+        labels_per_species = D['labels_per_species']
+        taxa = D['taxa']
+        obs_locs = D['obs_locs']
+        obs_locs_idx = D['obs_locs_idx']
+        # class_index = np.where(taxa==taxa_id)
+        # class_index = class_index[0]
+        # class_index = class_index[0]
+        # species_loc_indices = loc_indices_per_species[class_index]
+        # species_locs = obs_locs[species_loc_indices]
+        # presence_indices = labels_per_species[class_index]
+        # species_locs = species_locs[presence_indices==1]
+        # Ensure class_index is correctly obtained as an integer index
+        class_indices = np.where(taxa == taxa_id)[0]
+        if len(class_indices) == 0:
+            raise ValueError(f"taxa_id {taxa_id} not found in taxa")
+        class_index = class_indices[0]
+        # Convert loc_indices_per_species[class_index] to a NumPy array
+        species_loc_indices = np.array(loc_indices_per_species[class_index])
+        # Retrieve the species locations using the indices
+        species_locs = obs_locs[species_loc_indices]
+        # Convert labels_per_species[class_index] to a NumPy array
+        presence_indices = np.array(labels_per_species[class_index])
+        # Filter species_locs where presence_indices == 1
+        species_locs = species_locs[presence_indices == 1]
+    else:
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        with open(os.path.join(paths['iucn'], 'iucn_res_5.json'), 'r') as f:
+            data = json.load(f)
+        obs_locs = np.array(data['locs'], dtype=np.float32)
+        taxa = [int(tt) for tt in data['taxa_presence'].keys()]
+        indices = data['taxa_presence'][str(taxa_id)]
+        species_locs = obs_locs[indices]  # shape (N, 2)
+    # Normalize lonlat
+    species_locs_normalized = species_locs.copy()
+    species_locs_normalized[:, 0] = species_locs_normalized[:, 0] / 180  # lon / 180
+    species_locs_normalized[:, 1] = species_locs_normalized[:, 1] / 90   # lat / 90# Get grid dimensions from ocean_mas
+    # Get pixel coordinates
+    x_pixel, y_pixel = lonlat_to_pixel(species_locs_normalized, grid_width, grid_height)
+    # Ensure x_pixel and y_pixel are within bounds
+    x_pixel = np.clip(x_pixel, 0, grid_width - 1)
+    y_pixel = np.clip(y_pixel, 0, grid_height - 1)
+    # Create data array
+    data_array = np.zeros((grid_height, grid_width))
+    # Set pixels where species is present
+    data_array[y_pixel, x_pixel] = 1
+    # Now call plot_heatmap
+    title = f"Species presence for taxa {taxa_id}"
+    save_loc = f"./images/species_presence_hr_{taxa_id}"
+    plot_heatmap_2(data_array, save_loc)
+    return True
+if __name__ == '__main__':
+    snt = True
+    grid_height = 501
+    grid_width = 1002
+    taxa_id =  11901 # Or any taxa id you want to plot, as string
+    #TODO: why snt true? can't generate gt for (hyacinth macaw(18938), yellow baboon(67683), pika(43188), southernflyingsquirrel (46272))
+    generate_ground_truth(taxa_id=taxa_id, snt=snt, grid_height=grid_height, grid_width=grid_width)

models.py ADDED Viewed

	@@ -0,0 +1,1434 @@

+import torch
+import torch.utils.data
+import torch.nn as nn
+import math
+import csv
+import numpy as np
+import json
+import os
+def get_model(params, inference_only=False):
+    if params['model'] == 'ResidualFCNet':
+        return ResidualFCNet(params['input_dim'] + params['input_time_dim'] + (20 if 'env' in params['input_enc'] and 'contrastive' not in params['input_enc'] else 0) + (1 if params['noise_time'] else 0), params['num_classes'] + (20 if 'env' in params['loss'] else 0), params['num_filts'], params['depth'])
+    elif params['model'] == 'LinNet':
+        return LinNet(params['input_dim'] + params['input_time_dim'] + (20 if 'env' in params['input_enc'] else 0) + (1 if params['noise_time'] else 0), params['num_classes'])
+    elif params['model'] == 'HyperNet':
+        return HyperNet(params, params['input_dim'] + (20 if 'env' in params['input_enc'] else 0), params['num_classes'], params['num_filts'], params['depth'],
+                                params['species_dim'], params['species_enc_depth'], params['species_filts'], params['species_enc'], inference_only=inference_only)
+    # chris models
+    elif params['model'] == 'MultiInputModel':
+        return MultiInputModel(num_inputs=params['input_dim'] + params['input_time_dim'] + (20 if 'env' in params['input_enc'] and 'contrastive' not in params['input_enc'] else 0) + (1 if params['noise_time'] else 0),
+                               num_filts=params['num_filts'], num_classes=params['num_classes'] + (20 if 'env' in params['loss'] else 0),
+                               depth=params['depth'], ema_factor=params['ema_factor'], nhead=params['num_heads'], num_encoder_layers=params['species_enc_depth'],
+                               dim_feedforward=params['species_filts'], dropout=params['transformer_dropout'],
+                               batch_first=True, token_dim=(params['species_dim'] + (20 if 'env' in params['transformer_input_enc'] else 0)),
+                               sinr_inputs=True if 'sinr' in params['transformer_input_enc'] else False,
+                               register=params['use_register'], use_pretrained_sinr=params['use_pretrained_sinr'],
+                               freeze_sinr=params['freeze_sinr'], pretrained_loc=params['pretrained_loc'],
+                               text_inputs=params['use_text_inputs'], class_token_transformation=params['class_token_transformation'])
+    elif params['model'] == 'VariableInputModel':
+        return VariableInputModel(num_inputs=params['input_dim'] + params['input_time_dim'] + (20 if 'env' in params['input_enc'] and 'contrastive' not in params['input_enc'] else 0) + (1 if params['noise_time'] else 0),
+                               num_filts=params['num_filts'], num_classes=params['num_classes'] + (20 if 'env' in params['loss'] else 0),
+                               depth=params['depth'], ema_factor=params['ema_factor'], nhead=params['num_heads'], num_encoder_layers=params['species_enc_depth'],
+                               dim_feedforward=params['species_filts'], dropout=params['transformer_dropout'],
+                               batch_first=True, token_dim=(params['species_dim'] + (20 if 'env' in params['transformer_input_enc'] else 0)),
+                               sinr_inputs=True if 'sinr' in params['transformer_input_enc'] else False,
+                               register=params['use_register'], use_pretrained_sinr=params['use_pretrained_sinr'],
+                               freeze_sinr=params['freeze_sinr'], pretrained_loc=params['pretrained_loc'],
+                               text_inputs=params['use_text_inputs'], image_inputs=params['use_image_inputs'],
+                                  env_inputs=params['use_env_inputs'],
+                                  class_token_transformation=params['class_token_transformation'])
+    # class VariableInputModel(nn.Module):
+    #     def __init__(self, num_inputs, num_filts, num_classes, depth=4, nonlin='relu', lowrank=0, ema_factor=0.1,
+    #                  nhead=8, num_encoder_layers=4, dim_feedforward=2048, dropout=0.1, batch_first=True, token_dim=256,
+    #                  sinr_inputs=False, register=False, use_pretrained_sinr=False, freeze_sinr=False, pretrained_loc='',
+    #                  text_inputs=False, image_inputs=False, env_inputs=False, class_token_transformation='identity'):
+class ResLayer(nn.Module):
+    def __init__(self, linear_size, activation=nn.ReLU, p=0.5):
+        super(ResLayer, self).__init__()
+        self.l_size = linear_size
+        self.nonlin1 = activation()
+        self.nonlin2 = activation()
+        self.dropout1 = nn.Dropout(p=p)
+        self.w1 = nn.Linear(self.l_size, self.l_size)
+        self.w2 = nn.Linear(self.l_size, self.l_size)
+    def forward(self, x):
+        y = self.w1(x)
+        y = self.nonlin1(y)
+        y = self.dropout1(y)
+        y = self.w2(y)
+        y = self.nonlin2(y)
+        out = x + y
+        return out
+class ResidualFCNet(nn.Module):
+    def __init__(self, num_inputs, num_classes, num_filts, depth=4, nonlin='relu', lowrank=0, dropout_p=0.5):
+        super(ResidualFCNet, self).__init__()
+        self.inc_bias = False
+        if lowrank < num_filts and lowrank != 0:
+            l1 = nn.Linear(num_filts if depth != -1 else num_inputs, lowrank, bias=self.inc_bias)
+            l2 = nn.Linear(lowrank, num_classes, bias=self.inc_bias)
+            self.class_emb = nn.Sequential(l1, l2)
+        else:
+            self.class_emb = nn.Linear(num_filts if depth != -1 else num_inputs, num_classes, bias=self.inc_bias)
+        if nonlin == 'relu':
+            activation = nn.ReLU
+        elif nonlin == 'silu':
+            activation = nn.SiLU
+        else:
+            raise NotImplementedError('Invalid nonlinearity specified.')
+        layers = []
+        if depth != -1:
+            layers.append(nn.Linear(num_inputs, num_filts))
+            layers.append(activation())
+            for i in range(depth):
+                layers.append(ResLayer(num_filts, activation=activation))
+        else:
+            layers.append(nn.Identity())
+        self.feats = torch.nn.Sequential(*layers)
+    def forward(self, x, class_of_interest=None, return_feats=False):
+        loc_emb = self.feats(x)
+        if return_feats:
+            return loc_emb
+        if class_of_interest is None:
+            class_pred = self.class_emb(loc_emb)
+        else:
+            class_pred = self.eval_single_class(loc_emb, class_of_interest), self.eval_single_class(loc_emb, -1)
+            return torch.sigmoid(class_pred[0]), torch.sigmoid(class_pred[1])
+        return torch.sigmoid(class_pred)
+    def eval_single_class(self, x, class_of_interest):
+        if self.inc_bias:
+            return x @ self.class_emb.weight[class_of_interest, :] + self.class_emb.bias[class_of_interest]
+        else:
+            return x @ self.class_emb.weight[class_of_interest, :]
+class SimpleFCNet(ResidualFCNet):
+    def forward(self, x, return_feats=True):
+        assert return_feats
+        loc_emb = self.feats(x)
+        class_pred = self.class_emb(loc_emb)
+        return class_pred
+class MockTransformer(nn.Module):
+    def __init__(self, num_classes, num_dims):
+        super(MockTransformer, self).__init__()
+        self.species_emb = nn.Embedding(num_embeddings=num_classes, embedding_dim=num_dims)
+    def forward(self, class_ids):
+        return self.species_emb(class_ids)
+class CombinedModel(nn.Module):
+    def __init__(self, num_inputs, num_filts, num_classes, depth=4, nonlin='relu', lowrank=0, ema_factor=0.1):
+        super(CombinedModel, self).__init__()
+        self.headless_model = HeadlessSINR(num_inputs, num_filts, depth, nonlin, lowrank)
+        if lowrank < num_filts and lowrank != 0:
+            self.transformer_model = MockTransformer(num_classes, lowrank)
+        else:
+            self.transformer_model = MockTransformer(num_classes, num_filts)
+        self.ema_factor = ema_factor
+        self.ema_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=lowrank if (lowrank < num_filts  and lowrank != 0) else num_filts)
+        self.ema_embeddings.weight.data.copy_(self.transformer_model.species_emb.weight.data)  # Initialize EMA with the same values as transformer
+        # this will have to change when I start using the actual transformer
+    def forward(self, x, class_ids=None, return_feats=False, return_class_embeddings=False, class_of_interest=None):
+        # Process input through the headless model to get feature embeddings
+        feature_embeddings = self.headless_model(x)
+        if return_feats:
+            return feature_embeddings
+        else:
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                class_embeddings = self.transformer_model(class_ids)
+                if return_class_embeddings:
+                    return class_embeddings
+                else:
+                    # Update EMA embeddings for these class IDs
+                    if self.training:
+                        self.update_ema_embeddings(class_ids, class_embeddings)
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embeddings.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    return probabilities
+            else:
+                device = self.ema_embeddings.weight.device
+                class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                class_embedding = self.get_ema_embeddings(class_of_interest_tensor)
+                print(f'using EMA estimate for class {class_of_interest}')
+                if return_class_embeddings:
+                    return class_embedding
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embedding.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    probabilities = probabilities.squeeze()
+                    return probabilities
+    def update_ema_embeddings(self, class_ids, current_embeddings):
+        if self.training:
+            # Get current EMA embeddings for the class IDs
+            ema_current = self.ema_embeddings(class_ids)
+            # Calculate new EMA values
+            ema_new = self.ema_factor * current_embeddings + (1 - self.ema_factor) * ema_current
+            # Update the EMA embeddings
+            self.ema_embeddings.weight.data[class_ids] = ema_new.detach()  # Detach to prevent gradients from flowing here
+    def get_ema_embeddings(self, class_ids):
+        # Method to access EMA embeddings
+        return self.ema_embeddings(class_ids)
+class HeadlessSINR(nn.Module):
+    def __init__(self, num_inputs, num_filts, depth=4, nonlin='relu', lowrank=0, dropout_p=0.5):
+        super(HeadlessSINR, self).__init__()
+        self.inc_bias = False
+        self.low_rank_feats = None
+        if lowrank < num_filts and lowrank != 0:
+            l1 = nn.Linear(num_filts if depth != -1 else num_inputs, lowrank, bias=self.inc_bias)
+            self.low_rank_feats = l1
+        # else:
+        #     self.class_emb = nn.Linear(num_filts if depth != -1 else num_inputs, num_classes, bias=self.inc_bias)
+        if nonlin == 'relu':
+            activation = nn.ReLU
+        elif nonlin == 'silu':
+            activation = nn.SiLU
+        else:
+            raise NotImplementedError('Invalid nonlinearity specified.')
+        # Create the layers list for feature extraction
+        layers = []
+        if depth != -1:
+            layers.append(nn.Linear(num_inputs, num_filts))
+            layers.append(activation())
+            for i in range(depth):
+                layers.append(ResLayer(num_filts, activation=activation, p=dropout_p))
+        else:
+            layers.append(nn.Identity())
+        # Include low-rank features in the sequential model if it is defined
+        if self.low_rank_feats:
+            # Apply initial layers then low-rank features
+            layers.append(self.low_rank_feats)
+        # Set up the features as a sequential model
+        self.feats = nn.Sequential(*layers)
+    def forward(self, x):
+        loc_emb = self.feats(x)
+        return loc_emb
+class TransformerEncoderModel(nn.Module):
+    def __init__(self, d_model=256, nhead=8, num_encoder_layers=4, dim_feedforward=2048, dropout=0.1, activation='relu',
+                 batch_first=True, output_dim=256): # BATCH FIRST MIGHT HAVE TO CHANGE
+        super(TransformerEncoderModel, self).__init__()
+        self.input_layer_norm = nn.LayerNorm(normalized_shape=d_model)
+        # Create an encoder layer
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_model,
+            nhead=nhead,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+            activation=activation,
+            batch_first=batch_first
+        )
+        # Stack the encoder layers into an encoder module
+        self.transformer_encoder = nn.TransformerEncoder(
+            encoder_layer=encoder_layer,
+            num_layers=num_encoder_layers
+        )
+        # Example output layer (modify according to your needs)
+        self.output_layer = nn.Linear(d_model, output_dim)
+    def forward(self, src, src_mask=None, src_key_padding_mask=None):
+        """
+        Args:
+            src: the sequence to the encoder (shape: [seq_length, batch_size, d_model])
+            src_mask: the mask for the src sequence (shape: [seq_length, seq_length])
+            src_key_padding_mask: the mask for the padding tokens (shape: [batch_size, seq_length])
+        Returns:
+            output of the transformer encoder
+        """
+        # Pass the input through the transformer encoder
+        encoder_input = self.input_layer_norm(src)
+        encoder_output = self.transformer_encoder(encoder_input, src_key_padding_mask=src_key_padding_mask, mask=src_mask)
+        # # Pass the encoder output through the output layer
+        # output = self.output_layer(encoder_output)
+        # Assuming the class token is the first in the sequence
+        # batch_first so we have (batch, sequence, dim)
+        if encoder_output.ndim == 2:
+            # in situations where we don't have a batch
+            encoder_output = encoder_output.unsqueeze(0)
+        class_token_embedding = encoder_output[:, 0, :]
+        output = self.output_layer(class_token_embedding)  # Process only the class token embedding
+        return output
+class MultiInputModel(nn.Module):
+    def __init__(self, num_inputs, num_filts, num_classes, depth=4, nonlin='relu', lowrank=0, ema_factor=0.1,
+                 nhead=8, num_encoder_layers=4, dim_feedforward=2048, dropout=0.1, batch_first=True, token_dim=256,
+                 sinr_inputs=False, register=False, use_pretrained_sinr=False, freeze_sinr=False, pretrained_loc='',
+                 text_inputs=False, class_token_transformation='identity'):
+        super(MultiInputModel, self).__init__()
+        self.headless_model = HeadlessSINR(num_inputs, num_filts, depth, nonlin, lowrank, dropout_p=dropout)
+        self.ema_factor = ema_factor
+        self.class_token_transformation = class_token_transformation
+        # Load pretrained state_dict if use_pretrained_sinr is set to True
+        if use_pretrained_sinr:
+            #pretrained_state_dict = torch.load(pretrained_loc, weights_only=False)['state_dict']
+            pretrained_state_dict = torch.load(pretrained_loc, map_location=torch.device('cpu'))['state_dict']
+            filtered_state_dict = {k: v for k, v in pretrained_state_dict.items() if not k.startswith('class_emb')}
+            self.headless_model.load_state_dict(filtered_state_dict, strict=False)
+            #print(f'Using pretrained sinr from {pretrained_loc}')
+        # Freeze the SINR model if freeze_sinr is set to True
+        if freeze_sinr:
+            for param in self.headless_model.parameters():
+                param.requires_grad = False
+            print("Freezing SINR model parameters")
+        # self.transformer_model = MockTransformer(num_classes, num_filts)
+        self.transformer_model = TransformerEncoderModel(d_model=token_dim,
+                                                         nhead=nhead,
+                                                         num_encoder_layers=num_encoder_layers,
+                                                         dim_feedforward=dim_feedforward,
+                                                         dropout=dropout,
+                                                         batch_first=batch_first,
+                                                         output_dim=num_filts)
+        self.ema_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=num_filts)
+        # this is just a workaround for now to load eval embeddings - probably not needed long term
+        self.eval_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=num_filts)
+        self.ema_embeddings.weight.requires_grad = False
+        self.eval_embeddings.weight.requires_grad = False
+        self.num_filts=num_filts
+        self.token_dim = token_dim
+        # nn.init.xavier_uniform_(self.ema_embeddings.weight) # not needed I think
+        self.sinr_inputs = sinr_inputs
+        if self.sinr_inputs:
+            if self.num_filts != self.token_dim and self.class_token_transformation == 'identity':
+                raise ValueError("If using sinr inputs to transformer with identity class token transformation"
+                                 "then token_dim of transformer must be equal to num_filts of sinr model")
+        # Add a class token
+        self.class_token = nn.Parameter(torch.empty(1, self.token_dim))
+        nn.init.xavier_uniform_(self.class_token)
+        if register:
+            # Add a register token initialized with Xavier uniform initialization
+            self.register = nn.Parameter(torch.empty(1, self.token_dim))
+            # self.register = (self.register / 2)
+            nn.init.xavier_uniform_(self.register)
+        else:
+            self.register = None
+        self.text_inputs = text_inputs
+        if self.text_inputs:
+            #print("JUST USING A HEADLESS SINR FOR THE TEXT MODEL RIGHT NOW")
+            self.text_model=HeadlessSINR(num_inputs=4096, num_filts=512, depth=2, nonlin=nonlin, lowrank=token_dim, dropout_p=dropout)
+        else:
+            self.text_model=None
+        # Type-specific embeddings for class, register, location, and text tokens
+        self.class_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+        nn.init.xavier_uniform_(self.class_type_embedding)
+        if register:
+            self.register_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+            nn.init.xavier_uniform_(self.register_type_embedding)
+        self.location_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+        nn.init.xavier_uniform_(self.location_type_embedding)
+        if text_inputs:
+            self.text_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+            nn.init.xavier_uniform_(self.text_type_embedding)
+        # Instantiate the class token transformation module
+        if class_token_transformation == 'identity':
+            self.class_token_transform = Identity(token_dim, num_filts)
+        elif class_token_transformation == 'linear':
+            self.class_token_transform = LinearTransformation(token_dim, num_filts)
+        elif class_token_transformation == 'single_layer_nn':
+            self.class_token_transform = SingleLayerNN(token_dim, num_filts, dropout_p=dropout)
+        elif class_token_transformation == 'two_layer_nn':
+            self.class_token_transform = TwoLayerNN(token_dim, num_filts, dropout_p=dropout)
+        elif class_token_transformation == 'sinr':
+            self.class_token_transform = HeadlessSINR(token_dim, num_filts, depth, nonlin, lowrank, dropout_p=dropout)
+        else:
+            raise ValueError(f"Unknown class_token_transformation: {class_token_transformation}")
+    def forward(self, x, context_sequence, context_mask, class_ids=None, return_feats=False, return_class_embeddings=False, class_of_interest=None, use_eval_embeddings=False, text_emb=None):
+        # Process input through the headless model to get feature embeddings
+        feature_embeddings = self.headless_model(x)
+        if return_feats:
+            return feature_embeddings
+        if context_sequence.dim() == 2:
+            context_sequence = context_sequence.unsqueeze(0)  # Add batch dimension if missing
+        context_sequence = context_sequence[:, 1:, :]
+        if self.sinr_inputs:
+            # Pass through the headless model
+            context_sequence = self.headless_model(context_sequence)
+        # Add type-specific embedding to each location token
+        # print("SEE IF THIS WORKS")
+        context_sequence += self.location_type_embedding
+        batch_size = context_sequence.size(0)
+        # Expand the class token to match the batch size and add its type-specific embedding
+        class_token_expanded = self.class_token.expand(batch_size, -1, -1) + self.class_type_embedding
+        if self.text_inputs and (text_emb is not None):
+            text_mask = (text_emb.sum(dim=1) == 0)
+            text_emb = self.text_model(text_emb)
+            text_emb += self.text_type_embedding
+            text_emb[text_mask] = 0
+            # Reshape text_emb to have the shape (batch_size, 1, embedding_dim)
+            text_emb = text_emb.unsqueeze(1)
+        if self.register is None:
+            # context sequence = learnable class_token + rest of sequence
+            if self.text_inputs:
+                # Add the class token and text embeddings to the context sequence
+                context_sequence = torch.cat((class_token_expanded, text_emb, context_sequence), dim=1)
+                # Pad the context mask to account for the added text embeddings
+                context_mask = nn.functional.pad(context_mask, pad=(1, 0), value=False)
+                # Update the new part of the mask with the text_mask
+                context_mask[:, 1] = text_mask  # Apply mask directly
+            else:
+                context_sequence = torch.cat((class_token_expanded, context_sequence), dim=1)
+        else:
+            # Expand the register token to match the batch size and add its type-specific embedding
+            register_expanded = self.register.expand(batch_size, -1, -1) + self.register_type_embedding
+            if self.text_inputs:
+                # Add all components: class token, register, text embeddings, and context
+                context_sequence = torch.cat((class_token_expanded, register_expanded, text_emb, context_sequence),
+                                             dim=1)
+                # Double pad the context mask: first for register, then for text embeddings
+                context_mask = nn.functional.pad(context_mask, pad=(1, 0), value=False)
+                context_mask = nn.functional.pad(context_mask, pad=(1, 0), value=False)
+                # Update the new part of the mask for text embeddings
+                context_mask[:, register_expanded.size(1) + 1] = text_mask  # Apply mask directly
+            else:
+                context_sequence = torch.cat((class_token_expanded, register_expanded, context_sequence), dim=1)
+                # Update the context mask to account for the register token
+                context_mask = nn.functional.pad(context_mask, pad=(1, 0), value=False)
+        if use_eval_embeddings == False:
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                class_token_output = self.transformer_model(src=context_sequence, src_key_padding_mask=context_mask)
+                # pass these through the class token transformation
+                class_embeddings = self.class_token_transform(class_token_output)  # Shape: (batch_size, num_filts)
+                if return_class_embeddings:
+                    return class_embeddings
+                else:
+                    # Update EMA embeddings for these class IDs
+                    with torch.no_grad():
+                        if self.training:
+                            self.update_ema_embeddings(class_ids, class_embeddings)
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embeddings.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    return probabilities
+            else:
+                device = self.ema_embeddings.weight.device
+                class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                class_embedding = self.get_ema_embeddings(class_of_interest_tensor)
+                print(f'using EMA estimate for class {class_of_interest}')
+                if return_class_embeddings:
+                    return class_embedding
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embedding.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    probabilities = probabilities.squeeze()
+                    return probabilities
+        else:
+            self.eval()
+            if not hasattr(self, 'eval_embeddings'):
+                self.eval_embeddings = self.ema_embeddings
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                class_token_output = self.transformer_model(src=context_sequence, src_key_padding_mask=context_mask)
+                class_embeddings = self.class_token_transform(class_token_output)
+                # Update EMA embeddings for these class IDs
+                self.generate_eval_embeddings(class_ids, class_embeddings)
+                # Matrix multiplication to produce logits
+                logits = feature_embeddings @ class_embeddings.T
+                # Apply sigmoid to convert logits to probabilities
+                probabilities = torch.sigmoid(logits)
+                return probabilities
+            else:
+                device = self.ema_embeddings.weight.device
+                class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                class_embedding = self.get_eval_embeddings(class_of_interest_tensor)
+                print(f'using eval embedding for class {class_of_interest}')
+                if return_class_embeddings:
+                    return class_embedding
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embedding.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    probabilities = probabilities.squeeze()
+                    return probabilities
+    def init_eval_embeddings(self, num_classes):
+        self.eval_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=self.num_filts)
+        nn.init.xavier_uniform_(self.eval_embeddings.weight)
+    def get_ema_embeddings(self, class_ids):
+        # Method to access EMA embeddings
+        return self.ema_embeddings(class_ids)
+    def get_eval_embeddings(self, class_ids):
+        # Method to access eval embeddings
+        return self.eval_embeddings(class_ids)
+    def update_ema_embeddings(self, class_ids, current_embeddings):
+        if self.training:
+            # Get unique class IDs and their counts
+            unique_class_ids, inverse_indices, counts = class_ids.unique(return_counts=True, return_inverse=True)
+            # Get current EMA embeddings for unique class IDs
+            ema_current = self.ema_embeddings(unique_class_ids)
+            # Initialize a placeholder for new EMA values
+            ema_new = torch.zeros_like(ema_current)
+            # Compute the average of current embeddings for each unique class ID
+            current_sum = torch.zeros_like(ema_current)
+            current_sum.index_add_(0, inverse_indices, current_embeddings)
+            current_avg = current_sum / counts.unsqueeze(1)
+            # Apply EMA update formula
+            ema_new = self.ema_factor * current_avg + (1 - self.ema_factor) * ema_current
+            # Update the EMA embeddings for unique class IDs
+            self.ema_embeddings.weight.data[unique_class_ids] = ema_new.detach()  # Detach to prevent gradients
+    def generate_eval_embeddings(self, class_id, current_embedding):
+        self.eval_embeddings.weight.data[class_id, :] = current_embedding.detach()  # Detach to prevent gradients
+        # self.eval_embeddings.weight.data[class_id] = self.ema_embeddings.weight.data[class_id]  # Detach to prevent gradients
+    def embedding_forward(self, x, class_ids=None, return_feats=False, return_class_embeddings=False, class_of_interest=None, eval=False):
+        # forward method that uses ema or eval embeddings rather than context sequence
+        # Process input through the headless model to get feature embeddings
+        feature_embeddings = self.headless_model(x)
+        if return_feats:
+            return feature_embeddings
+        else:
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                if eval == False:
+                    class_embeddings = self.get_ema_embeddings(class_ids=class_ids)
+                else:
+                    class_embeddings = self.get_eval_embeddings(class_ids=class_ids)
+                if return_class_embeddings:
+                    return class_embeddings
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embeddings.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    return probabilities
+            else:
+                if eval == False:
+                    device = self.ema_embeddings.weight.device
+                    class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                    class_embedding = self.get_ema_embeddings(class_of_interest_tensor)
+                    print(f'using EMA estimate for class {class_of_interest}')
+                    if return_class_embeddings:
+                        return class_embedding
+                    else:
+                        # Matrix multiplication to produce logits
+                        logits = feature_embeddings @ class_embedding.T
+                        # Apply sigmoid to convert logits to probabilities
+                        probabilities = torch.sigmoid(logits)
+                        probabilities = probabilities.squeeze()
+                    return probabilities
+                else:
+                    device = self.eval_embeddings.weight.device
+                    class_of_interest_tensor = torch.tensor([class_of_interest]).to(device)
+                    class_embedding = self.get_eval_embeddings(class_of_interest_tensor)
+                    #print(f'using eval estimate for class {class_of_interest}')
+                    if return_class_embeddings:
+                        return class_embedding
+                    else:
+                        # Matrix multiplication to produce logits
+                        logits = feature_embeddings @ class_embedding.T
+                        # Apply sigmoid to convert logits to probabilities
+                        probabilities = torch.sigmoid(logits)
+                        probabilities = probabilities.squeeze()
+                    return probabilities
+class VariableInputModel(nn.Module):
+    def __init__(self, num_inputs, num_filts, num_classes, depth=4, nonlin='relu', lowrank=0, ema_factor=0.1,
+                 nhead=8, num_encoder_layers=4, dim_feedforward=2048, dropout=0.1, batch_first=True, token_dim=256,
+                 sinr_inputs=False, register=False, use_pretrained_sinr=False, freeze_sinr=False, pretrained_loc='',
+                 text_inputs=False, image_inputs=False, env_inputs=False,  class_token_transformation='identity'):
+        super(VariableInputModel, self).__init__()
+        self.headless_model = HeadlessSINR(num_inputs, num_filts, depth, nonlin, lowrank, dropout_p=dropout)
+        self.ema_factor = ema_factor
+        self.class_token_transformation = class_token_transformation
+        # Load pretrained state_dict if use_pretrained_sinr is set to True
+        if use_pretrained_sinr:
+            pretrained_state_dict = torch.load(pretrained_loc, weights_only=False)['state_dict']
+            filtered_state_dict = {k: v for k, v in pretrained_state_dict.items() if not k.startswith('class_emb')}
+            self.headless_model.load_state_dict(filtered_state_dict, strict=False)
+            #print(f'Using pretrained sinr from {pretrained_loc}')
+        # Freeze the SINR model if freeze_sinr is set to True
+        if freeze_sinr:
+            for param in self.headless_model.parameters():
+                param.requires_grad = False
+            print("Freezing SINR model parameters")
+        # self.transformer_model = MockTransformer(num_classes, num_filts)
+        self.transformer_model = TransformerEncoderModel(d_model=token_dim,
+                                                         nhead=nhead,
+                                                         num_encoder_layers=num_encoder_layers,
+                                                         dim_feedforward=dim_feedforward,
+                                                         dropout=dropout,
+                                                         batch_first=batch_first,
+                                                         output_dim=num_filts)
+        self.ema_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=num_filts)
+        # this is just a workaround for now to load eval embeddings - probably not needed long term
+        self.eval_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=num_filts)
+        self.ema_embeddings.weight.requires_grad = False
+        self.eval_embeddings.weight.requires_grad = False
+        self.num_filts=num_filts
+        self.token_dim = token_dim
+        # nn.init.xavier_uniform_(self.ema_embeddings.weight) # not needed I think
+        self.sinr_inputs = sinr_inputs
+        if self.sinr_inputs:
+            if self.num_filts != self.token_dim and self.class_token_transformation == 'identity':
+                raise ValueError("If using sinr inputs to transformer with identity class token transformation"
+                                 "then token_dim of transformer must be equal to num_filts of sinr model")
+        # Add a class token
+        self.class_token = nn.Parameter(torch.empty(1, self.token_dim))
+        nn.init.xavier_uniform_(self.class_token)
+        if register:
+            # Add a register token initialized with Xavier uniform initialization
+            self.register = nn.Parameter(torch.empty(1, self.token_dim))
+            # self.register = (self.register / 2)
+            nn.init.xavier_uniform_(self.register)
+        else:
+            self.register = None
+        self.text_inputs = text_inputs
+        if self.text_inputs:
+            print("JUST USING A HEADLESS SINR FOR THE TEXT MODEL RIGHT NOW")
+            self.text_model=HeadlessSINR(num_inputs=4096, num_filts=512, depth=2, nonlin=nonlin, lowrank=token_dim, dropout_p=dropout)
+        else:
+            self.text_model=None
+        self.image_inputs = image_inputs
+        if self.image_inputs:
+            print("JUST USING A HEADLESS SINR FOR THE IMAGE MODEL RIGHT NOW")
+            self.image_model=HeadlessSINR(num_inputs=1024, num_filts=512, depth=2, nonlin=nonlin, lowrank=token_dim, dropout_p=dropout)
+        else:
+            self.image_model=None
+        self.env_inputs = env_inputs
+        if self.env_inputs:
+            print("JUST USING A HEADLESS SINR FOR THE ENV MODEL RIGHT NOW")
+            self.env_model=HeadlessSINR(num_inputs=20, num_filts=512, depth=2, nonlin=nonlin, lowrank=token_dim, dropout_p=dropout)
+        else:
+            self.env_model=None
+        # Type-specific embeddings for class, register, location, text, image and env tokens
+        self.class_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+        nn.init.xavier_uniform_(self.class_type_embedding)
+        if register:
+            self.register_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+            nn.init.xavier_uniform_(self.register_type_embedding)
+        self.location_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+        nn.init.xavier_uniform_(self.location_type_embedding)
+        if text_inputs:
+            self.text_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+            nn.init.xavier_uniform_(self.text_type_embedding)
+        if image_inputs:
+            self.image_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+            nn.init.xavier_uniform_(self.image_type_embedding)
+        if env_inputs:
+            self.env_type_embedding = nn.Parameter(torch.empty(1, self.token_dim))
+            nn.init.xavier_uniform_(self.env_type_embedding)
+        # Instantiate the class token transformation module
+        if class_token_transformation == 'identity':
+            self.class_token_transform = Identity(token_dim, num_filts)
+        elif class_token_transformation == 'linear':
+            self.class_token_transform = LinearTransformation(token_dim, num_filts)
+        elif class_token_transformation == 'single_layer_nn':
+            self.class_token_transform = SingleLayerNN(token_dim, num_filts, dropout_p=dropout)
+        elif class_token_transformation == 'two_layer_nn':
+            self.class_token_transform = TwoLayerNN(token_dim, num_filts, dropout_p=dropout)
+        elif class_token_transformation == 'sinr':
+            self.class_token_transform = HeadlessSINR(token_dim, num_filts, 2, nonlin, lowrank, dropout_p=dropout)
+        else:
+            raise ValueError(f"Unknown class_token_transformation: {class_token_transformation}")
+    def forward(self, x, context_sequence, context_mask, class_ids=None, return_feats=False,
+                return_class_embeddings=False, class_of_interest=None, use_eval_embeddings=False, text_emb=None,
+                image_emb=None, env_emb=None):
+        # Process input through the headless model to get feature embeddings
+        feature_embeddings = self.headless_model(x)
+        if return_feats:
+            return feature_embeddings
+        if context_sequence.dim() == 2:
+            context_sequence = context_sequence.unsqueeze(0)  # Add batch dimension if missing
+        context_sequence = context_sequence[:, 1:, :]
+        context_mask = context_mask[:, 1:]
+        if self.sinr_inputs:
+            context_sequence = self.headless_model(context_sequence)
+        # Add type-specific embedding to each location token
+        context_sequence += self.location_type_embedding
+        batch_size = context_sequence.size(0)
+        # Initialize lists for tokens and masks
+        tokens = []
+        masks = []
+        # Process class token
+        class_token_expanded = self.class_token.expand(batch_size, -1, -1) + self.class_type_embedding
+        tokens.append(class_token_expanded)
+        # The class token is always present, so mask is False (i.e., not masked out)
+        class_mask = torch.zeros(batch_size, 1, dtype=torch.bool, device=context_sequence.device)
+        masks.append(class_mask)
+        # Process register token if present
+        if self.register is not None:
+            register_expanded = self.register.expand(batch_size, -1, -1) + self.register_type_embedding
+            tokens.append(register_expanded)
+            register_mask = torch.zeros(batch_size, 1, dtype=torch.bool, device=context_sequence.device)
+            masks.append(register_mask)
+        # Process text embeddings
+        if self.text_inputs and (text_emb is not None):
+            text_mask = (text_emb.sum(dim=1) == 0)
+            text_emb = self.text_model(text_emb)
+            text_emb += self.text_type_embedding
+            # Set embeddings to zero where mask is True
+            text_emb[text_mask] = 0
+            text_emb = text_emb.unsqueeze(1)
+            tokens.append(text_emb)
+            # Expand text_mask to match sequence dimensions
+            text_mask = text_mask.unsqueeze(1)
+            masks.append(text_mask)
+        # Process image embeddings
+        if self.image_inputs and (image_emb is not None):
+            image_mask = (image_emb.sum(dim=1) == 0)
+            image_emb = self.image_model(image_emb)
+            image_emb += self.image_type_embedding
+            image_emb[image_mask] = 0
+            image_emb = image_emb.unsqueeze(1)
+            tokens.append(image_emb)
+            image_mask = image_mask.unsqueeze(1)
+            masks.append(image_mask)
+        # Process env embeddings if needed (can be added similarly)
+        if self.env_inputs and (env_emb is not None):
+            env_mask = context_mask
+            env_emb = self.env_model(env_emb)
+            env_emb += self.env_type_embedding
+            env_emb[env_mask] = 0
+            env_emb = env_emb.unsqueeze(1)
+            tokens.append(env_emb)
+            env_mask = env_mask.unsqueeze(1)
+            masks.append(env_mask)
+        # Process location tokens
+        tokens.append(context_sequence)
+        masks.append(context_mask)
+        # Concatenate all tokens and masks
+        context_sequence = torch.cat(tokens, dim=1)
+        context_mask = torch.cat(masks, dim=1)
+        if use_eval_embeddings == False:
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                class_token_output = self.transformer_model(src=context_sequence, src_key_padding_mask=context_mask)
+                # pass these through the class token transformation
+                class_embeddings = self.class_token_transform(class_token_output)  # Shape: (batch_size, num_filts)
+                if return_class_embeddings:
+                    return class_embeddings
+                else:
+                    # Update EMA embeddings for these class IDs
+                    with torch.no_grad():
+                        if self.training:
+                            self.update_ema_embeddings(class_ids, class_embeddings)
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embeddings.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    return probabilities
+            else:
+                device = self.ema_embeddings.weight.device
+                class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                class_embedding = self.get_ema_embeddings(class_of_interest_tensor)
+                print(f'using EMA estimate for class {class_of_interest}')
+                if return_class_embeddings:
+                    return class_embedding
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embedding.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    probabilities = probabilities.squeeze()
+                    return probabilities
+        else:
+            self.eval()
+            if not hasattr(self, 'eval_embeddings'):
+                print('No Eval Embeddings for this species?!')
+                self.eval_embeddings = self.ema_embeddings
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                class_token_output = self.transformer_model(src=context_sequence, src_key_padding_mask=context_mask)
+                class_embeddings = self.class_token_transform(class_token_output)
+                # Update EMA embeddings for these class IDs
+                self.generate_eval_embeddings(class_ids, class_embeddings)
+                # Matrix multiplication to produce logits
+                logits = feature_embeddings @ class_embeddings.T
+                # Apply sigmoid to convert logits to probabilities
+                probabilities = torch.sigmoid(logits)
+                return probabilities
+            else:
+                device = self.ema_embeddings.weight.device
+                class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                class_embedding = self.get_eval_embeddings(class_of_interest_tensor)
+                print(f'using eval embedding for class {class_of_interest}')
+                if return_class_embeddings:
+                    return class_embedding
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embedding.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    probabilities = probabilities.squeeze()
+                    return probabilities
+    def get_loc_emb(self, x):
+        feature_embeddings = self.headless_model(x)
+        return feature_embeddings
+    def init_eval_embeddings(self, num_classes):
+        self.eval_embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=self.num_filts)
+        nn.init.xavier_uniform_(self.eval_embeddings.weight)
+    def get_ema_embeddings(self, class_ids):
+        # Method to access EMA embeddings
+        return self.ema_embeddings(class_ids)
+    def get_eval_embeddings(self, class_ids):
+        # Method to access eval embeddings
+        return self.eval_embeddings(class_ids)
+    def update_ema_embeddings(self, class_ids, current_embeddings):
+        if self.training:
+            # Get unique class IDs and their counts
+            unique_class_ids, inverse_indices, counts = class_ids.unique(return_counts=True, return_inverse=True)
+            # Get current EMA embeddings for unique class IDs
+            ema_current = self.ema_embeddings(unique_class_ids)
+            # Initialize a placeholder for new EMA values
+            ema_new = torch.zeros_like(ema_current)
+            # Compute the average of current embeddings for each unique class ID
+            current_sum = torch.zeros_like(ema_current)
+            current_sum.index_add_(0, inverse_indices, current_embeddings)
+            current_avg = current_sum / counts.unsqueeze(1)
+            # Apply EMA update formula
+            ema_new = self.ema_factor * current_avg + (1 - self.ema_factor) * ema_current
+            # Update the EMA embeddings for unique class IDs
+            self.ema_embeddings.weight.data[unique_class_ids] = ema_new.detach()  # Detach to prevent gradients
+    def generate_eval_embeddings(self, class_id, current_embedding):
+        self.eval_embeddings.weight.data[class_id, :] = current_embedding.detach()  # Detach to prevent gradients
+        # self.eval_embeddings.weight.data[class_id] = self.ema_embeddings.weight.data[class_id]  # Detach to prevent gradients
+    def embedding_forward(self, x, class_ids=None, return_feats=False, return_class_embeddings=False, class_of_interest=None, eval=False):
+        # forward method that uses ema or eval embeddings rather than context sequence
+        # Process input through the headless model to get feature embeddings
+        feature_embeddings = self.headless_model(x)
+        if return_feats:
+            return feature_embeddings
+        else:
+            if class_of_interest == None:
+                # Get class-specific embeddings based on class_ids
+                if eval == False:
+                    class_embeddings = self.get_ema_embeddings(class_ids=class_ids)
+                else:
+                    class_embeddings = self.get_eval_embeddings(class_ids=class_ids)
+                if return_class_embeddings:
+                    return class_embeddings
+                else:
+                    # Matrix multiplication to produce logits
+                    logits = feature_embeddings @ class_embeddings.T
+                    # Apply sigmoid to convert logits to probabilities
+                    probabilities = torch.sigmoid(logits)
+                    return probabilities
+            else:
+                if eval == False:
+                    device = self.ema_embeddings.weight.device
+                    class_of_interest_tensor =torch.tensor([class_of_interest]).to(device)
+                    class_embedding = self.get_ema_embeddings(class_of_interest_tensor)
+                    print(f'using EMA estimate for class {class_of_interest}')
+                    if return_class_embeddings:
+                        return class_embedding
+                    else:
+                        # Matrix multiplication to produce logits
+                        logits = feature_embeddings @ class_embedding.T
+                        # Apply sigmoid to convert logits to probabilities
+                        probabilities = torch.sigmoid(logits)
+                        probabilities = probabilities.squeeze()
+                    return probabilities
+                else:
+                    device = self.eval_embeddings.weight.device
+                    class_of_interest_tensor = torch.tensor([class_of_interest]).to(device)
+                    class_embedding = self.get_eval_embeddings(class_of_interest_tensor)
+                    #print(f'using eval estimate for class {class_of_interest}')
+                    if return_class_embeddings:
+                        return class_embedding
+                    else:
+                        # Matrix multiplication to produce logits
+                        logits = feature_embeddings @ class_embedding.T
+                        # Apply sigmoid to convert logits to probabilities
+                        probabilities = torch.sigmoid(logits)
+                        probabilities = probabilities.squeeze()
+                    return probabilities
+class LinNet(nn.Module):
+    def __init__(self, num_inputs, num_classes):
+        super(LinNet, self).__init__()
+        self.num_layers = 0
+        self.inc_bias = False
+        self.class_emb = nn.Linear(num_inputs, num_classes, bias=self.inc_bias)
+        self.feats = nn.Identity()  # does not do anything
+    def forward(self, x, class_of_interest=None, return_feats=False):
+        loc_emb = self.feats(x)
+        if return_feats:
+            return loc_emb
+        if class_of_interest is None:
+            class_pred = self.class_emb(loc_emb)
+        else:
+            class_pred = self.eval_single_class(loc_emb, class_of_interest)
+        return torch.sigmoid(class_pred)
+    def eval_single_class(self, x, class_of_interest):
+        if self.inc_bias:
+            return x @ self.class_emb.weight[class_of_interest, :] + self.class_emb.bias[class_of_interest]
+        else:
+            return x @ self.class_emb.weight[class_of_interest, :]
+class ParallelMulti(torch.nn.Module):
+    def __init__(self, x: list[torch.nn.Module]):
+        super(ParallelMulti, self).__init__()
+        self.layers = nn.ModuleList(x)
+    def forward(self, xs, **kwargs):
+        out = torch.cat([self.layers[i](x, **kwargs) for i,x in enumerate(xs)], dim=1)
+        return out
+class SequentialMulti(torch.nn.Sequential):
+    def forward(self, *inputs, **kwargs):
+        for module in self._modules.values():
+            if type(inputs) == tuple:
+                inputs = module(*inputs, **kwargs)
+            else:
+                inputs = module(inputs)
+        return inputs
+# Chris's transformation classes
+class Identity(nn.Module):
+    def __init__(self, in_dim, out_dim):
+        super(Identity, self).__init__()
+        # No parameters needed for identity transformation
+    def forward(self, x):
+        return x
+class LinearTransformation(nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True):
+        super(LinearTransformation, self).__init__()
+        self.linear = nn.Linear(in_dim, out_dim, bias=bias)
+    def forward(self, x):
+        return self.linear(x)
+class SingleLayerNN(nn.Module):
+    def __init__(self, in_dim, out_dim, dropout_p=0.1, bias=True):
+        super(SingleLayerNN, self).__init__()
+        hidden_dim = (in_dim + out_dim) // 2  # Choose an appropriate hidden dimension
+        self.net = nn.Sequential(
+            nn.Linear(in_dim, hidden_dim, bias=bias),
+            nn.ReLU(),
+            nn.Dropout(p=dropout_p),
+            nn.Linear(hidden_dim, out_dim, bias=bias)
+        )
+    def forward(self, x):
+        return self.net(x)
+class TwoLayerNN(nn.Module):
+    def __init__(self, in_dim, out_dim, dropout_p=0.1, bias=True):
+        super(TwoLayerNN, self).__init__()
+        hidden_dim = (in_dim + out_dim) // 2  # Choose an appropriate hidden dimension
+        self.net = nn.Sequential(
+            nn.Linear(in_dim, hidden_dim, bias=bias),
+            nn.ReLU(),
+            nn.Dropout(p=dropout_p),
+            nn.Linear(hidden_dim, hidden_dim, bias=bias),
+            nn.ReLU(),
+            nn.Dropout(p=dropout_p),
+            nn.Linear(hidden_dim, out_dim, bias=bias)
+        )
+    def forward(self, x):
+        return self.net(x)
+class HyperNet(nn.Module):
+    '''
+    :param asdf
+    '''
+    def __init__(self, params, num_inputs, num_classes, num_filts, pos_enc_depth, species_dim, species_enc_depth, species_filts, species_enc='embed', inference_only=False):
+        super(HyperNet, self).__init__()
+        if species_enc == 'embed':
+            self.species_emb = nn.Embedding(num_classes, species_dim)
+            self.species_emb.weight.data *= 0.01
+        elif species_enc == 'taxa':
+            self.species_emb = TaxaEncoder(params, './data/inat_taxa_info.csv', species_dim)
+        elif species_enc == 'text':
+            self.species_emb = TextEncoder(params, params['text_emb_path'], species_dim, './data/inat_taxa_info.csv')
+        elif species_enc == 'wiki':
+            self.species_emb = WikiEncoder(params, params['text_emb_path'], species_dim, inference_only=inference_only)
+        if species_enc_depth == -1:
+            self.species_enc = nn.Identity()
+        elif species_enc_depth == 0:
+            self.species_enc = nn.Linear(species_dim, num_filts+1)
+        else:
+            self.species_enc = SimpleFCNet(species_dim, num_filts+1, species_filts, depth=species_enc_depth)
+        if 'geoprior' in params['loss']:
+            self.species_params = nn.Parameter(torch.randn(num_classes, species_dim))
+            self.species_params.data *= 0.0386
+        self.pos_enc = SimpleFCNet(num_inputs, num_filts, num_filts, depth=pos_enc_depth)
+    def forward(self, x, y):
+        ys, indmap = torch.unique(y, return_inverse=True)
+        species = self.species_enc(self.species_emb(ys))
+        species_w, species_b = species[...,:-1], species[...,-1:]
+        pos = self.pos_enc(x)
+        out = torch.bmm(species_w[indmap],pos[...,None])
+        out = (out + 0*species_b[indmap]).squeeze(-1) #TODO
+        if hasattr(self, 'species_params'):
+            out2 = torch.bmm(self.species_params[ys][indmap],pos[...,None])
+            out2 = out2.squeeze(-1)
+            out3 = (species_w, self.species_params[ys], ys)
+            return out, out2, out3
+        else:
+            return out
+    def zero_shot(self, x, species_emb):
+        species = self.species_enc(self.species_emb.zero_shot(species_emb))
+        species_w, _ = species[...,:-1], species[...,-1:]
+        pos = self.pos_enc(x)
+        out = pos @ species_w.T
+        return out
+class TaxaEncoder(nn.Module):
+    def __init__(self, params, fpath, embedding_dim):
+        super(TaxaEncoder, self).__init__()
+        import datasets
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        data_dir = paths['train']
+        obs_file = os.path.join(data_dir, params['obs_file'])
+        taxa_file_snt = os.path.join(data_dir, 'taxa_subsets.json')
+        taxa_of_interest = datasets.get_taxa_of_interest(params['species_set'], params['num_aux_species'],
+                                                params['aux_species_seed'], params['taxa_file'], taxa_file_snt)
+        locs, labels, _, dates, _, _ = datasets.load_inat_data(obs_file, taxa_of_interest)
+        unique_taxa, class_ids = np.unique(labels, return_inverse=True)
+        class_to_taxa = unique_taxa.tolist()
+        self.fpath = fpath
+        ids = []
+        rows = []
+        with open(fpath, newline='') as csvfile:
+            spamreader = csv.reader(csvfile, delimiter=',')
+            for row in spamreader:
+                if row[0] == 'taxon_id':
+                    continue
+                ids.append(int(row[0]))
+                rows.append(row[3:])
+        print()
+        rows = np.array(rows)
+        rows = [np.unique(rows[:,i], return_inverse=True)[1] for i in range(rows.shape[1])]
+        rows = torch.from_numpy(np.vstack(rows).T)
+        rows = rows
+        self.taxa2row = {taxaid:i for i, taxaid in enumerate(ids)}
+        embs = [nn.Embedding(rows[:,i].max()+2, embedding_dim, 0) for i in range(rows.shape[1])]
+        embs[-1] = nn.Embedding(len(class_to_taxa), embedding_dim)
+        rows2 = torch.zeros((len(class_to_taxa), 7), dtype=rows.dtype)
+        startind = rows[:,-1].max()
+        for i in range(len(class_to_taxa)):
+            if class_to_taxa[i] in ids:
+                rows2[i] = rows[ids.index(class_to_taxa[i])]+1
+                rows2[i,-1] -= 1
+            else:
+                rows2[i,-1] = startind
+                startind += 1
+        self.register_buffer('rows', rows2)
+        for e in embs:
+            e.weight.data *= 0.01
+        self.embs = nn.ModuleList(embs)
+    def forward(self, x):
+        inds = self.rows[x]
+        out = sum([self.embs[i](inds[...,i]) for i in range(inds.shape[-1])])
+        return out
+class TextEncoder(nn.Module):
+    def __init__(self, params, path, embedding_dim, fpath='inat_taxa_info.csv'):
+        super(TextEncoder, self).__init__()
+        import datasets
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        data_dir = paths['train']
+        obs_file = os.path.join(data_dir, params['obs_file'])
+        taxa_file_snt = os.path.join(data_dir, 'taxa_subsets.json')
+        taxa_of_interest = datasets.get_taxa_of_interest(params['species_set'], params['num_aux_species'],
+                                                params['aux_species_seed'], params['taxa_file'], taxa_file_snt)
+        locs, labels, _, dates, _, _ = datasets.load_inat_data(obs_file, taxa_of_interest)
+        unique_taxa, class_ids = np.unique(labels, return_inverse=True)
+        class_to_taxa = unique_taxa.tolist()
+        self.fpath = fpath
+        ids = []
+        with open(fpath, newline='') as csvfile:
+            spamreader = csv.reader(csvfile, delimiter=',')
+            for row in spamreader:
+                if row[0] == 'taxon_id':
+                    continue
+                ids.append(int(row[0]))
+        embs = torch.load(path)
+        if len(embs) != len(ids):
+            print("Warning: Number of embeddings doesn't match number of species")
+        ids = ids[:embs.shape[0]]
+        if isinstance(embs, list):
+            embs = torch.stack(embs)
+        self.taxa2row = {taxaid:i for i, taxaid in enumerate(ids)}
+        indmap = -1+torch.zeros(len(class_to_taxa), dtype=torch.int)
+        embmap = -1+torch.zeros(len(class_to_taxa), dtype=torch.int)
+        self.missing_emb = nn.Embedding(len(class_to_taxa)-embs.shape[0], embedding_dim)
+        startind = 0
+        for i in range(len(class_to_taxa)):
+            if class_to_taxa[i] in ids:
+                indmap[i] = ids.index(class_to_taxa[i])
+            else:
+                embmap[i] = startind
+                startind += 1
+        self.scales = nn.Parameter(torch.zeros(len(class_to_taxa), 1))
+        self.register_buffer('indmap', indmap, persistent=False)
+        self.register_buffer('embmap', embmap, persistent=False)
+        self.register_buffer('embs', embs, persistent=False)
+        if params['text_hidden_dim'] == 0:
+            self.linear1 = nn.Linear(embs.shape[1], embedding_dim)
+        else:
+            self.linear1 = nn.Linear(embs.shape[1], params['text_hidden_dim'])
+            self.linear2 = nn.Linear(params['text_hidden_dim'], embedding_dim)
+            self.act = nn.SiLU()
+        if params['text_learn_dim'] > 0:
+            self.learned_emb = nn.Embedding(len(class_to_taxa), params['text_learn_dim'])
+            self.learned_emb.weight.data *= 0.01
+            self.linear_learned = nn.Linear(params['text_learn_dim'], embedding_dim)
+    def forward(self, x):
+        inds = self.indmap[x]
+        out = self.embs[self.indmap[x].cpu()]
+        out = self.linear1(out)
+        if hasattr(self, 'linear2'):
+            out = self.linear2(self.act(out))
+        out = self.scales[x] * (out / (out.std(dim=1)[:, None]))
+        out[inds == -1] = self.missing_emb(self.embmap[x[inds == -1]])
+        if hasattr(self, 'learned_emb'):
+            out2 = self.learned_emb(x)
+            out2 = self.linear_learned(out2)
+            out = out+out2
+        return out
+class WikiEncoder(nn.Module):
+    def __init__(self, params, path, embedding_dim, inference_only=False):
+        super(WikiEncoder, self).__init__()
+        self.path = path
+        if not inference_only:
+            import datasets
+            with open('paths.json', 'r') as f:
+                paths = json.load(f)
+            data_dir = paths['train']
+            obs_file = os.path.join(data_dir, params['obs_file'])
+            taxa_file_snt = os.path.join(data_dir, 'taxa_subsets.json')
+            taxa_of_interest = datasets.get_taxa_of_interest(params['species_set'], params['num_aux_species'],
+                                                    params['aux_species_seed'], params['taxa_file'], taxa_file_snt)
+            locs, labels, _, dates, _, _ = datasets.load_inat_data(obs_file, taxa_of_interest)
+            if params['zero_shot']:
+                with open('paths.json', 'r') as f:
+                    paths = json.load(f)
+                with open(os.path.join(paths['iucn'], 'iucn_res_5.json'), 'r') as f:
+                    data = json.load(f)
+                D = np.load(os.path.join(paths['snt'], 'snt_res_5.npy'), allow_pickle=True)
+                D = D.item()
+                taxa_snt = D['taxa'].tolist()
+                taxa = [int(tt) for tt in data['taxa_presence'].keys()]
+                taxa = list(set(taxa + taxa_snt))
+                mask = labels != taxa[0]
+                for i in range(1, len(taxa)):
+                    mask &= (labels != taxa[i])
+                locs = locs[mask]
+                dates = dates[mask]
+                labels = labels[mask]
+            unique_taxa, class_ids = np.unique(labels, return_inverse=True)
+            class_to_taxa = unique_taxa.tolist()
+            embs = torch.load(path)
+            ids = embs['taxon_id'].tolist()
+            if 'keys' in embs:
+                taxa_counts = torch.zeros(len(ids), dtype=torch.int32)
+                for i,k in embs['keys']:
+                    taxa_counts[i] += 1
+            else:
+                taxa_counts = torch.ones(len(ids), dtype=torch.int32)
+            count_sum = torch.cumsum(taxa_counts, dim=0) - taxa_counts
+            embs = embs['data']
+            self.taxa2row = {taxaid:i for i, taxaid in enumerate(ids)}
+            indmap = -1+torch.zeros(len(class_to_taxa), dtype=torch.int)
+            countmap = torch.zeros(len(class_to_taxa), dtype=torch.int)
+            self.species_emb = nn.Embedding(len(class_to_taxa), embedding_dim)
+            self.species_emb.weight.data *= 0.01
+            for i in range(len(class_to_taxa)):
+                if class_to_taxa[i] in ids:
+                    i2 = ids.index(class_to_taxa[i])
+                    indmap[i] = count_sum[i2]
+                    countmap[i] = taxa_counts[i2]
+            self.register_buffer('indmap', indmap, persistent=False)
+            self.register_buffer('countmap', countmap, persistent=False)
+            self.register_buffer('embs', embs, persistent=False)
+            assert embs.shape[1] == 4096
+        self.scale = nn.Parameter(torch.zeros(1))
+        if params['species_dropout'] > 0:
+            self.dropout = nn.Dropout(p=params['species_dropout'])
+        if params['text_hidden_dim'] == 0:
+            self.linear1 = nn.Linear(4096, embedding_dim)
+        else:
+            self.linear1 = nn.Linear(4096, params['text_hidden_dim'])
+            if params['text_batchnorm']:
+                self.bn1 = nn.BatchNorm1d(params['text_hidden_dim'])
+            for l in range(params['text_num_layers']-1):
+                setattr(self, f'linear{l+2}', nn.Linear(params['text_hidden_dim'], params['text_hidden_dim']))
+                if params['text_batchnorm']:
+                    setattr(self, f'bn{l+2}', nn.BatchNorm1d(params['text_hidden_dim']))
+            setattr(self, f'linear{params["text_num_layers"]+1}', nn.Linear(params['text_hidden_dim'], embedding_dim))
+            self.act = nn.SiLU()
+        if params['text_learn_dim'] > 0:
+            self.learned_emb = nn.Embedding(len(class_to_taxa), params['text_learn_dim'])
+            self.learned_emb.weight.data *= 0.01
+            self.linear_learned = nn.Linear(params['text_learn_dim'], embedding_dim)
+    def forward(self, x):
+        inds = self.indmap[x] + (torch.rand(x.shape,device=x.device)*self.countmap[x]).floor().int()
+        out = self.embs[inds]
+        if hasattr(self, 'dropout'):
+            out = self.dropout(out)
+        out = self.linear1(out)
+        if hasattr(self, 'linear2'):
+            out = self.act(out)
+        if hasattr(self, 'bn1'):
+            out = self.bn1(out)
+        i = 2
+        while hasattr(self, f'linear{i}'):
+            if hasattr(self, f'linear{i}'):
+                out = self.act(getattr(self, f'linear{i}')(out))
+            if hasattr(self, f'bn{i}'):
+                out = getattr(self, f'bn{i}')(out)
+            i += 1
+        #out = self.scale * (out / (out.std(dim=1)[:, None]))
+        out2 = self.species_emb(x)
+        chosen = torch.rand((out.shape[0],), device=x.device)
+        chosen = 1+0*chosen #TODO fix this
+        chosen[inds == -1] = 0
+        out = chosen[:,None] * out + (1-chosen[:,None])*out2
+        if hasattr(self, 'learned_emb'):
+            out2 = self.learned_emb(x)
+            out2 = self.linear_learned(out2)
+            out = out+out2
+        return out
+    def zero_shot(self, species_emb):
+        out = species_emb
+        out = self.linear1(out)
+        if hasattr(self, 'linear2'):
+            out = self.act(out)
+        if hasattr(self, 'bn1'):
+            out = self.bn1(out)
+        i = 2
+        while hasattr(self, f'linear{i}'):
+            if hasattr(self, f'linear{i}'):
+                out = self.act(getattr(self, f'linear{i}')(out))
+            if hasattr(self, f'bn{i}'):
+                out = getattr(self, f'bn{i}')(out)
+            i += 1
+        return out
+    def zero_shot_old(self, species_emb):
+        out = species_emb
+        out = self.linear1(out)
+        if hasattr(self, 'linear2'):
+            out = self.linear2(self.act(out))
+        out = self.scale * (out / (out.std(dim=-1, keepdim=True)))
+        return out
+# MINE - would only be used for my models - not currently being used at all
+# CURRENTLY JUST USING A HEADLESS_SINR FOR THE TEXT ENCODER
+class MultiInputTextEncoder(nn.Module):
+    def __init__(self, token_dim, dropout, input_dim=4096, depth=2, hidden_dim=512, nonlin='relu', batch_norm=True, layer_norm=False):
+        super(MultiInputTextEncoder, self).__init__()
+        print("THINK ABOUT IF SOME OF THESE HYPERPARAMETERS SHOULD BE DISTINCT FROM THE TRANSFORMER VERSION")
+        print("DEPTH / NUM_ENCODER_LAYERS, DROPOUT, DIM_FEEDFORWARD, ETC")
+        print("AT PRESENT WE JUST HAVE A SORT OF BASIC VERSION IMPLEMENTED THAT ATTEMPTS TO BE LIKE MAX'S VERSION")
+        print("ALSO, OPTION TO HAVE IT PRETRAINED? ADD RESIDUAL LAYERS?")
+        self.token_dim=token_dim
+        self.dropout=dropout
+        self.input_dim=input_dim
+        self.depth=depth
+        self.hidden_dim=hidden_dim
+        self.batch_norm = batch_norm
+        self.layer_norm = layer_norm
+        if nonlin == 'relu':
+            activation = nn.ReLU
+        elif nonlin == 'silu':
+            activation = nn.SiLU
+        else:
+            raise NotImplementedError('Invalid nonlinearity specified.')
+        self.dropout_layer = nn.Dropout(p=self.dropout)
+        if self.depth <= 1:
+            self.linear1 = nn.Linear(self.input_dim, self.token_dim)
+        else:
+            self.linear1 = nn.Linear(self.input_dim, self.hidden_dim)
+        if self.batch_norm:
+            self.bn1 = nn.BatchNorm1d(self.hidden_dim)
+        # if self.layer_norm:
+        #     self.ln1 = nn.LayerNorm(self.hidden_dim)

paths.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "data": "data/",
+    "masks": "data/masks/",
+    "env": "data/env/",
+    "train": "data/train/",
+    "geo_prior": "data/eval/geo_prior/",
+    "snt": "data/eval/snt/",
+    "iucn": "data/eval/iucn/",
+    "geo_feature": "data/eval/geo_feature/"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio==3.36.1
+h3==3.7.6
+matplotlib==3.7.1
+numpy==1.25.0
+pandas==2.0.3
+scikit_learn==1.3.0
+scikit-image==0.19.3
+tifffile==2023.7.4
+torch==1.12.1
+imagecodecs==2023.9.18

setup.py ADDED Viewed

The diff for this file is too large to render. See raw diff

utils.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import matplotlib.pyplot as plt
+import torch
+import numpy as np
+import math
+import datetime
+#from h3.unstable import vect
+import h3
+class CoordEncoder:
+    def __init__(self, input_enc, raster=None, input_dim=0):
+        self.input_enc = input_enc
+        self.raster = raster
+        self.input_dim = input_dim
+    def encode(self, locs, normalize=True):
+        # assumes lon, lat in range [-180, 180] and [-90, 90]
+        if normalize:
+            locs = normalize_coords(locs)
+        if self.input_enc == 'none':
+            loc_feats = locs * torch.tensor([[180.0,90.0]], device=locs.device)
+        elif self.input_enc == 'sin_cos': # sinusoidal encoding
+            loc_feats = encode_loc(locs, input_dim=self.input_dim)
+        elif self.input_enc == 'env': # bioclim variables
+            loc_feats = bilinear_interpolate(locs, self.raster)
+        elif self.input_enc == 'sin_cos_env': # sinusoidal encoding & bioclim variables
+            loc_feats = encode_loc(locs, input_dim=self.input_dim)
+            context_feats = bilinear_interpolate(locs, self.raster.to(locs.device))
+            loc_feats = torch.cat((loc_feats, context_feats), 1)
+        elif self.input_enc == 'satclip': #SatClip Embedding
+            if not hasattr(self, 'model'):
+                import sys
+                sys.path.append('./satclip/satclip')
+                from satclip.satclip.load import get_satclip
+                self.model = get_satclip('satclip/satclip-vit16-l10.ckpt', device="cpu")
+                self.model.eval()
+            self.model = self.model.to(locs.device)
+            locs = locs*torch.tensor([[180.0, 90.0]], device=locs.device)
+            max_batch = 1000000
+            loc_feats = torch.empty(locs.shape[0], 256, device=locs.device)
+            with torch.no_grad():
+                for i in range(0, locs.shape[0], max_batch):
+                    loc_feats[i:i+max_batch] = self.model(locs[i:i+max_batch].double()).float()
+        else:
+            raise NotImplementedError('Unknown input encoding.')
+        return loc_feats
+    def encode_fast(self, loc: list[float], normalize=True):
+        assert not normalize
+        if self.input_enc == 'sin_cos':
+            loc_feats = encode_loc_fast(loc, input_dim=self.input_dim)
+        else:
+            raise NotImplementedError('Unknown input encoding.')
+        return loc_feats
+class TimeEncoder:
+    def __init__(self, input_enc='conical'):
+        self.input_enc = input_enc
+    def encode(self, intervals):
+        # assumes time, width in range [0, 1]
+        t_center = intervals[:, :1]
+        t_width = intervals[:, 1:]
+        if self.input_enc == 'conical':
+            t_feats = torch.cat([(1 - t_width) * torch.sin(2 * torch.pi * t_center),
+                           (1 - t_width) * torch.cos(2 * torch.pi * t_center), 2 * t_width - 1], dim=1)
+        elif self.input_enc == 'cylindrical':
+            t_feats = torch.cat([torch.sin(2 * torch.pi * t_center), torch.cos(2 * torch.pi * t_center), 2 * t_width - 1], dim=1)
+        return t_feats
+    def encode_fast(self, intervals):
+        # assumes time, width in range [0, 1]
+        t_center, t_width = intervals
+        if self.input_enc == 'conical':
+            t_feats = torch.tensor([(1 - t_width) * math.sin(2 * math.pi * t_center),
+                                 (1 - t_width) * math.cos(2 * math.pi * t_center), 2 * t_width - 1])
+        elif self.input_enc == 'cylindrical':
+            t_feats = torch.tensor([math.sin(2 * math.pi * t_center),
+                                    math.cos(2 * math.pi * t_center), 2 * t_width - 1])
+        return t_feats
+def normalize_coords(locs):
+    # locs is in lon {-180, 180}, lat {90, -90}
+    # output is in the range [-1, 1]
+    locs[:,0] /= 180.0
+    locs[:,1] /= 90.0
+    return locs
+def encode_loc(loc_ip, concat_dim=1, input_dim=0):
+    # assumes inputs location are in range -1 to 1
+    # location is lon, lat
+    encs = []
+    for i in range(input_dim//4):
+        encs.append(torch.sin(math.pi*(2**i)*loc_ip))
+        encs.append(torch.cos(math.pi*(2**i)*loc_ip))
+    feats = torch.cat(encs, concat_dim)
+    return feats
+def encode_loc_fast(loc_ip: list[float], input_dim=0):
+    # assumes inputs location are in range -1 to 1
+    # location is lon, lat
+    input_dim //= 2 # needed to make it compatible with encode_loc
+    feats = [(math.sin if i%(2*len(loc_ip))<len(loc_ip) else math.cos)(math.pi*(2**(i//(2*len(loc_ip))))*loc_ip[i%len(loc_ip)]) for i in range(input_dim)]
+    return feats
+def bilinear_interpolate(loc_ip, data, remove_nans_raster=True):
+    # loc is N x 2 vector, where each row is [lon,lat] entry
+    #   each entry spans range [-1,1]
+    # data is H x W x C, height x width x channel data matrix
+    # op will be N x C matrix of interpolated features
+    assert data is not None
+    # map to [0,1], then scale to data size
+    loc = (loc_ip.clone() + 1) / 2.0
+    loc[:,1] = 1 - loc[:,1] # this is because latitude goes from +90 on top to bottom while
+                            # longitude goes from -90 to 90 left to right
+    assert not torch.any(torch.isnan(loc))
+    if remove_nans_raster:
+        data[torch.isnan(data)] = 0.0 # replace with mean value (0 is mean post-normalization)
+    # cast locations into pixel space
+    loc[:, 0] *= (data.shape[1]-1)
+    loc[:, 1] *= (data.shape[0]-1)
+    loc_int = torch.floor(loc).long()  # integer pixel coordinates
+    xx = loc_int[:, 0]
+    yy = loc_int[:, 1]
+    xx_plus = xx + 1
+    xx_plus[xx_plus > (data.shape[1]-1)] = data.shape[1]-1
+    yy_plus = yy + 1
+    yy_plus[yy_plus > (data.shape[0]-1)] = data.shape[0]-1
+    loc_delta = loc - torch.floor(loc)   # delta values
+    dx = loc_delta[:, 0].unsqueeze(1)
+    dy = loc_delta[:, 1].unsqueeze(1)
+    interp_val = data[yy, xx, :]*(1-dx)*(1-dy) + data[yy, xx_plus, :]*dx*(1-dy) + \
+                 data[yy_plus, xx, :]*(1-dx)*dy   + data[yy_plus, xx_plus, :]*dx*dy
+    return interp_val
+def rand_samples(batch_size, device, rand_type='uniform'):
+    # randomly sample background locations
+    if rand_type == 'spherical':
+        rand_loc = torch.rand(batch_size, 2).to(device)
+        theta1 = 2.0*math.pi*rand_loc[:, 0]
+        theta2 = torch.acos(2.0*rand_loc[:, 1] - 1.0)
+        lat = 1.0 - 2.0*theta2/math.pi
+        lon = (theta1/math.pi) - 1.0
+        rand_loc = torch.cat((lon.unsqueeze(1), lat.unsqueeze(1)), 1)
+    elif rand_type == 'uniform':
+        rand_loc = torch.rand(batch_size, 2).to(device)*2.0 - 1.0
+    return rand_loc
+def get_time_stamp():
+    cur_time = str(datetime.datetime.now())
+    date, time = cur_time.split(' ')
+    h, m, s = time.split(':')
+    s = s.split('.')[0]
+    time_stamp = '{}-{}-{}-{}'.format(date, h, m, s)
+    return time_stamp
+def coord_grid(grid_size, split_ids=None, split_of_interest=None):
+    # generate a grid of locations spaced evenly in coordinate space
+    feats = np.zeros((grid_size[0], grid_size[1], 2), dtype=np.float32)
+    mg = np.meshgrid(np.linspace(-180, 180, feats.shape[1]), np.linspace(90, -90, feats.shape[0]))
+    feats[:, :, 0] = mg[0]
+    feats[:, :, 1] = mg[1]
+    if split_ids is None or split_of_interest is None:
+        # return feats for all locations
+        # this will be an N x 2 array
+        return feats.reshape(feats.shape[0]*feats.shape[1], 2)
+    else:
+        # only select a subset of locations
+        ind_y, ind_x = np.where(split_ids==split_of_interest)
+        # these will be N_subset x 2 in size
+        return feats[ind_y, ind_x, :]
+def create_spatial_split(raster, mask, train_amt=1.0, cell_size=25):
+    # generates a checkerboard style train test split
+    # 0 is invalid, 1 is train, and 2 is test
+    # c_size is units of pixels
+    split_ids = np.ones((raster.shape[0], raster.shape[1]))
+    start = cell_size
+    for ii in np.arange(0, split_ids.shape[0], cell_size):
+        if start == 0:
+            start = cell_size
+        else:
+            start = 0
+        for jj in np.arange(start, split_ids.shape[1], cell_size*2):
+            split_ids[ii:ii+cell_size, jj:jj+cell_size] = 2
+    split_ids = split_ids*mask
+    if train_amt < 1.0:
+        # take a subset of the data
+        tr_y, tr_x = np.where(split_ids==1)
+        inds = np.random.choice(len(tr_y), int(len(tr_y)*(1.0-train_amt)), replace=False)
+        split_ids[tr_y[inds], tr_x[inds]] = 0
+    return split_ids
+def average_precision_score_faster(y_true, y_scores):
+    # drop in replacement for sklearn's average_precision_score
+    # comparable up to floating point differences
+    num_positives = y_true.sum()
+    inds = np.argsort(y_scores)[::-1]
+    y_true_s = y_true[inds]
+    false_pos_c = np.cumsum(1.0 - y_true_s)
+    true_pos_c = np.cumsum(y_true_s)
+    recall = true_pos_c / num_positives
+    false_neg = np.maximum(true_pos_c + false_pos_c, np.finfo(np.float32).eps)
+    precision = true_pos_c / false_neg
+    recall_e = np.hstack((0, recall, 1))
+    recall_e = (recall_e[1:] - recall_e[:-1])[:-1]
+    map_score = (recall_e*precision).sum()
+    return map_score
+#TODO I might be able to just cast these to a float to make them 1 or 0
+#TODO y_true are the same as the ones
+def average_precision_score_fasterer(y_true, y_scores):
+    # drop in replacement for sklearn's average_precision_score
+    # comparable up to floating point differences
+    num_positives = y_true.sum()
+    inds = torch.argsort(y_scores, descending=True)
+    y_true_s = y_true[inds]
+    false_pos_c = torch.cumsum(1.0 - y_true_s, dim=0)
+    true_pos_c = torch.cumsum(y_true_s, dim=0)
+    recall = true_pos_c / num_positives
+    false_neg = (true_pos_c + false_pos_c).clip(min=np.finfo(np.float32).eps)
+    precision = true_pos_c / false_neg
+    recall_e = torch.cat([torch.zeros(1, device=recall.device), recall, torch.ones(1, device=recall.device)])
+    recall_e = (recall_e[1:] - recall_e[:-1])[:-1]
+    map_score = (recall_e*precision).sum()
+    return map_score
+class DataPDFH3:
+    def __init__(self, data='data_pdf_h3.pt', device='cpu'):
+        super(DataPDFH3, self).__init__()
+        self.data = torch.cumsum(torch.load(data, map_location=device), dim=0)
+        self.data = torch.cat([torch.zeros_like(self.data[:1]), self.data], dim=0)
+        inds = torch.load('inds_h3.pt')
+        inds = ((inds >> 30) & 4194303)
+        self.ind_map = -1+torch.zeros(2 ** 22, dtype=torch.int32)
+        self.ind_map[inds] = torch.arange(inds.shape[0], dtype=torch.int32)
+        self.cum_counts = self.data.sum(dim=-1)
+    def _sample(self, pos, time, noise_level):
+        pos = pos.cpu()
+        time = time.cpu()
+        noise_level = noise_level.cpu()
+        t_low = (365*(time - 0.5*(noise_level))).int()
+        t_high = (365*(time + 0.5*(noise_level))).int()
+        t_high[t_low < 0] += 365
+        t_low[t_low < 0] += 365
+        pos_ind = torch.from_numpy((h3.latlng_to_cell(90*pos[:, 1], 180*pos[:, 0], 5).astype(np.int64) >> 30) & 4194303)
+        pos_ind = self.ind_map[pos_ind]
+        counts = self.data[t_high.clamp(max=364)+1, pos_ind] - self.data[t_low, pos_ind]
+        counts[t_high > 364] += self.data[(t_high[t_high > 364] - 365).clamp(max=364) + 1, pos_ind[t_high > 364]]
+        counts[t_high > 729] += self.data[(t_high[t_high > 729] - 730).clamp(max=364) + 1, pos_ind[t_high > 729]]
+        totals = self.cum_counts[t_high.clamp(max=364)+1] - self.cum_counts[t_low]
+        totals[t_high > 364] += self.cum_counts[(t_high[t_high > 364] - 365).clamp(max=364) + 1]
+        totals[t_high > 729] += self.cum_counts[(t_high[t_high > 729] - 730).clamp(max=364) + 1]
+        counts[pos_ind < 0] = 0
+        return counts, totals
+    def sample(self, pos, time, noise_level):
+        counts, totals = self._sample(pos, time, noise_level)
+        return counts/totals
+    def sample_log(self, pos, time, noise_level, eps=1e-2):
+        counts, totals = self._sample(pos, time, noise_level)
+        return torch.log(counts)-torch.log(totals+eps)
+class LowRankModel:
+    def __init__(self, data='nmf_256.pt', device='cpu'):
+        super(LowRankModel, self).__init__()
+        dim=-1
+        x1, x2 = torch.load(data, map_location=device)
+        m = torch.load('class_counts_locs_h3.pt').float()
+        chosen_inds = m.sum(dim=0).to_dense().sort(descending=True).indices[:]
+        if dim == 0:
+            n = m.to_dense()[:, chosen_inds].sum(dim=dim, keepdim=True)
+            self.data = n*torch.softmax(x1 @ x2, dim=dim)
+            self.data = self.data/torch.sum(self.data, dim=1, keepdim=True)
+        elif dim == 1:
+            self.data = torch.softmax(x1 @ x2, dim=dim)
+        elif dim == -1:
+            self.data = torch.from_numpy(x1 @ x2)
+            self.data = self.data/torch.sum(self.data, dim=1, keepdim=True)
+        m = m.to_dense()[:, chosen_inds]
+        #self.data = m.to_dense().float()/torch.sum(m.to_dense(), dim=1, keepdim=True)
+        self.pc = m.sum(dim=1, keepdim=True) / m.sum()
+        inds = torch.load('inds_h3.pt')[chosen_inds]
+        inds = ((inds >> 30) & 4194303)
+        self.ind_map = -1+torch.zeros(2 ** 22, dtype=torch.int32)
+        self.ind_map[inds] = torch.arange(inds.shape[0], dtype=torch.int32)
+    def sample(self, pos):#, time, noise_level):
+        pos = pos.cpu()
+        pos_ind = torch.from_numpy((h3.latlng_to_cell(pos[:, 1], pos[:, 0], 5).astype(np.int64) >> 30) & 4194303)
+        pos_ind = self.ind_map[pos_ind]
+        out = self.data[:, pos_ind]
+        out *= self.pc
+        out = out/torch.sum(out, dim=0, keepdim=True)
+        out[:, pos_ind < 0] = 1.0/out.shape[0]
+        return out

viz_ls_map.py ADDED Viewed

	@@ -0,0 +1,283 @@

+"""
+Demo that takes an iNaturalist taxa ID as input and generates a prediction
+for each location on the globe and saves the ouput as an image.
+"""
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import json
+import argparse
+import utils
+import datasets
+import eval
+import create_inputs_to_fs_sinr
+text_model = './experiments/gpt_data.pt'
+def extract_grit_token(model, text:str):
+    def gritlm_instruction(instruction):
+        return "<|user|>\n" + instruction + "\n<|embed|>\n" if instruction else "<|embed|>\n"
+    d_rep = model.encode([text], instruction=gritlm_instruction(""))
+    d_rep = torch.from_numpy(d_rep)
+    return d_rep
+def choose_context_points_from_map(eval_params):
+    context_points = []
+    if False:
+        def onclick(event):
+            if event.xdata is not None and event.ydata is not None:
+                # Convert image coordinates to normalized geographical coordinates
+                lon = event.xdata / mask.shape[1] * 2 - 1
+                lat = 1 - event.ydata / mask.shape[0] * 2
+                context_points.append((lon, lat))
+                print(f"Added context point: ({lon}, {lat})")
+        # Load ocean mask
+        with open('paths.json', 'r') as f:
+            paths = json.load(f)
+        if eval_params['high_res']:
+            mask = np.load(os.path.join(paths['masks'], 'ocean_mask_hr.npy'))
+        else:
+            mask = np.load(os.path.join(paths['masks'], 'ocean_mask.npy'))
+        mask_inds = np.where(mask.reshape(-1) == 1)[0]
+        # # Generate input features
+        # locs = utils.coord_grid(mask.shape)
+        # if not eval_params['disable_ocean_mask']:
+        #     locs = locs[mask_inds, :]
+        # locs = torch.from_numpy(locs)
+        # Reshape and create masked array for visualization
+        op_im = np.ones((mask.shape[0] * mask.shape[1])) * np.nan  # Set to NaN
+        op_im[mask_inds] = 0  # Placeholder for the mask visualization
+        op_im = op_im.reshape((mask.shape[0], mask.shape[1]))
+        op_im = np.ma.masked_invalid(op_im)
+        # Set color for masked values
+        cmap = plt.cm.plasma
+        cmap.set_bad(color='none')
+        plt.ioff()
+        # Display the map and capture context points
+        fig, ax = plt.subplots(figsize=(6, 3), dpi=334)  # Define the figure size
+        ax.imshow(op_im, cmap=cmap, interpolation='nearest')  # Display the image
+        ax.axis('off')  # Turn off the axis
+        # Connect the onclick event to the handler
+        cid = fig.canvas.mpl_connect('button_press_event', onclick)
+        plt.show(block=True)  # Block execution until the window is closed
+        print(f"Context points collected: {context_points}")
+    else:
+        #USA
+        #TODO: 37.541170, -92.003293 1. flip order, then 2. normalize so divide by 180 and 90
+        context_points = [(-0.5884012559178662, 0.46394662490802496), (-0.5451199953511522, 0.4504212309809269),
+         (-0.5437674559584422, 0.5342786733289353), (-0.589753795310576, 0.5342786733289353)]
+        print(f"Context points collected: {context_points}")
+    return context_points
+def main(eval_params):
+     # load params
+    with open('paths.json', 'r') as f:
+        paths = json.load(f)
+    ckp_name = os.path.split(eval_params['model_path'])[-1]
+    experiment_name = os.path.split(os.path.split(eval_params['model_path'])[-2])[-1]
+    eval_overrides = {'ckp_name':ckp_name,
+                      'experiment_name':experiment_name,
+                      'device':eval_params['device']}
+    train_overrides = {'dataset': 'eval_transformer'}
+    #grit = GritLM("GritLM/GritLM-7B", torch_dtype="auto", mode="embedding")
+    #grit_gpt = torch.load(text_model, map_location='cpu')
+    #context_model = torch.load("experiments/zero_shot_ls_sin_cos_cap_1000_text_context_20_sinr_two_layer_nn/model.pt", map_location=torch.device('cpu'))
+    context_data = np.load('data/positive_eval_data.npz')
+    text_type_value = 0
+    for pt in eval_params['context_pt_trial']:
+        number_of_context_points = pt
+        if eval_params['choose_context_points'] == 1:
+            #context_points = choose_context_points_from_map(eval_params)
+            text_emb, text_type_value = create_inputs_to_fs_sinr.use_pregenerated_textemb_fromchris(taxon_id=eval_params['test_taxa'],
+                                                                                                    text_type=eval_params['text_type'])
+            context_points = create_inputs_to_fs_sinr.get_eval_context_points(taxa_id=eval_params['test_taxa'],
+                                                                              context_data=context_data,
+                                                                              size=number_of_context_points)
+            model, context_locs_of_interest, train_params, class_of_interest = eval.generate_eval_embedding_from_given_points(
+                                                                        context_points=context_points,
+                                                                        overrides=eval_overrides,
+                                                                        taxa_of_interest=eval_params['taxa_id'],
+                                                                        train_overrides=train_overrides,
+                                                                        text_emb=text_emb)
+            #TODO: why is taxa_id updated to 'selected pts'??
+            eval_params['taxa_id'] = 'selected_points'
+        else:
+            model, context_locs_of_interest, train_params, class_of_interest = eval.generate_eval_embeddings(
+                                                                            overrides=eval_overrides,
+                                                                            taxa_of_interest=eval_params['taxa_id'],
+                                                                            num_context=eval_params['num_context'],
+                                                                            train_overrides=train_overrides)
+        if train_params['params']['input_enc'] in ['env', 'sin_cos_env']:
+            raster = datasets.load_env()
+        else:
+            raster = None
+        enc = utils.CoordEncoder(train_params['params']['input_enc'], raster=raster, input_dim=train_params['params']['input_dim'])
+        enc_time = utils.CoordEncoder('sin_cos', raster=None, input_dim=2 * train_params['params']['input_time_dim'])
+        # load ocean mask
+        if eval_params['high_res']:
+            mask = np.load(os.path.join(paths['masks'], 'ocean_mask_hr.npy'))
+        else:
+            mask = np.load(os.path.join(paths['masks'], 'ocean_mask.npy'))
+        #mask = 0*mask+1
+        mask_inds = np.where(mask.reshape(-1) == 1)[0]
+        # generate input features
+        locs = utils.coord_grid(mask.shape)
+        if not eval_params['disable_ocean_mask']:
+            locs = locs[mask_inds, :]
+        locs = torch.from_numpy(locs)
+        locs_enc = enc.encode(locs).to(eval_params['device'])
+        if train_params['params']['input_time_dim'] > 0:
+            extra_input = torch.cat([enc_time.encode(torch.tensor([[0.0]]), normalize=False), torch.tensor([[1.0]])],
+                                    dim=1).to(eval_params['device'])
+            locs_enc = torch.cat((locs_enc, extra_input.repeat(locs_enc.shape[0], 1)), dim=1)
+        with torch.no_grad():
+            # Here if we set eval to False we will see what the ema embeddings look like (currently as ema is 1.0 this is just the last training example seen)
+            preds = model.embedding_forward(x=locs_enc, class_ids=None, return_feats=False, class_of_interest=class_of_interest, eval=True).cpu().numpy()
+        # threshold predictions
+        if eval_params['threshold'] > 0:
+            print(f'Applying threshold of {eval_params["threshold"]} to the predictions.')
+            preds[preds<eval_params['threshold']] = 0.0
+            preds[preds>=eval_params['threshold']] = 1.0
+        # mask data
+        if not eval_params['disable_ocean_mask']:
+            op_im = np.ones((mask.shape[0] * mask.shape[1])) * np.nan  # set to NaN
+            op_im[mask_inds] = preds
+        else:
+            op_im = preds
+        # reshape and create masked array for visualization
+        op_im = op_im.reshape((mask.shape[0], mask.shape[1]))
+        op_im = np.ma.masked_invalid(op_im)
+        # set color for masked values
+        cmap = plt.cm.plasma
+        cmap.set_bad(color='none')
+        if eval_params['set_max_cmap_to_1']:
+            vmax = 1.0
+        else:
+            vmax = np.max(op_im)
+        # # Display the image
+        # if eval_params['show_map'] == 1:
+        #     fig, ax = plt.subplots()
+        #     cax = ax.imshow(op_im, vmin=0, vmax=vmax, cmap=cmap)
+        #     fig.colorbar(cax)
+        #     plt.show(block=True)  # Set block=True to block code execution until the window is closed
+        if eval_params['show_map'] == 1:
+            # Display the image
+            fig, ax = plt.subplots(figsize=(6,3), dpi=334)
+            plt.imshow(op_im, vmin=0, vmax=vmax, cmap=cmap, interpolation='nearest')  # Display the image
+            plt.axis('off')  # Turn off the axis
+            if eval_params['show_context_points'] == 1:
+                # Convert the tensor to numpy array if it's not already
+                context_locs = context_locs_of_interest.numpy() if isinstance(context_locs_of_interest, torch.Tensor) else context_locs_of_interest
+                # Convert context locations directly to image coordinates
+                #delete our dumby context point (at 0,0)
+                image_x = (context_locs[1:, 0] + 1) / 2 * op_im.shape[1]  # Scale longitude from [-1, 1] to [0, image width]
+                image_y = (1 - (context_locs[1:, 1] + 1) / 2) * op_im.shape[
+                    0]  # Scale latitude from [-1, 1] to [0, image height]
+                from matplotlib.offsetbox import OffsetImage, AnnotationBbox
+                # Plot the context locations
+                def getImage(path):
+                    return OffsetImage(plt.imread(path), zoom=.04)
+                for x0, y0 in zip(image_x, image_y):
+                    ab = AnnotationBbox(getImage('black_circle.png'), (x0, y0), frameon=False)
+                    ax.add_artist(ab)
+                #plt.scatter(image_x, image_y, c='green', s=30, marker=r'$\checkmark$')  # Adjust color and size of the point
+            #plt.show(block=True)  # Block execution until the window is closed
+        exp_name = eval_params['model_path'].split(os.path.sep)[-2]
+        # save image
+        #save_loc = os.path.join(eval_params['op_path'], exp_name + '_' + str(eval_params['taxa_id']) + '_' + eval_params['additional_save_name'] +'_map.png')
+        #save_loc = os.path.join(eval_params['op_path'], exp_name + '_' + str(eval_params['taxa_id']) + '_' + eval_params['additional_save_name'] +'_map.png')
+        #save_loc = 'images/testenv_' + eval_params['taxa_name'] + '(' + eval_params['taxa_id'] + ')_'+ eval_params['text_type'] + '(' + str(text_type_value) + ')_' + str(number_of_context_points) +'.png'
+        save_loc = 'images/testenv_' + eval_params['taxa_name'] + '(' + eval_params['taxa_id'] + ')_'+ eval_params['text_type'] + '_' + str(number_of_context_points) +'.png'
+        print(f'Saving image to {save_loc}')
+        plt.savefig(save_loc, bbox_inches='tight', pad_inches=0, dpi=334)
+        # plt.imsave(fname=save_loc, arr=op_im, vmin=0, vmax=vmax, cmap=cmap)
+        plt.show(block=False)  # Block execution until the window is closed
+    return True
+if __name__ == '__main__':
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    info_str = '\nDemo that takes an iNaturalist taxa ID as input and ' + \
+               'generates a predicted range for each location on the globe ' + \
+               'and saves the ouput as an image.\n\n' + \
+               'Warning: these estimated ranges should be validated before use.'
+    parser = argparse.ArgumentParser(usage=info_str)
+    # parser.add_argument('--model_path', type=str, default='./pretrained_models/model_an_full_input_enc_sin_cos_hard_cap_num_per_class_1000.pt')
+    # parser.add_argument('--model_path', type=str, default='./experiments/transformer_ema_1.0/model_10.pt')
+    # parser.add_argument('--model_path', type=str, default='./experiments/03_08_coord_multihead.pt/model.pt')
+    # parser.add_argument('--model_path', type=str, default='./experimentvs/coord_context_20_without_registry/model_best.pt')
+    # parser.add_argument('--model_path', type=str, default='./experiments/coord_sinr_inputs_context_20_without_registry/model_best.pt')
+    parser.add_argument('--model_path', type=str, default='./experiments/zero_shot_ls_sin_cos_env_cap_1000_text_context_20_sinr_two_layer_nn/model.pt')
+    #parser.add_argument('--model_path', type=str, default='./experiments/zero_shot_ls_sin_cos_cap_1000_text_context_20_sinr_two_layer_nn/model.pt')
+    # parser.add_argument('--taxa_id', type=int, default=144575, help='iNaturalist taxon ID.')
+    # parser.add_argument('--taxa_id', type=int, default=9083, help='iNaturalist taxon ID.')
+    parser.add_argument('--taxa_id', type=int, default=3352, help='iNaturalist taxon ID.')
+    parser.add_argument('--threshold', type=float, default=-1, help='Threshold the range map [0, 1].')
+    parser.add_argument('--op_path', type=str, default='./images/', help='Location where the output image will be saved.')
+    parser.add_argument('--rand_taxa', action='store_true', help='Select a random taxa.')
+    parser.add_argument('--high_res', action='store_true', help='Generate higher resolution output.')
+    parser.add_argument('--disable_ocean_mask', action='store_true', help='Do not use an ocean mask.')
+    parser.add_argument('--set_max_cmap_to_1', action='store_true', help='Consistent maximum intensity ouput.')
+    parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda')
+    #parser.add_argument('--device', type=str, default='cuda:3', help='cpu or cuda')
+    parser.add_argument('--show_map', type=int, default=1, help='shows the map if 1')
+    parser.add_argument('--show_context_points', type=int, default=1, help='also plots context points if 1')
+    parser.add_argument('--prefix', type=str, default='')
+    parser.add_argument('--num_context', type=int, default=5)
+    parser.add_argument('--choose_context_points', type=int, default=1)
+    parser.add_argument('--additional_save_name', type=str, default="")
+    #taxas: black&whitewarbler(10286), hyacinth macaw(18938), yellow baboon(67683)
+    # bawnswallow (11901), pika(43188), loon(4626), eurorobin(13094)
+    # southernflyingsquirrel (46272)
+    parser.add_argument('--taxa_name', type=str, default='sfs', help='Name of the taxon.')
+    parser.add_argument('--test_taxa', type=int, default=46272, help='Taxon ID to test.')
+    parser.add_argument('--text_type', type=str, default='range', help='Type of text for input.')
+    parser.add_argument('--context_pt_trial', type=int, nargs='+', default=[0, 1, 2, 5, 10, 20], help='List of context points for trial.')
+    eval_params = vars(parser.parse_args())
+    if not os.path.isdir(eval_params['op_path']):
+        os.makedirs(eval_params['op_path'])
+    eval_params['high_res'] = True
+    main(eval_params)