Spaces:

seanpedrickcase
/

address_matcher

Runtime error

App Files Files Community

seanpedrickcase commited on Aug 28

Commit

99d13a7

•

1 Parent(s): 043d5fc

Rearranged interface to focus on API. Optimised Dockerfile.

Browse files

Files changed (6) hide show

Dockerfile +9 -5
README.md +1 -1
app.py +13 -11
requirements.txt +5 -5
requirements_aws.txt +11 -0
tools/constants.py +1 -1

Dockerfile CHANGED Viewed

@@ -1,16 +1,18 @@
 FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
 # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL
-COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
 WORKDIR /src
 COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-RUN pip install --no-cache-dir gradio==4.32.2
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
@@ -19,7 +21,8 @@ RUN useradd -m -u 1000 user
 RUN chown -R user:user /home/user
 # Make output folder
-RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output
 # Switch to the "user" user
 USER user
@@ -29,6 +32,7 @@ ENV HOME=/home/user \
 	PATH=/home/user/.local/bin:$PATH \
     PYTHONPATH=$HOME/app \
 	PYTHONUNBUFFERED=1 \
 	GRADIO_ALLOW_FLAGGING=never \
 	GRADIO_NUM_PORTS=1 \
 	GRADIO_SERVER_NAME=0.0.0.0 \

 FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
 # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL
+#COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
+# Update apt
+RUN apt-get update && rm -rf /var/lib/apt/lists/*
 WORKDIR /src
 COPY requirements.txt .
+RUN pip install --no-cache-dir torch==2.4.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
+	pip install --no-cache-dir -r requirements_aws.txt && \
+	pip install --no-cache-dir gradio==4.42.0
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 RUN chown -R user:user /home/user
 # Make output folder
+RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output && \
+	mkdir -p /home/user/app/output/api && chown -R user:user /home/user/app/output/api
 # Switch to the "user" user
 USER user
 	PATH=/home/user/.local/bin:$PATH \
     PYTHONPATH=$HOME/app \
 	PYTHONUNBUFFERED=1 \
+	PYTHONDONTWRITEBYTECODE=1 \
 	GRADIO_ALLOW_FLAGGING=never \
 	GRADIO_NUM_PORTS=1 \
 	GRADIO_SERVER_NAME=0.0.0.0 \

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🌍
 colorFrom: purple
 colorTo: gray
 sdk: gradio
-sdk_version: 4.33.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: purple
 colorTo: gray
 sdk: gradio
+sdk_version: 4.42.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -47,14 +47,15 @@ with block:
     """)
     with gr.Tab("Match addresses"):
-        with gr.Accordion("I have multiple addresses", open = True):
             in_file = gr.File(label="Input addresses from file", file_count= "multiple")
-            in_colnames = gr.Dropdown(choices=[], multiselect=True, label="Select columns that make up the address. Make sure postcode is at the end")
-            in_existing = gr.Dropdown(choices=[], multiselect=False, label="Select columns that indicate existing matches.")
-        with gr.Accordion("I only have a single address", open = False):
-            in_text = gr.Textbox(label="Input a single address as text")
         gr.Markdown(
         """
@@ -62,18 +63,19 @@ with block:
         Upload a reference file to match against, or alternatively call the Addressbase API (requires API key). Fuzzy matching will work on any address format, but the neural network will only work with the LLPG LPI format, e.g. with columns SaoText, SaoStartNumber etc.. This joins on the UPRN column. If any of these are different for you,
         open 'Custom reference file format or join columns' below.
         """)
-        in_ref = gr.File(label="Input reference addresses from file", file_count= "multiple")
-        with gr.Accordion("Use Addressbase API instead of reference file", open = False):
             in_api = gr.Dropdown(label="Choose API type", multiselect=False, value=None, choices=["Postcode"])#["Postcode", "UPRN"]) #choices=["Address", "Postcode", "UPRN"])
             in_api_key = gr.Textbox(label="Addressbase API key", type='password')
         with gr.Accordion("Custom reference file format or join columns (i.e. not LLPG LPI format)", open = False):
-            in_refcol = gr.Dropdown(choices=[], multiselect=True, label="Select columns that make up the reference address. Make sure postcode is at the end")
-            in_joincol = gr.Dropdown(choices=[], multiselect=True, label="Select columns you want to join on to the search dataset")
-        match_btn = gr.Button("Match addresses")
         with gr.Row():
             output_summary = gr.Textbox(label="Output summary")

     """)
     with gr.Tab("Match addresses"):
+        with gr.Accordion("Quick check - single address", open = True):
+            in_text = gr.Textbox(label="Input a single address as text")
+        with gr.Accordion("I have multiple addresses", open = False):
             in_file = gr.File(label="Input addresses from file", file_count= "multiple")
+            in_colnames = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns that make up the address. Make sure postcode is at the end")
+            in_existing = gr.Dropdown(value=[], choices=[], multiselect=False, label="Select columns that indicate existing matches.")
         gr.Markdown(
         """
         Upload a reference file to match against, or alternatively call the Addressbase API (requires API key). Fuzzy matching will work on any address format, but the neural network will only work with the LLPG LPI format, e.g. with columns SaoText, SaoStartNumber etc.. This joins on the UPRN column. If any of these are different for you,
         open 'Custom reference file format or join columns' below.
         """)
+        with gr.Accordion("Use Addressbase API (instead of reference file)", open = True):
             in_api = gr.Dropdown(label="Choose API type", multiselect=False, value=None, choices=["Postcode"])#["Postcode", "UPRN"]) #choices=["Address", "Postcode", "UPRN"])
             in_api_key = gr.Textbox(label="Addressbase API key", type='password')
+        with gr.Accordion("Match against reference file of addresses", open = False):
+            in_ref = gr.File(label="Input reference addresses from file", file_count= "multiple")
         with gr.Accordion("Custom reference file format or join columns (i.e. not LLPG LPI format)", open = False):
+            in_refcol = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns that make up the reference address. Make sure postcode is at the end")
+            in_joincol = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns you want to join on to the search dataset")
+        match_btn = gr.Button("Match addresses", variant="primary")
         with gr.Row():
             output_summary = gr.Textbox(label="Output summary")

requirements.txt CHANGED Viewed

@@ -1,12 +1,12 @@
-numpy==1.26.2
 pandas==2.2.2
 rapidfuzz==3.8.1
-torch==2.2.1
 recordlinkage==0.16
 pyap==0.3.1
 pytest==7.4.3
 pyarrow==14.0.1
 openpyxl==3.1.2
-gradio==4.33.0
-boto3==1.34.103
-polars==0.20.19

+torch==2.4.0
 pandas==2.2.2
 rapidfuzz==3.8.1
 recordlinkage==0.16
 pyap==0.3.1
 pytest==7.4.3
 pyarrow==14.0.1
 openpyxl==3.1.2
+gradio==4.42.0
+boto3==1.34.158
+polars==0.20.19
+numpy==1.26.4

requirements_aws.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pandas==2.2.2
+rapidfuzz==3.8.1
+recordlinkage==0.16
+pyap==0.3.1
+pytest==7.4.3
+pyarrow==14.0.1
+openpyxl==3.1.2
+gradio==4.42.0
+boto3==1.34.158
+polars==0.20.19
+numpy==1.26.4

tools/constants.py CHANGED Viewed

@@ -197,7 +197,7 @@ if os.path.exists(model_path):
             out_model_path = os.path.join(out_model_dir, out_model_file_name)
             print("Model location: ", out_model_path)
-            exported_model.load_state_dict(torch.load(out_model_path, map_location=torch.device('cpu')))
             exported_model.eval()
             device='cpu'

             out_model_path = os.path.join(out_model_dir, out_model_file_name)
             print("Model location: ", out_model_path)
+            exported_model.load_state_dict(torch.load(out_model_path, map_location=torch.device('cpu'), weights_only=False))
             exported_model.eval()
             device='cpu'