seanpedrickcase commited on
Commit
99d13a7
β€’
1 Parent(s): 043d5fc

Rearranged interface to focus on API. Optimised Dockerfile.

Browse files
Files changed (6) hide show
  1. Dockerfile +9 -5
  2. README.md +1 -1
  3. app.py +13 -11
  4. requirements.txt +5 -5
  5. requirements_aws.txt +11 -0
  6. tools/constants.py +1 -1
Dockerfile CHANGED
@@ -1,16 +1,18 @@
1
  FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
2
 
3
  # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL
4
- COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
5
 
 
 
6
 
7
  WORKDIR /src
8
 
9
  COPY requirements.txt .
10
 
11
- RUN pip install --no-cache-dir -r requirements.txt
12
-
13
- RUN pip install --no-cache-dir gradio==4.32.2
14
 
15
  # Set up a new user named "user" with user ID 1000
16
  RUN useradd -m -u 1000 user
@@ -19,7 +21,8 @@ RUN useradd -m -u 1000 user
19
  RUN chown -R user:user /home/user
20
 
21
  # Make output folder
22
- RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output
 
23
 
24
  # Switch to the "user" user
25
  USER user
@@ -29,6 +32,7 @@ ENV HOME=/home/user \
29
  PATH=/home/user/.local/bin:$PATH \
30
  PYTHONPATH=$HOME/app \
31
  PYTHONUNBUFFERED=1 \
 
32
  GRADIO_ALLOW_FLAGGING=never \
33
  GRADIO_NUM_PORTS=1 \
34
  GRADIO_SERVER_NAME=0.0.0.0 \
 
1
  FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
2
 
3
  # Install Lambda web adapter in case you want to run with with an AWS Lamba function URL
4
+ #COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
5
 
6
+ # Update apt
7
+ RUN apt-get update && rm -rf /var/lib/apt/lists/*
8
 
9
  WORKDIR /src
10
 
11
  COPY requirements.txt .
12
 
13
+ RUN pip install --no-cache-dir torch==2.4.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
14
+ pip install --no-cache-dir -r requirements_aws.txt && \
15
+ pip install --no-cache-dir gradio==4.42.0
16
 
17
  # Set up a new user named "user" with user ID 1000
18
  RUN useradd -m -u 1000 user
 
21
  RUN chown -R user:user /home/user
22
 
23
  # Make output folder
24
+ RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output && \
25
+ mkdir -p /home/user/app/output/api && chown -R user:user /home/user/app/output/api
26
 
27
  # Switch to the "user" user
28
  USER user
 
32
  PATH=/home/user/.local/bin:$PATH \
33
  PYTHONPATH=$HOME/app \
34
  PYTHONUNBUFFERED=1 \
35
+ PYTHONDONTWRITEBYTECODE=1 \
36
  GRADIO_ALLOW_FLAGGING=never \
37
  GRADIO_NUM_PORTS=1 \
38
  GRADIO_SERVER_NAME=0.0.0.0 \
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🌍
4
  colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.33.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
4
  colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 4.42.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py CHANGED
@@ -47,14 +47,15 @@ with block:
47
  """)
48
 
49
  with gr.Tab("Match addresses"):
 
 
 
50
 
51
- with gr.Accordion("I have multiple addresses", open = True):
52
  in_file = gr.File(label="Input addresses from file", file_count= "multiple")
53
- in_colnames = gr.Dropdown(choices=[], multiselect=True, label="Select columns that make up the address. Make sure postcode is at the end")
54
- in_existing = gr.Dropdown(choices=[], multiselect=False, label="Select columns that indicate existing matches.")
55
 
56
- with gr.Accordion("I only have a single address", open = False):
57
- in_text = gr.Textbox(label="Input a single address as text")
58
 
59
  gr.Markdown(
60
  """
@@ -62,18 +63,19 @@ with block:
62
  Upload a reference file to match against, or alternatively call the Addressbase API (requires API key). Fuzzy matching will work on any address format, but the neural network will only work with the LLPG LPI format, e.g. with columns SaoText, SaoStartNumber etc.. This joins on the UPRN column. If any of these are different for you,
63
  open 'Custom reference file format or join columns' below.
64
  """)
65
-
66
- in_ref = gr.File(label="Input reference addresses from file", file_count= "multiple")
67
 
68
- with gr.Accordion("Use Addressbase API instead of reference file", open = False):
69
  in_api = gr.Dropdown(label="Choose API type", multiselect=False, value=None, choices=["Postcode"])#["Postcode", "UPRN"]) #choices=["Address", "Postcode", "UPRN"])
70
  in_api_key = gr.Textbox(label="Addressbase API key", type='password')
 
 
 
71
 
72
  with gr.Accordion("Custom reference file format or join columns (i.e. not LLPG LPI format)", open = False):
73
- in_refcol = gr.Dropdown(choices=[], multiselect=True, label="Select columns that make up the reference address. Make sure postcode is at the end")
74
- in_joincol = gr.Dropdown(choices=[], multiselect=True, label="Select columns you want to join on to the search dataset")
75
 
76
- match_btn = gr.Button("Match addresses")
77
 
78
  with gr.Row():
79
  output_summary = gr.Textbox(label="Output summary")
 
47
  """)
48
 
49
  with gr.Tab("Match addresses"):
50
+
51
+ with gr.Accordion("Quick check - single address", open = True):
52
+ in_text = gr.Textbox(label="Input a single address as text")
53
 
54
+ with gr.Accordion("I have multiple addresses", open = False):
55
  in_file = gr.File(label="Input addresses from file", file_count= "multiple")
56
+ in_colnames = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns that make up the address. Make sure postcode is at the end")
57
+ in_existing = gr.Dropdown(value=[], choices=[], multiselect=False, label="Select columns that indicate existing matches.")
58
 
 
 
59
 
60
  gr.Markdown(
61
  """
 
63
  Upload a reference file to match against, or alternatively call the Addressbase API (requires API key). Fuzzy matching will work on any address format, but the neural network will only work with the LLPG LPI format, e.g. with columns SaoText, SaoStartNumber etc.. This joins on the UPRN column. If any of these are different for you,
64
  open 'Custom reference file format or join columns' below.
65
  """)
 
 
66
 
67
+ with gr.Accordion("Use Addressbase API (instead of reference file)", open = True):
68
  in_api = gr.Dropdown(label="Choose API type", multiselect=False, value=None, choices=["Postcode"])#["Postcode", "UPRN"]) #choices=["Address", "Postcode", "UPRN"])
69
  in_api_key = gr.Textbox(label="Addressbase API key", type='password')
70
+
71
+ with gr.Accordion("Match against reference file of addresses", open = False):
72
+ in_ref = gr.File(label="Input reference addresses from file", file_count= "multiple")
73
 
74
  with gr.Accordion("Custom reference file format or join columns (i.e. not LLPG LPI format)", open = False):
75
+ in_refcol = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns that make up the reference address. Make sure postcode is at the end")
76
+ in_joincol = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns you want to join on to the search dataset")
77
 
78
+ match_btn = gr.Button("Match addresses", variant="primary")
79
 
80
  with gr.Row():
81
  output_summary = gr.Textbox(label="Output summary")
requirements.txt CHANGED
@@ -1,12 +1,12 @@
1
- numpy==1.26.2
2
  pandas==2.2.2
3
  rapidfuzz==3.8.1
4
- torch==2.2.1
5
  recordlinkage==0.16
6
  pyap==0.3.1
7
  pytest==7.4.3
8
  pyarrow==14.0.1
9
  openpyxl==3.1.2
10
- gradio==4.33.0
11
- boto3==1.34.103
12
- polars==0.20.19
 
 
1
+ torch==2.4.0
2
  pandas==2.2.2
3
  rapidfuzz==3.8.1
 
4
  recordlinkage==0.16
5
  pyap==0.3.1
6
  pytest==7.4.3
7
  pyarrow==14.0.1
8
  openpyxl==3.1.2
9
+ gradio==4.42.0
10
+ boto3==1.34.158
11
+ polars==0.20.19
12
+ numpy==1.26.4
requirements_aws.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ rapidfuzz==3.8.1
3
+ recordlinkage==0.16
4
+ pyap==0.3.1
5
+ pytest==7.4.3
6
+ pyarrow==14.0.1
7
+ openpyxl==3.1.2
8
+ gradio==4.42.0
9
+ boto3==1.34.158
10
+ polars==0.20.19
11
+ numpy==1.26.4
tools/constants.py CHANGED
@@ -197,7 +197,7 @@ if os.path.exists(model_path):
197
 
198
  out_model_path = os.path.join(out_model_dir, out_model_file_name)
199
  print("Model location: ", out_model_path)
200
- exported_model.load_state_dict(torch.load(out_model_path, map_location=torch.device('cpu')))
201
  exported_model.eval()
202
 
203
  device='cpu'
 
197
 
198
  out_model_path = os.path.join(out_model_dir, out_model_file_name)
199
  print("Model location: ", out_model_path)
200
+ exported_model.load_state_dict(torch.load(out_model_path, map_location=torch.device('cpu'), weights_only=False))
201
  exported_model.eval()
202
 
203
  device='cpu'