Gabriel commited on
Commit
0b149d1
·
1 Parent(s): 14d4a0b

added git lfs

Browse files
Files changed (3) hide show
  1. Dockerfile +5 -2
  2. app.py +6 -68
  3. helper/utils.py +69 -0
Dockerfile CHANGED
@@ -5,12 +5,15 @@ ENV PYTHONUNBUFFERED=1
5
 
6
  RUN apt-get update && apt-get install --no-install-recommends -y \
7
  build-essential \
8
- # python3.9 \ # Commented out as it might be pre-installed
9
  python3-pip \
10
  git \
11
  ffmpeg \
12
  libsm6 \
13
  libxext6 \
 
 
 
 
14
  && apt-get clean && rm -rf /var/lib/apt/lists/*
15
 
16
  WORKDIR /code
@@ -48,4 +51,4 @@ WORKDIR $HOME/app
48
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
49
  COPY --chown=user . $HOME/app
50
 
51
- CMD ["python3", "app.py"]
 
5
 
6
  RUN apt-get update && apt-get install --no-install-recommends -y \
7
  build-essential \
 
8
  python3-pip \
9
  git \
10
  ffmpeg \
11
  libsm6 \
12
  libxext6 \
13
+ curl \
14
+ && curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \
15
+ && apt-get install --no-install-recommends -y git-lfs \
16
+ && git lfs install \
17
  && apt-get clean && rm -rf /var/lib/apt/lists/*
18
 
19
  WORKDIR /code
 
51
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
52
  COPY --chown=user . $HOME/app
53
 
54
+ CMD ["python3", "app.py"]
app.py CHANGED
@@ -1,13 +1,6 @@
1
- import hashlib
2
  import os
3
- import shutil
4
- import sqlite3
5
- from datetime import datetime
6
 
7
  import gradio as gr
8
- import huggingface_hub
9
- import pandas as pd
10
- import pytz
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
  from helper.gradio_config import css, theme
@@ -15,71 +8,16 @@ from helper.text.text_about import TextAbout
15
  from helper.text.text_app import TextApp
16
  from helper.text.text_howto import TextHowTo
17
  from helper.text.text_roadmap import TextRoadmap
 
18
  from tabs.htr_tool import htr_tool_tab
19
  from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
20
 
21
- DB_FILE = "./traffic_data.db"
22
 
23
- TOKEN = os.environ.get("HUB_TOKEN")
24
- repo = huggingface_hub.Repository(
25
- local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=TOKEN
26
- )
27
- repo.git_pull()
28
-
29
- # Set db to latest
30
- shutil.copyfile("./data/traffic_data.db", DB_FILE)
31
-
32
-
33
- def hash_ip(ip_address):
34
- return hashlib.sha256(ip_address.encode()).hexdigest()
35
-
36
-
37
- # Create table if it doesn't already exist
38
- db = sqlite3.connect(DB_FILE)
39
- try:
40
- db.execute("SELECT * FROM ip_data").fetchall()
41
- db.close()
42
- except sqlite3.OperationalError:
43
- db.execute(
44
- """
45
- CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
46
- current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
47
- hashed_ip TEXT)
48
- """
49
- )
50
- db.commit()
51
- db.close()
52
-
53
-
54
- def current_time_sw():
55
- swedish_tz = pytz.timezone("Europe/Stockholm")
56
- return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
57
-
58
-
59
- def add_ip_data(request: gr.Request):
60
- host = request.client.host
61
- hashed_ip = hash_ip(host)
62
-
63
- db = sqlite3.connect(DB_FILE)
64
- cursor = db.cursor()
65
- cursor.execute("INSERT INTO ip_data(current_time, hashed_ip) VALUES(?,?)", [current_time_sw(), hashed_ip])
66
- db.commit()
67
- db.close()
68
-
69
-
70
- def backup_db():
71
- shutil.copyfile(DB_FILE, "./data/traffic_data.db")
72
- db = sqlite3.connect(DB_FILE)
73
- ip_data = db.execute("SELECT * FROM ip_data").fetchall()
74
- pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip"]).to_csv("./data/ip_data.csv", index=False)
75
-
76
- print("updating traffic_data")
77
- repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
78
-
79
-
80
- scheduler = BackgroundScheduler()
81
- scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
82
- scheduler.start()
83
 
84
 
85
  with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
 
 
1
  import os
 
 
 
2
 
3
  import gradio as gr
 
 
 
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
 
6
  from helper.gradio_config import css, theme
 
8
  from helper.text.text_app import TextApp
9
  from helper.text.text_howto import TextHowTo
10
  from helper.text.text_roadmap import TextRoadmap
11
+ from helper.utils import add_ip_data, backup_db
12
  from tabs.htr_tool import htr_tool_tab
13
  from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
14
 
15
+ SECRET_KEY = os.environ.get("AM_I_IN_A_DOCKER_CONTAINER", False)
16
 
17
+ if SECRET_KEY:
18
+ scheduler = BackgroundScheduler()
19
+ scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
20
+ scheduler.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
helper/utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ import shutil
4
+ import sqlite3
5
+ from datetime import datetime
6
+
7
+ import gradio as gr
8
+ import huggingface_hub
9
+ import pandas as pd
10
+ import pytz
11
+
12
+
13
+ def hash_ip(ip_address):
14
+ return hashlib.sha256(ip_address.encode()).hexdigest()
15
+
16
+
17
+ def current_time_sw():
18
+ swedish_tz = pytz.timezone("Europe/Stockholm")
19
+ return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
20
+
21
+
22
+ def add_ip_data(request: gr.Request):
23
+ host = request.client.host
24
+ hashed_ip = hash_ip(host)
25
+
26
+ db = sqlite3.connect(DB_FILE)
27
+ cursor = db.cursor()
28
+ cursor.execute("INSERT INTO ip_data(current_time, hashed_ip) VALUES(?,?)", [current_time_sw(), hashed_ip])
29
+ db.commit()
30
+ db.close()
31
+
32
+
33
+ def backup_db():
34
+ shutil.copyfile(DB_FILE, "./data/traffic_data.db")
35
+ db = sqlite3.connect(DB_FILE)
36
+ ip_data = db.execute("SELECT * FROM ip_data").fetchall()
37
+ pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip"]).to_csv("./data/ip_data.csv", index=False)
38
+
39
+ print("updating traffic_data")
40
+ repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
41
+
42
+
43
+ DB_FILE = "./traffic_data.db"
44
+
45
+ TOKEN = os.environ.get("HUB_TOKEN")
46
+ repo = huggingface_hub.Repository(
47
+ local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=TOKEN
48
+ )
49
+ repo.git_pull()
50
+
51
+ # Set db to latest
52
+ shutil.copyfile("./data/traffic_data.db", DB_FILE)
53
+
54
+
55
+ # Create table if it doesn't already exist
56
+ db = sqlite3.connect(DB_FILE)
57
+ try:
58
+ db.execute("SELECT * FROM ip_data").fetchall()
59
+ db.close()
60
+ except sqlite3.OperationalError:
61
+ db.execute(
62
+ """
63
+ CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
64
+ current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
65
+ hashed_ip TEXT)
66
+ """
67
+ )
68
+ db.commit()
69
+ db.close()