terapyon commited on
Commit
89e36c5
·
1 Parent(s): af9a512

update for HF spaces

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. src/app.py +12 -2
  4. src/config.py +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # podcast-search
2
 
3
  Podcast terapyon channelを検索する仕組み
 
1
+ title: Podcast Search
2
+ emoji: 🚀
3
+ colorFrom: green
4
+ colorTo: gray
5
+ sdk: streamlit
6
+ sdk_version: 1.41.1
7
+ app_file: src/app.py
8
+ pinned: false
9
+ license: mit
10
+ short_description: terapyon channel の検索
11
+
12
+
13
  # podcast-search
14
 
15
  Podcast terapyon channelを検索する仕組み
src/app.py CHANGED
@@ -1,13 +1,23 @@
1
  from datetime import timedelta
 
2
  import streamlit as st
3
  import duckdb
4
  from embedding import get_embeddings
5
- from config import DUCKDB_FILE
6
 
7
 
8
  @st.cache_resource
9
  def get_conn():
10
- return duckdb.connect(DUCKDB_FILE)
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  title_query = """SELECT id, title FROM podcasts
 
1
  from datetime import timedelta
2
+ import os
3
  import streamlit as st
4
  import duckdb
5
  from embedding import get_embeddings
6
+ from config import HF_HOST, DUCKDB_FILE, HF_REPO_TYPE, HF_REPO_ID, HF_FILENAME
7
 
8
 
9
  @st.cache_resource
10
  def get_conn():
11
+ if HF_HOST:
12
+ os.environ["HUGGINGFACE_TOKEN"] = os.getenv("HF_TOKEN", "")
13
+ from huggingface_hub import hf_hub_download
14
+ local_file = hf_hub_download(
15
+ repo_type=HF_REPO_TYPE,
16
+ repo_id=HF_REPO_ID,
17
+ filename=HF_FILENAME)
18
+ return duckdb.connect(local_file)
19
+ else:
20
+ return duckdb.connect(DUCKDB_FILE)
21
 
22
 
23
  title_query = """SELECT id, title FROM podcasts
src/config.py CHANGED
@@ -4,8 +4,14 @@ from pathlib import Path
4
  # import logging
5
 
6
 
 
 
 
 
 
7
  HERE = Path(__file__).resolve().parent
8
  DUCKDB_FILE = HERE.parent / "db" / "terapyon-podcast.duckdb"
 
9
  STORE_DIR = HERE.parent / "store"
10
  DATA_DIR = HERE.parent / "data"
11
  PODCAST_TITLE_LIST = str(STORE_DIR / 'title-list-202301-202501.parquet')
 
4
  # import logging
5
 
6
 
7
+ HF_HOST = True
8
+ HF_REPO_TYPE = "dataset"
9
+ HF_REPO_ID = "terapyon/terapyon-podcast"
10
+ HF_FILENAME = "terapyon-podcast-20250104.duckdb"
11
+
12
  HERE = Path(__file__).resolve().parent
13
  DUCKDB_FILE = HERE.parent / "db" / "terapyon-podcast.duckdb"
14
+
15
  STORE_DIR = HERE.parent / "store"
16
  DATA_DIR = HERE.parent / "data"
17
  PODCAST_TITLE_LIST = str(STORE_DIR / 'title-list-202301-202501.parquet')