terapyon commited on
Commit
6fd8495
·
1 Parent(s): 7f1680d

made streamlit ui for base search ui

Browse files
Files changed (3) hide show
  1. pyproject.toml +1 -0
  2. requirements.txt +2 -1
  3. src/app.py +51 -0
pyproject.toml CHANGED
@@ -14,6 +14,7 @@ dependencies = [
14
  "pyarrow>=18.1.0",
15
  "sentence-transformers>=3.3.1",
16
  "sentencepiece>=0.2.0",
 
17
  "torch>=2.5.1",
18
  "tqdm>=4.67.1",
19
  "unidic-lite>=1.0.8",
 
14
  "pyarrow>=18.1.0",
15
  "sentence-transformers>=3.3.1",
16
  "sentencepiece>=0.2.0",
17
+ "streamlit>=1.41.1",
18
  "torch>=2.5.1",
19
  "tqdm>=4.67.1",
20
  "unidic-lite>=1.0.8",
requirements.txt CHANGED
@@ -9,4 +9,5 @@ pandas
9
  numpy
10
  polars
11
  pyarrow
12
- duckdb
 
 
9
  numpy
10
  polars
11
  pyarrow
12
+ duckdb
13
+ streamlit
src/app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import duckdb
3
+ from embedding import get_embeddings
4
+ from config import DUCKDB_FILE
5
+
6
+
7
+ @st.cache_resource
8
+ def get_conn():
9
+ return duckdb.connect(DUCKDB_FILE)
10
+
11
+
12
+ query = """WITH ordered_embeddings AS (
13
+ SELECT embeddings.id, embeddings.part FROM embeddings
14
+ ORDER BY array_distance(embedding, ?::FLOAT[1024])
15
+ LIMIT 10
16
+ )
17
+ SELECT
18
+ p.title,
19
+ p.date,
20
+ e.start,
21
+ e.text
22
+ FROM
23
+ ordered_embeddings oe
24
+ JOIN
25
+ episodes e
26
+ ON
27
+ oe.id = e.id AND oe.part = e.part
28
+ JOIN
29
+ podcasts p
30
+ ON
31
+ oe.id = p.id;
32
+ """
33
+
34
+ st.title("terapyon cannel search")
35
+
36
+ word = st.text_input("Search word")
37
+ if word:
38
+ st.write(f"Search word: {word}")
39
+ embeddings = get_embeddings([word], query=True)
40
+ word_embedding = embeddings[0, :]
41
+
42
+ conn = get_conn()
43
+ result = conn.execute(query, (word_embedding,)).df()
44
+ selected = st.dataframe(result,
45
+ on_select="rerun",
46
+ selection_mode="single-row")
47
+ if selected:
48
+ rows = selected["selection"].get("rows")
49
+ if rows:
50
+ row = rows[0]
51
+ st.text(result.iloc[row, 3])