Spaces:
Running
Running
Update vocab.py
Browse files
vocab.py
CHANGED
@@ -1,67 +1,28 @@
|
|
1 |
-
import sqlite3
|
2 |
import os
|
3 |
import json
|
4 |
-
import random
|
5 |
-
from ai_sentence import generate_sentence
|
6 |
-
from tqdm import tqdm
|
7 |
|
8 |
DATA_DIR = "./data"
|
9 |
-
DB_PATH = os.path.join(DATA_DIR, "sentences.db")
|
10 |
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
# 抽單字 & 查例句或 GPT 生成例句
|
13 |
-
def get_words_with_sentences(source, n):
|
14 |
-
try:
|
15 |
-
# 取得單字庫所有單字資料
|
16 |
-
with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f:
|
17 |
-
words = json.load(f)
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
result_display = ""
|
22 |
-
for word_data in tqdm(selected_words, desc="處理單字"):
|
23 |
-
word = word_data['word']
|
24 |
-
phonetic = word_data['phonetic']
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
33 |
-
|
34 |
-
# 優先取 Tatoeba
|
35 |
-
sentence = ""
|
36 |
-
for rec in sentence_records:
|
37 |
-
if rec[1] == "tatoeba":
|
38 |
-
sentence = rec[0]
|
39 |
-
break
|
40 |
-
if not sentence:
|
41 |
-
sentence = sentence_records[0][0]
|
42 |
-
else:
|
43 |
-
# 如果句庫沒有,生成 GPT 句子
|
44 |
-
sentence = generate_sentence(word, "EleutherAI/pythia-410m")
|
45 |
-
# 存回資料庫
|
46 |
-
conn = sqlite3.connect(DB_PATH)
|
47 |
-
c = conn.cursor()
|
48 |
-
c.execute('''
|
49 |
-
INSERT INTO sentences (word, phonetic, sentence, source, model)
|
50 |
-
VALUES (?, ?, ?, ?, ?)
|
51 |
-
ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
|
52 |
-
''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m"))
|
53 |
-
conn.commit()
|
54 |
-
conn.close()
|
55 |
-
|
56 |
-
result_display += f"""
|
57 |
-
<div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
|
58 |
-
<strong>單字:</strong> {word} <br>
|
59 |
-
<strong>音標:</strong> {phonetic or '無'} <br>
|
60 |
-
<strong>句子:</strong> {sentence}
|
61 |
-
</div>
|
62 |
-
"""
|
63 |
-
|
64 |
-
return result_display, "✅ 抽單字 & 生成完成"
|
65 |
-
|
66 |
-
except Exception as e:
|
67 |
-
return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"
|
|
|
|
|
1 |
import os
|
2 |
import json
|
|
|
|
|
|
|
3 |
|
4 |
DATA_DIR = "./data"
|
|
|
5 |
|
6 |
+
# 取得單字庫名稱清單
|
7 |
+
def get_sources():
|
8 |
+
files = os.listdir(DATA_DIR)
|
9 |
+
sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
|
10 |
+
return sources
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
# 取得單字庫的所有單字(回傳整個物件列表)
|
14 |
+
def get_words_from_source(source):
|
15 |
+
data_path = os.path.join(DATA_DIR, f"{source}.json")
|
16 |
+
with open(data_path, 'r', encoding='utf-8') as f:
|
17 |
+
words = json.load(f)
|
18 |
+
return words
|
19 |
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
# 查詢單字細節(音標等)
|
22 |
+
def get_word_info(source, word):
|
23 |
+
words = get_words_from_source(source)
|
24 |
+
for entry in words:
|
25 |
+
if entry['word'] == word:
|
26 |
+
return entry # 回傳 {'id': 1, 'word': 'apple', 'phonetic': '...'}
|
27 |
|
28 |
+
return None # 找不到單字
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|