dayuian commited on
Commit
3ed2d28
·
verified ·
1 Parent(s): 4982f66

Update vocab.py

Browse files
Files changed (1) hide show
  1. vocab.py +18 -57
vocab.py CHANGED
@@ -1,67 +1,28 @@
1
- import sqlite3
2
  import os
3
  import json
4
- import random
5
- from ai_sentence import generate_sentence
6
- from tqdm import tqdm
7
 
8
  DATA_DIR = "./data"
9
- DB_PATH = os.path.join(DATA_DIR, "sentences.db")
10
 
 
 
 
 
 
11
 
12
- # 抽單字 & 查例句或 GPT 生成例句
13
- def get_words_with_sentences(source, n):
14
- try:
15
- # 取得單字庫所有單字資料
16
- with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f:
17
- words = json.load(f)
18
 
19
- selected_words = random.sample(words, n)
 
 
 
 
 
20
 
21
- result_display = ""
22
- for word_data in tqdm(selected_words, desc="處理單字"):
23
- word = word_data['word']
24
- phonetic = word_data['phonetic']
25
 
26
- # 查詢句庫
27
- conn = sqlite3.connect(DB_PATH)
28
- c = conn.cursor()
29
- c.execute('SELECT sentence, source, model FROM sentences WHERE word=?', (word,))
30
- sentence_records = c.fetchall()
31
- conn.close()
32
 
33
- if sentence_records:
34
- # 優先取 Tatoeba
35
- sentence = ""
36
- for rec in sentence_records:
37
- if rec[1] == "tatoeba":
38
- sentence = rec[0]
39
- break
40
- if not sentence:
41
- sentence = sentence_records[0][0]
42
- else:
43
- # 如果句庫沒有,生成 GPT 句子
44
- sentence = generate_sentence(word, "EleutherAI/pythia-410m")
45
- # 存回資料庫
46
- conn = sqlite3.connect(DB_PATH)
47
- c = conn.cursor()
48
- c.execute('''
49
- INSERT INTO sentences (word, phonetic, sentence, source, model)
50
- VALUES (?, ?, ?, ?, ?)
51
- ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
52
- ''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m"))
53
- conn.commit()
54
- conn.close()
55
-
56
- result_display += f"""
57
- <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
58
- <strong>單字:</strong> {word} <br>
59
- <strong>音標:</strong> {phonetic or '無'} <br>
60
- <strong>句子:</strong> {sentence}
61
- </div>
62
- """
63
-
64
- return result_display, "✅ 抽單字 & 生成完成"
65
-
66
- except Exception as e:
67
- return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"
 
 
1
  import os
2
  import json
 
 
 
3
 
4
  DATA_DIR = "./data"
 
5
 
6
+ # 取得單字庫名稱清單
7
+ def get_sources():
8
+ files = os.listdir(DATA_DIR)
9
+ sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
10
+ return sources
11
 
 
 
 
 
 
 
12
 
13
+ # 取得單字庫的所有單字(回傳整個物件列表)
14
+ def get_words_from_source(source):
15
+ data_path = os.path.join(DATA_DIR, f"{source}.json")
16
+ with open(data_path, 'r', encoding='utf-8') as f:
17
+ words = json.load(f)
18
+ return words
19
 
 
 
 
 
20
 
21
+ # 查詢單字細節(音標等)
22
+ def get_word_info(source, word):
23
+ words = get_words_from_source(source)
24
+ for entry in words:
25
+ if entry['word'] == word:
26
+ return entry # 回傳 {'id': 1, 'word': 'apple', 'phonetic': '...'}
27
 
28
+ return None # 找不到單字