dayuian commited on
Commit
cf14f69
·
verified ·
1 Parent(s): ebb3019

Update sentences.py

Browse files
Files changed (1) hide show
  1. sentences.py +43 -58
sentences.py CHANGED
@@ -1,15 +1,13 @@
1
  import sqlite3
2
  import os
3
- import random
4
  from ai_sentence import generate_sentence
5
- from vocab import get_words_from_source, get_word_info
6
  from tqdm import tqdm
7
 
8
  DATA_DIR = "./data"
9
  DB_PATH = os.path.join(DATA_DIR, "sentences.db")
10
 
11
 
12
- # 初始化資料庫(建表)
13
  def init_db():
14
  conn = sqlite3.connect(DB_PATH)
15
  c = conn.cursor()
@@ -28,7 +26,6 @@ def init_db():
28
  conn.close()
29
 
30
 
31
- # 查詢句庫中的某個單字的所有例句
32
  def get_sentences_by_word(word):
33
  conn = sqlite3.connect(DB_PATH)
34
  c = conn.cursor()
@@ -38,7 +35,6 @@ def get_sentences_by_word(word):
38
  return results
39
 
40
 
41
- # 儲存句子到 SQLite
42
  def save_sentence(word, phonetic, sentence, source, model):
43
  conn = sqlite3.connect(DB_PATH)
44
  c = conn.cursor()
@@ -51,56 +47,45 @@ def save_sentence(word, phonetic, sentence, source, model):
51
  conn.close()
52
 
53
 
54
- # 隨機抽單字 + 查句庫 or GPT 生成例句
55
- def get_words_with_sentences(source, n):
56
- try:
57
- words = get_words_from_source(source)
58
- selected_words = random.sample(words, n)
59
-
60
- result_display = ""
61
- for word_data in tqdm(selected_words, desc="處理單字"):
62
- word = word_data['word']
63
- phonetic = word_data['phonetic']
64
-
65
- # 查詢句庫
66
- sentence_records = get_sentences_by_word(word)
67
-
68
- if sentence_records:
69
- # 優先取 Tatoeba
70
- sentence = ""
71
- for rec in sentence_records:
72
- if rec[3] == "tatoeba": # source 字段
73
- sentence = rec[2] # sentence 字段
74
- break
75
- if not sentence:
76
- sentence = sentence_records[0][2]
77
- source_used = sentence_records[0][3]
78
- model_used = sentence_records[0][4]
79
-
80
- else:
81
- # GPT 生成句子
82
- sentence = generate_sentence(word, "EleutherAI/pythia-410m")
83
- source_used = "ai"
84
- model_used = "EleutherAI/pythia-410m"
85
-
86
- # 查詢音標,避免 GPT 生成時音標缺失
87
- if not phonetic:
88
- word_info = get_word_info(source, word)
89
- phonetic = word_info['phonetic'] if word_info else ''
90
-
91
- # 存回句庫
92
- save_sentence(word, phonetic, sentence, source_used, model_used)
93
-
94
- result_display += f"""
95
- <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
96
- <strong>單字:</strong> {word} <br>
97
- <strong>音標:</strong> {phonetic or '無'} <br>
98
- <strong>句子:</strong> {sentence} <br>
99
- <strong>來源:</strong> {source_used} {f"({model_used})" if model_used else ""}
100
- </div>
101
- """
102
-
103
- return result_display, f"✅ 成功抽取 {n} 個單字 & 句子"
104
-
105
- except Exception as e:
106
- return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"
 
1
  import sqlite3
2
  import os
3
+ from vocab import get_word_info
4
  from ai_sentence import generate_sentence
 
5
  from tqdm import tqdm
6
 
7
  DATA_DIR = "./data"
8
  DB_PATH = os.path.join(DATA_DIR, "sentences.db")
9
 
10
 
 
11
  def init_db():
12
  conn = sqlite3.connect(DB_PATH)
13
  c = conn.cursor()
 
26
  conn.close()
27
 
28
 
 
29
  def get_sentences_by_word(word):
30
  conn = sqlite3.connect(DB_PATH)
31
  c = conn.cursor()
 
35
  return results
36
 
37
 
 
38
  def save_sentence(word, phonetic, sentence, source, model):
39
  conn = sqlite3.connect(DB_PATH)
40
  c = conn.cursor()
 
47
  conn.close()
48
 
49
 
50
+ def generate_sentences(words, source, use_ai, model_name):
51
+ result_display = ""
52
+ status_log = []
53
+
54
+ for word in tqdm(words, desc="處理單字"):
55
+ # 1. 查單字音標
56
+ word_info = get_word_info(source, word)
57
+ phonetic = word_info['phonetic'] if word_info else ""
58
+
59
+ # 2. 查句庫
60
+ sentence_records = get_sentences_by_word(word)
61
+
62
+ # 3. 判斷是否用AI
63
+ if use_ai or not sentence_records:
64
+ try:
65
+ sentence = generate_sentence(word, model_name)
66
+ save_sentence(word, phonetic, sentence, 'ai', model_name)
67
+ source_used = 'ai'
68
+ model_used = model_name
69
+ except Exception as e:
70
+ sentence = f"[AI生成失敗:{e}]"
71
+ source_used = "error"
72
+ model_used = None
73
+ else:
74
+ # 取第一筆句庫資料
75
+ sentence = sentence_records[0][2]
76
+ source_used = sentence_records[0][3]
77
+ model_used = sentence_records[0][4]
78
+
79
+ # 4. 組裝顯示內容
80
+ result_display += f"""
81
+ <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
82
+ <strong>單字:</strong> {word} <br>
83
+ <strong>音標:</strong> {phonetic} <br>
84
+ <strong>句子:</strong> {sentence} <br>
85
+ <strong>來源:</strong> {source_used} {f"({model_used})" if model_used else ""}
86
+ </div>
87
+ """
88
+
89
+ status_log.append(f"✅ {word}:{source_used}")
90
+
91
+ return result_display, "\n".join(status_log)