Spaces:

dayuian
/

VocabLine

Running

App Files Files Community

dayuian commited on 11 days ago

Commit

cf14f69

verified ·

1 Parent(s): ebb3019

Update sentences.py

Browse files

Files changed (1) hide show

sentences.py +43 -58

sentences.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import sqlite3
 import os
-import random
 from ai_sentence import generate_sentence
-from vocab import get_words_from_source, get_word_info
 from tqdm import tqdm
 DATA_DIR = "./data"
 DB_PATH = os.path.join(DATA_DIR, "sentences.db")
-# 初始化資料庫（建表）
 def init_db():
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
@@ -28,7 +26,6 @@ def init_db():
     conn.close()
-# 查詢句庫中的某個單字的所有例句
 def get_sentences_by_word(word):
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
@@ -38,7 +35,6 @@ def get_sentences_by_word(word):
     return results
-# 儲存句子到 SQLite
 def save_sentence(word, phonetic, sentence, source, model):
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
@@ -51,56 +47,45 @@ def save_sentence(word, phonetic, sentence, source, model):
     conn.close()
-# 隨機抽單字 + 查句庫 or GPT 生成例句
-def get_words_with_sentences(source, n):
-    try:
-        words = get_words_from_source(source)
-        selected_words = random.sample(words, n)
-        result_display = ""
-        for word_data in tqdm(selected_words, desc="處理單字"):
-            word = word_data['word']
-            phonetic = word_data['phonetic']
-            # 查詢句庫
-            sentence_records = get_sentences_by_word(word)
-            if sentence_records:
-                # 優先取 Tatoeba
-                sentence = ""
-                for rec in sentence_records:
-                    if rec[3] == "tatoeba":  # source 字段
-                        sentence = rec[2]  # sentence 字段
-                        break
-                if not sentence:
-                    sentence = sentence_records[0][2]
-                source_used = sentence_records[0][3]
-                model_used = sentence_records[0][4]
-            else:
-                # GPT 生成句子
-                sentence = generate_sentence(word, "EleutherAI/pythia-410m")
-                source_used = "ai"
-                model_used = "EleutherAI/pythia-410m"
-                # 查詢音標，避免 GPT 生成時音標缺失
-                if not phonetic:
-                    word_info = get_word_info(source, word)
-                    phonetic = word_info['phonetic'] if word_info else ''
-                # 存回句庫
-                save_sentence(word, phonetic, sentence, source_used, model_used)
-            result_display += f"""
-            <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
-                <strong>單字：</strong> {word} <br>
-                <strong>音標：</strong> {phonetic or '無'} <br>
-                <strong>句子：</strong> {sentence} <br>
-                <strong>來源：</strong> {source_used} {f"({model_used})" if model_used else ""}
-            </div>
-            """
-        return result_display, f"✅ 成功抽取 {n} 個單字 & 句子"
-    except Exception as e:
-        return f"<p style='color:red;'>❌ 發生錯誤：{str(e)}</p>", f"❌ 錯誤：{str(e)}"

 import sqlite3
 import os
+from vocab import get_word_info
 from ai_sentence import generate_sentence
 from tqdm import tqdm
 DATA_DIR = "./data"
 DB_PATH = os.path.join(DATA_DIR, "sentences.db")
 def init_db():
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
     conn.close()
 def get_sentences_by_word(word):
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
     return results
 def save_sentence(word, phonetic, sentence, source, model):
     conn = sqlite3.connect(DB_PATH)
     c = conn.cursor()
     conn.close()
+def generate_sentences(words, source, use_ai, model_name):
+    result_display = ""
+    status_log = []
+    for word in tqdm(words, desc="處理單字"):
+        # 1. 查單字音標
+        word_info = get_word_info(source, word)
+        phonetic = word_info['phonetic'] if word_info else "無"
+        # 2. 查句庫
+        sentence_records = get_sentences_by_word(word)
+        # 3. 判斷是否用AI
+        if use_ai or not sentence_records:
+            try:
+                sentence = generate_sentence(word, model_name)
+                save_sentence(word, phonetic, sentence, 'ai', model_name)
+                source_used = 'ai'
+                model_used = model_name
+            except Exception as e:
+                sentence = f"[AI生成失敗：{e}]"
+                source_used = "error"
+                model_used = None
+        else:
+            # 取第一筆句庫資料
+            sentence = sentence_records[0][2]
+            source_used = sentence_records[0][3]
+            model_used = sentence_records[0][4]
+        # 4. 組裝顯示內容
+        result_display += f"""
+        <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
+            <strong>單字：</strong> {word} <br>
+            <strong>音標：</strong> {phonetic} <br>
+            <strong>句子：</strong> {sentence} <br>
+            <strong>來源：</strong> {source_used} {f"({model_used})" if model_used else ""}
+        </div>
+        """
+        status_log.append(f"✅ {word}：{source_used}")
+    return result_display, "\n".join(status_log)