ginipick commited on
Commit
d3d164d
ยท
verified ยท
1 Parent(s): f013686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -25
app.py CHANGED
@@ -32,27 +32,27 @@ def load_parquet(filename: str) -> str:
32
  return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
33
 
34
  def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
35
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์— ์ค‘๋ณต ๋ฐฉ์ง€ ์ง€์‹œ ์ถ”๊ฐ€
36
  system_prefix = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ. ๋„ˆ๋Š” ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” ์—ญํ• ์„ ํ•œ๋‹ค.
37
-
38
- ์ค‘์š” ๊ทœ์น™:
39
- 1. ์ด์ „ ๋Œ€ํ™”์—์„œ ์ด๋ฏธ ๋‹ต๋ณ€ํ•œ ๋‚ด์šฉ์„ ๋ฐ˜๋ณตํ•˜์ง€ ๋ง ๊ฒƒ
40
- 2. ์งˆ๋ฌธ๊ณผ ์ง์ ‘ ๊ด€๋ จ๋œ ๋‚ด์šฉ๋งŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
41
- 3. ๋ถˆํ•„์š”ํ•œ ์˜ˆ์‹œ๋‚˜ ๋ถ€์—ฐ ์„ค๋ช…์€ ์ตœ์†Œํ™”ํ•  ๊ฒƒ
42
- 4. ๋‹ต๋ณ€์€ ๋ช…ํ™•ํ•˜๊ณ  ๊ฐ„๊ฒฐํ•˜๊ฒŒ ํ•  ๊ฒƒ
43
- 5. ๋™์ผํ•œ ๋‚ด์šฉ์„ ๋‹ค๋ฅธ ํ‘œํ˜„์œผ๋กœ ๋ฐ˜๋ณตํ•˜์ง€ ๋ง ๊ฒƒ
44
- """
45
-
46
  if parquet_data:
47
  try:
48
  df = pd.read_json(io.StringIO(parquet_data))
49
  data_summary = df.describe(include='all').to_string()
50
- system_prefix += f"\n\n์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ ์š”์•ฝ:\n{data_summary}"
51
  except Exception as e:
52
  print(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
53
 
54
- # ์ด์ „ ๋Œ€ํ™” ์ปจํ…์ŠคํŠธ ์ตœ์ ํ™”
55
- recent_history = history[-3:] if history else [] # ์ตœ๊ทผ 3๊ฐœ ๋Œ€ํ™”๋งŒ ์œ ์ง€
56
 
57
  prompt = system_prefix + "\n\n"
58
  for chat in recent_history:
@@ -68,23 +68,45 @@ def respond(message: str, history: List[Dict[str, str]], system_message: str = "
68
  prompt=prompt,
69
  max_new_tokens=max_tokens,
70
  stream=True,
71
- temperature=temperature,
72
  top_p=top_p,
73
- repetition_penalty=1.2, # ๋ฐ˜๋ณต ํŽ˜๋„ํ‹ฐ ์ถ”๊ฐ€
74
- no_repeat_ngram_size=3, # n-gram ๋ฐ˜๋ณต ๋ฐฉ์ง€
75
  )
76
 
77
  for msg in stream:
78
  if msg:
79
  response += msg
80
- # ์ค‘๋ณต ๋ฌธ์žฅ ์ œ๊ฑฐ
81
- response = remove_duplicates(response)
82
- yield response
83
  except Exception as e:
84
  error_message = f"์ถ”๋ก  ์˜ค๋ฅ˜: {str(e)}"
85
  print(error_message)
86
  yield error_message
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def remove_duplicates(text: str) -> str:
89
  """์ค‘๋ณต ๋ฌธ์žฅ ์ œ๊ฑฐ ํ•จ์ˆ˜"""
90
  sentences = text.split('.')
@@ -332,28 +354,37 @@ with gr.Blocks(css=css) as demo:
332
  def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
333
  history = history or []
334
 
335
- # ์ค‘๋ณต ์งˆ๋ฌธ ์ฒดํฌ
336
- if history and any(chat['role'] == 'user' and chat['content'].strip() == message.strip() for chat in history[-3:]):
 
337
  yield history + [{"role": "assistant", "content": "๋™์ผํ•œ ์งˆ๋ฌธ์ด ์ตœ๊ทผ์— ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ํ•ด์ฃผ์„ธ์š”."}], ""
338
  return
339
 
340
  try:
341
  history.append({"role": "user", "content": message})
342
- response_gen = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_data)
 
 
 
 
 
 
 
 
343
 
344
  partial_response = ""
345
  for partial in response_gen:
346
  partial_response = partial
347
- # ์ค‘๋ณต ์ œ๊ฑฐ๋œ ์‘๋‹ต์œผ๋กœ ์—…๋ฐ์ดํŠธ
348
- display_history = history + [{"role": "assistant", "content": remove_duplicates(partial_response)}]
349
  yield display_history, ""
350
 
351
- history.append({"role": "assistant", "content": remove_duplicates(partial_response)})
352
  except Exception as e:
353
  response = f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
354
  history.append({"role": "assistant", "content": response})
355
  yield history, ""
356
 
 
357
 
358
 
359
  send_data_upload.click(
 
32
  return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
33
 
34
  def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
35
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ฐ•ํ™”
36
  system_prefix = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ. ๋„ˆ๋Š” ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” ์—ญํ• ์„ ํ•œ๋‹ค.
37
+
38
+ ์ฃผ์š” ์ง€์นจ:
39
+ 1. ์งˆ๋ฌธ๊ณผ ์ง์ ‘ ๊ด€๋ จ๋œ ๋‚ด์šฉ๋งŒ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
40
+ 2. ์ด์ „ ๋‹ต๋ณ€๊ณผ ์ค‘๋ณต๋˜๋Š” ๋‚ด์šฉ์€ ์ œ์™ธํ•  ๊ฒƒ
41
+ 3. ๋ถˆํ•„์š”ํ•œ ์˜ˆ์‹œ๋‚˜ ๋ถ€์—ฐ ์„ค๋ช…์€ ํ•˜์ง€ ๋ง ๊ฒƒ
42
+ 4. ๋™์ผํ•œ ๋‚ด์šฉ์„ ๋‹ค๋ฅธ ํ‘œํ˜„์œผ๋กœ ๋ฐ˜๋ณตํ•˜์ง€ ๋ง ๊ฒƒ
43
+ 5. ํ•ต์‹ฌ ์ •๋ณด๋งŒ ์ „๋‹ฌํ•  ๊ฒƒ
44
+ """
45
+
46
  if parquet_data:
47
  try:
48
  df = pd.read_json(io.StringIO(parquet_data))
49
  data_summary = df.describe(include='all').to_string()
50
+ system_prefix += f"\n\n๋ฐ์ดํ„ฐ ์š”์•ฝ:\n{data_summary}"
51
  except Exception as e:
52
  print(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
53
 
54
+ # ์ตœ๊ทผ ๋Œ€ํ™” ์ปจํ…์ŠคํŠธ๋งŒ ์œ ์ง€
55
+ recent_history = history[-3:] if history else []
56
 
57
  prompt = system_prefix + "\n\n"
58
  for chat in recent_history:
 
68
  prompt=prompt,
69
  max_new_tokens=max_tokens,
70
  stream=True,
71
+ temperature=temperature, # ๋‚ฎ์€ temperature๋กœ ์ผ๊ด€์„ฑ ์œ ์ง€
72
  top_p=top_p,
73
+ repetition_penalty=1.2, # ๋ฐ˜๋ณต ํŽ˜๋„ํ‹ฐ๋งŒ ์ ์šฉ
 
74
  )
75
 
76
  for msg in stream:
77
  if msg:
78
  response += msg
79
+ # ์‘๋‹ต ์ •์ œ
80
+ cleaned_response = clean_response(response)
81
+ yield cleaned_response
82
  except Exception as e:
83
  error_message = f"์ถ”๋ก  ์˜ค๋ฅ˜: {str(e)}"
84
  print(error_message)
85
  yield error_message
86
 
87
+ def clean_response(text: str) -> str:
88
+ """์‘๋‹ต ํ…์ŠคํŠธ ์ •์ œ ํ•จ์ˆ˜"""
89
+ # ๋ฌธ์žฅ ๋‹จ์œ„๋กœ ๋ถ„๋ฆฌ
90
+ sentences = [s.strip() for s in text.split('.') if s.strip()]
91
+
92
+ # ์ค‘๋ณต ์ œ๊ฑฐ
93
+ unique_sentences = []
94
+ seen = set()
95
+
96
+ for sentence in sentences:
97
+ # ๋ฌธ์žฅ ์ •๊ทœํ™” (๊ณต๋ฐฑ ์ œ๊ฑฐ, ์†Œ๋ฌธ์ž ๋ณ€ํ™˜)
98
+ normalized = ' '.join(sentence.lower().split())
99
+ if normalized not in seen:
100
+ seen.add(normalized)
101
+ unique_sentences.append(sentence)
102
+
103
+ # ์ •์ œ๋œ ๋ฌธ์žฅ ๊ฒฐํ•ฉ
104
+ cleaned_text = '. '.join(unique_sentences)
105
+ if cleaned_text and not cleaned_text.endswith('.'):
106
+ cleaned_text += '.'
107
+
108
+ return cleaned_text
109
+
110
  def remove_duplicates(text: str) -> str:
111
  """์ค‘๋ณต ๋ฌธ์žฅ ์ œ๊ฑฐ ํ•จ์ˆ˜"""
112
  sentences = text.split('.')
 
354
  def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
355
  history = history or []
356
 
357
+ # ์ค‘๋ณต ์งˆ๋ฌธ ๊ฒ€์‚ฌ
358
+ recent_questions = [chat['content'].strip().lower() for chat in history[-3:] if chat['role'] == 'user']
359
+ if message.strip().lower() in recent_questions:
360
  yield history + [{"role": "assistant", "content": "๋™์ผํ•œ ์งˆ๋ฌธ์ด ์ตœ๊ทผ์— ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ํ•ด์ฃผ์„ธ์š”."}], ""
361
  return
362
 
363
  try:
364
  history.append({"role": "user", "content": message})
365
+ response_gen = respond(
366
+ message,
367
+ history,
368
+ system_message,
369
+ max_tokens,
370
+ temperature=0.3, # ๋‚ฎ์€ temperature ์‚ฌ์šฉ
371
+ top_p=top_p,
372
+ parquet_data=parquet_data
373
+ )
374
 
375
  partial_response = ""
376
  for partial in response_gen:
377
  partial_response = partial
378
+ display_history = history + [{"role": "assistant", "content": partial_response}]
 
379
  yield display_history, ""
380
 
381
+ history.append({"role": "assistant", "content": partial_response})
382
  except Exception as e:
383
  response = f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
384
  history.append({"role": "assistant", "content": response})
385
  yield history, ""
386
 
387
+
388
 
389
 
390
  send_data_upload.click(