ginipick commited on
Commit
f013686
Β·
verified Β·
1 Parent(s): a958a41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -43
app.py CHANGED
@@ -31,32 +31,31 @@ def load_parquet(filename: str) -> str:
31
  except Exception as e:
32
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
33
 
34
- def respond(
35
- message: str,
36
- history: List[Dict[str, str]],
37
- system_message: str = "",
38
- max_tokens: int = 4000,
39
- temperature: float = 0.5,
40
- top_p: float = 0.9,
41
- parquet_data: str = None
42
- ) -> str:
43
- # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ μ„€μ •
 
 
44
  if parquet_data:
45
- system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•  것. λ„ˆλŠ” μ—…λ‘œλ“œλœ 데이터λ₯Ό 기반으둜 μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λŠ” 역할을 ν•œλ‹€. 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ μ‚¬μš©μžμ—κ²Œ 도움이 λ˜λŠ” 정보λ₯Ό μ œκ³΅ν•˜λΌ. 데이터λ₯Ό ν™œμš©ν•˜μ—¬ μƒμ„Έν•˜κ³  μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜λ˜, λ―Όκ°ν•œ μ •λ³΄λ‚˜ 개인 정보λ₯Ό λ…ΈμΆœν•˜μ§€ 마라."""
46
  try:
47
  df = pd.read_json(io.StringIO(parquet_data))
48
- # λ°μ΄ν„°μ˜ μš”μ•½ 정보 생성
49
  data_summary = df.describe(include='all').to_string()
50
- system_prefix += f"\n\nμ—…λ‘œλ“œλœ λ°μ΄ν„°μ˜ μš”μ•½ 정보:\n{data_summary}"
51
  except Exception as e:
52
- print(f"데이터 λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {str(e)}\n{traceback.format_exc()}")
53
- system_prefix += "\n\n데이터λ₯Ό λ‘œλ“œν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
54
- else:
55
- system_prefix = system_message or "λ„ˆλŠ” AI μ‘°μ–Έμž 역할이닀."
56
-
57
- # λ©”μ‹œμ§€ 생성
58
  prompt = system_prefix + "\n\n"
59
- for chat in history:
60
  if chat['role'] == 'user':
61
  prompt += f"μ‚¬μš©μž: {chat['content']}\n"
62
  else:
@@ -64,7 +63,6 @@ def respond(
64
  prompt += f"μ‚¬μš©μž: {message}\nAI:"
65
 
66
  try:
67
- # λͺ¨λΈμ— λ©”μ‹œμ§€ 전솑 및 응닡 λ°›κΈ°
68
  response = ""
69
  stream = hf_client.text_generation(
70
  prompt=prompt,
@@ -72,16 +70,35 @@ def respond(
72
  stream=True,
73
  temperature=temperature,
74
  top_p=top_p,
 
 
75
  )
 
76
  for msg in stream:
77
  if msg:
78
  response += msg
 
 
79
  yield response
80
  except Exception as e:
81
- error_message = f"μΆ”λ‘  쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n{traceback.format_exc()}"
82
  print(error_message)
83
  yield error_message
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def upload_csv(file_path: str) -> Tuple[str, str]:
86
  try:
87
  # CSV 파일 읽기
@@ -312,38 +329,33 @@ with gr.Blocks(css=css) as demo:
312
 
313
  parquet_data_state = gr.State()
314
 
315
- def handle_message_data_upload(
316
- message: str,
317
- history: List[Dict[str, str]],
318
- system_message: str,
319
- max_tokens: int,
320
- temperature: float,
321
- top_p: float,
322
- parquet_data: str
323
- ):
324
  history = history or []
 
 
 
 
 
 
325
  try:
326
- # μ‚¬μš©μžμ˜ λ©”μ‹œμ§€λ₯Ό νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€
327
  history.append({"role": "user", "content": message})
328
- # 응닡 생성
329
- response_gen = respond(
330
- message, history, system_message, max_tokens, temperature, top_p, parquet_data
331
- )
332
  partial_response = ""
333
  for partial in response_gen:
334
  partial_response = partial
335
- # λŒ€ν™” λ‚΄μ—­ μ—…λ°μ΄νŠΈ
336
- display_history = history + [
337
- {"role": "assistant", "content": partial_response}
338
- ]
339
  yield display_history, ""
340
- # μ–΄μ‹œμŠ€ν„΄νŠΈμ˜ 응닡을 νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€
341
- history.append({"role": "assistant", "content": partial_response})
342
  except Exception as e:
343
- response = f"μΆ”λ‘  쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
344
  history.append({"role": "assistant", "content": response})
345
  yield history, ""
346
 
 
 
347
  send_data_upload.click(
348
  handle_message_data_upload,
349
  inputs=[
 
31
  except Exception as e:
32
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
33
 
34
+ def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
35
+ # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈμ— 쀑볡 방지 μ§€μ‹œ μΆ”κ°€
36
+ system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•  것. λ„ˆλŠ” μ—…λ‘œλ“œλœ 데이터λ₯Ό 기반으둜 μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λŠ” 역할을 ν•œλ‹€.
37
+
38
+ μ€‘μš” κ·œμΉ™:
39
+ 1. 이전 λŒ€ν™”μ—μ„œ 이미 λ‹΅λ³€ν•œ λ‚΄μš©μ„ λ°˜λ³΅ν•˜μ§€ 말 것
40
+ 2. 질문과 직접 κ΄€λ ¨λœ λ‚΄μš©λ§Œ λ‹΅λ³€ν•  것
41
+ 3. λΆˆν•„μš”ν•œ μ˜ˆμ‹œλ‚˜ λΆ€μ—° μ„€λͺ…은 μ΅œμ†Œν™”ν•  것
42
+ 4. 닡변은 λͺ…ν™•ν•˜κ³  κ°„κ²°ν•˜κ²Œ ν•  것
43
+ 5. λ™μΌν•œ λ‚΄μš©μ„ λ‹€λ₯Έ ν‘œν˜„μœΌλ‘œ λ°˜λ³΅ν•˜μ§€ 말 것
44
+ """
45
+
46
  if parquet_data:
 
47
  try:
48
  df = pd.read_json(io.StringIO(parquet_data))
 
49
  data_summary = df.describe(include='all').to_string()
50
+ system_prefix += f"\n\nμ—…λ‘œλ“œλœ 데이터 μš”μ•½:\n{data_summary}"
51
  except Exception as e:
52
+ print(f"데이터 λ‘œλ“œ 였λ₯˜: {str(e)}")
53
+
54
+ # 이전 λŒ€ν™” μ»¨ν…μŠ€νŠΈ μ΅œμ ν™”
55
+ recent_history = history[-3:] if history else [] # 졜근 3개 λŒ€ν™”λ§Œ μœ μ§€
56
+
 
57
  prompt = system_prefix + "\n\n"
58
+ for chat in recent_history:
59
  if chat['role'] == 'user':
60
  prompt += f"μ‚¬μš©μž: {chat['content']}\n"
61
  else:
 
63
  prompt += f"μ‚¬μš©μž: {message}\nAI:"
64
 
65
  try:
 
66
  response = ""
67
  stream = hf_client.text_generation(
68
  prompt=prompt,
 
70
  stream=True,
71
  temperature=temperature,
72
  top_p=top_p,
73
+ repetition_penalty=1.2, # 반볡 νŽ˜λ„ν‹° μΆ”κ°€
74
+ no_repeat_ngram_size=3, # n-gram 반볡 방지
75
  )
76
+
77
  for msg in stream:
78
  if msg:
79
  response += msg
80
+ # 쀑볡 λ¬Έμž₯ 제거
81
+ response = remove_duplicates(response)
82
  yield response
83
  except Exception as e:
84
+ error_message = f"μΆ”λ‘  였λ₯˜: {str(e)}"
85
  print(error_message)
86
  yield error_message
87
 
88
+ def remove_duplicates(text: str) -> str:
89
+ """쀑볡 λ¬Έμž₯ 제거 ν•¨μˆ˜"""
90
+ sentences = text.split('.')
91
+ unique_sentences = []
92
+ seen = set()
93
+
94
+ for sentence in sentences:
95
+ sentence = sentence.strip()
96
+ if sentence and sentence not in seen:
97
+ seen.add(sentence)
98
+ unique_sentences.append(sentence)
99
+
100
+ return '. '.join(unique_sentences)
101
+
102
  def upload_csv(file_path: str) -> Tuple[str, str]:
103
  try:
104
  # CSV 파일 읽기
 
329
 
330
  parquet_data_state = gr.State()
331
 
332
+ def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
 
 
 
 
 
 
 
 
333
  history = history or []
334
+
335
+ # 쀑볡 질문 체크
336
+ if history and any(chat['role'] == 'user' and chat['content'].strip() == message.strip() for chat in history[-3:]):
337
+ yield history + [{"role": "assistant", "content": "λ™μΌν•œ 질문이 μ΅œκ·Όμ— μžˆμ—ˆμŠ΅λ‹ˆλ‹€. λ‹€λ₯Έ μ§ˆλ¬Έμ„ ν•΄μ£Όμ„Έμš”."}], ""
338
+ return
339
+
340
  try:
 
341
  history.append({"role": "user", "content": message})
342
+ response_gen = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_data)
343
+
 
 
344
  partial_response = ""
345
  for partial in response_gen:
346
  partial_response = partial
347
+ # 쀑볡 제거된 μ‘λ‹΅μœΌλ‘œ μ—…λ°μ΄νŠΈ
348
+ display_history = history + [{"role": "assistant", "content": remove_duplicates(partial_response)}]
 
 
349
  yield display_history, ""
350
+
351
+ history.append({"role": "assistant", "content": remove_duplicates(partial_response)})
352
  except Exception as e:
353
+ response = f"였λ₯˜ λ°œμƒ: {str(e)}"
354
  history.append({"role": "assistant", "content": response})
355
  yield history, ""
356
 
357
+
358
+
359
  send_data_upload.click(
360
  handle_message_data_upload,
361
  inputs=[