cutechicken commited on
Commit
3c893d2
ยท
verified ยท
1 Parent(s): 861ff06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +288 -127
app.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
  import gradio as gr
3
  import spaces
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
5
  import os
6
  from threading import Thread
7
  import random
@@ -12,9 +13,20 @@ import pandas as pd
12
  from typing import List, Tuple
13
  import json
14
  from datetime import datetime
15
-
16
- # GPU ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
17
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
20
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -22,12 +34,7 @@ MODEL_ID = "CohereForAI/c4ai-command-r7b-12-2024"
22
  MODELS = os.environ.get("MODELS")
23
  MODEL_NAME = MODEL_ID.split("/")[-1]
24
 
25
- # ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
26
- model = AutoModelForCausalLM.from_pretrained(
27
- MODEL_ID,
28
- torch_dtype=torch.bfloat16,
29
- device_map="auto",
30
- )
31
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
32
 
33
  # ์œ„ํ‚คํ”ผ๋””์•„ ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
@@ -147,51 +154,45 @@ def analyze_file_content(content, file_type):
147
  words = len(content.split())
148
  return f"๐Ÿ“ ๋ฌธ์„œ ๊ตฌ์กฐ: {total_lines}์ค„, {paragraphs}๋‹จ๋ฝ, ์•ฝ {words}๋‹จ์–ด"
149
 
150
- def read_uploaded_file(file):
151
- if file is None:
152
- return "", ""
153
  try:
154
- file_ext = os.path.splitext(file.name)[1].lower()
155
-
156
- if file_ext == '.parquet':
157
- df = pd.read_parquet(file.name, engine='pyarrow')
158
- content = df.head(10).to_markdown(index=False)
159
- return content, "parquet"
160
- elif file_ext == '.csv':
161
- encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
162
- for encoding in encodings:
163
- try:
164
- df = pd.read_csv(file.name, encoding=encoding)
165
- content = f"๐Ÿ“Š ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
166
- content += f"\n๐Ÿ“ˆ ๋ฐ์ดํ„ฐ ์ •๋ณด:\n"
167
- content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df)}\n"
168
- content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}\n"
169
- content += f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
170
- content += f"\n๐Ÿ“‹ ์ปฌ๋Ÿผ ๋ฐ์ดํ„ฐ ํƒ€์ž…:\n"
171
- for col, dtype in df.dtypes.items():
172
- content += f"- {col}: {dtype}\n"
173
- null_counts = df.isnull().sum()
174
- if null_counts.any():
175
- content += f"\nโš ๏ธ ๊ฒฐ์ธก์น˜:\n"
176
- for col, null_count in null_counts[null_counts > 0].items():
177
- content += f"- {col}: {null_count}๊ฐœ ๋ˆ„๋ฝ\n"
178
- return content, "csv"
179
- except UnicodeDecodeError:
180
- continue
181
- raise UnicodeDecodeError(f"โŒ ์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
182
  else:
183
- encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
184
- for encoding in encodings:
185
- try:
186
- with open(file.name, 'r', encoding=encoding) as f:
187
- content = f.read()
188
- return content, "text"
189
- except UnicodeDecodeError:
190
- continue
191
- raise UnicodeDecodeError(f"โŒ ์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  except Exception as e:
193
- return f"โŒ ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
194
-
195
 
196
  def read_uploaded_file(file):
197
  if file is None:
@@ -199,76 +200,173 @@ def read_uploaded_file(file):
199
  try:
200
  file_ext = os.path.splitext(file.name)[1].lower()
201
 
 
 
 
202
  if file_ext == '.parquet':
203
- df = pd.read_parquet(file.name)
204
- content = f"๐Ÿ“Š ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
205
- content += f"\n๐Ÿ“ˆ ๋ฐ์ดํ„ฐ ์ •๋ณด:\n"
206
- content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df)}\n"
207
- content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}\n"
208
- content += f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
209
- return content, "parquet"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
 
 
211
  elif file_ext == '.csv':
212
  encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
213
  for encoding in encodings:
214
  try:
215
  df = pd.read_csv(file.name, encoding=encoding)
216
- content = f"๐Ÿ“Š ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
217
- content += f"\n๐Ÿ“ˆ ๋ฐ์ดํ„ฐ ์ •๋ณด:\n"
218
- content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df)}\n"
219
- content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}\n"
220
- content += f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
221
- content += f"\n๐Ÿ“‹ ์ปฌ๋Ÿผ ๋ฐ์ดํ„ฐ ํƒ€์ž…:\n"
222
- for col, dtype in df.dtypes.items():
223
- content += f"- {col}: {dtype}\n"
 
 
 
 
 
 
224
  null_counts = df.isnull().sum()
225
- if null_counts.any():
226
- content += f"\nโš ๏ธ ๊ฒฐ์ธก์น˜:\n"
227
- for col, null_count in null_counts[null_counts > 0].items():
228
- content += f"- {col}: {null_count}๊ฐœ ๋ˆ„๋ฝ\n"
229
  return content, "csv"
230
  except UnicodeDecodeError:
231
  continue
232
  raise UnicodeDecodeError(f"์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
233
-
234
- else: # ํ…์ŠคํŠธ ํŒŒ์ผ
 
235
  encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
236
  for encoding in encodings:
237
  try:
238
  with open(file.name, 'r', encoding=encoding) as f:
239
  content = f.read()
240
 
241
- # ํŒŒ์ผ ๋‚ด์šฉ ๋ถ„์„
242
- lines = content.split('\n')
243
- total_lines = len(lines)
244
- non_empty_lines = len([line for line in lines if line.strip()])
245
-
246
- # ์ฝ”๋“œ ํŒŒ์ผ ์—ฌ๋ถ€ ํ™•์ธ
247
- is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
 
 
 
 
 
 
 
248
 
249
- if is_code:
250
- # ์ฝ”๋“œ ํŒŒ์ผ ๋ถ„์„
251
- functions = len([line for line in lines if 'def ' in line])
252
- classes = len([line for line in lines if 'class ' in line])
253
- imports = len([line for line in lines if 'import ' in line or 'from ' in line])
254
-
255
- analysis = f"\n๐Ÿ“ ์ฝ”๋“œ ๋ถ„์„:\n"
256
- analysis += f"- ์ „์ฒด ๋ผ์ธ ์ˆ˜: {total_lines}\n"
257
- analysis += f"- ํ•จ์ˆ˜ ์ˆ˜: {functions}\n"
258
- analysis += f"- ํด๋ž˜์Šค ์ˆ˜: {classes}\n"
259
- analysis += f"- import ๋ฌธ ์ˆ˜: {imports}\n"
260
- else:
261
- # ์ผ๋ฐ˜ ํ…์ŠคํŠธ ํŒŒ์ผ ๋ถ„์„
262
- words = len(content.split())
263
- chars = len(content)
264
-
265
- analysis = f"\n๐Ÿ“ ํ…์ŠคํŠธ ๋ถ„์„:\n"
266
- analysis += f"- ์ „์ฒด ๋ผ์ธ ์ˆ˜: {total_lines}\n"
267
- analysis += f"- ์‹ค์ œ ๋‚ด์šฉ์ด ์žˆ๋Š” ๋ผ์ธ ์ˆ˜: {non_empty_lines}\n"
268
- analysis += f"- ๋‹จ์–ด ์ˆ˜: {words}\n"
269
- analysis += f"- ๋ฌธ์ž ์ˆ˜: {chars}\n"
270
 
271
- return content + analysis, "text"
 
 
 
 
 
 
272
  except UnicodeDecodeError:
273
  continue
274
  raise UnicodeDecodeError(f"์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
@@ -276,6 +374,9 @@ def read_uploaded_file(file):
276
  except Exception as e:
277
  return f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
278
 
 
 
 
279
  # ํŒŒ์ผ ์—…๋กœ๋“œ ์ด๋ฒคํŠธ ํ•ธ๋“ค๋ง ์ˆ˜์ •
280
  def init_msg():
281
  return "ํŒŒ์ผ์„ ๋ถ„์„ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค..."
@@ -291,18 +392,15 @@ CSS = """
291
  --text-color: #333333;
292
  --shadow-color: rgba(0, 0, 0, 0.1);
293
  }
294
-
295
  body {
296
  background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
297
  min-height: 100vh;
298
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
299
  }
300
-
301
  .container {
302
  transform-style: preserve-3d;
303
  perspective: 1000px;
304
  }
305
-
306
  .chatbot {
307
  background: var(--card-background);
308
  border-radius: 20px;
@@ -313,11 +411,9 @@ body {
313
  transition: transform 0.3s ease;
314
  backdrop-filter: blur(10px);
315
  }
316
-
317
  .chatbot:hover {
318
  transform: translateZ(10px);
319
  }
320
-
321
  /* ๋ฉ”์‹œ์ง€ ์ž…๋ ฅ ์˜์—ญ */
322
  .input-area {
323
  background: var(--card-background);
@@ -333,11 +429,9 @@ body {
333
  align-items: center;
334
  gap: 10px;
335
  }
336
-
337
  .input-area:hover {
338
  transform: translateZ(5px);
339
  }
340
-
341
  /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
342
  .custom-button {
343
  background: linear-gradient(145deg, var(--primary-color), var(--secondary-color));
@@ -353,14 +447,12 @@ body {
353
  0 4px 6px var(--shadow-color),
354
  0 1px 3px var(--shadow-color);
355
  }
356
-
357
  .custom-button:hover {
358
  transform: translateZ(5px) translateY(-2px);
359
  box-shadow:
360
  0 7px 14px var(--shadow-color),
361
  0 3px 6px var(--shadow-color);
362
  }
363
-
364
  /* ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฒ„ํŠผ */
365
  .file-upload-icon {
366
  background: linear-gradient(145deg, #64b5f6, #42a5f5);
@@ -376,12 +468,10 @@ body {
376
  transition: all 0.3s ease;
377
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
378
  }
379
-
380
  .file-upload-icon:hover {
381
  transform: translateY(-2px);
382
  box-shadow: 0 4px 8px rgba(0,0,0,0.2);
383
  }
384
-
385
  /* ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฒ„ํŠผ ๋‚ด๋ถ€ ์š”์†Œ ์Šคํƒ€์ผ๋ง */
386
  .file-upload-icon > .wrap {
387
  display: flex !important;
@@ -390,17 +480,14 @@ body {
390
  width: 100%;
391
  height: 100%;
392
  }
393
-
394
  .file-upload-icon > .wrap > p {
395
  display: none !important;
396
  }
397
-
398
  .file-upload-icon > .wrap::before {
399
  content: "๐Ÿ“";
400
  font-size: 2em;
401
  display: block;
402
  }
403
-
404
  /* ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
405
  .message {
406
  background: var(--card-background);
@@ -413,16 +500,13 @@ body {
413
  transform: translateZ(0);
414
  transition: all 0.3s ease;
415
  }
416
-
417
  .message:hover {
418
  transform: translateZ(5px);
419
  }
420
-
421
  .chat-container {
422
  height: 600px !important;
423
  margin-bottom: 10px;
424
  }
425
-
426
  .input-container {
427
  height: 70px !important;
428
  display: flex;
@@ -430,7 +514,6 @@ body {
430
  gap: 10px;
431
  margin-top: 5px;
432
  }
433
-
434
  .input-textbox {
435
  height: 70px !important;
436
  border-radius: 8px !important;
@@ -439,17 +522,14 @@ body {
439
  display: flex !important;
440
  align-items: flex-start !important; /* ํ…์ŠคํŠธ ์ž…๋ ฅ ์œ„์น˜๋ฅผ ์œ„๋กœ ์กฐ์ • */
441
  }
442
-
443
  .input-textbox textarea {
444
  padding-top: 5px !important; /* ํ…์ŠคํŠธ ์ƒ๋‹จ ์—ฌ๋ฐฑ ์กฐ์ • */
445
  }
446
-
447
  .send-button {
448
  height: 70px !important;
449
  min-width: 70px !important;
450
  font-size: 1.1em !important;
451
  }
452
-
453
  /* ์„ค์ • ํŒจ๋„ ๊ธฐ๋ณธ ์Šคํƒ€์ผ */
454
  .settings-panel {
455
  padding: 20px;
@@ -457,12 +537,37 @@ body {
457
  }
458
  """
459
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  @spaces.GPU
461
  def stream_chat(message: str, history: list, uploaded_file, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
 
462
  try:
 
 
 
463
  print(f'message is - {message}')
464
  print(f'history is - {history}')
465
-
466
  # ํŒŒ์ผ ์—…๋กœ๋“œ ์ฒ˜๋ฆฌ
467
  file_context = ""
468
  if uploaded_file and message == "ํŒŒ์ผ์„ ๋ถ„์„ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค...":
@@ -471,10 +576,23 @@ def stream_chat(message: str, history: list, uploaded_file, temperature: float,
471
  if content:
472
  file_analysis = analyze_file_content(content, file_type)
473
  file_context = f"\n\n๐Ÿ“„ ํŒŒ์ผ ๋ถ„์„ ๊ฒฐ๊ณผ:\n{file_analysis}\n\nํŒŒ์ผ ๋‚ด์šฉ:\n```\n{content}\n```"
 
474
  message = "์—…๋กœ๋“œ๋œ ํŒŒ์ผ์„ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”."
475
  except Exception as e:
476
  print(f"ํŒŒ์ผ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
477
  file_context = f"\n\nโŒ ํŒŒ์ผ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
  # ๊ด€๋ จ ์ปจํ…์ŠคํŠธ ์ฐพ๊ธฐ
480
  try:
@@ -498,9 +616,18 @@ def stream_chat(message: str, history: list, uploaded_file, temperature: float,
498
  final_message = file_context + wiki_context + "\nํ˜„์žฌ ์งˆ๋ฌธ: " + message
499
  conversation.append({"role": "user", "content": final_message})
500
 
501
- # ํ† ํฌ๋‚˜์ด์ € ์„ค์ •
502
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
503
- inputs = tokenizer(input_ids, return_tensors="pt").to(0)
 
 
 
 
 
 
 
 
 
504
 
505
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
506
 
@@ -510,12 +637,15 @@ def stream_chat(message: str, history: list, uploaded_file, temperature: float,
510
  top_k=top_k,
511
  top_p=top_p,
512
  repetition_penalty=penalty,
513
- max_new_tokens=max_new_tokens,
514
  do_sample=True,
515
  temperature=temperature,
516
  eos_token_id=[255001],
517
  )
518
 
 
 
 
519
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
520
  thread.start()
521
 
@@ -524,15 +654,26 @@ def stream_chat(message: str, history: list, uploaded_file, temperature: float,
524
  buffer += new_text
525
  yield "", history + [[message, buffer]]
526
 
 
 
 
527
  except Exception as e:
528
  error_message = f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
529
  print(f"Stream chat ์˜ค๋ฅ˜: {error_message}")
 
 
530
  yield "", history + [[message, error_message]]
531
 
532
 
533
 
534
  def create_demo():
535
  with gr.Blocks(css=CSS) as demo:
 
 
 
 
 
 
536
  chatbot = gr.Chatbot(
537
  value=[],
538
  height=600,
@@ -551,7 +692,7 @@ def create_demo():
551
  show_label=False
552
  )
553
 
554
- with gr.Column(scale=4):
555
  msg = gr.Textbox(
556
  show_label=False,
557
  placeholder="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”... ๐Ÿ’ญ",
@@ -566,6 +707,13 @@ def create_demo():
566
  elem_classes="send-button custom-button",
567
  scale=1
568
  )
 
 
 
 
 
 
 
569
 
570
  with gr.Accordion("๐ŸŽฎ ๊ณ ๊ธ‰ ์„ค์ •", open=False):
571
  with gr.Row():
@@ -602,6 +750,12 @@ def create_demo():
602
  inputs=msg
603
  )
604
 
 
 
 
 
 
 
605
  # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
606
  msg.submit(
607
  stream_chat,
@@ -626,6 +780,13 @@ def create_demo():
626
  queue=True
627
  )
628
 
 
 
 
 
 
 
 
629
  return demo
630
 
631
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  import spaces
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
+
6
  import os
7
  from threading import Thread
8
  import random
 
13
  from typing import List, Tuple
14
  import json
15
  from datetime import datetime
16
+ import pyarrow.parquet as pq
17
+ import pypdf
18
+ import io
19
+ import pyarrow.parquet as pq
20
+ from pdfminer.high_level import extract_text
21
+ from pdfminer.layout import LAParams
22
+ from tabulate import tabulate # tabulate ์ถ”๊ฐ€
23
+ import platform
24
+ import subprocess
25
+ import pytesseract
26
+ from pdf2image import convert_from_path
27
+
28
+ # ์ „์—ญ ๋ณ€์ˆ˜ ์ถ”๊ฐ€
29
+ current_file_context = None
30
 
31
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
32
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
34
  MODELS = os.environ.get("MODELS")
35
  MODEL_NAME = MODEL_ID.split("/")[-1]
36
 
37
+ model = None # ์ „์—ญ ๋ณ€์ˆ˜๋กœ ์„ ์–ธ
 
 
 
 
 
38
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
39
 
40
  # ์œ„ํ‚คํ”ผ๋””์•„ ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
 
154
  words = len(content.split())
155
  return f"๐Ÿ“ ๋ฌธ์„œ ๊ตฌ์กฐ: {total_lines}์ค„, {paragraphs}๋‹จ๋ฝ, ์•ฝ {words}๋‹จ์–ด"
156
 
157
+
158
+ def extract_pdf_text_with_ocr(file_path):
 
159
  try:
160
+ # Poppler ๊ฒฝ๋กœ ์„ค์ •
161
+ if platform.system() == 'Windows':
162
+ poppler_path = r"C:\Program Files\poppler-0.68.0\bin"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  else:
164
+ poppler_path = None # Linux์˜ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ๊ฒฝ๋กœ ์‚ฌ์šฉ
165
+
166
+ # PDF๋ฅผ ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜
167
+ images = convert_from_path(
168
+ file_path,
169
+ poppler_path=poppler_path,
170
+ fmt='jpeg',
171
+ grayscale=False,
172
+ size=(1700, None) # ํ•ด์ƒ๋„ ํ–ฅ์ƒ
173
+ )
174
+
175
+ # ์ „์ฒด ํ…์ŠคํŠธ ์ €์žฅ
176
+ text = ""
177
+
178
+ # ๊ฐ ํŽ˜์ด์ง€์— ๋Œ€ํ•ด OCR ์ˆ˜ํ–‰
179
+ for i, image in enumerate(images):
180
+ try:
181
+ # OCR ์„ค์ •
182
+ custom_config = r'--oem 3 --psm 6 -l kor+eng'
183
+ # OCR ์ˆ˜ํ–‰
184
+ page_text = pytesseract.image_to_string(
185
+ image,
186
+ config=custom_config
187
+ )
188
+ text += f"\n--- ํŽ˜์ด์ง€ {i+1} ---\n{page_text}\n"
189
+ except Exception as e:
190
+ print(f"ํŽ˜์ด์ง€ {i+1} OCR ์˜ค๋ฅ˜: {str(e)}")
191
+ continue
192
+
193
+ return text
194
  except Exception as e:
195
+ return f"PDF ํ…์ŠคํŠธ ์ถ”์ถœ ์˜ค๋ฅ˜: {str(e)}"
 
196
 
197
  def read_uploaded_file(file):
198
  if file is None:
 
200
  try:
201
  file_ext = os.path.splitext(file.name)[1].lower()
202
 
203
+
204
+
205
+ # Parquet ํŒŒ์ผ ์ฒ˜๋ฆฌ
206
  if file_ext == '.parquet':
207
+ try:
208
+ table = pq.read_table(file.name)
209
+ df = table.to_pandas()
210
+
211
+ content = f"๐Ÿ“Š Parquet ํŒŒ์ผ ๋ถ„์„:\n\n"
212
+ content += f"1. ๊ธฐ๋ณธ ์ •๋ณด:\n"
213
+ content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df):,}๊ฐœ\n"
214
+ content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}๊ฐœ\n"
215
+ content += f"- ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
216
+
217
+ content += f"2. ์ปฌ๋Ÿผ ์ •๋ณด:\n"
218
+ for col in df.columns:
219
+ content += f"- {col} ({df[col].dtype})\n"
220
+
221
+ content += f"\n3. ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n"
222
+ # tabulate ์‚ฌ์šฉํ•˜์—ฌ ํ…Œ์ด๋ธ” ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ
223
+ content += tabulate(df.head(5), headers='keys', tablefmt='pipe', showindex=False)
224
+
225
+ content += f"\n\n4. ๊ฒฐ์ธก์น˜ ์ •๋ณด:\n"
226
+ null_counts = df.isnull().sum()
227
+ for col, count in null_counts[null_counts > 0].items():
228
+ content += f"- {col}: {count:,}๊ฐœ ({count/len(df)*100:.1f}%)\n"
229
+
230
+ # ์ˆ˜์น˜ํ˜• ์ปฌ๋Ÿผ์— ๋Œ€ํ•œ ๊ธฐ๋ณธ ํ†ต๊ณ„
231
+ numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
232
+ if len(numeric_cols) > 0:
233
+ content += f"\n5. ์ˆ˜์น˜ํ˜• ์ปฌ๋Ÿผ ํ†ต๊ณ„:\n"
234
+ stats_df = df[numeric_cols].describe()
235
+ content += tabulate(stats_df, headers='keys', tablefmt='pipe')
236
+
237
+ return content, "parquet"
238
+ except Exception as e:
239
+ return f"Parquet ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
240
+
241
+ # PDF ํŒŒ์ผ ์ฒ˜๋ฆฌ
242
+ if file_ext == '.pdf':
243
+ try:
244
+ pdf_reader = pypdf.PdfReader(file.name)
245
+ total_pages = len(pdf_reader.pages)
246
+
247
+ content = f"๐Ÿ“‘ PDF ๋ฌธ์„œ ๋ถ„์„:\n\n"
248
+ content += f"1. ๊ธฐ๋ณธ ์ •๋ณด:\n"
249
+ content += f"- ์ด ํŽ˜์ด์ง€ ์ˆ˜: {total_pages}ํŽ˜์ด์ง€\n"
250
+
251
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ถ”์ถœ
252
+ if pdf_reader.metadata:
253
+ content += "\n2. ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ:\n"
254
+ for key, value in pdf_reader.metadata.items():
255
+ if value and str(key).startswith('/'):
256
+ content += f"- {key[1:]}: {value}\n"
257
+
258
+ # ๋จผ์ € pdfminer๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹œ๋„
259
+ try:
260
+ text = extract_text(
261
+ file.name,
262
+ laparams=LAParams(
263
+ line_margin=0.5,
264
+ word_margin=0.1,
265
+ char_margin=2.0,
266
+ all_texts=True
267
+ )
268
+ )
269
+ except:
270
+ text = ""
271
+
272
+ # pdfminer๋กœ ์ถ”์ถœ ์‹คํŒจ์‹œ OCR ์‹œ๋„
273
+ if not text.strip():
274
+ text = extract_pdf_text_with_ocr(file.name)
275
+
276
+ # ํ…์ŠคํŠธ ๋ถ„์„
277
+ if text:
278
+ words = text.split()
279
+ lines = text.split('\n')
280
+ content += f"\n3. ํ…์ŠคํŠธ ๋ถ„์„:\n"
281
+ content += f"- ์ด ๋‹จ์–ด ์ˆ˜: {len(words):,}๊ฐœ\n"
282
+ content += f"- ๊ณ ์œ  ๋‹จ์–ด ์ˆ˜: {len(set(words)):,}๊ฐœ\n"
283
+ content += f"- ์ด ๋ผ์ธ ์ˆ˜: {len(lines):,}๊ฐœ\n"
284
+
285
+ # ๋ณธ๋ฌธ ๋‚ด์šฉ
286
+ content += f"\n4. ๋ณธ๋ฌธ ๋‚ด์šฉ:\n"
287
+ preview_length = min(2000, len(text)) # ๋ฏธ๋ฆฌ๋ณด๊ธฐ ๊ธธ์ด ์ฆ๊ฐ€
288
+ content += f"--- ์ฒ˜์Œ {preview_length}์ž ---\n"
289
+ content += text[:preview_length]
290
+ if len(text) > preview_length:
291
+ content += f"\n... (์ด {len(text):,}์ž ์ค‘ ์ผ๋ถ€ ํ‘œ์‹œ)\n"
292
+ else:
293
+ content += "\nโš ๏ธ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ"
294
+
295
+ return content, "pdf"
296
+ except Exception as e:
297
+ return f"PDF ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
298
+
299
 
300
+
301
+ # CSV ํŒŒ์ผ ์ฒ˜๋ฆฌ
302
  elif file_ext == '.csv':
303
  encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
304
  for encoding in encodings:
305
  try:
306
  df = pd.read_csv(file.name, encoding=encoding)
307
+ content = f"๐Ÿ“Š CSV ํŒŒ์ผ ๋ถ„์„:\n\n"
308
+ content += f"1. ๊ธฐ๋ณธ ์ •๋ณด:\n"
309
+ content += f"- ์ „์ฒด ํ–‰ ์ˆ˜: {len(df):,}๊ฐœ\n"
310
+ content += f"- ์ „์ฒด ์—ด ์ˆ˜: {len(df.columns)}๊ฐœ\n"
311
+ content += f"- ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
312
+
313
+ content += f"2. ์ปฌ๋Ÿผ ์ •๋ณด:\n"
314
+ for col in df.columns:
315
+ content += f"- {col} ({df[col].dtype})\n"
316
+
317
+ content += f"\n3. ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n"
318
+ content += df.head(5).to_markdown(index=False)
319
+
320
+ content += f"\n\n4. ๊ฒฐ์ธก์น˜ ์ •๋ณด:\n"
321
  null_counts = df.isnull().sum()
322
+ for col, count in null_counts[null_counts > 0].items():
323
+ content += f"- {col}: {count:,}๊ฐœ ({count/len(df)*100:.1f}%)\n"
324
+
 
325
  return content, "csv"
326
  except UnicodeDecodeError:
327
  continue
328
  raise UnicodeDecodeError(f"์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
329
+
330
+ # ํ…์ŠคํŠธ ํŒŒ์ผ ์ฒ˜๋ฆฌ
331
+ else:
332
  encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
333
  for encoding in encodings:
334
  try:
335
  with open(file.name, 'r', encoding=encoding) as f:
336
  content = f.read()
337
 
338
+ # ํŒŒ์ผ ๋‚ด์šฉ ๋ถ„์„
339
+ lines = content.split('\n')
340
+ total_lines = len(lines)
341
+ non_empty_lines = len([line for line in lines if line.strip()])
342
+
343
+ # ์ฝ”๋“œ ํŒŒ์ผ ์—ฌ๋ถ€ ํ™•์ธ
344
+ is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
345
+
346
+ analysis = f"\n๐Ÿ“ ํŒŒ์ผ ๋ถ„์„:\n"
347
+ if is_code:
348
+ # ์ฝ”๋“œ ํŒŒ์ผ ๋ถ„์„
349
+ functions = len([line for line in lines if 'def ' in line])
350
+ classes = len([line for line in lines if 'class ' in line])
351
+ imports = len([line for line in lines if 'import ' in line or 'from ' in line])
352
 
353
+ analysis += f"- ํŒŒ์ผ ์œ ํ˜•: ์ฝ”๋“œ\n"
354
+ analysis += f"- ์ „์ฒด ๋ผ์ธ ์ˆ˜: {total_lines:,}์ค„\n"
355
+ analysis += f"- ํ•จ์ˆ˜ ์ˆ˜: {functions}๊ฐœ\n"
356
+ analysis += f"- ํด๋ž˜์Šค ์ˆ˜: {classes}๊ฐœ\n"
357
+ analysis += f"- import ๋ฌธ ์ˆ˜: {imports}๊ฐœ\n"
358
+ else:
359
+ # ์ผ๋ฐ˜ ํ…์ŠคํŠธ ํŒŒ์ผ ๋ถ„์„
360
+ words = len(content.split())
361
+ chars = len(content)
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
+ analysis += f"- ํŒŒ์ผ ์œ ํ˜•: ํ…์ŠคํŠธ\n"
364
+ analysis += f"- ์ „์ฒด ๋ผ์ธ ์ˆ˜: {total_lines:,}์ค„\n"
365
+ analysis += f"- ์‹ค์ œ ๋‚ด์šฉ์ด ์žˆ๋Š” ๋ผ์ธ ์ˆ˜: {non_empty_lines:,}์ค„\n"
366
+ analysis += f"- ๋‹จ์–ด ์ˆ˜: {words:,}๊ฐœ\n"
367
+ analysis += f"- ๋ฌธ์ž ์ˆ˜: {chars:,}๊ฐœ\n"
368
+
369
+ return content + analysis, "text"
370
  except UnicodeDecodeError:
371
  continue
372
  raise UnicodeDecodeError(f"์ง€์›๋˜๋Š” ์ธ์ฝ”๋”ฉ์œผ๋กœ ํŒŒ์ผ์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค ({', '.join(encodings)})")
 
374
  except Exception as e:
375
  return f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}", "error"
376
 
377
+
378
+
379
+
380
  # ํŒŒ์ผ ์—…๋กœ๋“œ ์ด๋ฒคํŠธ ํ•ธ๋“ค๋ง ์ˆ˜์ •
381
  def init_msg():
382
  return "ํŒŒ์ผ์„ ๋ถ„์„ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค..."
 
392
  --text-color: #333333;
393
  --shadow-color: rgba(0, 0, 0, 0.1);
394
  }
 
395
  body {
396
  background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
397
  min-height: 100vh;
398
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
399
  }
 
400
  .container {
401
  transform-style: preserve-3d;
402
  perspective: 1000px;
403
  }
 
404
  .chatbot {
405
  background: var(--card-background);
406
  border-radius: 20px;
 
411
  transition: transform 0.3s ease;
412
  backdrop-filter: blur(10px);
413
  }
 
414
  .chatbot:hover {
415
  transform: translateZ(10px);
416
  }
 
417
  /* ๋ฉ”์‹œ์ง€ ์ž…๋ ฅ ์˜์—ญ */
418
  .input-area {
419
  background: var(--card-background);
 
429
  align-items: center;
430
  gap: 10px;
431
  }
 
432
  .input-area:hover {
433
  transform: translateZ(5px);
434
  }
 
435
  /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
436
  .custom-button {
437
  background: linear-gradient(145deg, var(--primary-color), var(--secondary-color));
 
447
  0 4px 6px var(--shadow-color),
448
  0 1px 3px var(--shadow-color);
449
  }
 
450
  .custom-button:hover {
451
  transform: translateZ(5px) translateY(-2px);
452
  box-shadow:
453
  0 7px 14px var(--shadow-color),
454
  0 3px 6px var(--shadow-color);
455
  }
 
456
  /* ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฒ„ํŠผ */
457
  .file-upload-icon {
458
  background: linear-gradient(145deg, #64b5f6, #42a5f5);
 
468
  transition: all 0.3s ease;
469
  box-shadow: 0 2px 5px rgba(0,0,0,0.1);
470
  }
 
471
  .file-upload-icon:hover {
472
  transform: translateY(-2px);
473
  box-shadow: 0 4px 8px rgba(0,0,0,0.2);
474
  }
 
475
  /* ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฒ„ํŠผ ๋‚ด๋ถ€ ์š”์†Œ ์Šคํƒ€์ผ๋ง */
476
  .file-upload-icon > .wrap {
477
  display: flex !important;
 
480
  width: 100%;
481
  height: 100%;
482
  }
 
483
  .file-upload-icon > .wrap > p {
484
  display: none !important;
485
  }
 
486
  .file-upload-icon > .wrap::before {
487
  content: "๐Ÿ“";
488
  font-size: 2em;
489
  display: block;
490
  }
 
491
  /* ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
492
  .message {
493
  background: var(--card-background);
 
500
  transform: translateZ(0);
501
  transition: all 0.3s ease;
502
  }
 
503
  .message:hover {
504
  transform: translateZ(5px);
505
  }
 
506
  .chat-container {
507
  height: 600px !important;
508
  margin-bottom: 10px;
509
  }
 
510
  .input-container {
511
  height: 70px !important;
512
  display: flex;
 
514
  gap: 10px;
515
  margin-top: 5px;
516
  }
 
517
  .input-textbox {
518
  height: 70px !important;
519
  border-radius: 8px !important;
 
522
  display: flex !important;
523
  align-items: flex-start !important; /* ํ…์ŠคํŠธ ์ž…๋ ฅ ์œ„์น˜๋ฅผ ์œ„๋กœ ์กฐ์ • */
524
  }
 
525
  .input-textbox textarea {
526
  padding-top: 5px !important; /* ํ…์ŠคํŠธ ์ƒ๋‹จ ์—ฌ๋ฐฑ ์กฐ์ • */
527
  }
 
528
  .send-button {
529
  height: 70px !important;
530
  min-width: 70px !important;
531
  font-size: 1.1em !important;
532
  }
 
533
  /* ์„ค์ • ํŒจ๋„ ๊ธฐ๋ณธ ์Šคํƒ€์ผ */
534
  .settings-panel {
535
  padding: 20px;
 
537
  }
538
  """
539
 
540
+ # GPU ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ ํ•จ์ˆ˜ ์ˆ˜์ •
541
+ def clear_cuda_memory():
542
+ if hasattr(torch.cuda, 'empty_cache'):
543
+ with torch.cuda.device('cuda'):
544
+ torch.cuda.empty_cache()
545
+
546
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ•จ์ˆ˜ ์ˆ˜์ •
547
+ @spaces.GPU
548
+ def load_model():
549
+ try:
550
+ model = AutoModelForCausalLM.from_pretrained(
551
+ MODEL_ID,
552
+ torch_dtype=torch.bfloat16,
553
+ device_map="auto",
554
+ )
555
+ return model
556
+ except Exception as e:
557
+ print(f"๋ชจ๋ธ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
558
+ raise
559
+
560
  @spaces.GPU
561
  def stream_chat(message: str, history: list, uploaded_file, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
562
+ global model, current_file_context
563
+
564
  try:
565
+ if model is None:
566
+ model = load_model()
567
+
568
  print(f'message is - {message}')
569
  print(f'history is - {history}')
570
+
571
  # ํŒŒ์ผ ์—…๋กœ๋“œ ์ฒ˜๋ฆฌ
572
  file_context = ""
573
  if uploaded_file and message == "ํŒŒ์ผ์„ ๋ถ„์„ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค...":
 
576
  if content:
577
  file_analysis = analyze_file_content(content, file_type)
578
  file_context = f"\n\n๐Ÿ“„ ํŒŒ์ผ ๋ถ„์„ ๊ฒฐ๊ณผ:\n{file_analysis}\n\nํŒŒ์ผ ๋‚ด์šฉ:\n```\n{content}\n```"
579
+ current_file_context = file_context # ํŒŒ์ผ ์ปจํ…์ŠคํŠธ ์ €์žฅ
580
  message = "์—…๋กœ๋“œ๋œ ํŒŒ์ผ์„ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”."
581
  except Exception as e:
582
  print(f"ํŒŒ์ผ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
583
  file_context = f"\n\nโŒ ํŒŒ์ผ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
584
+ elif current_file_context: # ์ €์žฅ๋œ ํŒŒ์ผ ์ปจํ…์ŠคํŠธ๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉ
585
+ file_context = current_file_context
586
+
587
+
588
+ # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ๋ชจ๋‹ˆํ„ฐ๋ง
589
+ if torch.cuda.is_available():
590
+ print(f"CUDA ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
591
+
592
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ๋‚ด๊ธฐ
593
+ max_history_length = 10 # ์ตœ๋Œ€ ํžˆ์Šคํ† ๋ฆฌ ๊ธธ์ด ์„ค์ •
594
+ if len(history) > max_history_length:
595
+ history = history[-max_history_length:]
596
 
597
  # ๊ด€๋ จ ์ปจํ…์ŠคํŠธ ์ฐพ๊ธฐ
598
  try:
 
616
  final_message = file_context + wiki_context + "\nํ˜„์žฌ ์งˆ๋ฌธ: " + message
617
  conversation.append({"role": "user", "content": final_message})
618
 
619
+ # ํ† ํฐ ์ˆ˜ ์ œํ•œ
620
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
621
+ max_length = 4096 # ๋˜๋Š” ๋ชจ๋ธ์˜ ์ตœ๋Œ€ ์ปจํ…์ŠคํŠธ ๊ธธ์ด
622
+ if len(input_ids.split()) > max_length:
623
+ # ์ปจํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž˜๋ผ๋‚ด๊ธฐ
624
+ input_ids = " ".join(input_ids.split()[-max_length:])
625
+
626
+ inputs = tokenizer(input_ids, return_tensors="pt").to("cuda")
627
+
628
+ # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ์ฒดํฌ
629
+ if torch.cuda.is_available():
630
+ print(f"์ž…๋ ฅ ํ…์„œ ์ƒ์„ฑ ํ›„ CUDA ๋ฉ”๋ชจ๋ฆฌ: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
631
 
632
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
633
 
 
637
  top_k=top_k,
638
  top_p=top_p,
639
  repetition_penalty=penalty,
640
+ max_new_tokens=min(max_new_tokens, 2048), # ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜ ์ œํ•œ
641
  do_sample=True,
642
  temperature=temperature,
643
  eos_token_id=[255001],
644
  )
645
 
646
+ # ์ƒ์„ฑ ์‹œ์ž‘ ์ „ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
647
+ clear_cuda_memory()
648
+
649
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
650
  thread.start()
651
 
 
654
  buffer += new_text
655
  yield "", history + [[message, buffer]]
656
 
657
+ # ์ƒ์„ฑ ์™„๋ฃŒ ํ›„ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
658
+ clear_cuda_memory()
659
+
660
  except Exception as e:
661
  error_message = f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
662
  print(f"Stream chat ์˜ค๋ฅ˜: {error_message}")
663
+ # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ์—๋„ ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
664
+ clear_cuda_memory()
665
  yield "", history + [[message, error_message]]
666
 
667
 
668
 
669
  def create_demo():
670
  with gr.Blocks(css=CSS) as demo:
671
+ with gr.Column(elem_classes="markdown-style"):
672
+ gr.Markdown("""
673
+ # ๐Ÿค– OnDevice AI RAG
674
+ #### ๐Ÿ“Š RAG: ๋ฐ์ดํ„ฐ ๋ถ„์„/ํ•™์Šต ํŒŒ์ผ ์—…๋กœ๋“œ(TXT, CSV, PDF, Parquet ํŒŒ์ผ)
675
+ """)
676
+
677
  chatbot = gr.Chatbot(
678
  value=[],
679
  height=600,
 
692
  show_label=False
693
  )
694
 
695
+ with gr.Column(scale=3):
696
  msg = gr.Textbox(
697
  show_label=False,
698
  placeholder="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”... ๐Ÿ’ญ",
 
707
  elem_classes="send-button custom-button",
708
  scale=1
709
  )
710
+
711
+ with gr.Column(scale=1, min_width=70):
712
+ clear = gr.Button(
713
+ "Clear",
714
+ elem_classes="clear-button custom-button",
715
+ scale=1
716
+ )
717
 
718
  with gr.Accordion("๐ŸŽฎ ๊ณ ๊ธ‰ ์„ค์ •", open=False):
719
  with gr.Row():
 
750
  inputs=msg
751
  )
752
 
753
+ # Clear ํ•จ์ˆ˜ ์ •์˜
754
+ def clear_conversation():
755
+ global current_file_context
756
+ current_file_context = None
757
+ return [], None, "์ƒˆ๋กœ์šด ๋Œ€ํ™”๋ฅผ ์‹œ์ž‘ํ•˜์„ธ์š”..."
758
+
759
  # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
760
  msg.submit(
761
  stream_chat,
 
780
  queue=True
781
  )
782
 
783
+ # Clear ๋ฒ„ํŠผ ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
784
+ clear.click(
785
+ fn=clear_conversation,
786
+ outputs=[chatbot, file_upload, msg],
787
+ queue=False
788
+ )
789
+
790
  return demo
791
 
792
  if __name__ == "__main__":