This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .github/ISSUE_TEMPLATE/bug_report.yml +0 -75
  2. .github/ISSUE_TEMPLATE/feature_request.yml +0 -28
  3. .github/workflows/build-with-chatglm.yml +0 -44
  4. .github/workflows/build-with-jittorllms.yml +0 -44
  5. .github/workflows/build-with-latex.yml +0 -44
  6. .github/workflows/build-without-local-llms.yml +0 -44
  7. .gitignore +1 -14
  8. .pre-commit-config.yaml +0 -32
  9. Dockerfile +3 -24
  10. README.md +126 -365
  11. app.py +105 -409
  12. appx.py +7 -0
  13. check_proxy.py +6 -160
  14. colorful.py +0 -61
  15. config.py +15 -356
  16. core_functional.py +0 -173
  17. crazy_functional.py +0 -723
  18. crazy_functions/CodeInterpreter.py +0 -232
  19. crazy_functions/Langchain知识库.py +0 -106
  20. crazy_functions/Latex全文润色.py +0 -245
  21. crazy_functions/Latex全文翻译.py +0 -176
  22. crazy_functions/Latex输出PDF.py +0 -484
  23. crazy_functions/Latex输出PDF结果.py +0 -306
  24. crazy_functions/__init__.py +0 -0
  25. crazy_functions/agent_fns/auto_agent.py +0 -23
  26. crazy_functions/agent_fns/echo_agent.py +0 -19
  27. crazy_functions/agent_fns/general.py +0 -138
  28. crazy_functions/agent_fns/persistent.py +0 -16
  29. crazy_functions/agent_fns/pipe.py +0 -194
  30. crazy_functions/agent_fns/watchdog.py +0 -28
  31. crazy_functions/chatglm微调工具.py +0 -141
  32. crazy_functions/crazy_functions_test.py +0 -231
  33. crazy_functions/crazy_utils.py +0 -608
  34. crazy_functions/diagram_fns/file_tree.py +0 -122
  35. crazy_functions/game_fns/game_ascii_art.py +0 -42
  36. crazy_functions/game_fns/game_interactive_story.py +0 -212
  37. crazy_functions/game_fns/game_utils.py +0 -35
  38. crazy_functions/gen_fns/gen_fns_shared.py +0 -70
  39. crazy_functions/ipc_fns/mp.py +0 -37
  40. crazy_functions/json_fns/pydantic_io.py +0 -111
  41. crazy_functions/latex_fns/latex_actions.py +0 -467
  42. crazy_functions/latex_fns/latex_toolbox.py +0 -694
  43. crazy_functions/latex_utils.py +0 -788
  44. crazy_functions/live_audio/aliyunASR.py +0 -261
  45. crazy_functions/live_audio/audio_io.py +0 -51
  46. crazy_functions/multi_stage/multi_stage_utils.py +0 -93
  47. crazy_functions/pdf_fns/breakdown_txt.py +0 -125
  48. crazy_functions/pdf_fns/parse_pdf.py +0 -171
  49. crazy_functions/pdf_fns/parse_word.py +0 -85
  50. crazy_functions/pdf_fns/report_gen_html.py +0 -58
.github/ISSUE_TEMPLATE/bug_report.yml DELETED
@@ -1,75 +0,0 @@
1
- name: Report Bug | 报告BUG
2
- description: "Report bug"
3
- title: "[Bug]: "
4
- labels: []
5
- body:
6
- - type: dropdown
7
- id: download
8
- attributes:
9
- label: Installation Method | 安装方法与平台
10
- options:
11
- - Please choose | 请选择
12
- - Pip Install (I ignored requirements.txt)
13
- - Pip Install (I used latest requirements.txt)
14
- - Anaconda (I ignored requirements.txt)
15
- - Anaconda (I used latest requirements.txt)
16
- - Docker(Windows/Mac)
17
- - Docker(Linux)
18
- - Docker-Compose(Windows/Mac)
19
- - Docker-Compose(Linux)
20
- - Huggingface
21
- - Others (Please Describe)
22
- validations:
23
- required: true
24
-
25
- - type: dropdown
26
- id: version
27
- attributes:
28
- label: Version | 版本
29
- options:
30
- - Please choose | 请选择
31
- - Latest | 最新版
32
- - Others | 非最新版
33
- validations:
34
- required: true
35
-
36
- - type: dropdown
37
- id: os
38
- attributes:
39
- label: OS | 操作系统
40
- options:
41
- - Please choose | 请选择
42
- - Windows
43
- - Mac
44
- - Linux
45
- - Docker
46
- validations:
47
- required: true
48
-
49
- - type: textarea
50
- id: describe
51
- attributes:
52
- label: Describe the bug | 简述
53
- description: Describe the bug | 简述
54
- validations:
55
- required: true
56
-
57
- - type: textarea
58
- id: screenshot
59
- attributes:
60
- label: Screen Shot | 有帮助的截图
61
- description: Screen Shot | 有帮助的截图
62
- validations:
63
- required: true
64
-
65
- - type: textarea
66
- id: traceback
67
- attributes:
68
- label: Terminal Traceback & Material to Help Reproduce Bugs | 终端traceback(如有) + 帮助我们复现的测试材料样本(如有)
69
- description: Terminal Traceback & Material to Help Reproduce Bugs | 终端traceback(如有) + 帮助我们复现的测试材料样本(如有)
70
-
71
-
72
-
73
-
74
-
75
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/ISSUE_TEMPLATE/feature_request.yml DELETED
@@ -1,28 +0,0 @@
1
- name: Feature Request | 功能请求
2
- description: "Feature Request"
3
- title: "[Feature]: "
4
- labels: []
5
- body:
6
- - type: dropdown
7
- id: download
8
- attributes:
9
- label: Class | 类型
10
- options:
11
- - Please choose | 请选择
12
- - 其他
13
- - 函数插件
14
- - 大语言模型
15
- - 程序主体
16
- validations:
17
- required: false
18
-
19
- - type: textarea
20
- id: traceback
21
- attributes:
22
- label: Feature Request | 功能请求
23
- description: Feature Request | 功能请求
24
-
25
-
26
-
27
-
28
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/build-with-chatglm.yml DELETED
@@ -1,44 +0,0 @@
1
- # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
2
- name: Create and publish a Docker image for ChatGLM support
3
-
4
- on:
5
- push:
6
- branches:
7
- - 'master'
8
-
9
- env:
10
- REGISTRY: ghcr.io
11
- IMAGE_NAME: ${{ github.repository }}_chatglm_moss
12
-
13
- jobs:
14
- build-and-push-image:
15
- runs-on: ubuntu-latest
16
- permissions:
17
- contents: read
18
- packages: write
19
-
20
- steps:
21
- - name: Checkout repository
22
- uses: actions/checkout@v3
23
-
24
- - name: Log in to the Container registry
25
- uses: docker/login-action@v2
26
- with:
27
- registry: ${{ env.REGISTRY }}
28
- username: ${{ github.actor }}
29
- password: ${{ secrets.GITHUB_TOKEN }}
30
-
31
- - name: Extract metadata (tags, labels) for Docker
32
- id: meta
33
- uses: docker/metadata-action@v4
34
- with:
35
- images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
36
-
37
- - name: Build and push Docker image
38
- uses: docker/build-push-action@v4
39
- with:
40
- context: .
41
- push: true
42
- file: docs/GithubAction+ChatGLM+Moss
43
- tags: ${{ steps.meta.outputs.tags }}
44
- labels: ${{ steps.meta.outputs.labels }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/build-with-jittorllms.yml DELETED
@@ -1,44 +0,0 @@
1
- # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
2
- name: Create and publish a Docker image for ChatGLM support
3
-
4
- on:
5
- push:
6
- branches:
7
- - 'master'
8
-
9
- env:
10
- REGISTRY: ghcr.io
11
- IMAGE_NAME: ${{ github.repository }}_jittorllms
12
-
13
- jobs:
14
- build-and-push-image:
15
- runs-on: ubuntu-latest
16
- permissions:
17
- contents: read
18
- packages: write
19
-
20
- steps:
21
- - name: Checkout repository
22
- uses: actions/checkout@v3
23
-
24
- - name: Log in to the Container registry
25
- uses: docker/login-action@v2
26
- with:
27
- registry: ${{ env.REGISTRY }}
28
- username: ${{ github.actor }}
29
- password: ${{ secrets.GITHUB_TOKEN }}
30
-
31
- - name: Extract metadata (tags, labels) for Docker
32
- id: meta
33
- uses: docker/metadata-action@v4
34
- with:
35
- images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
36
-
37
- - name: Build and push Docker image
38
- uses: docker/build-push-action@v4
39
- with:
40
- context: .
41
- push: true
42
- file: docs/GithubAction+JittorLLMs
43
- tags: ${{ steps.meta.outputs.tags }}
44
- labels: ${{ steps.meta.outputs.labels }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/build-with-latex.yml DELETED
@@ -1,44 +0,0 @@
1
- # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
2
- name: Create and publish a Docker image for Latex support
3
-
4
- on:
5
- push:
6
- branches:
7
- - 'master'
8
-
9
- env:
10
- REGISTRY: ghcr.io
11
- IMAGE_NAME: ${{ github.repository }}_with_latex
12
-
13
- jobs:
14
- build-and-push-image:
15
- runs-on: ubuntu-latest
16
- permissions:
17
- contents: read
18
- packages: write
19
-
20
- steps:
21
- - name: Checkout repository
22
- uses: actions/checkout@v3
23
-
24
- - name: Log in to the Container registry
25
- uses: docker/login-action@v2
26
- with:
27
- registry: ${{ env.REGISTRY }}
28
- username: ${{ github.actor }}
29
- password: ${{ secrets.GITHUB_TOKEN }}
30
-
31
- - name: Extract metadata (tags, labels) for Docker
32
- id: meta
33
- uses: docker/metadata-action@v4
34
- with:
35
- images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
36
-
37
- - name: Build and push Docker image
38
- uses: docker/build-push-action@v4
39
- with:
40
- context: .
41
- push: true
42
- file: docs/GithubAction+NoLocal+Latex
43
- tags: ${{ steps.meta.outputs.tags }}
44
- labels: ${{ steps.meta.outputs.labels }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/build-without-local-llms.yml DELETED
@@ -1,44 +0,0 @@
1
- # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
2
- name: Create and publish a Docker image
3
-
4
- on:
5
- push:
6
- branches:
7
- - 'master'
8
-
9
- env:
10
- REGISTRY: ghcr.io
11
- IMAGE_NAME: ${{ github.repository }}_nolocal
12
-
13
- jobs:
14
- build-and-push-image:
15
- runs-on: ubuntu-latest
16
- permissions:
17
- contents: read
18
- packages: write
19
-
20
- steps:
21
- - name: Checkout repository
22
- uses: actions/checkout@v3
23
-
24
- - name: Log in to the Container registry
25
- uses: docker/login-action@v2
26
- with:
27
- registry: ${{ env.REGISTRY }}
28
- username: ${{ github.actor }}
29
- password: ${{ secrets.GITHUB_TOKEN }}
30
-
31
- - name: Extract metadata (tags, labels) for Docker
32
- id: meta
33
- uses: docker/metadata-action@v4
34
- with:
35
- images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
36
-
37
- - name: Build and push Docker image
38
- uses: docker/build-push-action@v4
39
- with:
40
- context: .
41
- push: true
42
- file: docs/GithubAction+NoLocal
43
- tags: ${{ steps.meta.outputs.tags }}
44
- labels: ${{ steps.meta.outputs.labels }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -131,22 +131,9 @@ dmypy.json
131
  # Pyre type checker
132
  .pyre/
133
 
134
- .vscode
135
- .idea
136
-
137
  history
138
  ssr_conf
139
  config_private.py
140
  gpt_log
141
  private.md
142
- private_upload
143
- other_llms
144
- cradle*
145
- debug*
146
- private*
147
- crazy_functions/test_project/pdf_and_word
148
- crazy_functions/test_samples
149
- request_llm/jittorllms
150
- multi-language
151
- request_llm/moss
152
- media
 
131
  # Pyre type checker
132
  .pyre/
133
 
 
 
 
134
  history
135
  ssr_conf
136
  config_private.py
137
  gpt_log
138
  private.md
139
+ private_upload
 
 
 
 
 
 
 
 
 
 
.pre-commit-config.yaml DELETED
@@ -1,32 +0,0 @@
1
- default_language_version:
2
- python: python3
3
- exclude: 'dotnet'
4
- ci:
5
- autofix_prs: true
6
- autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
7
- autoupdate_schedule: 'quarterly'
8
-
9
- repos:
10
- - repo: https://github.com/pre-commit/pre-commit-hooks
11
- rev: v4.4.0
12
- hooks:
13
- - id: check-ast
14
- # - id: check-yaml
15
- - id: check-toml
16
- - id: check-json
17
- - id: check-byte-order-marker
18
- exclude: .gitignore
19
- - id: check-merge-conflict
20
- - id: detect-private-key
21
- - id: trailing-whitespace
22
- - id: end-of-file-fixer
23
- - id: no-commit-to-branch
24
- - repo: https://github.com/psf/black
25
- rev: 23.3.0
26
- hooks:
27
- - id: black
28
- # - repo: https://github.com/charliermarsh/ruff-pre-commit
29
- # rev: v0.0.261
30
- # hooks:
31
- # - id: ruff
32
- # args: ["--fix"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -1,34 +1,13 @@
1
- # 此Dockerfile适用于“无本地模型”的迷你运行环境构建
2
- # 如果需要使用chatglm等本地模型或者latex运行依赖,请参考 docker-compose.yml
3
- # - 如何构建: 先修改 `config.py`, 然后 `docker build -t gpt-academic . `
4
- # - 如何运行(Linux下): `docker run --rm -it --net=host gpt-academic `
5
- # - 如何运行(其他操作系统,选择任意一个固定端口50923): `docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic `
6
  FROM python:3.11
7
 
8
-
9
- # 非必要步骤,更换pip源 (以下三行,可以删除)
10
  RUN echo '[global]' > /etc/pip.conf && \
11
  echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
12
  echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
13
 
 
14
 
15
- # 进入工作路径(必要)
16
  WORKDIR /gpt
17
 
18
 
19
- # 安装大部分依赖,利用Docker缓存加速以后的构建 (以下三行,可以删除)
20
- COPY requirements.txt ./
21
- RUN pip3 install -r requirements.txt
22
-
23
-
24
- # 装载项目文件,安装剩余依赖(必要)
25
- COPY . .
26
- RUN pip3 install -r requirements.txt
27
-
28
-
29
- # 非必要步骤,用于预热模块(可以删除)
30
- RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
31
-
32
-
33
- # 启动(必要)
34
- CMD ["python3", "-u", "main.py"]
 
 
 
 
 
 
1
  FROM python:3.11
2
 
 
 
3
  RUN echo '[global]' > /etc/pip.conf && \
4
  echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
5
  echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
6
 
7
+ RUN pip3 install gradio requests[socks] mdtex2html
8
 
9
+ COPY . /gpt
10
  WORKDIR /gpt
11
 
12
 
13
+ CMD ["python3", "main.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,456 +1,217 @@
 
1
  ---
2
- title: GPT-Academic
3
  emoji: 😻
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 3.32.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
  # ChatGPT 学术优化
13
- > **Note**
14
- >
15
- > 2023.11.12: 某些依赖包尚不兼容python 3.12,推荐python 3.11。
16
- >
17
- > 2023.12.26: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目完全开源免费,您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。
18
-
19
- <br>
20
-
21
- <div align=center>
22
- <h1 aligh="center">
23
- <img src="docs/logo.png" width="40"> GPT 学术优化 (GPT Academic)
24
- </h1>
25
-
26
- [![Github][Github-image]][Github-url]
27
- [![License][License-image]][License-url]
28
- [![Releases][Releases-image]][Releases-url]
29
- [![Installation][Installation-image]][Installation-url]
30
- [![Wiki][Wiki-image]][Wiki-url]
31
- [![PR][PRs-image]][PRs-url]
32
-
33
- [Github-image]: https://img.shields.io/badge/github-12100E.svg?style=flat-square
34
- [License-image]: https://img.shields.io/github/license/binary-husky/gpt_academic?label=License&style=flat-square&color=orange
35
- [Releases-image]: https://img.shields.io/github/release/binary-husky/gpt_academic?label=Release&style=flat-square&color=blue
36
- [Installation-image]: https://img.shields.io/badge/dynamic/json?color=blue&url=https://raw.githubusercontent.com/binary-husky/gpt_academic/master/version&query=$.version&label=Installation&style=flat-square
37
- [Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square
38
- [PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square
39
-
40
- [Github-url]: https://github.com/binary-husky/gpt_academic
41
- [License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE
42
- [Releases-url]: https://github.com/binary-husky/gpt_academic/releases
43
- [Installation-url]: https://github.com/binary-husky/gpt_academic#installation
44
- [Wiki-url]: https://github.com/binary-husky/gpt_academic/wiki
45
- [PRs-url]: https://github.com/binary-husky/gpt_academic/pulls
46
-
47
-
48
- </div>
49
- <br>
50
-
51
- **如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!**
52
-
53
- If you like this project, please give it a Star.
54
- Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
55
- <br>
56
 
57
- > [!NOTE]
58
- > 1.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。
59
- > [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki))
60
- >
61
- > 2.本项目兼容并鼓励尝试国内中文大语言基座模型如通义千问,智谱GLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交即可生效。
62
-
63
- <br><br>
64
 
 
65
  <div align="center">
66
 
67
- 功能(⭐= 近期新增功能) | 描述
68
  --- | ---
69
- ⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问[Qwen](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf),[智谱GLM4](https://open.bigmodel.cn/),DALLE3, [DeepseekCoder](https://coder.deepseek.com/)
70
- ⭐支持mermaid图像渲染 | 支持让GPT生成[流程图](https://www.bilibili.com/video/BV18c41147H9/)、状态转移图、甘特图、饼状图、GitGraph等等(3.7版本)
71
- ⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
72
- ⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
73
- ⭐AutoGen多智能体插件 | [插件] 借助微软AutoGen,探索多Agent的智能涌现可能!
74
- ⭐虚空终端插件 | [插件] 能够使用自然语言直接调度本项目其他插件
75
- 润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码
76
- [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
77
- 模块化设计 | 支持自定义强大的[插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
78
- [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [插件] 一键剖析Python/C/C++/Java/Lua/...项目树 或 [自我剖析](https://www.bilibili.com/video/BV1cj411A7VW)
79
- 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [插件] 一键解读latex/pdf论文全文并生成摘要
80
- Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [插件] 一键翻译或润色latex论文
81
- 批量注释生成 | [插件] 一键批量生成函数注释
82
- Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?就是出自他的手笔
83
- [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [插件] PDF论文提取题目&摘要+翻译全文(多线程)
84
- [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
85
- Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
86
- [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
87
- 互联网信息聚合+GPT | [插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
88
- 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
89
- 启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
90
- [多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)伺候的感觉一定会很不错吧?
91
- 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/)
92
- ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中)
93
- 更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……
94
- </div>
95
 
 
96
 
97
- - 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
98
  <div align="center">
99
- <img src="https://user-images.githubusercontent.com/96192199/279702205-d81137c3-affd-4cd1-bb5e-b15610389762.gif" width="700" >
100
  </div>
101
 
102
 
103
- - 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放剪贴板
104
- <div align="center">
105
- <img src="https://user-images.githubusercontent.com/96192199/231975334-b4788e91-4887-412f-8b43-2b9c5f41d248.gif" width="700" >
106
- </div>
107
 
108
- - 润色/纠错
109
  <div align="center">
110
- <img src="https://user-images.githubusercontent.com/96192199/231980294-f374bdcb-3309-4560-b424-38ef39f04ebd.gif" width="700" >
111
  </div>
112
 
113
- - 如果输出包含公式,会以tex形式和渲染形式同时显示,方便复制和阅读
114
  <div align="center">
115
- <img src="https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png" width="700" >
116
  </div>
117
 
118
- - 懒得看项目代码?直接把整个工程炫ChatGPT嘴里
 
119
  <div align="center">
120
- <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
121
  </div>
122
 
123
- - 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + GPT4)
124
  <div align="center">
125
- <img src="https://user-images.githubusercontent.com/96192199/232537274-deca0563-7aa6-4b5d-94a2-b7c453c47794.png" width="700" >
126
  </div>
127
 
128
- <br><br>
129
-
130
- # Installation
131
-
132
- ```mermaid
133
- flowchart TD
134
- A{"安装方法"} --> W1("I. 🔑直接运行 (Windows, Linux or MacOS)")
135
- W1 --> W11["1. Python pip包管理依赖"]
136
- W1 --> W12["2. Anaconda包管理依赖(推荐⭐)"]
137
-
138
- A --> W2["II. 🐳使用Docker (Windows, Linux or MacOS)"]
139
 
140
- W2 --> k1["1. 部署项目全部能力的大镜像(推荐⭐)"]
141
- W2 --> k2["2. 仅在线模型(GPT, GLM4等)镜像"]
142
- W2 --> k3["3. 在线模型 + Latex的大镜像"]
 
143
 
144
- A --> W4["IV. 🚀其他部署方法"]
145
- W4 --> C1["1. Windows/MacOS 一键安装运行脚本(推荐⭐)"]
146
- W4 --> C2["2. Huggingface, Sealos远程部署"]
147
- W4 --> C4["3. ... 其他 ..."]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  ```
149
 
150
- ### 安装方法I:直接运行 (Windows, Linux or MacOS)
151
-
152
- 1. 下载项目
153
-
154
- ```sh
155
- git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
156
- cd gpt_academic
157
- ```
158
 
159
- 2. 配置API_KEY等变量
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- 在`config.py`中,配置API KEY等变量。[特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)、[Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。
162
-
163
- 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解以上读取逻辑,我们强烈建议您在`config.py`同路径下创建一个名为`config_private.py`的新配置文件,并使用`config_private.py`配置项目,从而确保自动更新时不会丢失配置 」。
164
-
165
- 「 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py` 」。
166
-
167
-
168
- 3. 安装依赖
169
- ```sh
170
- # (选择I: 如熟悉python, python推荐版本 3.9 ~ 3.11)备注:使用官方pip源或者阿里pip源, 临时换源方法:python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
171
- python -m pip install -r requirements.txt
172
-
173
- # (选择II: 使用Anaconda)步骤也是类似的 (https://www.bilibili.com/video/BV1rc411W7Dr):
174
- conda create -n gptac_venv python=3.11 # 创建anaconda环境
175
- conda activate gptac_venv # 激活anaconda环境
176
- python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
177
- ```
178
-
179
-
180
- <details><summary>如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处</summary>
181
- <p>
182
-
183
- 【可选步骤】如果需要支持清华ChatGLM3/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强):
184
-
185
- ```sh
186
- # 【可选步骤I】支持清华ChatGLM3。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
187
- python -m pip install -r request_llms/requirements_chatglm.txt
188
-
189
- # 【可选步骤II】支持复旦MOSS
190
- python -m pip install -r request_llms/requirements_moss.txt
191
- git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss # 注意执行此行代码时,必须处于项目根路径
192
-
193
- # 【可选步骤III】支持RWKV Runner
194
- 参考wiki:https://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
195
-
196
- # 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案):
197
- AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
198
-
199
- # 【可选步骤V】支持本地模型INT8,INT4量化(这里所指的模型本身不是量化版本,目前deepseek-coder支持,后面测试后会加入更多模型量化选择)
200
- pip install bitsandbyte
201
- # windows用户安装bitsandbytes需要使用下面bitsandbytes-windows-webui
202
- python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui
203
- pip install -U git+https://github.com/huggingface/transformers.git
204
- pip install -U git+https://github.com/huggingface/accelerate.git
205
- pip install peft
206
  ```
207
 
208
- </p>
209
- </details>
210
-
211
-
212
-
213
- 4. 运行
214
- ```sh
215
- python main.py
216
- ```
217
-
218
- ### 安装方法II:使用Docker
219
-
220
- 0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法部署完整项目)
221
- [![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml)
222
-
223
- ``` sh
224
- # 修改docker-compose.yml,保留方案0并删除其他方案。然后运行:
225
- docker-compose up
226
- ```
227
-
228
- 1. 仅ChatGPT + GLM4 + 文心一言+spark等在线模型(推荐大多数人选择)
229
- [![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
230
- [![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
231
- [![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
232
-
233
- ``` sh
234
- # 修改docker-compose.yml,保留方案1并删除其他方案。然后运行:
235
- docker-compose up
236
- ```
237
 
238
- P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用方案4或者方案0获取Latex功能。
239
-
240
- 2. ChatGPT + GLM3 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时)
241
- [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml)
242
-
243
- ``` sh
244
- # 修改docker-compose.yml,保留方案2并删除其他方案。然后运行:
245
- docker-compose up
246
- ```
247
-
248
-
249
- ### 安装方法III:其他部署方法
250
- 1. **Windows一键运行脚本**。
251
- 完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。
252
-
253
- 2. 使用第三方API、Azure等、文心一言、星火等,见[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)
254
-
255
- 3. 云服务器远程部署避坑指南。
256
- 请访问[云服务器远程部署wiki](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
257
-
258
- 4. 在其他平台部署&二级网址部署
259
- - 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。
260
- - 使用WSL2(Windows Subsystem for Linux 子系统)。请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
261
- - 如何在二级网址(如`http://localhost/subpath`)下运行。请访问[FastAPI运行说明](docs/WithFastapi.md)
262
-
263
- <br><br>
264
-
265
- # Advanced Usage
266
- ### I:自定义新的便捷按钮(学术快捷键)
267
-
268
- 任意文本编辑器打开`core_functional.py`,添加如下条目,然后重启程序。(如果按钮已存在,那么可以直接修改(前缀、后缀都已支持热修改),无需重启程序即可生效。)
269
  例如
270
-
271
- ```python
272
  "超级英译中": {
273
- # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
274
- "Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n",
275
 
 
 
 
276
  # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。
277
  "Suffix": "",
 
278
  },
279
  ```
280
-
281
  <div align="center">
282
  <img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
283
  </div>
284
 
285
- ### II:自定义函数插件
286
- 编写强大的函数插件来执行任何你想得到的和想不到的任务。
287
- 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。
288
- 详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
289
 
290
- <br><br>
291
 
292
- # Updates
293
- ### I:动态
294
 
295
- 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件,
296
- 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。
297
- Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。
298
- <div align="center">
299
- <img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" >
300
- </div>
301
 
302
- 2. ⭐Latex/Arxiv论文翻译功能⭐
303
  <div align="center">
304
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" > ===>
305
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
306
  </div>
307
 
308
- 3. 虚空终端(从自然语言输入中,理解用户意图+自动调用其他插件)
 
 
 
309
 
310
- - 步骤一:输入 “ 请调用插件翻译PDF论文,地址为https://openreview.net/pdf?id=rJl0r3R9KX ”
311
- - 步骤二:点击“虚空终端”
312
 
 
313
  <div align="center">
314
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/66f1b044-e9ff-4eed-9126-5d4f3668f1ed" width="500" >
 
315
  </div>
316
 
317
- 4. 模块化功能设计,简单的接口却能支持强大的功能
 
318
  <div align="center">
319
- <img src="https://user-images.githubusercontent.com/96192199/229288270-093643c1-0018-487a-81e6-1d7809b6e90f.png" height="400" >
320
- <img src="https://user-images.githubusercontent.com/96192199/227504931-19955f78-45cd-4d1c-adac-e71e50957915.png" height="400" >
321
  </div>
322
 
323
- 5. 译解其他开源项目
324
  <div align="center">
325
- <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" height="250" >
326
- <img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" height="250" >
327
  </div>
328
 
329
- 6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`)
330
  <div align="center">
331
- <img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
332
  </div>
333
 
334
- 7. OpenAI图像生成
335
  <div align="center">
336
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
337
  </div>
338
 
339
- 8. 基于mermaid的流图、脑图绘制
340
  <div align="center">
341
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/c518b82f-bd53-46e2-baf5-ad1b081c1da4" width="500" >
342
  </div>
343
 
344
- 9. Latex全文校对纠错
345
  <div align="center">
346
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
347
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
 
348
  </div>
349
 
350
- 10. 语言、主题切换
351
  <div align="center">
352
- <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/b6799499-b6fb-4f0c-9c8e-1b441872f4e8" width="500" >
 
353
  </div>
354
 
355
 
356
-
357
- ### II:版本:
358
- - version 3.80(TODO): 优化AutoGen插件主题并设计一系列衍生插件
359
- - version 3.70: 引入Mermaid绘图,实现GPT画脑图等功能
360
- - version 3.60: 引入AutoGen作为新一代插件的基石
361
- - version 3.57: 支持GLM3,星火v3,文心一言v4,修复本地模型的并发BUG
362
- - version 3.56: 支持动态追加基础功能按钮,新汇报PDF汇总页面
363
- - version 3.55: 重构前端界面,引入悬浮窗口与菜单栏
364
- - version 3.54: 新增动态代码解释器(Code Interpreter)(待完善)
365
- - version 3.53: 支持动态选择不同界面主题,提高稳定性&解决多用户冲突问题
366
- - version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进UI,设计新主题
367
- - version 3.49: 支持百度千帆平台和文心一言
368
- - version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火
369
- - version 3.46: 支持完全脱手操作的实时语音对话
370
- - version 3.45: 支持自定义ChatGLM2微调模型
371
- - version 3.44: 正式支持Azure,优化界面易用性
372
- - version 3.4: +arxiv论文翻译、latex论文批改功能
373
- - version 3.3: +互联网信息综合功能
374
- - version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的LLM组合)
375
- - version 3.1: 支持同时问询多个gpt模型!支持api2d,支持多个apikey负载均衡
376
- - version 3.0: 对chatglm和其他小型llm的支持
377
- - version 2.6: 重构了插件结构,提高了交互性,加入更多插件
378
- - version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题
379
- - version 2.4: 新增PDF全文翻译功能; 新增输入区切换位置的功能
380
- - version 2.3: 增强多线程交互性
381
- - version 2.2: 函数插件支持热重载
382
- - version 2.1: 可折叠式布局
383
- - version 2.0: 引入模块化函数插件
384
- - version 1.0: 基础功能
385
-
386
- GPT Academic开发者QQ群:`610599535`
387
-
388
- - 已知问题
389
- - 某些浏览器翻译插件干扰此软件前端的运行
390
- - 官方Gradio目前有很多兼容性问题,请**务必使用`requirement.txt`安装Gradio**
391
-
392
- ```mermaid
393
- timeline LR
394
- title GPT-Academic项目发展历程
395
- section 2.x
396
- 1.0~2.2: 基础功能: 引入模块化函数插件: 可折叠式布局: 函数插件支持热重载
397
- 2.3~2.5: 增强多线程交互性: 新增PDF全文翻译功能: 新增输入区切换位置的功能: 自更新
398
- 2.6: 重构了插件结构: 提高了交互性: 加入更多插件
399
- section 3.x
400
- 3.0~3.1: 对chatglm支持: 对其他小型llm支持: 支持同时问询多个gpt模型: 支持多个apikey负载均衡
401
- 3.2~3.3: 函数插件支持更多参数接口: 保存对话功能: 解读任意语言代码: 同时询问任意的LLM组合: 互联网信息综合功能
402
- 3.4: 加入arxiv论文翻译: 加入latex论文批改功能
403
- 3.44: 正式支持Azure: 优化界面易用性
404
- 3.46: 自定义ChatGLM2微调模型: 实时语音对话
405
- 3.49: 支持阿里达摩院通义千问: 上海AI-Lab书生: 讯飞星火: 支持百度千帆平台 & 文心一言
406
- 3.50: 虚空终端: 支持插件分类: 改进UI: 设计新主题
407
- 3.53: 动态选择不同界面主题: 提高稳定性: 解决多用户冲突问题
408
- 3.55: 动态代码解释器: 重构前端界面: 引入悬浮窗口与菜单栏
409
- 3.56: 动态追加基础功能按钮: 新汇报PDF汇总页面
410
- 3.57: GLM3, 星火v3: 支持文心一言v4: 修复本地模型的并发BUG
411
- 3.60: 引入AutoGen
412
- 3.70: 引入Mermaid绘图: 实现GPT画脑图等功能
413
- 3.80(TODO): 优化AutoGen插件主题: 设计衍生插件
414
-
415
- ```
416
-
417
-
418
- ### III:主题
419
- 可以通过修改`THEME`选项(config.py)变更主题
420
- 1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/)
421
-
422
-
423
- ### IV:本项目的开发分支
424
-
425
- 1. `master` 分支: 主分支,稳定版
426
- 2. `frontier` 分支: 开发分支,测试版
427
- 3. 如何[接入其他大模型](request_llms/README.md)
428
- 4. 访问GPT-Academic的[在线服务并支持我们](https://github.com/binary-husky/gpt_academic/wiki/online)
429
-
430
- ### V:参考与学习
431
-
432
  ```
433
- 代码中参考了很多其他优秀项目中的设计,顺序不分先后:
434
-
435
- # 清华ChatGLM2-6B:
436
- https://github.com/THUDM/ChatGLM2-6B
437
-
438
- # 清华JittorLLMs:
439
- https://github.com/Jittor/JittorLLMs
440
-
441
- # ChatPaper:
442
- https://github.com/kaixindelele/ChatPaper
443
-
444
- # Edge-GPT:
445
- https://github.com/acheong08/EdgeGPT
446
-
447
- # ChuanhuChatGPT:
448
- https://github.com/GaiZhenbiao/ChuanhuChatGPT
449
-
450
- # Oobabooga one-click installer:
451
- https://github.com/oobabooga/one-click-installers
452
-
453
- # More:
454
  https://github.com/gradio-app/gradio
455
- https://github.com/fghrsh/live2d_demo
 
456
  ```
 
1
+
2
  ---
3
+ title: ChatImprovement
4
  emoji: 😻
5
  colorFrom: blue
6
  colorTo: blue
7
  sdk: gradio
8
+ sdk_version: 3.23.0
9
  app_file: app.py
10
  pinned: false
11
  ---
12
 
13
  # ChatGPT 学术优化
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ **如果喜欢这个项目,请给它一个Star;如果你发明了更好用的学术快捷键,欢迎发issue或者pull requests**
 
 
 
 
 
 
16
 
17
+ If you like this project, please give it a Star. If you've come up with more useful academic shortcuts, feel free to open an issue or pull request.
18
  <div align="center">
19
 
20
+ 功能 | 描述
21
  --- | ---
22
+ 一键润色 | 支持一键润色、一键查找论文语法错误
23
+ 一键中英互译 | 一键中英互译
24
+ 一键代码解释 | 可以正确显示代码、解释代码
25
+ 自定义快捷键 | 支持自定义快捷键
26
+ 配置代理服务器 | 支持配置代理服务器
27
+ 模块化设计 | 支持自定义高阶的实验性功能
28
+ 自我程序剖析 | [实验性功能] 一键读懂本项目的源代码
29
+ 程序剖析 | [实验性功能] 一键可以剖析其他Python/C++项目
30
+ 读论文 | [实验性功能] 一键解读latex论文全文并生成摘要
31
+ 批量注释生成 | [实验性功能] 一键批量生成函数注释
32
+ chat分析报告生成 | [实验性功能] 运行后自动生成总结汇报
33
+ 公式显示 | 可以同时显示公式的tex形式和渲染形式
34
+ 图片显示 | 可以在markdown中显示图片
35
+ 支持GPT输出的markdown表格 | 可以输出支持GPT的markdown表格
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ </div>
38
 
39
+ - 新界面
40
  <div align="center">
41
+ <img src="https://user-images.githubusercontent.com/96192199/227528413-36ab42da-d589-4ef1-ba75-28aa02442d05.png" width="700" >
42
  </div>
43
 
44
 
 
 
 
 
45
 
46
+ - 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放粘贴板
47
  <div align="center">
48
+ <img src="img/公式.gif" width="700" >
49
  </div>
50
 
51
+ - 代码的显示自然也不在话下 https://www.bilibili.com/video/BV1F24y147PD/
52
  <div align="center">
53
+ <img src="img/润色.gif" width="700" >
54
  </div>
55
 
56
+
57
+ - 支持GPT输出的markdown表格
58
  <div align="center">
59
+ <img src="img/demo2.jpg" width="500" >
60
  </div>
61
 
62
+ - 如果输出包含公式,会同时以tex形式和渲染形式显示,方便复制和阅读
63
  <div align="center">
64
+ <img src="img/demo.jpg" width="500" >
65
  </div>
66
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ - 懒得看项目代码?整个工程直接给chatgpt炫嘴里
69
+ <div align="center">
70
+ <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="700" >
71
+ </div>
72
 
73
+ ## 直接运行 (Windows or Linux or MacOS)
74
+
75
+ ``` sh
76
+ # 下载项目
77
+ git clone https://github.com/binary-husky/chatgpt_academic.git
78
+ cd chatgpt_academic
79
+ # 在config.py中,配置 海外Proxy 和 OpenAI API KEY
80
+ - 1.如果你在国内,需要设置海外代理才能够使用 OpenAI API,你可以通过 config.py 文件来进行设置。
81
+ - 2.配置 OpenAI API KEY。你需要在 OpenAI 官网上注册并获取 API KEY。一旦你拿到了 API KEY,在 config.py 文件里配置好即可。
82
+ # 安装依赖
83
+ python -m pip install -r requirements.txt
84
+ # 运行
85
+ python main.py
86
+
87
+ # 测试实验性功能
88
+ ## 测试C++项目头文件分析
89
+ input区域 输入 ./crazy_functions/test_project/cpp/libJPG , 然后点击 "[实验] 解析整个C++项目(input输入项目根路径)"
90
+ ## 测试给Latex项目写摘要
91
+ input区域 输入 ./crazy_functions/test_project/latex/attention , 然后点击 "[实验] 读tex论文写摘要(input输入项目根路径)"
92
+ ## 测试Python项目分析
93
+ input区域 输入 ./crazy_functions/test_project/python/dqn , 然后点击 "[实验] 解析整个py项目(input输入项目根路径)"
94
+ ## 测试自我代码解读
95
+ 点击 "[实验] 请解析并解构此项目本身"
96
+ ## 测试实验功能模板函数(要求gpt回答几个数的平方是什么),您可以根据此函数为模板,实现更复杂的功能
97
+ 点击 "[实验] 实验功能函数模板"
98
  ```
99
 
 
 
 
 
 
 
 
 
100
 
101
+ ## 使用docker (Linux)
102
+
103
+ ``` sh
104
+ # 下载项目
105
+ git clone https://github.com/binary-husky/chatgpt_academic.git
106
+ cd chatgpt_academic
107
+ # 配置 海外Proxy 和 OpenAI API KEY
108
+ config.py
109
+ # 安装
110
+ docker build -t gpt-academic .
111
+ # 运行
112
+ docker run --rm -it --net=host gpt-academic
113
+
114
+ # 测试实验性功能
115
+ ## 测试自我代码解读
116
+ 点击 "[实验] 请解析并解构此项目本身"
117
+ ## 测试实验功能模板函数(要求gpt回答几个数的平方是什么),您可以根据此函数为模板,实现更复杂的功能
118
+ 点击 "[实验] 实验功能函数模板"
119
+ ##(请注意在docker中运行时,需要额外注意程序的文件访问权限问题)
120
+ ## 测试C++项目头文件分析
121
+ input区域 输入 ./crazy_functions/test_project/cpp/libJPG , 然后点击 "[实验] 解析整个C++项目(input��入项目根路径)"
122
+ ## 测试给Latex项目写摘要
123
+ input区域 输入 ./crazy_functions/test_project/latex/attention , 然后点击 "[实验] 读tex论文写摘要(input输入项目根路径)"
124
+ ## 测试Python项目分析
125
+ input区域 输入 ./crazy_functions/test_project/python/dqn , 然后点击 "[实验] 解析整个py项目(input输入项目根路径)"
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  ```
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ ## 自定义新的便捷按钮(学术快捷键自定义)
131
+ 打开functional.py,添加条目如下,然后重启程序即可。(如果按钮已经添加成功并可见,那么前缀、后缀都支持热修改,无需重启程序即可生效。)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  例如
133
+ ```
 
134
  "超级英译中": {
 
 
135
 
136
+ # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
137
+ "Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n",
138
+
139
  # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。
140
  "Suffix": "",
141
+
142
  },
143
  ```
 
144
  <div align="center">
145
  <img src="https://user-images.githubusercontent.com/96192199/226899272-477c2134-ed71-4326-810c-29891fe4a508.png" width="500" >
146
  </div>
147
 
 
 
 
 
148
 
149
+ 如果你发明了更好用的学术快捷键,欢迎发issue或者pull requests!
150
 
151
+ ## 配置代理
 
152
 
153
+ 在```config.py```中修改端口与代理软件对应
 
 
 
 
 
154
 
 
155
  <div align="center">
156
+ <img src="https://user-images.githubusercontent.com/96192199/226571294-37a47cd9-4d40-4c16-97a2-d360845406f7.png" width="500" >
157
+ <img src="https://user-images.githubusercontent.com/96192199/226838985-e5c95956-69c2-4c23-a4dd-cd7944eeb451.png" width="500" >
158
  </div>
159
 
160
+ 配置完成后,你可以用以下命令测试代理是否工作,如果一切正常,下面的代码将输出你的代理服务器所在地:
161
+ ```
162
+ python check_proxy.py
163
+ ```
164
 
165
+ ## 兼容性测试
 
166
 
167
+ ### 图片显示:
168
  <div align="center">
169
+ <img src="https://user-images.githubusercontent.com/96192199/226906087-b5f1c127-2060-4db9-af05-487643b21ed9.png" height="200" >
170
+ <img src="https://user-images.githubusercontent.com/96192199/226906703-7226495d-6a1f-4a53-9728-ce6778cbdd19.png" height="200" >
171
  </div>
172
 
173
+ ### 如果一个程序能够读懂并剖析自己:
174
+
175
  <div align="center">
176
+ <img src="https://user-images.githubusercontent.com/96192199/226936850-c77d7183-0749-4c1c-9875-fd4891842d0c.png" width="800" >
 
177
  </div>
178
 
 
179
  <div align="center">
180
+ <img src="https://user-images.githubusercontent.com/96192199/226936618-9b487e4b-ab5b-4b6e-84c6-16942102e917.png" width="800" >
 
181
  </div>
182
 
183
+ ### 其他任意Python/Cpp项目剖析:
184
  <div align="center">
185
+ <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="800" >
186
  </div>
187
 
 
188
  <div align="center">
189
+ <img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" width="800" >
190
  </div>
191
 
192
+ ### Latex论文一键阅读理解与摘要生成
193
  <div align="center">
194
+ <img src="https://user-images.githubusercontent.com/96192199/227504406-86ab97cd-f208-41c3-8e4a-7000e51cf980.png" width="800" >
195
  </div>
196
 
197
+ ### 自动报告生成
198
  <div align="center">
199
+ <img src="https://user-images.githubusercontent.com/96192199/227503770-fe29ce2c-53fd-47b0-b0ff-93805f0c2ff4.png" height="300" >
200
+ <img src="https://user-images.githubusercontent.com/96192199/227504617-7a497bb3-0a2a-4b50-9a8a-95ae60ea7afd.png" height="300" >
201
+ <img src="https://user-images.githubusercontent.com/96192199/227504005-efeaefe0-b687-49d0-bf95-2d7b7e66c348.png" height="300" >
202
  </div>
203
 
204
+ ### 模块化功能设计
205
  <div align="center">
206
+ <img src="https://user-images.githubusercontent.com/96192199/227504981-4c6c39c0-ae79-47e6-bffe-0e6442d9da65.png" height="400" >
207
+ <img src="https://user-images.githubusercontent.com/96192199/227504931-19955f78-45cd-4d1c-adac-e71e50957915.png" height="400" >
208
  </div>
209
 
210
 
211
+ ## 参考项目
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  ```
213
+ https://github.com/Python-Markdown/markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  https://github.com/gradio-app/gradio
215
+ https://github.com/polarwinkel/mdtex2html
216
+ https://github.com/GaiZhenbiao/ChuanhuChatGPT
217
  ```
app.py CHANGED
@@ -1,412 +1,108 @@
1
  import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
2
-
3
- help_menu_description = \
4
- """Github源代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic),
5
- 感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors).
6
- </br></br>常见问题请查阅[项目Wiki](https://github.com/binary-husky/gpt_academic/wiki),
7
- 如遇到Bug请前往[Bug反馈](https://github.com/binary-husky/gpt_academic/issues).
8
- </br></br>普通对话使用说明: 1. 输入问题; 2. 点击提交
9
- </br></br>基础功能区使用说明: 1. 输入文本; 2. 点击任意基础功能区按钮
10
- </br></br>函数插件区使用说明: 1. 输入路径/问题, 或者上传文件; 2. 点击任意函数插件区按钮
11
- </br></br>虚空终端使用说明: 点击虚空终端, 然后根据提示输入指令, 再次点击虚空终端
12
- </br></br>如何保存对话: 点击保存当前的对话按钮
13
- </br></br>如何语音对话: 请阅读Wiki
14
- </br></br>如何临时更换API_KEY: 在输入区输入临时API_KEY后提交(网页刷新后失效)"""
15
-
16
- def main():
17
- import subprocess, sys
18
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'https://public.agent-matrix.com/publish/gradio-3.32.8-py3-none-any.whl'])
19
- import gradio as gr
20
- if gr.__version__ not in ['3.32.8']:
21
- raise ModuleNotFoundError("使用项目内置Gradio获取最优体验! 请运行 `pip install -r requirements.txt` 指令安装内置Gradio及其他依赖, 详情信息见requirements.txt.")
22
- from request_llms.bridge_all import predict
23
- from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
24
- # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址
25
- proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
26
- CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
27
- ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING, AVAIL_THEMES, THEME, ADD_WAIFU = get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT', 'PATH_LOGGING', 'AVAIL_THEMES', 'THEME', 'ADD_WAIFU')
28
- DARK_MODE, NUM_CUSTOM_BASIC_BTN, SSL_KEYFILE, SSL_CERTFILE = get_conf('DARK_MODE', 'NUM_CUSTOM_BASIC_BTN', 'SSL_KEYFILE', 'SSL_CERTFILE')
29
- INIT_SYS_PROMPT = get_conf('INIT_SYS_PROMPT')
30
-
31
- # 如果WEB_PORT是-1, 则随机选取WEB端口
32
- PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
33
- from check_proxy import get_current_version
34
- from themes.theme import adjust_theme, advanced_css, theme_declaration, js_code_clear, js_code_reset, js_code_show_or_hide, js_code_show_or_hide_group2
35
- from themes.theme import js_code_for_css_changing, js_code_for_toggle_darkmode, js_code_for_persistent_cookie_init
36
- from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, init_cookie
37
- title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
38
-
39
- # 问询记录, python 版本建议3.9+(越新越好)
40
- import logging, uuid
41
- os.makedirs(PATH_LOGGING, exist_ok=True)
42
- try:logging.basicConfig(filename=f"{PATH_LOGGING}/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
43
- except:logging.basicConfig(filename=f"{PATH_LOGGING}/chat_secrets.log", level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
44
- # Disable logging output from the 'httpx' logger
45
- logging.getLogger("httpx").setLevel(logging.WARNING)
46
- print(f"所有问询记录将自动保存在本地目录./{PATH_LOGGING}/chat_secrets.log, 请注意自我隐私保护哦!")
47
-
48
- # 一些普通功能模块
49
- from core_functional import get_core_functions
50
- functional = get_core_functions()
51
-
52
- # 高级函数插件
53
- from crazy_functional import get_crazy_functions
54
- DEFAULT_FN_GROUPS = get_conf('DEFAULT_FN_GROUPS')
55
- plugins = get_crazy_functions()
56
- all_plugin_groups = list(set([g for _, plugin in plugins.items() for g in plugin['Group'].split('|')]))
57
- match_group = lambda tags, groups: any([g in groups for g in tags.split('|')])
58
-
59
- # 处理markdown文本格式的转变
60
- gr.Chatbot.postprocess = format_io
61
-
62
- # 做一些外观色彩上的调整
63
- set_theme = adjust_theme()
64
-
65
- # 代理与自动更新
66
- from check_proxy import check_proxy, auto_update, warm_up_modules
67
- proxy_info = check_proxy(proxies)
68
-
69
- gr_L1 = lambda: gr.Row().style()
70
- gr_L2 = lambda scale, elem_id: gr.Column(scale=scale, elem_id=elem_id, min_width=400)
71
- if LAYOUT == "TOP-DOWN":
72
- gr_L1 = lambda: DummyWith()
73
- gr_L2 = lambda scale, elem_id: gr.Row()
74
- CHATBOT_HEIGHT /= 2
75
-
76
- cancel_handles = []
77
- customize_btns = {}
78
- predefined_btns = {}
79
- with gr.Blocks(title="GPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
80
- gr.HTML(title_html)
81
- gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
82
- secret_css, dark_mode, py_pickle_cookie = gr.Textbox(visible=False), gr.Textbox(DARK_MODE, visible=False), gr.Textbox(visible=False)
83
- cookies = gr.State(load_chat_cookies())
84
- with gr_L1():
85
- with gr_L2(scale=2, elem_id="gpt-chat"):
86
- chatbot = gr.Chatbot(label=f"当前模型:{LLM_MODEL}", elem_id="gpt-chatbot")
87
- if LAYOUT == "TOP-DOWN": chatbot.style(height=CHATBOT_HEIGHT)
88
- history = gr.State([])
89
- with gr_L2(scale=1, elem_id="gpt-panel"):
90
- with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary:
91
- with gr.Row():
92
- txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥,输入多个密钥时,用英文逗号间隔。支持多个OpenAI密钥共存。").style(container=False)
93
- with gr.Row():
94
- submitBtn = gr.Button("提交", elem_id="elem_submit", variant="primary")
95
- with gr.Row():
96
- resetBtn = gr.Button("重置", elem_id="elem_reset", variant="secondary"); resetBtn.style(size="sm")
97
- stopBtn = gr.Button("停止", elem_id="elem_stop", variant="secondary"); stopBtn.style(size="sm")
98
- clearBtn = gr.Button("清除", elem_id="elem_clear", variant="secondary", visible=False); clearBtn.style(size="sm")
99
- if ENABLE_AUDIO:
100
- with gr.Row():
101
- audio_mic = gr.Audio(source="microphone", type="numpy", elem_id="elem_audio", streaming=True, show_label=False).style(container=False)
102
- with gr.Row():
103
- status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
104
-
105
- with gr.Accordion("基础功能区", open=True, elem_id="basic-panel") as area_basic_fn:
106
- with gr.Row():
107
- for k in range(NUM_CUSTOM_BASIC_BTN):
108
- customize_btn = gr.Button("自定义按钮" + str(k+1), visible=False, variant="secondary", info_str=f'基础功能区: 自定义按钮')
109
- customize_btn.style(size="sm")
110
- customize_btns.update({"自定义按钮" + str(k+1): customize_btn})
111
- for k in functional:
112
- if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
113
- variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
114
- functional[k]["Button"] = gr.Button(k, variant=variant, info_str=f'基础功能区: {k}')
115
- functional[k]["Button"].style(size="sm")
116
- predefined_btns.update({k: functional[k]["Button"]})
117
- with gr.Accordion("函数插件区", open=True, elem_id="plugin-panel") as area_crazy_fn:
118
- with gr.Row():
119
- gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)")
120
- with gr.Row(elem_id="input-plugin-group"):
121
- plugin_group_sel = gr.Dropdown(choices=all_plugin_groups, label='', show_label=False, value=DEFAULT_FN_GROUPS,
122
- multiselect=True, interactive=True, elem_classes='normal_mut_select').style(container=False)
123
- with gr.Row():
124
- for k, plugin in plugins.items():
125
- if not plugin.get("AsButton", True): continue
126
- visible = True if match_group(plugin['Group'], DEFAULT_FN_GROUPS) else False
127
- variant = plugins[k]["Color"] if "Color" in plugin else "secondary"
128
- info = plugins[k].get("Info", k)
129
- plugin['Button'] = plugins[k]['Button'] = gr.Button(k, variant=variant,
130
- visible=visible, info_str=f'函数插件区: {info}').style(size="sm")
131
- with gr.Row():
132
- with gr.Accordion("更多函数插件", open=True):
133
- dropdown_fn_list = []
134
- for k, plugin in plugins.items():
135
- if not match_group(plugin['Group'], DEFAULT_FN_GROUPS): continue
136
- if not plugin.get("AsButton", True): dropdown_fn_list.append(k) # 排除已经是按钮的插件
137
- elif plugin.get('AdvancedArgs', False): dropdown_fn_list.append(k) # 对于需要高级参数的插件,亦在下拉菜单中显示
138
- with gr.Row():
139
- dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False)
140
- with gr.Row():
141
- plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False,
142
- placeholder="这里是特殊函数插件的高级参数输入区").style(container=False)
143
- with gr.Row():
144
- switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary").style(size="sm")
145
- with gr.Row():
146
- with gr.Accordion("点击展开“文件下载区”。", open=False) as area_file_up:
147
- file_upload = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple", elem_id="elem_upload")
148
-
149
- with gr.Floating(init_x="0%", init_y="0%", visible=True, width=None, drag="forbidden", elem_id="tooltip"):
150
  with gr.Row():
151
- with gr.Tab("上传文件", elem_id="interact-panel"):
152
- gr.Markdown("请上传本地文件/压缩包供“函数插件区”功能调用。请注意: 上传文件后会自动把输入区修改为相应路径。")
153
- file_upload_2 = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple", elem_id="elem_upload_float")
154
-
155
- with gr.Tab("更换模型", elem_id="interact-panel"):
156
- md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
157
- top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
158
- temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
159
- max_length_sl = gr.Slider(minimum=256, maximum=1024*32, value=4096, step=128, interactive=True, label="Local LLM MaxLength",)
160
- system_prompt = gr.Textbox(show_label=True, lines=2, placeholder=f"System Prompt", label="System prompt", value=INIT_SYS_PROMPT)
161
-
162
- with gr.Tab("界面外观", elem_id="interact-panel"):
163
- theme_dropdown = gr.Dropdown(AVAIL_THEMES, value=THEME, label="更换UI主题").style(container=False)
164
- checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "浮动输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区", elem_id='cbs').style(container=False)
165
- opt = ["自定义菜单"]
166
- value=[]
167
- if ADD_WAIFU: opt += ["添加Live2D形象"]; value += ["添加Live2D形象"]
168
- checkboxes_2 = gr.CheckboxGroup(opt, value=value, label="显示/隐藏自定义菜单", elem_id='cbsc').style(container=False)
169
- dark_mode_btn = gr.Button("切换界面明暗 ☀", variant="secondary").style(size="sm")
170
- dark_mode_btn.click(None, None, None, _js=js_code_for_toggle_darkmode)
171
- with gr.Tab("帮助", elem_id="interact-panel"):
172
- gr.Markdown(help_menu_description)
173
-
174
- with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_input_secondary:
175
- with gr.Accordion("浮动输入区", open=True, elem_id="input-panel2"):
176
- with gr.Row() as row:
177
- row.style(equal_height=True)
178
- with gr.Column(scale=10):
179
- txt2 = gr.Textbox(show_label=False, placeholder="Input question here.",
180
- elem_id='user_input_float', lines=8, label="输入区2").style(container=False)
181
- with gr.Column(scale=1, min_width=40):
182
- submitBtn2 = gr.Button("提交", variant="primary"); submitBtn2.style(size="sm")
183
- resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn2.style(size="sm")
184
- stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
185
- clearBtn2 = gr.Button("清除", elem_id="elem_clear2", variant="secondary", visible=False); clearBtn2.style(size="sm")
186
-
187
-
188
- with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_customize:
189
- with gr.Accordion("自定义菜单", open=True, elem_id="edit-panel"):
190
- with gr.Row() as row:
191
- with gr.Column(scale=10):
192
- AVAIL_BTN = [btn for btn in customize_btns.keys()] + [k for k in functional]
193
- basic_btn_dropdown = gr.Dropdown(AVAIL_BTN, value="自定义按钮1", label="选择一个需要自定义基础功能区按钮").style(container=False)
194
- basic_fn_title = gr.Textbox(show_label=False, placeholder="输入新按钮名称", lines=1).style(container=False)
195
- basic_fn_prefix = gr.Textbox(show_label=False, placeholder="输入新提示前缀", lines=4).style(container=False)
196
- basic_fn_suffix = gr.Textbox(show_label=False, placeholder="输入新提示后缀", lines=4).style(container=False)
197
- with gr.Column(scale=1, min_width=70):
198
- basic_fn_confirm = gr.Button("确认并保存", variant="primary"); basic_fn_confirm.style(size="sm")
199
- basic_fn_clean = gr.Button("恢复默认", variant="primary"); basic_fn_clean.style(size="sm")
200
- def assign_btn(persistent_cookie_, cookies_, basic_btn_dropdown_, basic_fn_title, basic_fn_prefix, basic_fn_suffix, clean_up=False):
201
- ret = {}
202
- # 读取之前的自定义按钮
203
- customize_fn_overwrite_ = cookies_['customize_fn_overwrite']
204
- # 更新新的自定义按钮
205
- customize_fn_overwrite_.update({
206
- basic_btn_dropdown_:
207
- {
208
- "Title":basic_fn_title,
209
- "Prefix":basic_fn_prefix,
210
- "Suffix":basic_fn_suffix,
211
- }
212
- }
213
- )
214
- if clean_up:
215
- customize_fn_overwrite_ = {}
216
- cookies_.update(customize_fn_overwrite_) # 更新cookie
217
- visible = (not clean_up) and (basic_fn_title != "")
218
- if basic_btn_dropdown_ in customize_btns:
219
- # 是自定义按钮,不是预定义按钮
220
- ret.update({customize_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
221
- else:
222
- # 是预定义按钮
223
- ret.update({predefined_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
224
- ret.update({cookies: cookies_})
225
- try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict
226
- except: persistent_cookie_ = {}
227
- persistent_cookie_["custom_bnt"] = customize_fn_overwrite_ # dict update new value
228
- persistent_cookie_ = to_cookie_str(persistent_cookie_) # persistent cookie to dict
229
- ret.update({py_pickle_cookie: persistent_cookie_}) # write persistent cookie
230
- return ret
231
-
232
- # update btn
233
- h = basic_fn_confirm.click(assign_btn, [py_pickle_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix],
234
- [py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()])
235
- h.then(None, [py_pickle_cookie], None, _js="""(py_pickle_cookie)=>{setCookie("py_pickle_cookie", py_pickle_cookie, 365);}""")
236
- # clean up btn
237
- h2 = basic_fn_clean.click(assign_btn, [py_pickle_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix, gr.State(True)],
238
- [py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()])
239
- h2.then(None, [py_pickle_cookie], None, _js="""(py_pickle_cookie)=>{setCookie("py_pickle_cookie", py_pickle_cookie, 365);}""")
240
-
241
- def persistent_cookie_reload(persistent_cookie_, cookies_):
242
- ret = {}
243
- for k in customize_btns:
244
- ret.update({customize_btns[k]: gr.update(visible=False, value="")})
245
-
246
- try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict
247
- except: return ret
248
-
249
- customize_fn_overwrite_ = persistent_cookie_.get("custom_bnt", {})
250
- cookies_['customize_fn_overwrite'] = customize_fn_overwrite_
251
- ret.update({cookies: cookies_})
252
-
253
- for k,v in persistent_cookie_["custom_bnt"].items():
254
- if v['Title'] == "": continue
255
- if k in customize_btns: ret.update({customize_btns[k]: gr.update(visible=True, value=v['Title'])})
256
- else: ret.update({predefined_btns[k]: gr.update(visible=True, value=v['Title'])})
257
- return ret
258
-
259
- # 功能区显示开关与功能区的互动
260
- def fn_area_visibility(a):
261
- ret = {}
262
- ret.update({area_input_primary: gr.update(visible=("浮动输入区" not in a))})
263
- ret.update({area_input_secondary: gr.update(visible=("浮动输入区" in a))})
264
- ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
265
- if "浮动输入区" in a: ret.update({txt: gr.update(value="")})
266
- return ret
267
- checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, txt, txt2, plugin_advanced_arg] )
268
- checkboxes.select(None, [checkboxes], None, _js=js_code_show_or_hide)
269
-
270
- # 功能区显示开关与功能区的互动
271
- def fn_area_visibility_2(a):
272
- ret = {}
273
- ret.update({area_customize: gr.update(visible=("自定义菜单" in a))})
274
- return ret
275
- checkboxes_2.select(fn_area_visibility_2, [checkboxes_2], [area_customize] )
276
- checkboxes_2.select(None, [checkboxes_2], None, _js=js_code_show_or_hide_group2)
277
-
278
- # 整理反复出现的控件句柄组合
279
- input_combo = [cookies, max_length_sl, md_dropdown, txt, txt2, top_p, temperature, chatbot, history, system_prompt, plugin_advanced_arg]
280
- output_combo = [cookies, chatbot, history, status]
281
- predict_args = dict(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True)], outputs=output_combo)
282
- # 提交按钮、重置按钮
283
- cancel_handles.append(txt.submit(**predict_args))
284
- cancel_handles.append(txt2.submit(**predict_args))
285
- cancel_handles.append(submitBtn.click(**predict_args))
286
- cancel_handles.append(submitBtn2.click(**predict_args))
287
- resetBtn.click(None, None, [chatbot, history, status], _js=js_code_reset) # 先在前端快速清除chatbot&status
288
- resetBtn2.click(None, None, [chatbot, history, status], _js=js_code_reset) # 先在前端快速清除chatbot&status
289
- resetBtn.click(lambda: ([], [], "已重置"), None, [chatbot, history, status]) # 再在后端清除history
290
- resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status]) # 再在后端清除history
291
- clearBtn.click(None, None, [txt, txt2], _js=js_code_clear)
292
- clearBtn2.click(None, None, [txt, txt2], _js=js_code_clear)
293
- if AUTO_CLEAR_TXT:
294
- submitBtn.click(None, None, [txt, txt2], _js=js_code_clear)
295
- submitBtn2.click(None, None, [txt, txt2], _js=js_code_clear)
296
- txt.submit(None, None, [txt, txt2], _js=js_code_clear)
297
- txt2.submit(None, None, [txt, txt2], _js=js_code_clear)
298
- # 基础功能区的回调函数注册
299
- for k in functional:
300
- if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
301
- click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
302
- cancel_handles.append(click_handle)
303
- for btn in customize_btns.values():
304
- click_handle = btn.click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(btn.value)], outputs=output_combo)
305
- cancel_handles.append(click_handle)
306
- # 文件上传区,接收文件后与chatbot的互动
307
- file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}")
308
- file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}")
309
- # 函数插件-固定按钮区
310
- for k in plugins:
311
- if not plugins[k].get("AsButton", True): continue
312
- click_handle = plugins[k]["Button"].click(ArgsGeneralWrapper(plugins[k]["Function"]), [*input_combo], output_combo)
313
- click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot])
314
- cancel_handles.append(click_handle)
315
- # 函数插件-下拉菜单与随变按钮的互动
316
- def on_dropdown_changed(k):
317
- variant = plugins[k]["Color"] if "Color" in plugins[k] else "secondary"
318
- info = plugins[k].get("Info", k)
319
- ret = {switchy_bt: gr.update(value=k, variant=variant, info_str=f'函数插件区: {info}')}
320
- if plugins[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
321
- ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + plugins[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
322
- else:
323
- ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
324
- return ret
325
- dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
326
-
327
- def on_md_dropdown_changed(k):
328
- return {chatbot: gr.update(label="当前模型:"+k)}
329
- md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
330
-
331
- def on_theme_dropdown_changed(theme, secret_css):
332
- adjust_theme, css_part1, _, adjust_dynamic_theme = load_dynamic_theme(theme)
333
- if adjust_dynamic_theme:
334
- css_part2 = adjust_dynamic_theme._get_theme_css()
335
- else:
336
- css_part2 = adjust_theme()._get_theme_css()
337
- return css_part2 + css_part1
338
-
339
- theme_handle = theme_dropdown.select(on_theme_dropdown_changed, [theme_dropdown, secret_css], [secret_css])
340
- theme_handle.then(
341
- None,
342
- [secret_css],
343
- None,
344
- _js=js_code_for_css_changing
345
  )
346
- # 随变按钮的回调函数注册
347
- def route(request: gr.Request, k, *args, **kwargs):
348
- if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
349
- yield from ArgsGeneralWrapper(plugins[k]["Function"])(request, *args, **kwargs)
350
- click_handle = switchy_bt.click(route,[switchy_bt, *input_combo], output_combo)
351
- click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot])
352
- cancel_handles.append(click_handle)
353
- # 终止按钮的回调函数注册
354
- stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
355
- stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
356
- plugins_as_btn = {name:plugin for name, plugin in plugins.items() if plugin.get('Button', None)}
357
- def on_group_change(group_list):
358
- btn_list = []
359
- fns_list = []
360
- if not group_list: # 处理特殊情况:没有选择任何插件组
361
- return [*[plugin['Button'].update(visible=False) for _, plugin in plugins_as_btn.items()], gr.Dropdown.update(choices=[])]
362
- for k, plugin in plugins.items():
363
- if plugin.get("AsButton", True):
364
- btn_list.append(plugin['Button'].update(visible=match_group(plugin['Group'], group_list))) # 刷新按钮
365
- if plugin.get('AdvancedArgs', False): dropdown_fn_list.append(k) # 对于需要高级参数的插件,亦在下拉菜单中显示
366
- elif match_group(plugin['Group'], group_list): fns_list.append(k) # 刷新下拉列表
367
- return [*btn_list, gr.Dropdown.update(choices=fns_list)]
368
- plugin_group_sel.select(fn=on_group_change, inputs=[plugin_group_sel], outputs=[*[plugin['Button'] for name, plugin in plugins_as_btn.items()], dropdown])
369
- if ENABLE_AUDIO:
370
- from crazy_functions.live_audio.audio_io import RealtimeAudioDistribution
371
- rad = RealtimeAudioDistribution()
372
- def deal_audio(audio, cookies):
373
- rad.feed(cookies['uuid'].hex, audio)
374
- audio_mic.stream(deal_audio, inputs=[audio_mic, cookies])
375
-
376
-
377
- demo.load(init_cookie, inputs=[cookies], outputs=[cookies])
378
- demo.load(persistent_cookie_reload, inputs = [py_pickle_cookie, cookies],
379
- outputs = [py_pickle_cookie, cookies, *customize_btns.values(), *predefined_btns.values()], _js=js_code_for_persistent_cookie_init)
380
- demo.load(None, inputs=[dark_mode], outputs=None, _js="""(dark_mode)=>{apply_cookie_for_checkbox(dark_mode);}""") # 配置暗色主题或亮色主题
381
- demo.load(None, inputs=[gr.Textbox(LAYOUT, visible=False)], outputs=None, _js='(LAYOUT)=>{GptAcademicJavaScriptInit(LAYOUT);}')
382
-
383
- # gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
384
- def run_delayed_tasks():
385
- import threading, webbrowser, time
386
- print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
387
- if DARK_MODE: print(f"\t「暗色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
388
- else: print(f"\t「亮色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
389
-
390
- def auto_updates(): time.sleep(0); auto_update()
391
- def open_browser(): time.sleep(2); webbrowser.open_new_tab(f"http://localhost:{PORT}")
392
- def warm_up_mods(): time.sleep(6); warm_up_modules()
393
-
394
- threading.Thread(target=auto_updates, name="self-upgrade", daemon=True).start() # 查看自动更新
395
- threading.Thread(target=open_browser, name="open-browser", daemon=True).start() # 打开浏览器页面
396
- threading.Thread(target=warm_up_mods, name="warm-up", daemon=True).start() # 预热tiktoken模块
397
-
398
- run_delayed_tasks()
399
- demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
400
-
401
-
402
- # 如果需要在二级路径下运行
403
- # CUSTOM_PATH = get_conf('CUSTOM_PATH')
404
- # if CUSTOM_PATH != "/":
405
- # from toolbox import run_gradio_in_subpath
406
- # run_gradio_in_subpath(demo, auth=AUTHENTICATION, port=PORT, custom_path=CUSTOM_PATH)
407
- # else:
408
- # demo.launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png",
409
- # blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile",f"{PATH_LOGGING}/admin"])
410
-
411
- if __name__ == "__main__":
412
- main()
 
1
  import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
2
+ import gradio as gr
3
+ from predict import predict
4
+ from toolbox import format_io, find_free_port
5
+
6
+ # 建议您复制一个config_private.py放自己的秘密,如API和代理网址,避免不小心传github被别人看到
7
+ try: from config_private import proxies, WEB_PORT
8
+ except: from config import proxies, WEB_PORT
9
+
10
+ # 如果WEB_PORT是-1,则随机选取WEB端口
11
+ PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
12
+
13
+ initial_prompt = "Serve me as a writing and programming assistant."
14
+ title_html = """<h1 align="center">ChatGPT 学术优化</h1>"""
15
+
16
+ # 问询记录,python 版本建议3.9+(越新越好)
17
+ import logging
18
+ os.makedirs('gpt_log', exist_ok=True)
19
+ try:logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8')
20
+ except:logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO)
21
+ print('所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log,请注意自我隐私保护哦!')
22
+
23
+ # 一些普通功能模块
24
+ from functional import get_functionals
25
+ functional = get_functionals()
26
+
27
+ # 对一些丧心病狂的实验性功能模块进行测试
28
+ from functional_crazy import get_crazy_functionals, on_file_uploaded, on_report_generated
29
+ crazy_functional = get_crazy_functionals()
30
+
31
+ # 处理markdown文本格式的转变
32
+ gr.Chatbot.postprocess = format_io
33
+
34
+ # 做一些样式上的调整
35
+ try: set_theme = gr.themes.Default( primary_hue=gr.themes.utils.colors.orange,
36
+ font=["ui-sans-serif", "system-ui", "sans-serif", gr.themes.utils.fonts.GoogleFont("Source Sans Pro")],
37
+ font_mono=["ui-monospace", "Consolas", "monospace", gr.themes.utils.fonts.GoogleFont("IBM Plex Mono")])
38
+ except:
39
+ set_theme = None; print('gradio版本较旧,不能自定义字体和颜色')
40
+
41
+ with gr.Blocks(theme=set_theme, analytics_enabled=False) as demo:
42
+ gr.HTML(title_html)
43
+ with gr.Row():
44
+ with gr.Column(scale=2):
45
+ chatbot = gr.Chatbot()
46
+ chatbot.style(height=1000)
47
+ chatbot.style()
48
+ history = gr.State([])
49
+ TRUE = gr.State(True)
50
+ FALSE = gr.State(False)
51
+ with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  with gr.Row():
53
+ with gr.Column(scale=12):
54
+ api = gr.Textbox(show_label=False, placeholder="Input OpenAI Key.").style(container=False)
55
+ with gr.Row():
56
+ with gr.Column(scale=12):
57
+ txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False)
58
+ with gr.Column(scale=1):
59
+ submitBtn = gr.Button("Ask", variant="primary")
60
+ with gr.Row():
61
+ for k in functional:
62
+ variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
63
+ functional[k]["Button"] = gr.Button(k, variant=variant)
64
+ with gr.Row():
65
+ gr.Markdown("以下部分实验性功能需从input框读取路径.")
66
+ with gr.Row():
67
+ for k in crazy_functional:
68
+ variant = crazy_functional[k]["Color"] if "Color" in crazy_functional[k] else "secondary"
69
+ crazy_functional[k]["Button"] = gr.Button(k, variant=variant)
70
+ with gr.Row():
71
+ gr.Markdown("上传本地文件供上面的实验性功能调用.")
72
+ with gr.Row():
73
+ file_upload = gr.Files(label='任何文件,但推荐上传压缩文件(zip, tar)', file_count="multiple")
74
+
75
+ from check_proxy import check_proxy
76
+ statusDisplay = gr.Markdown(f"{check_proxy(proxies)}")
77
+ systemPromptTxt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt).style(container=True)
78
+ #inputs, top_p, temperature, top_k, repetition_penalty
79
+ with gr.Accordion("arguments", open=False):
80
+ top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
81
+ temperature = gr.Slider(minimum=-0, maximum=5.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
82
+
83
+ txt.submit(predict, [api, txt, top_p, temperature, chatbot, history, systemPromptTxt], [chatbot, history, statusDisplay])
84
+ submitBtn.click(predict, [api, txt, top_p, temperature, chatbot, history, systemPromptTxt], [chatbot, history, statusDisplay], show_progress=True)
85
+ for k in functional:
86
+ functional[k]["Button"].click(predict,
87
+ [api, txt, top_p, temperature, chatbot, history, systemPromptTxt, TRUE, gr.State(k)], [chatbot, history, statusDisplay], show_progress=True)
88
+ file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt], [chatbot, txt])
89
+ for k in crazy_functional:
90
+ click_handle = crazy_functional[k]["Button"].click(crazy_functional[k]["Function"],
91
+ [api, txt, top_p, temperature, chatbot, history, systemPromptTxt, gr.State(PORT)], [chatbot, history, statusDisplay]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  )
93
+ try: click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
94
+ except: pass
95
+
96
+
97
+ # 延迟函数,做一些准备工作,最后尝试打开浏览器
98
+ def auto_opentab_delay():
99
+ import threading, webbrowser, time
100
+ print(f"URL http://localhost:{PORT}")
101
+ def open(): time.sleep(2)
102
+ webbrowser.open_new_tab(f'http://localhost:{PORT}')
103
+ t = threading.Thread(target=open)
104
+ t.daemon = True; t.start()
105
+
106
+ auto_opentab_delay()
107
+ demo.title = "ChatGPT 学术优化"
108
+ demo.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
appx.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ iface.launch()
check_proxy.py CHANGED
@@ -5,17 +5,9 @@ def check_proxy(proxies):
5
  try:
6
  response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
7
  data = response.json()
8
- if 'country_name' in data:
9
- country = data['country_name']
10
- result = f"代理配置 {proxies_https}, 代理所在地:{country}"
11
- elif 'error' in data:
12
- alternative = _check_with_backup_source(proxies)
13
- if alternative is None:
14
- result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限"
15
- else:
16
- result = f"代理配置 {proxies_https}, 代理所在地:{alternative}"
17
- else:
18
- result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
19
  print(result)
20
  return result
21
  except:
@@ -23,154 +15,8 @@ def check_proxy(proxies):
23
  print(result)
24
  return result
25
 
26
- def _check_with_backup_source(proxies):
27
- import random, string, requests
28
- random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
29
- try: return requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()['dns']['geo']
30
- except: return None
31
 
32
- def backup_and_download(current_version, remote_version):
33
- """
34
- 一键更新协议:备份和下载
35
- """
36
- from toolbox import get_conf
37
- import shutil
38
- import os
39
- import requests
40
- import zipfile
41
- os.makedirs(f'./history', exist_ok=True)
42
- backup_dir = f'./history/backup-{current_version}/'
43
- new_version_dir = f'./history/new-version-{remote_version}/'
44
- if os.path.exists(new_version_dir):
45
- return new_version_dir
46
- os.makedirs(new_version_dir)
47
- shutil.copytree('./', backup_dir, ignore=lambda x, y: ['history'])
48
- proxies = get_conf('proxies')
49
- try: r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
50
- except: r = requests.get('https://public.gpt-academic.top/publish/master.zip', proxies=proxies, stream=True)
51
- zip_file_path = backup_dir+'/master.zip'
52
- with open(zip_file_path, 'wb+') as f:
53
- f.write(r.content)
54
- dst_path = new_version_dir
55
- with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
56
- for zip_info in zip_ref.infolist():
57
- dst_file_path = os.path.join(dst_path, zip_info.filename)
58
- if os.path.exists(dst_file_path):
59
- os.remove(dst_file_path)
60
- zip_ref.extract(zip_info, dst_path)
61
- return new_version_dir
62
-
63
-
64
- def patch_and_restart(path):
65
- """
66
- 一键更新协议:覆盖和重启
67
- """
68
- from distutils import dir_util
69
- import shutil
70
- import os
71
- import sys
72
- import time
73
- import glob
74
- from colorful import print亮黄, print亮绿, print亮红
75
- # if not using config_private, move origin config.py as config_private.py
76
- if not os.path.exists('config_private.py'):
77
- print亮黄('由于您没有设置config_private.py私密配置,现将您的现有配置移动至config_private.py以防止配置丢失,',
78
- '另外您可以随时在history子文件夹下找回旧版的程序。')
79
- shutil.copyfile('config.py', 'config_private.py')
80
- path_new_version = glob.glob(path + '/*-master')[0]
81
- dir_util.copy_tree(path_new_version, './')
82
- print亮绿('代码已经更新,即将更新pip包依赖……')
83
- for i in reversed(range(5)): time.sleep(1); print(i)
84
- try:
85
- import subprocess
86
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])
87
- except:
88
- print亮红('pip包依赖安装出现问题,需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`,然后在用常规的`python main.py`的方式启动。')
89
- print亮绿('更新完成,您可以随时在history子文件夹下找回旧版的程序,5s之后重启')
90
- print亮红('假如重启失败,您可能需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`,然后在用常规的`python main.py`的方式启动。')
91
- print(' ------------------------------ -----------------------------------')
92
- for i in reversed(range(8)): time.sleep(1); print(i)
93
- os.execl(sys.executable, sys.executable, *sys.argv)
94
-
95
-
96
- def get_current_version():
97
- import json
98
- try:
99
- with open('./version', 'r', encoding='utf8') as f:
100
- current_version = json.loads(f.read())['version']
101
- except:
102
- current_version = ""
103
- return current_version
104
-
105
-
106
- def auto_update(raise_error=False):
107
- """
108
- 一键更新协议:查询版本和用户意见
109
- """
110
- try:
111
- from toolbox import get_conf
112
- import requests
113
- import json
114
- proxies = get_conf('proxies')
115
- try: response = requests.get("https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5)
116
- except: response = requests.get("https://public.gpt-academic.top/publish/version", proxies=proxies, timeout=5)
117
- remote_json_data = json.loads(response.text)
118
- remote_version = remote_json_data['version']
119
- if remote_json_data["show_feature"]:
120
- new_feature = "新功能:" + remote_json_data["new_feature"]
121
- else:
122
- new_feature = ""
123
- with open('./version', 'r', encoding='utf8') as f:
124
- current_version = f.read()
125
- current_version = json.loads(current_version)['version']
126
- if (remote_version - current_version) >= 0.01-1e-5:
127
- from colorful import print亮黄
128
- print亮黄(f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
129
- print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
130
- user_instruction = input('(2)是否一键更新代码(Y+回车=确认,输入其他/无输入+回车=不更新)?')
131
- if user_instruction in ['Y', 'y']:
132
- path = backup_and_download(current_version, remote_version)
133
- try:
134
- patch_and_restart(path)
135
- except:
136
- msg = '更新失败。'
137
- if raise_error:
138
- from toolbox import trimmed_format_exc
139
- msg += trimmed_format_exc()
140
- print(msg)
141
- else:
142
- print('自动更新程序:已禁用')
143
- return
144
- else:
145
- return
146
- except:
147
- msg = '自动更新程序:已禁用。建议排查:代理网络配置。'
148
- if raise_error:
149
- from toolbox import trimmed_format_exc
150
- msg += trimmed_format_exc()
151
- print(msg)
152
-
153
- def warm_up_modules():
154
- print('正在执行一些模块的预热 ...')
155
- from toolbox import ProxyNetworkActivate
156
- from request_llms.bridge_all import model_info
157
- with ProxyNetworkActivate("Warmup_Modules"):
158
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
159
- enc.encode("模块预热", disallowed_special=())
160
- enc = model_info["gpt-4"]['tokenizer']
161
- enc.encode("模块预热", disallowed_special=())
162
-
163
- def warm_up_vectordb():
164
- print('正在执行一些模块的预热 ...')
165
- from toolbox import ProxyNetworkActivate
166
- with ProxyNetworkActivate("Warmup_Modules"):
167
- import nltk
168
- with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt")
169
-
170
-
171
  if __name__ == '__main__':
172
- import os
173
- os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
174
- from toolbox import get_conf
175
- proxies = get_conf('proxies')
176
- check_proxy(proxies)
 
5
  try:
6
  response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
7
  data = response.json()
8
+ print(f'查询代理的地理位置,返回的结果是{data}')
9
+ country = data['country_name']
10
+ result = f"代理配置 {proxies_https}, 代理所在地:{country}"
 
 
 
 
 
 
 
 
11
  print(result)
12
  return result
13
  except:
 
15
  print(result)
16
  return result
17
 
 
 
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  if __name__ == '__main__':
20
+ try: from config_private import proxies # 放自己的秘密如API和代理网址 os.path.exists('config_private.py')
21
+ except: from config import proxies
22
+ check_proxy(proxies)
 
 
colorful.py DELETED
@@ -1,61 +0,0 @@
1
- import platform
2
- from sys import stdout
3
-
4
- if platform.system()=="Linux":
5
- pass
6
- else:
7
- from colorama import init
8
- init()
9
-
10
- # Do you like the elegance of Chinese characters?
11
- def print红(*kw,**kargs):
12
- print("\033[0;31m",*kw,"\033[0m",**kargs)
13
- def print绿(*kw,**kargs):
14
- print("\033[0;32m",*kw,"\033[0m",**kargs)
15
- def print黄(*kw,**kargs):
16
- print("\033[0;33m",*kw,"\033[0m",**kargs)
17
- def print蓝(*kw,**kargs):
18
- print("\033[0;34m",*kw,"\033[0m",**kargs)
19
- def print紫(*kw,**kargs):
20
- print("\033[0;35m",*kw,"\033[0m",**kargs)
21
- def print靛(*kw,**kargs):
22
- print("\033[0;36m",*kw,"\033[0m",**kargs)
23
-
24
- def print亮红(*kw,**kargs):
25
- print("\033[1;31m",*kw,"\033[0m",**kargs)
26
- def print亮绿(*kw,**kargs):
27
- print("\033[1;32m",*kw,"\033[0m",**kargs)
28
- def print亮黄(*kw,**kargs):
29
- print("\033[1;33m",*kw,"\033[0m",**kargs)
30
- def print亮蓝(*kw,**kargs):
31
- print("\033[1;34m",*kw,"\033[0m",**kargs)
32
- def print亮紫(*kw,**kargs):
33
- print("\033[1;35m",*kw,"\033[0m",**kargs)
34
- def print亮靛(*kw,**kargs):
35
- print("\033[1;36m",*kw,"\033[0m",**kargs)
36
-
37
- # Do you like the elegance of Chinese characters?
38
- def sprint红(*kw):
39
- return "\033[0;31m"+' '.join(kw)+"\033[0m"
40
- def sprint绿(*kw):
41
- return "\033[0;32m"+' '.join(kw)+"\033[0m"
42
- def sprint黄(*kw):
43
- return "\033[0;33m"+' '.join(kw)+"\033[0m"
44
- def sprint蓝(*kw):
45
- return "\033[0;34m"+' '.join(kw)+"\033[0m"
46
- def sprint紫(*kw):
47
- return "\033[0;35m"+' '.join(kw)+"\033[0m"
48
- def sprint靛(*kw):
49
- return "\033[0;36m"+' '.join(kw)+"\033[0m"
50
- def sprint亮红(*kw):
51
- return "\033[1;31m"+' '.join(kw)+"\033[0m"
52
- def sprint亮绿(*kw):
53
- return "\033[1;32m"+' '.join(kw)+"\033[0m"
54
- def sprint亮黄(*kw):
55
- return "\033[1;33m"+' '.join(kw)+"\033[0m"
56
- def sprint亮蓝(*kw):
57
- return "\033[1;34m"+' '.join(kw)+"\033[0m"
58
- def sprint亮紫(*kw):
59
- return "\033[1;35m"+' '.join(kw)+"\033[0m"
60
- def sprint亮靛(*kw):
61
- return "\033[1;36m"+' '.join(kw)+"\033[0m"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.py CHANGED
@@ -1,370 +1,29 @@
1
- """
2
- 以下所有配置也都支持利用环境变量覆写,环境变量配置格式见docker-compose.yml。
3
- 读取优先级:环境变量 > config_private.py > config.py
4
- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
5
- All the following configurations also support using environment variables to override,
6
- and the environment variable configuration format can be seen in docker-compose.yml.
7
- Configuration reading priority: environment variable > config_private.py > config.py
8
- """
9
 
10
- # [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项
11
- API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
12
-
13
-
14
- # [step 1]>> API_KEY = "sk-123456789xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx123456789"。极少数情况下,还需要填写组织(格式如org-123456789abcdefghijklmno的),请向下翻,找 API_ORG 设置项
15
- API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey3,azure-apikey4"
16
-
17
-
18
- # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改
19
- USE_PROXY = False
20
  if USE_PROXY:
21
- """
22
- 代理网络的地址,打开你的代理软件查看代理协议(socks5h / http)、地址(localhost)和端口(11284)
23
- 填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改
24
- <配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1>
25
- [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http
26
- [地址] 填localhost或者127.0.0.1(localhost意思是代理软件安装在本机上)
27
- [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上
28
- """
29
- proxies = {
30
- # [协议]:// [地址] :[端口]
31
- "http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890",
32
- "https": "socks5h://localhost:11284", # 再例如 "https": "http://127.0.0.1:7890",
33
- }
34
  else:
35
  proxies = None
36
-
37
- # ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
38
-
39
- # 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
40
- # 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
41
- # 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
42
- API_URL_REDIRECT = {}
43
-
44
-
45
- # 多线程函数插件中,默认允许多少路线程同时访问OpenAI。Free trial users的限制是每分钟3次,Pay-as-you-go users的限制是每分钟3500次
46
- # 一言以蔽之:免费(5刀)用户填3,OpenAI绑了信用卡的用户可以填 16 或者更高。提高限制请查询:https://platform.openai.com/docs/guides/rate-limits/overview
47
- DEFAULT_WORKER_NUM = 3
48
-
49
-
50
- # 色彩主题, 可选 ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast"]
51
- # 更多主题, 请查阅Gradio主题商店: https://huggingface.co/spaces/gradio/theme-gallery 可选 ["Gstaff/Xkcd", "NoCrypt/Miku", ...]
52
- THEME = "Chuanhu-Small-and-Beautiful"
53
- AVAIL_THEMES = ["Default", "Chuanhu-Small-and-Beautiful", "High-Contrast", "Gstaff/Xkcd", "NoCrypt/Miku"]
54
-
55
-
56
- # 默认的系统提示词(system prompt)
57
- INIT_SYS_PROMPT = "Serve me as a writing and programming assistant."
58
-
59
-
60
- # 对话窗的高度 (仅在LAYOUT="TOP-DOWN"时生效)
61
- CHATBOT_HEIGHT = 1115
62
-
63
-
64
- # 代码高亮
65
- CODE_HIGHLIGHT = True
66
-
67
-
68
- # 窗口布局
69
- LAYOUT = "LEFT-RIGHT" # "LEFT-RIGHT"(左右布局) # "TOP-DOWN"(上下布局)
70
-
71
-
72
- # 暗色模式 / 亮色模式
73
- DARK_MODE = False
74
-
75
 
76
  # 发送请求到OpenAI后,等待多久判定为超时
77
- TIMEOUT_SECONDS = 30
78
-
79
 
80
  # 网页的端口, -1代表随机端口
81
  WEB_PORT = -1
82
 
83
-
84
  # 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
85
  MAX_RETRY = 2
86
 
87
- # OpenAI模型选择是(gpt4现在只对申请成功的人开放)
88
- LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
89
- AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo", "spark", "azure-gpt-3.5"]
90
-
91
- # 插件分类默认选项
92
- DEFAULT_FN_GROUPS = ['对话', '编程', '学术', '智能体']
93
-
94
-
95
- # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
96
- LLM_MODEL = "gpt-3.5-turbo-16k" # 可选 ↓↓↓
97
- AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-preview",
98
- "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
99
- "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-3-turbo",
100
- "gemini-pro", "chatglm3", "claude-2"]
101
- # P.S. 其他可用的模型还包括 [
102
- # "moss", "qwen-turbo", "qwen-plus", "qwen-max"
103
- # "zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613",
104
- # "gpt-3.5-turbo-16k-0613", "gpt-3.5-random", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
105
- # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
106
- # ]
107
-
108
-
109
- # 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
110
- MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3"
111
-
112
-
113
- # 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用)
114
- # 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
115
- # 也可以是具体的模型路径
116
- QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
117
-
118
-
119
- # 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
120
- DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
121
-
122
-
123
- # 百度千帆(LLM_MODEL="qianfan")
124
- BAIDU_CLOUD_API_KEY = ''
125
- BAIDU_CLOUD_SECRET_KEY = ''
126
- BAIDU_CLOUD_QIANFAN_MODEL = 'ERNIE-Bot' # 可选 "ERNIE-Bot-4"(文心大模型4.0), "ERNIE-Bot"(文心一言), "ERNIE-Bot-turbo", "BLOOMZ-7B", "Llama-2-70B-Chat", "Llama-2-13B-Chat", "Llama-2-7B-Chat"
127
-
128
-
129
- # 如果使用ChatGLM2微调模型,请把 LLM_MODEL="chatglmft",并在此处指定模型路径
130
- CHATGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b-pt-128-1e-2/checkpoint-100"
131
-
132
-
133
- # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
134
- LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
135
- LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
136
-
137
- # 设置gradio的并行线程数(不需要修改)
138
- CONCURRENT_COUNT = 100
139
-
140
-
141
- # 是否在提交时自动清空输入框
142
- AUTO_CLEAR_TXT = False
143
-
144
-
145
- # 加一个live2d装饰
146
- ADD_WAIFU = True
147
-
148
-
149
- # 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
150
- # [("username", "password"), ("username2", "password2"), ...]
151
- AUTHENTICATION = []
152
-
153
-
154
- # 如果需要在二级路径下运行(常规情况下,不要修改!!)(需要配合修改main.py才能生效!)
155
- CUSTOM_PATH = "/"
156
-
157
-
158
- # HTTPS 秘钥和证书(不需要修改)
159
- SSL_KEYFILE = ""
160
- SSL_CERTFILE = ""
161
-
162
-
163
- # 极少数情况下,openai的官方KEY需要伴随组织编码(格式如org-xxxxxxxxxxxxxxxxxxxxxxxx)使用
164
- API_ORG = ""
165
-
166
-
167
- # 如果需要使用Slack Claude,使用教程详情见 request_llms/README.md
168
- SLACK_CLAUDE_BOT_ID = ''
169
- SLACK_CLAUDE_USER_TOKEN = ''
170
-
171
-
172
- # 如果需要使用AZURE(方法一:单个azure模型部署)详情请见额外文档 docs\use_azure.md
173
- AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/"
174
- AZURE_API_KEY = "填入azure openai api的密钥" # 建议直接在API_KEY处填写,该选项即将被弃用
175
- AZURE_ENGINE = "填入你亲手写的部署名" # 读 docs\use_azure.md
176
-
177
-
178
- # 如果需要使用AZURE(方法二:多个azure模型部署+动态切换)详情请见额外文档 docs\use_azure.md
179
- AZURE_CFG_ARRAY = {}
180
-
181
-
182
- # 使用Newbing (不推荐使用,未来将删除)
183
- NEWBING_STYLE = "creative" # ["creative", "balanced", "precise"]
184
- NEWBING_COOKIES = """
185
- put your new bing cookies here
186
- """
187
-
188
-
189
- # 阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
190
- ENABLE_AUDIO = False
191
- ALIYUN_TOKEN="" # 例如 f37f30e0f9934c34a992f6f64f7eba4f
192
- ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
193
- ALIYUN_ACCESSKEY="" # (无需填写)
194
- ALIYUN_SECRET="" # (无需填写)
195
-
196
-
197
- # 接入讯飞星火大模型 https://console.xfyun.cn/services/iat
198
- XFYUN_APPID = "00000000"
199
- XFYUN_API_SECRET = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
200
- XFYUN_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
201
-
202
-
203
- # 接入智谱大模型
204
- ZHIPUAI_API_KEY = ""
205
- ZHIPUAI_MODEL = "" # 此选项已废弃,不再需要填写
206
-
207
-
208
- # # 火山引擎YUNQUE大模型
209
- # YUNQUE_SECRET_KEY = ""
210
- # YUNQUE_ACCESS_KEY = ""
211
- # YUNQUE_MODEL = ""
212
-
213
-
214
- # Claude API KEY
215
- ANTHROPIC_API_KEY = ""
216
-
217
-
218
- # Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
219
- MATHPIX_APPID = ""
220
- MATHPIX_APPKEY = ""
221
-
222
-
223
- # 自定义API KEY格式
224
- CUSTOM_API_KEY_PATTERN = ""
225
-
226
-
227
- # Google Gemini API-Key
228
- GEMINI_API_KEY = ''
229
-
230
-
231
- # HUGGINGFACE的TOKEN,下载LLAMA时起作用 https://huggingface.co/docs/hub/security-tokens
232
- HUGGINGFACE_ACCESS_TOKEN = ""
233
-
234
-
235
- # GROBID服务器地址(填写多个可以均衡负载),用于高质量地读取PDF文档
236
- # 获取方法:复制以下空间https://huggingface.co/spaces/qingxu98/grobid,设为public,然后GROBID_URL = "https://(你的hf用户名如qingxu98)-(你的填写的空间名如grobid).hf.space"
237
- GROBID_URLS = [
238
- "https://qingxu98-grobid.hf.space","https://qingxu98-grobid2.hf.space","https://qingxu98-grobid3.hf.space",
239
- "https://qingxu98-grobid4.hf.space","https://qingxu98-grobid5.hf.space", "https://qingxu98-grobid6.hf.space",
240
- "https://qingxu98-grobid7.hf.space", "https://qingxu98-grobid8.hf.space",
241
- ]
242
-
243
-
244
- # 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性,默认关闭
245
- ALLOW_RESET_CONFIG = False
246
-
247
-
248
- # 在使用AutoGen插件时,是否使用Docker容器运行代码
249
- AUTOGEN_USE_DOCKER = False
250
-
251
-
252
- # 临时的上传文件夹位置,请勿修改
253
- PATH_PRIVATE_UPLOAD = "private_upload"
254
-
255
-
256
- # 日志文件夹的位置,请勿修改
257
- PATH_LOGGING = "gpt_log"
258
-
259
-
260
- # 除了连接OpenAI之外,还有哪些场合允许使用代理,请勿修改
261
- WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid",
262
- "Warmup_Modules", "Nougat_Download", "AutoGen"]
263
-
264
-
265
- # *实验性功能*: 自动检测并屏蔽失效的KEY,请勿使用
266
- BLOCK_INVALID_APIKEY = False
267
-
268
-
269
- # 启用插件热加载
270
- PLUGIN_HOT_RELOAD = False
271
-
272
-
273
- # 自定义按钮的最大数量限制
274
- NUM_CUSTOM_BASIC_BTN = 4
275
-
276
- """
277
- 在线大模型配置关联关系示意图
278
-
279
- ├── "gpt-3.5-turbo" 等openai模型
280
- │ ├── API_KEY
281
- │ ├── CUSTOM_API_KEY_PATTERN(不常用)
282
- │ ├── API_ORG(不常用)
283
- │ └── API_URL_REDIRECT(不常用)
284
-
285
- ├── "azure-gpt-3.5" 等azure模型(单个azure模型,不需要动态切换)
286
- │ ├── API_KEY
287
- │ ├── AZURE_ENDPOINT
288
- │ ├── AZURE_API_KEY
289
- │ ├── AZURE_ENGINE
290
- │ └── API_URL_REDIRECT
291
-
292
- ├── "azure-gpt-3.5" 等azure模型(多个azure模型,需要动态切换,高优先级)
293
- │ └── AZURE_CFG_ARRAY
294
-
295
- ├── "spark" 星火认知大模型 spark & sparkv2
296
- │ ├── XFYUN_APPID
297
- │ ├── XFYUN_API_SECRET
298
- │ └── XFYUN_API_KEY
299
-
300
- ├── "claude-1-100k" 等claude模型
301
- │ └── ANTHROPIC_API_KEY
302
-
303
- ├── "stack-claude"
304
- │ ├── SLACK_CLAUDE_BOT_ID
305
- │ └── SLACK_CLAUDE_USER_TOKEN
306
-
307
- ├── "qianfan" 百度千帆大模型库
308
- │ ├── BAIDU_CLOUD_QIANFAN_MODEL
309
- │ ├── BAIDU_CLOUD_API_KEY
310
- │ └── BAIDU_CLOUD_SECRET_KEY
311
-
312
- ├── "glm-4", "glm-3-turbo", "zhipuai" 智谱AI大模型
313
- │ └── ZHIPUAI_API_KEY
314
-
315
- ├── "qwen-turbo" 等通义千问大模型
316
- │ └── DASHSCOPE_API_KEY
317
-
318
- ├── "Gemini"
319
- │ └── GEMINI_API_KEY
320
-
321
- └── "newbing" Newbing接口不再稳定,不推荐使用
322
- ├── NEWBING_STYLE
323
- └── NEWBING_COOKIES
324
-
325
-
326
- 本地大模型示意图
327
-
328
- ├── "chatglm3"
329
- ├── "chatglm"
330
- ├── "chatglm_onnx"
331
- ├── "chatglmft"
332
- ├── "internlm"
333
- ├── "moss"
334
- ├── "jittorllms_pangualpha"
335
- ├── "jittorllms_llama"
336
- ├── "deepseekcoder"
337
- ├── "qwen-local"
338
- ├── RWKV的支持见Wiki
339
- └── "llama2"
340
-
341
-
342
- 用户图形界面布局依赖关系示意图
343
-
344
- ├── CHATBOT_HEIGHT 对话窗的高度
345
- ├── CODE_HIGHLIGHT 代码高亮
346
- ├── LAYOUT 窗口布局
347
- ├── DARK_MODE 暗色模式 / 亮色模式
348
- ├── DEFAULT_FN_GROUPS 插件分类默认选项
349
- ├── THEME 色彩主题
350
- ├── AUTO_CLEAR_TXT 是否在提交时自动清空输入框
351
- ├── ADD_WAIFU 加一个live2d装饰
352
- └── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性
353
-
354
-
355
- 插件在线服务配置依赖关系示意图
356
-
357
- ├── 语音功能
358
- │ ├── ENABLE_AUDIO
359
- │ ├── ALIYUN_TOKEN
360
- │ ├── ALIYUN_APPKEY
361
- │ ├── ALIYUN_ACCESSKEY
362
- │ └── ALIYUN_SECRET
363
-
364
- └── PDF文档精准解析
365
- ├── GROBID_URLS
366
- ├── MATHPIX_APPID
367
- └── MATHPIX_APPKEY
368
-
369
 
370
- """
 
 
 
1
+ # API_KEY = "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" 此key无效
2
+ API_KEY = "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"
3
+ API_URL = "https://api.openai.com/v1/chat/completions"
 
 
 
 
 
4
 
5
+ # 改为True应用代理
6
+ USE_PROXY = True
 
 
 
 
 
 
 
 
7
  if USE_PROXY:
8
+ # 代理网络的地址,打开你的科学上网软件查看代理的协议(socks5/http)、地址(localhost)和端口(11284)
9
+ proxies = { "http": "socks5h://localhost:10808", "https": "socks5h://localhost:10808", }
10
+ print('网络代理状态:运行。')
 
 
 
 
 
 
 
 
 
 
11
  else:
12
  proxies = None
13
+ print('网络代理状态:未配置。无代理状态下很可能无法访问。')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # 发送请求到OpenAI后,等待多久判定为超时
16
+ TIMEOUT_SECONDS = 20
 
17
 
18
  # 网页的端口, -1代表随机端口
19
  WEB_PORT = -1
20
 
 
21
  # 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
22
  MAX_RETRY = 2
23
 
24
+ # 选择的OpenAI模型是(gpt4现在只对申请成功的人开放)
25
+ LLM_MODEL = "gpt-3.5-turbo"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # # 检查一下是不是忘了改config
28
+ # if API_KEY == "sk-此处填API秘钥":
29
+ # assert False, "请在config文件���修改API密钥, 添加海外代理之后再运行"
core_functional.py DELETED
@@ -1,173 +0,0 @@
1
- # 'primary' 颜色对应 theme.py 中的 primary_hue
2
- # 'secondary' 颜色对应 theme.py 中的 neutral_hue
3
- # 'stop' 颜色对应 theme.py 中的 color_er
4
- import importlib
5
- from toolbox import clear_line_break
6
- from toolbox import apply_gpt_academic_string_mask_langbased
7
- from toolbox import build_gpt_academic_masked_string_langbased
8
- from textwrap import dedent
9
-
10
- def get_core_functions():
11
- return {
12
-
13
- "学术语料润色": {
14
- # [1*] 前缀字符串,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等。
15
- # 这里填一个提示词字符串就行了,这里为了区分中英文情景搞复杂了一点
16
- "Prefix": build_gpt_academic_masked_string_langbased(
17
- text_show_english=
18
- r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, "
19
- r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. "
20
- r"Firstly, you should provide the polished paragraph. "
21
- r"Secondly, you should list all your modification and explain the reasons to do so in markdown table.",
22
- text_show_chinese=
23
- r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性,"
24
- r"同时分解长句,减少重复,并提供改进建议。请先提供文本的更正版本,然后在markdown表格中列出修改的内容,并给出修改的理由:"
25
- ) + "\n\n",
26
- # [2*] 后缀字符串,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
27
- "Suffix": r"",
28
- # [3] 按钮颜色 (可选参数,默认 secondary)
29
- "Color": r"secondary",
30
- # [4] 按钮是否可见 (可选参数,默认 True,即可见)
31
- "Visible": True,
32
- # [5] 是否在触发时清除历史 (可选参数,默认 False,即不处理之前的对话历史)
33
- "AutoClearHistory": False,
34
- # [6] 文本预处理 (可选参数,默认 None,举例:写个函数移除所有的换行符)
35
- "PreProcess": None,
36
- },
37
-
38
-
39
- "总结绘制脑图": {
40
- # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等
41
- "Prefix": r"",
42
- # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来
43
- "Suffix":
44
- # dedent() 函数用于去除多行字符串的缩进
45
- dedent("\n"+r'''
46
- ==============================
47
-
48
- 使用mermaid flowchart对以上文本进行总结,概括上述段落的内容以及内在逻辑关系,例如:
49
-
50
- 以下是对以上文本的总结,以mermaid flowchart的形式展示:
51
- ```mermaid
52
- flowchart LR
53
- A["节点名1"] --> B("节点名2")
54
- B --> C{"节点名3"}
55
- C --> D["节点名4"]
56
- C --> |"箭头名1"| E["节点名5"]
57
- C --> |"箭头名2"| F["节点名6"]
58
- ```
59
-
60
- 警告:
61
- (1)使用中文
62
- (2)节点名字使用引号包裹,如["Laptop"]
63
- (3)`|` 和 `"`之间不要存在空格
64
- (4)根据情况选择flowchart LR(从左到右)或者flowchart TD(从上到下)
65
- '''),
66
- },
67
-
68
-
69
- "查找语法错误": {
70
- "Prefix": r"Help me ensure that the grammar and the spelling is correct. "
71
- r"Do not try to polish the text, if no mistake is found, tell me that this paragraph is good. "
72
- r"If you find grammar or spelling mistakes, please list mistakes you find in a two-column markdown table, "
73
- r"put the original text the first column, "
74
- r"put the corrected text in the second column and highlight the key words you fixed. "
75
- r"Finally, please provide the proofreaded text.""\n\n"
76
- r"Example:""\n"
77
- r"Paragraph: How is you? Do you knows what is it?""\n"
78
- r"| Original sentence | Corrected sentence |""\n"
79
- r"| :--- | :--- |""\n"
80
- r"| How **is** you? | How **are** you? |""\n"
81
- r"| Do you **knows** what **is** **it**? | Do you **know** what **it** **is** ? |""\n\n"
82
- r"Below is a paragraph from an academic paper. "
83
- r"You need to report all grammar and spelling mistakes as the example before."
84
- + "\n\n",
85
- "Suffix": r"",
86
- "PreProcess": clear_line_break, # 预处理:清除换行符
87
- },
88
-
89
-
90
- "中译英": {
91
- "Prefix": r"Please translate following sentence to English:" + "\n\n",
92
- "Suffix": r"",
93
- },
94
-
95
-
96
- "学术英中互译": {
97
- "Prefix": build_gpt_academic_masked_string_langbased(
98
- text_show_chinese=
99
- r"I want you to act as a scientific English-Chinese translator, "
100
- r"I will provide you with some paragraphs in one language "
101
- r"and your task is to accurately and academically translate the paragraphs only into the other language. "
102
- r"Do not repeat the original provided paragraphs after translation. "
103
- r"You should use artificial intelligence tools, "
104
- r"such as natural language processing, and rhetorical knowledge "
105
- r"and experience about effective writing techniques to reply. "
106
- r"I'll give you my paragraphs as follows, tell me what language it is written in, and then translate:",
107
- text_show_english=
108
- r"你是经验丰富的翻译,请把以下学术文章段落翻译成中文,"
109
- r"并同时充分考虑中文的语法、清晰、简洁和整体可读性,"
110
- r"必要时,你可以修改整个句子的顺序以确保翻译后的段落符合中文的语言习惯。"
111
- r"你需要翻译的文本如下:"
112
- ) + "\n\n",
113
- "Suffix": r"",
114
- },
115
-
116
-
117
- "英译中": {
118
- "Prefix": r"翻译成地道的中文:" + "\n\n",
119
- "Suffix": r"",
120
- "Visible": False,
121
- },
122
-
123
-
124
- "找图片": {
125
- "Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL,"
126
- r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
127
- "Suffix": r"",
128
- "Visible": False,
129
- },
130
-
131
-
132
- "解释代码": {
133
- "Prefix": r"请解释以下代码:" + "\n```\n",
134
- "Suffix": "\n```\n",
135
- },
136
-
137
-
138
- "参考文献转Bib": {
139
- "Prefix": r"Here are some bibliography items, please transform them into bibtex style."
140
- r"Note that, reference styles maybe more than one kind, you should transform each item correctly."
141
- r"Items need to be transformed:" + "\n\n",
142
- "Visible": False,
143
- "Suffix": r"",
144
- }
145
- }
146
-
147
-
148
- def handle_core_functionality(additional_fn, inputs, history, chatbot):
149
- import core_functional
150
- importlib.reload(core_functional) # 热更新prompt
151
- core_functional = core_functional.get_core_functions()
152
- addition = chatbot._cookies['customize_fn_overwrite']
153
- if additional_fn in addition:
154
- # 自定义功能
155
- inputs = addition[additional_fn]["Prefix"] + inputs + addition[additional_fn]["Suffix"]
156
- return inputs, history
157
- else:
158
- # 预制功能
159
- if "PreProcess" in core_functional[additional_fn]:
160
- if core_functional[additional_fn]["PreProcess"] is not None:
161
- inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
162
- # 为字符串加上上面定义的前缀和后缀。
163
- inputs = apply_gpt_academic_string_mask_langbased(
164
- string = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"],
165
- lang_reference = inputs,
166
- )
167
- if core_functional[additional_fn].get("AutoClearHistory", False):
168
- history = []
169
- return inputs, history
170
-
171
- if __name__ == "__main__":
172
- t = get_core_functions()["总结绘制脑图"]
173
- print(t["Prefix"] + t["Suffix"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functional.py DELETED
@@ -1,723 +0,0 @@
1
- from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
2
- from toolbox import trimmed_format_exc
3
-
4
-
5
- def get_crazy_functions():
6
- from crazy_functions.读文章写摘要 import 读文章写摘要
7
- from crazy_functions.生成函数注释 import 批量生成函数注释
8
- from crazy_functions.解析项目源代码 import 解析项目本身
9
- from crazy_functions.解析项目源代码 import 解析一个Python项目
10
- from crazy_functions.解析项目源代码 import 解析一个Matlab项目
11
- from crazy_functions.解析项目源代码 import 解析一个C项目的头文件
12
- from crazy_functions.解析项目源代码 import 解析一个C项目
13
- from crazy_functions.解析项目源代码 import 解析一个Golang项目
14
- from crazy_functions.解析项目源代码 import 解析一个Rust项目
15
- from crazy_functions.解析项目源代码 import 解析一个Java项目
16
- from crazy_functions.解析项目源代码 import 解析一个前端项目
17
- from crazy_functions.高级功能函数模板 import 高阶功能模板函数
18
- from crazy_functions.Latex全文润色 import Latex英文润色
19
- from crazy_functions.询问多个大语言模型 import 同时问询
20
- from crazy_functions.解析项目源代码 import 解析一个Lua项目
21
- from crazy_functions.解析项目源代码 import 解析一个CSharp项目
22
- from crazy_functions.总结word文档 import 总结word文档
23
- from crazy_functions.解析JupyterNotebook import 解析ipynb文件
24
- from crazy_functions.对话历史存档 import 对话历史存档
25
- from crazy_functions.对话历史存档 import 载入对话历史存档
26
- from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
27
- from crazy_functions.辅助功能 import 清除缓存
28
- from crazy_functions.批量Markdown翻译 import Markdown英译中
29
- from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
30
- from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
31
- from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
32
- from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
33
- from crazy_functions.Latex全文润色 import Latex中文润色
34
- from crazy_functions.Latex全文润色 import Latex英文纠错
35
- from crazy_functions.批量Markdown翻译 import Markdown中译英
36
- from crazy_functions.虚空终端 import 虚空终端
37
- from crazy_functions.生成多种Mermaid图表 import 生成多种Mermaid图表
38
-
39
- function_plugins = {
40
- "虚空终端": {
41
- "Group": "对话|编程|学术|智能体",
42
- "Color": "stop",
43
- "AsButton": True,
44
- "Function": HotReload(虚空终端),
45
- },
46
- "解析整个Python项目": {
47
- "Group": "编程",
48
- "Color": "stop",
49
- "AsButton": True,
50
- "Info": "解析一个Python项目的所有源文件(.py) | 输入参数为路径",
51
- "Function": HotReload(解析一个Python项目),
52
- },
53
- "载入对话历史存档(先上传存档或输入路径)": {
54
- "Group": "对话",
55
- "Color": "stop",
56
- "AsButton": False,
57
- "Info": "载入对话历史存档 | 输入参数为路径",
58
- "Function": HotReload(载入对话历史存档),
59
- },
60
- "删除所有本地对话历史记录(谨慎操作)": {
61
- "Group": "对话",
62
- "AsButton": False,
63
- "Info": "删除所有本地对话历史记录,谨慎操作 | 不需要输入参数",
64
- "Function": HotReload(删除所有本地对话历史记录),
65
- },
66
- "清除所有缓存文件(谨慎操作)": {
67
- "Group": "对话",
68
- "Color": "stop",
69
- "AsButton": False, # 加入下拉菜单中
70
- "Info": "清除所有缓存文件,谨慎操作 | 不需要输入参数",
71
- "Function": HotReload(清除缓存),
72
- },
73
- "生成多种Mermaid图表(从当前对话或路径(.pdf/.md/.docx)中生产图表)": {
74
- "Group": "对话",
75
- "Color": "stop",
76
- "AsButton": False,
77
- "Info" : "基于当前对话或文件生成多种Mermaid图表,图表类型由模型判断",
78
- "Function": HotReload(生成多种Mermaid图表),
79
- "AdvancedArgs": True,
80
- "ArgsReminder": "请输入图类型对应的数字,不输入则为模型自行判断:1-流程图,2-序列图,3-类图,4-饼图,5-甘特图,6-状态图,7-实体关系图,8-象限提示图,9-思维导图",
81
- },
82
- "批量总结Word文档": {
83
- "Group": "学术",
84
- "Color": "stop",
85
- "AsButton": True,
86
- "Info": "批量总结word文档 | 输入参数为路径",
87
- "Function": HotReload(总结word文档),
88
- },
89
- "解析整个Matlab项目": {
90
- "Group": "��程",
91
- "Color": "stop",
92
- "AsButton": False,
93
- "Info": "解析一个Matlab项目的所有源文件(.m) | 输入参数为路径",
94
- "Function": HotReload(解析一个Matlab项目),
95
- },
96
- "解析整个C++项目头文件": {
97
- "Group": "编程",
98
- "Color": "stop",
99
- "AsButton": False, # 加入下拉菜单中
100
- "Info": "解析一个C++项目的所有头文件(.h/.hpp) | 输入参数为路径",
101
- "Function": HotReload(解析一个C项目的头文件),
102
- },
103
- "解析整个C++项目(.cpp/.hpp/.c/.h)": {
104
- "Group": "编程",
105
- "Color": "stop",
106
- "AsButton": False, # 加入下拉菜单中
107
- "Info": "解析一个C++项目的所有源文件(.cpp/.hpp/.c/.h)| 输入参数为路径",
108
- "Function": HotReload(解析一个C项目),
109
- },
110
- "解析整个Go项目": {
111
- "Group": "编程",
112
- "Color": "stop",
113
- "AsButton": False, # 加入下拉菜单中
114
- "Info": "解析一个Go项目的所有源文件 | 输入参数为路径",
115
- "Function": HotReload(解析一个Golang项目),
116
- },
117
- "解析整个Rust项目": {
118
- "Group": "编程",
119
- "Color": "stop",
120
- "AsButton": False, # 加入下拉菜单中
121
- "Info": "解析一个Rust项目的所有源文件 | 输入参数为路径",
122
- "Function": HotReload(解析一个Rust项目),
123
- },
124
- "解析整个Java项目": {
125
- "Group": "编程",
126
- "Color": "stop",
127
- "AsButton": False, # 加入下拉菜单中
128
- "Info": "解析一个Java项目的所有源文件 | 输入参数为路径",
129
- "Function": HotReload(解析一个Java项目),
130
- },
131
- "解析整个前端项目(js,ts,css等)": {
132
- "Group": "编程",
133
- "Color": "stop",
134
- "AsButton": False, # 加入下拉菜单中
135
- "Info": "解析一个前端项目的所有源文件(js,ts,css等) | 输入参数为路径",
136
- "Function": HotReload(解析一个前端项目),
137
- },
138
- "解析整个Lua项目": {
139
- "Group": "编程",
140
- "Color": "stop",
141
- "AsButton": False, # 加入下拉菜单中
142
- "Info": "解析一个Lua项目的所有源文件 | 输入参数为路径",
143
- "Function": HotReload(解析一个Lua项目),
144
- },
145
- "解析整个CSharp项目": {
146
- "Group": "编程",
147
- "Color": "stop",
148
- "AsButton": False, # 加入下拉菜单中
149
- "Info": "解析一个CSharp项目的所有源文件 | 输入参数为路径",
150
- "Function": HotReload(解析一个CSharp项目),
151
- },
152
- "解析Jupyter Notebook文件": {
153
- "Group": "编程",
154
- "Color": "stop",
155
- "AsButton": False,
156
- "Info": "解析Jupyter Notebook文件 | 输入参数为路径",
157
- "Function": HotReload(解析ipynb文件),
158
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
159
- "ArgsReminder": "若输入0,则不解析notebook中的Markdown块", # 高级参数输入区的显示提示
160
- },
161
- "读Tex论文写摘要": {
162
- "Group": "学术",
163
- "Color": "stop",
164
- "AsButton": False,
165
- "Info": "读取Tex论文并写摘要 | 输入参数为路径",
166
- "Function": HotReload(读文章写摘要),
167
- },
168
- "翻译README或MD": {
169
- "Group": "编程",
170
- "Color": "stop",
171
- "AsButton": True,
172
- "Info": "将Markdown翻译为中文 | 输入参数为路径或URL",
173
- "Function": HotReload(Markdown英译中),
174
- },
175
- "翻译Markdown或README(支持Github链接)": {
176
- "Group": "编程",
177
- "Color": "stop",
178
- "AsButton": False,
179
- "Info": "将Markdown或README翻译为中文 | 输入参数为路径或URL",
180
- "Function": HotReload(Markdown英译中),
181
- },
182
- "批量生成函数注释": {
183
- "Group": "编程",
184
- "Color": "stop",
185
- "AsButton": False, # 加入下拉菜单中
186
- "Info": "批量生成函数的注释 | 输入参数为路径",
187
- "Function": HotReload(批量生成函数注释),
188
- },
189
- "保存当前的对话": {
190
- "Group": "对话",
191
- "AsButton": True,
192
- "Info": "保存当前的对话 | 不需要输入参数",
193
- "Function": HotReload(对话历史存档),
194
- },
195
- "[多线程Demo]解析此项目本身(源码自译解)": {
196
- "Group": "对话|编程",
197
- "AsButton": False, # 加入下拉菜单中
198
- "Info": "多线程解析并翻译此项目的源码 | 不需要输入参数",
199
- "Function": HotReload(解析项目本身),
200
- },
201
- "���史上的今天": {
202
- "Group": "对话",
203
- "AsButton": True,
204
- "Info": "查看历史上的今天事件 (这是一个面向开发者的插件Demo) | 不需要输入参数",
205
- "Function": HotReload(高阶功能模板函数),
206
- },
207
- "精准翻译PDF论文": {
208
- "Group": "学术",
209
- "Color": "stop",
210
- "AsButton": True,
211
- "Info": "精准翻译PDF论文为中文 | 输入参数为路径",
212
- "Function": HotReload(批量翻译PDF文档),
213
- },
214
- "询问多个GPT模型": {
215
- "Group": "对话",
216
- "Color": "stop",
217
- "AsButton": True,
218
- "Function": HotReload(同时问询),
219
- },
220
- "批量总结PDF文档": {
221
- "Group": "学术",
222
- "Color": "stop",
223
- "AsButton": False, # 加入下拉菜单中
224
- "Info": "批量总结PDF文档的内容 | 输入参数为路径",
225
- "Function": HotReload(批量总结PDF文档),
226
- },
227
- "谷歌学术检索助手(输入谷歌学术搜索页url)": {
228
- "Group": "学术",
229
- "Color": "stop",
230
- "AsButton": False, # 加入下拉菜单中
231
- "Info": "使用谷歌学术检索助手搜索指定URL的结果 | 输入参数为谷歌学术搜索页的URL",
232
- "Function": HotReload(谷歌检索小助手),
233
- },
234
- "理解PDF文档内容 (模仿ChatPDF)": {
235
- "Group": "学术",
236
- "Color": "stop",
237
- "AsButton": False, # 加入下拉菜单中
238
- "Info": "理解PDF文档的内容并进行回答 | 输入参数为路径",
239
- "Function": HotReload(理解PDF文档内容标准文件输入),
240
- },
241
- "英文Latex项目全文润色(输入路径或上传压缩包)": {
242
- "Group": "学术",
243
- "Color": "stop",
244
- "AsButton": False, # 加入下拉菜单中
245
- "Info": "对英文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
246
- "Function": HotReload(Latex英文润色),
247
- },
248
-
249
- "中文Latex项目全文润色(输入路径或上传压缩包)": {
250
- "Group": "学术",
251
- "Color": "stop",
252
- "AsButton": False, # 加入下拉菜单中
253
- "Info": "对中文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
254
- "Function": HotReload(Latex中文润色),
255
- },
256
- # 已经被新插件取代
257
- # "英文Latex项目全文纠错(输入路径或上传压缩包)": {
258
- # "Group": "学术",
259
- # "Color": "stop",
260
- # "AsButton": False, # 加入下拉菜单中
261
- # "Info": "对英文Latex项目全文进行纠错处理 | 输入参数为路径或上传压缩包",
262
- # "Function": HotReload(Latex英文纠错),
263
- # },
264
- # 已经被新插件取代
265
- # "Latex项目全文中译英(输入路径或上传压缩包)": {
266
- # "Group": "学术",
267
- # "Color": "stop",
268
- # "AsButton": False, # 加入下拉菜单中
269
- # "Info": "对Latex项目全文进行中译英处理 | 输入参数为路径或上传压缩包",
270
- # "Function": HotReload(Latex中译英)
271
- # },
272
- # 已经被新插件取代
273
- # "Latex项目全文英译中(输入路径或上传压缩包)": {
274
- # "Group": "学术",
275
- # "Color": "stop",
276
- # "AsButton": False, # 加入下拉菜单中
277
- # "Info": "对Latex项目全文进行英译中处理 | 输入参数为路径或上传压缩包",
278
- # "Function": HotReload(Latex英译中)
279
- # },
280
- "批量Markdown中译英(输入路径或上传压缩包)": {
281
- "Group": "编程",
282
- "Color": "stop",
283
- "AsButton": False, # 加入下拉菜单中
284
- "Info": "批量将Markdown文件中文翻译为英文 | 输入参数为路径或上传压缩包",
285
- "Function": HotReload(Markdown中译英),
286
- },
287
- }
288
-
289
- # -=--=- 尚未充分测试的实验性插件 & 需要额外依赖的插件 -=--=-
290
- try:
291
- from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
292
-
293
- function_plugins.update(
294
- {
295
- "一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": {
296
- "Group": "学术",
297
- "Color": "stop",
298
- "AsButton": False, # 加入下拉菜单中
299
- # "Info": "下载arxiv论文并翻译摘要 | 输入参数为arxiv编号如1812.10695",
300
- "Function": HotReload(下载arxiv论文并翻译摘要),
301
- }
302
- }
303
- )
304
- except:
305
- print(trimmed_format_exc())
306
- print("Load function plugin failed")
307
-
308
- try:
309
- from crazy_functions.联网的ChatGPT import 连接网络回答问题
310
-
311
- function_plugins.update(
312
- {
313
- "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
314
- "Group": "对话",
315
- "Color": "stop",
316
- "AsButton": False, # 加入下拉菜单中
317
- # "Info": "连接网络回答问题(需要访问谷歌)| 输入参数是一个问题",
318
- "Function": HotReload(连接网络回答问题),
319
- }
320
- }
321
- )
322
- from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
323
-
324
- function_plugins.update(
325
- {
326
- "连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
327
- "Group": "对话",
328
- "Color": "stop",
329
- "AsButton": False, # 加入下拉菜单中
330
- "Info": "连接网络回答问题(需要访问中文Bing)| 输入参数是一个问题",
331
- "Function": HotReload(连接bing搜索回答问题),
332
- }
333
- }
334
- )
335
- except:
336
- print(trimmed_format_exc())
337
- print("Load function plugin failed")
338
-
339
- try:
340
- from crazy_functions.解析项目源代码 import 解析任意code项目
341
-
342
- function_plugins.update(
343
- {
344
- "解析项目源代码(手动指定和筛选源代码文件类型)": {
345
- "Group": "编程",
346
- "Color": "stop",
347
- "AsButton": False,
348
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
349
- "ArgsReminder": '输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: "*.c, ^*.cpp, config.toml, ^*.toml"', # 高级参数输入区的显示提示
350
- "Function": HotReload(解析任意code项目),
351
- },
352
- }
353
- )
354
- except:
355
- print(trimmed_format_exc())
356
- print("Load function plugin failed")
357
-
358
- try:
359
- from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
360
-
361
- function_plugins.update(
362
- {
363
- "询问多个GPT模型(手动指定询问哪些模型)": {
364
- "Group": "对话",
365
- "Color": "stop",
366
- "AsButton": False,
367
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
368
- "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&gpt-4", # 高级参数输入区的显示提示
369
- "Function": HotReload(同时问询_指定模型),
370
- },
371
- }
372
- )
373
- except:
374
- print(trimmed_format_exc())
375
- print("Load function plugin failed")
376
-
377
- try:
378
- from crazy_functions.图片生成 import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
379
-
380
- function_plugins.update(
381
- {
382
- "图片生成_DALLE2 (先切换模型到gpt-*)": {
383
- "Group": "对话",
384
- "Color": "stop",
385
- "AsButton": False,
386
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
387
- "ArgsReminder": "在这里输入分辨率, 如1024x1024(默认),支持 256x256, 512x512, 1024x1024", # 高级参数输入区的显示提示
388
- "Info": "使用DALLE2生成图片 | 输入参数字符串,提供图像的内容",
389
- "Function": HotReload(图片生成_DALLE2),
390
- },
391
- }
392
- )
393
- function_plugins.update(
394
- {
395
- "图片生成_DALLE3 (先切换模型到gpt-*)": {
396
- "Group": "对话",
397
- "Color": "stop",
398
- "AsButton": False,
399
- "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
400
- "ArgsReminder": "在这里输入自定义参数「分辨率-质量(可选)-风格(可选)」, 参数示例「1024x1024-hd-vivid」 || 分辨率支持 「1024x1024」(默认) /「1792x1024」/「1024x1792」 || 质量支持 「-standard」(默认) /「-hd」 || 风格支持 「-vivid」(默认) /「-natural」", # 高级参数输入区的显示提示
401
- "Info": "使用DALLE3生成图片 | 输入参数字符串,提供图像的内容",
402
- "Function": HotReload(图片生成_DALLE3),
403
- },
404
- }
405
- )
406
- function_plugins.update(
407
- {
408
- "图片修改_DALLE2 (先切换模型到gpt-*)": {
409
- "Group": "对话",
410
- "Color": "stop",
411
- "AsButton": False,
412
- "AdvancedArgs": False, # 调用时,唤起高级参数输入区(默认False)
413
- # "Info": "使用DALLE2修改图片 | 输入参数字符串,提供图像的内容",
414
- "Function": HotReload(图片修改_DALLE2),
415
- },
416
- }
417
- )
418
- except:
419
- print(trimmed_format_exc())
420
- print("Load function plugin failed")
421
-
422
- try:
423
- from crazy_functions.总结音视频 import 总结音视频
424
-
425
- function_plugins.update(
426
- {
427
- "批量总结音视频(输入路径或上传压缩包)": {
428
- "Group": "对话",
429
- "Color": "stop",
430
- "AsButton": False,
431
- "AdvancedArgs": True,
432
- "ArgsReminder": "调用openai api 使用whisper-1模型, 目前支持的格式:mp4, m4a, wav, mpga, mpeg, mp3。此处可以输入解析提示,例如:解析为简体中文(默认)。",
433
- "Info": "批量总结音频或视频 | 输入参数为路径",
434
- "Function": HotReload(总结音视频),
435
- }
436
- }
437
- )
438
- except:
439
- print(trimmed_format_exc())
440
- print("Load function plugin failed")
441
-
442
- try:
443
- from crazy_functions.数学动画生成manim import 动画生成
444
-
445
- function_plugins.update(
446
- {
447
- "数学动画生成(Manim)": {
448
- "Group": "对话",
449
- "Color": "stop",
450
- "AsButton": False,
451
- "Info": "按照自然语言描述生成一个动画 | 输入参数是一段话",
452
- "Function": HotReload(动画生成),
453
- }
454
- }
455
- )
456
- except:
457
- print(trimmed_format_exc())
458
- print("Load function plugin failed")
459
-
460
- try:
461
- from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
462
-
463
- function_plugins.update(
464
- {
465
- "Markdown翻译(指定翻译成何种语言)": {
466
- "Group": "编程",
467
- "Color": "stop",
468
- "AsButton": False,
469
- "AdvancedArgs": True,
470
- "ArgsReminder": "请输入要翻译成哪种语言,默认为Chinese。",
471
- "Function": HotReload(Markdown翻译指定语言),
472
- }
473
- }
474
- )
475
- except:
476
- print(trimmed_format_exc())
477
- print("Load function plugin failed")
478
-
479
- try:
480
- from crazy_functions.知识库问答 import 知识库文件注入
481
-
482
- function_plugins.update(
483
- {
484
- "构建知识库(先上传文件素材,再运行此插件)": {
485
- "Group": "对话",
486
- "Color": "stop",
487
- "AsButton": False,
488
- "AdvancedArgs": True,
489
- "ArgsReminder": "此处待注入的知识库名称id, 默认为default。文件进入知识库后可长期保存。可以通过再次调用本插件的方式,向知识库追加更多文档。",
490
- "Function": HotReload(知识库文件注入),
491
- }
492
- }
493
- )
494
- except:
495
- print(trimmed_format_exc())
496
- print("Load function plugin failed")
497
-
498
- try:
499
- from crazy_functions.知识库问答 import 读取知识库作答
500
-
501
- function_plugins.update(
502
- {
503
- "知识库文件注入(构建知识库后,再运行此插件)": {
504
- "Group": "对话",
505
- "Color": "stop",
506
- "AsButton": False,
507
- "AdvancedArgs": True,
508
- "ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要构建知识库后再运行此插件。",
509
- "Function": HotReload(读取知识库作答),
510
- }
511
- }
512
- )
513
- except:
514
- print(trimmed_format_exc())
515
- print("Load function plugin failed")
516
-
517
- try:
518
- from crazy_functions.交互功能函数模板 import 交互功能模板函数
519
-
520
- function_plugins.update(
521
- {
522
- "交互功能模板Demo函数(查找wallhaven.cc的壁纸)": {
523
- "Group": "对话",
524
- "Color": "stop",
525
- "AsButton": False,
526
- "Function": HotReload(交互功能模板函数),
527
- }
528
- }
529
- )
530
- except:
531
- print(trimmed_format_exc())
532
- print("Load function plugin failed")
533
-
534
- try:
535
- from crazy_functions.Latex输出PDF import Latex英文纠错加PDF对比
536
- from crazy_functions.Latex输出PDF import Latex翻译中文并重新编译PDF
537
- from crazy_functions.Latex输出PDF import PDF翻译中文并重新编译PDF
538
-
539
- function_plugins.update(
540
- {
541
- "Latex英文纠错+高亮修正位置 [需Latex]": {
542
- "Group": "学术",
543
- "Color": "stop",
544
- "AsButton": False,
545
- "AdvancedArgs": True,
546
- "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
547
- "Function": HotReload(Latex英文纠错加PDF对比),
548
- },
549
- "Arxiv论文精细翻译(输入arxivID)[需Latex]": {
550
- "Group": "学术",
551
- "Color": "stop",
552
- "AsButton": False,
553
- "AdvancedArgs": True,
554
- "ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
555
- r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
556
- r'If the term "agent" is used in this section, it should be translated to "智能体". ',
557
- "Info": "Arixv论文精细翻译 | 输入参数arxiv论文的ID,比如1812.10695",
558
- "Function": HotReload(Latex翻译中文并重新编译PDF),
559
- },
560
- "本地Latex论文精细翻译(上传Latex项目)[需Latex]": {
561
- "Group": "学术",
562
- "Color": "stop",
563
- "AsButton": False,
564
- "AdvancedArgs": True,
565
- "ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
566
- r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
567
- r'If the term "agent" is used in this section, it should be translated to "智能体". ',
568
- "Info": "本地Latex论文精细翻译 | 输入参数是路径",
569
- "Function": HotReload(Latex翻译中文并重新编译PDF),
570
- },
571
- "PDF翻译中文并重新编译PDF(上传PDF)[需Latex]": {
572
- "Group": "学术",
573
- "Color": "stop",
574
- "AsButton": False,
575
- "AdvancedArgs": True,
576
- "ArgsReminder": r"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "
577
- r"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: "
578
- r'If the term "agent" is used in this section, it should be translated to "智能体". ',
579
- "Info": "PDF翻译中文,并重新编译PDF | 输入参数为路径",
580
- "Function": HotReload(PDF翻译中文并重新编译PDF)
581
- }
582
- }
583
- )
584
- except:
585
- print(trimmed_format_exc())
586
- print("Load function plugin failed")
587
-
588
- try:
589
- from toolbox import get_conf
590
-
591
- ENABLE_AUDIO = get_conf("ENABLE_AUDIO")
592
- if ENABLE_AUDIO:
593
- from crazy_functions.语音助手 import 语音助手
594
-
595
- function_plugins.update(
596
- {
597
- "实时语音对话": {
598
- "Group": "对话",
599
- "Color": "stop",
600
- "AsButton": True,
601
- "Info": "这是一个时刻聆听着的语音对话助手 | 没有输入参数",
602
- "Function": HotReload(语音助手),
603
- }
604
- }
605
- )
606
- except:
607
- print(trimmed_format_exc())
608
- print("Load function plugin failed")
609
-
610
- try:
611
- from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档
612
-
613
- function_plugins.update(
614
- {
615
- "精准翻译PDF文档(NOUGAT)": {
616
- "Group": "学术",
617
- "Color": "stop",
618
- "AsButton": False,
619
- "Function": HotReload(批量翻译PDF文档),
620
- }
621
- }
622
- )
623
- except:
624
- print(trimmed_format_exc())
625
- print("Load function plugin failed")
626
-
627
- try:
628
- from crazy_functions.函数动态生成 import 函数动态生成
629
-
630
- function_plugins.update(
631
- {
632
- "动态代码解释器(CodeInterpreter)": {
633
- "Group": "智能体",
634
- "Color": "stop",
635
- "AsButton": False,
636
- "Function": HotReload(函数动态生成),
637
- }
638
- }
639
- )
640
- except:
641
- print(trimmed_format_exc())
642
- print("Load function plugin failed")
643
-
644
- try:
645
- from crazy_functions.多智能体 import 多智能体终端
646
-
647
- function_plugins.update(
648
- {
649
- "AutoGen多智能体终端(仅供测试)": {
650
- "Group": "智能体",
651
- "Color": "stop",
652
- "AsButton": False,
653
- "Function": HotReload(多智能体终端),
654
- }
655
- }
656
- )
657
- except:
658
- print(trimmed_format_exc())
659
- print("Load function plugin failed")
660
-
661
- try:
662
- from crazy_functions.互动小游戏 import 随机小游戏
663
-
664
- function_plugins.update(
665
- {
666
- "随机互动小游戏(仅供测试)": {
667
- "Group": "智能体",
668
- "Color": "stop",
669
- "AsButton": False,
670
- "Function": HotReload(随机小游戏),
671
- }
672
- }
673
- )
674
- except:
675
- print(trimmed_format_exc())
676
- print("Load function plugin failed")
677
-
678
- # try:
679
- # from crazy_functions.高级功能函数模板 import 测试图表渲染
680
- # function_plugins.update({
681
- # "绘制逻辑关系(测试图表渲染)": {
682
- # "Group": "智能体",
683
- # "Color": "stop",
684
- # "AsButton": True,
685
- # "Function": HotReload(测试图表渲染)
686
- # }
687
- # })
688
- # except:
689
- # print(trimmed_format_exc())
690
- # print('Load function plugin failed')
691
-
692
- # try:
693
- # from crazy_functions.chatglm微调工具 import 微调数据集生成
694
- # function_plugins.update({
695
- # "黑盒模型学习: 微调数据集生成 (先上传数据集)": {
696
- # "Color": "stop",
697
- # "AsButton": False,
698
- # "AdvancedArgs": True,
699
- # "ArgsReminder": "针对数据集输入(如 绿帽子*深蓝色衬衫*黑色运动裤)给出指令,例如您可以将以下命令复制到下方: --llm_to_learn=azure-gpt-3.5 --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、过去经历进行描写。要求:100字以内,用第二人称。' --system_prompt=''",
700
- # "Function": HotReload(微调数据集生成)
701
- # }
702
- # })
703
- # except:
704
- # print('Load function plugin failed')
705
-
706
- """
707
- 设置默认值:
708
- - 默认 Group = 对话
709
- - 默认 AsButton = True
710
- - 默认 AdvancedArgs = False
711
- - 默认 Color = secondary
712
- """
713
- for name, function_meta in function_plugins.items():
714
- if "Group" not in function_meta:
715
- function_plugins[name]["Group"] = "对话"
716
- if "AsButton" not in function_meta:
717
- function_plugins[name]["AsButton"] = True
718
- if "AdvancedArgs" not in function_meta:
719
- function_plugins[name]["AdvancedArgs"] = False
720
- if "Color" not in function_meta:
721
- function_plugins[name]["Color"] = "secondary"
722
-
723
- return function_plugins
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/CodeInterpreter.py DELETED
@@ -1,232 +0,0 @@
1
- from collections.abc import Callable, Iterable, Mapping
2
- from typing import Any
3
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
4
- from toolbox import promote_file_to_downloadzone, get_log_folder
5
- from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
6
- from .crazy_utils import input_clipping, try_install_deps
7
- from multiprocessing import Process, Pipe
8
- import os
9
- import time
10
-
11
- templete = """
12
- ```python
13
- import ... # Put dependencies here, e.g. import numpy as np
14
-
15
- class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
16
-
17
- def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
18
- # rewrite the function you have just written here
19
- ...
20
- return generated_file_path
21
- ```
22
- """
23
-
24
- def inspect_dependency(chatbot, history):
25
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
26
- return True
27
-
28
- def get_code_block(reply):
29
- import re
30
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
31
- matches = re.findall(pattern, reply) # find all code blocks in text
32
- if len(matches) == 1:
33
- return matches[0].strip('python') # code block
34
- for match in matches:
35
- if 'class TerminalFunction' in match:
36
- return match.strip('python') # code block
37
- raise RuntimeError("GPT is not generating proper code.")
38
-
39
- def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
40
- # 输入
41
- prompt_compose = [
42
- f'Your job:\n'
43
- f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
44
- f"2. You should write this function to perform following task: " + txt + "\n",
45
- f"3. Wrap the output python function with markdown codeblock."
46
- ]
47
- i_say = "".join(prompt_compose)
48
- demo = []
49
-
50
- # 第一步
51
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
52
- inputs=i_say, inputs_show_user=i_say,
53
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
54
- sys_prompt= r"You are a programmer."
55
- )
56
- history.extend([i_say, gpt_say])
57
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
58
-
59
- # 第二步
60
- prompt_compose = [
61
- "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
62
- templete
63
- ]
64
- i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
65
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
66
- inputs=i_say, inputs_show_user=inputs_show_user,
67
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
68
- sys_prompt= r"You are a programmer."
69
- )
70
- code_to_return = gpt_say
71
- history.extend([i_say, gpt_say])
72
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
73
-
74
- # # 第三步
75
- # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
76
- # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
77
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
78
- # inputs=i_say, inputs_show_user=inputs_show_user,
79
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
80
- # sys_prompt= r"You are a programmer."
81
- # )
82
- # # # 第三步
83
- # i_say = "Show me how to use `pip` to install packages to run the code above. "
84
- # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
85
- # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
86
- # inputs=i_say, inputs_show_user=i_say,
87
- # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
88
- # sys_prompt= r"You are a programmer."
89
- # )
90
- installation_advance = ""
91
-
92
- return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
93
-
94
- def make_module(code):
95
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
96
- with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
97
- f.write(code)
98
-
99
- def get_class_name(class_string):
100
- import re
101
- # Use regex to extract the class name
102
- class_name = re.search(r'class (\w+)\(', class_string).group(1)
103
- return class_name
104
-
105
- class_name = get_class_name(code)
106
- return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
107
-
108
- def init_module_instance(module):
109
- import importlib
110
- module_, class_ = module.split('->')
111
- init_f = getattr(importlib.import_module(module_), class_)
112
- return init_f()
113
-
114
- def for_immediate_show_off_when_possible(file_type, fp, chatbot):
115
- if file_type in ['png', 'jpg']:
116
- image_path = os.path.abspath(fp)
117
- chatbot.append(['这是一张图片, 展示如下:',
118
- f'本地文件地址: <br/>`{image_path}`<br/>'+
119
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
120
- ])
121
- return chatbot
122
-
123
- def subprocess_worker(instance, file_path, return_dict):
124
- return_dict['result'] = instance.run(file_path)
125
-
126
- def have_any_recent_upload_files(chatbot):
127
- _5min = 5 * 60
128
- if not chatbot: return False # chatbot is None
129
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
130
- if not most_recent_uploaded: return False # most_recent_uploaded is None
131
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
132
- else: return False # most_recent_uploaded is too old
133
-
134
- def get_recent_file_prompt_support(chatbot):
135
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
136
- path = most_recent_uploaded['path']
137
- return path
138
-
139
- @CatchException
140
- def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
141
- """
142
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
143
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
144
- plugin_kwargs 插件模型的参数,暂时没有用武之地
145
- chatbot 聊天显示框的句柄,用于显示给用户
146
- history 聊天历史,前情提要
147
- system_prompt 给gpt的静默提醒
148
- web_port 当前软件运行的端口号
149
- """
150
- raise NotImplementedError
151
-
152
- # 清空历史,以免输入溢出
153
- history = []; clear_file_downloadzone(chatbot)
154
-
155
- # 基本信息:功能、贡献者
156
- chatbot.append([
157
- "函数插件功能?",
158
- "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
159
- ])
160
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
161
-
162
- if have_any_recent_upload_files(chatbot):
163
- file_path = get_recent_file_prompt_support(chatbot)
164
- else:
165
- chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
166
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
-
168
- # 读取文件
169
- if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
170
- recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
171
- file_path = recently_uploaded_files[-1]
172
- file_type = file_path.split('.')[-1]
173
-
174
- # 粗心检查
175
- if is_the_upload_folder(txt):
176
- chatbot.append([
177
- "...",
178
- f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
179
- ])
180
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
181
- return
182
-
183
- # 开始干正事
184
- for j in range(5): # 最多重试5次
185
- try:
186
- code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
187
- yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
188
- code = get_code_block(code)
189
- res = make_module(code)
190
- instance = init_module_instance(res)
191
- break
192
- except Exception as e:
193
- chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
194
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
195
-
196
- # 代码生成结束, 开始执行
197
- try:
198
- import multiprocessing
199
- manager = multiprocessing.Manager()
200
- return_dict = manager.dict()
201
-
202
- p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
203
- # only has 10 seconds to run
204
- p.start(); p.join(timeout=10)
205
- if p.is_alive(): p.terminate(); p.join()
206
- p.close()
207
- res = return_dict['result']
208
- # res = instance.run(file_path)
209
- except Exception as e:
210
- chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
211
- # chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
212
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
213
- return
214
-
215
- # 顺利完成,收尾
216
- res = str(res)
217
- if os.path.exists(res):
218
- chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
219
- new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
220
- chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
221
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
222
- else:
223
- chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
224
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
225
-
226
- """
227
- 测试:
228
- 裁剪图像,保留下半部分
229
- 交换图像的蓝色通道和红色通道
230
- 将图像转为灰度图像
231
- 将csv文件转excel表格
232
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Langchain知识库.py DELETED
@@ -1,106 +0,0 @@
1
- from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg
2
- from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
3
-
4
-
5
-
6
- @CatchException
7
- def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
8
- """
9
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
10
- llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
11
- plugin_kwargs 插件模型的参数,暂时没有用武之地
12
- chatbot 聊天显示框的句柄,用于显示给用户
13
- history 聊天历史,前情提要
14
- system_prompt 给gpt的静默提醒
15
- web_port 当前软件运行的端口号
16
- """
17
- history = [] # 清空历史,以免输入溢出
18
-
19
- # < --------------------读取参数--------------- >
20
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
21
- kai_id = plugin_kwargs.get("advanced_arg", 'default')
22
-
23
- chatbot.append((f"向`{kai_id}`知识库中添加文件。", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
24
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
25
-
26
- # resolve deps
27
- try:
28
- from zh_langchain import construct_vector_store
29
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
30
- from .crazy_utils import knowledge_archive_interface
31
- except Exception as e:
32
- chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
33
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
34
- from .crazy_utils import try_install_deps
35
- try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
36
- yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
37
- return
38
-
39
- # < --------------------读取文件--------------- >
40
- file_manifest = []
41
- spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
42
- for sp in spl:
43
- _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
44
- file_manifest += file_manifest_tmp
45
-
46
- if len(file_manifest) == 0:
47
- chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
48
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
49
- return
50
-
51
- # < -------------------预热文本向量化模组--------------- >
52
- chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
53
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
54
- print('Checking Text2vec ...')
55
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
56
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
57
- HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
58
-
59
- # < -------------------构建知识库--------------- >
60
- chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
61
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
62
- print('Establishing knowledge archive ...')
63
- with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
64
- kai = knowledge_archive_interface()
65
- kai.feed_archive(file_manifest=file_manifest, id=kai_id)
66
- kai_files = kai.get_loaded_file()
67
- kai_files = '<br/>'.join(kai_files)
68
- # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
69
- # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
70
- # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
71
- # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答'
72
- # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
73
- chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
74
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
75
-
76
- @CatchException
77
- def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
78
- # resolve deps
79
- try:
80
- from zh_langchain import construct_vector_store
81
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
82
- from .crazy_utils import knowledge_archive_interface
83
- except Exception as e:
84
- chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
85
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
86
- from .crazy_utils import try_install_deps
87
- try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
88
- yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
89
- return
90
-
91
- # < ------------------- --------------- >
92
- kai = knowledge_archive_interface()
93
-
94
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
95
- kai_id = plugin_kwargs.get("advanced_arg", 'default')
96
- resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
97
-
98
- chatbot.append((txt, f'[知识库 {kai_id}] ' + prompt))
99
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
100
- gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
101
- inputs=prompt, inputs_show_user=txt,
102
- llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
103
- sys_prompt=system_prompt
104
- )
105
- history.extend((prompt, gpt_say))
106
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex全文润色.py DELETED
@@ -1,245 +0,0 @@
1
- from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
2
- from toolbox import CatchException, report_exception, write_history_to_file, zip_folder
3
-
4
-
5
- class PaperFileGroup():
6
- def __init__(self):
7
- self.file_paths = []
8
- self.file_contents = []
9
- self.sp_file_contents = []
10
- self.sp_file_index = []
11
- self.sp_file_tag = []
12
-
13
- # count_token
14
- from request_llms.bridge_all import model_info
15
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
- self.get_token_num = get_token_num
18
-
19
- def run_file_split(self, max_token_limit=1900):
20
- """
21
- 将长文本分离开来
22
- """
23
- for index, file_content in enumerate(self.file_contents):
24
- if self.get_token_num(file_content) < max_token_limit:
25
- self.sp_file_contents.append(file_content)
26
- self.sp_file_index.append(index)
27
- self.sp_file_tag.append(self.file_paths[index])
28
- else:
29
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
30
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
31
- for j, segment in enumerate(segments):
32
- self.sp_file_contents.append(segment)
33
- self.sp_file_index.append(index)
34
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
35
-
36
- print('Segmentation: done')
37
- def merge_result(self):
38
- self.file_result = ["" for _ in range(len(self.file_paths))]
39
- for r, k in zip(self.sp_file_result, self.sp_file_index):
40
- self.file_result[k] += r
41
-
42
- def write_result(self):
43
- manifest = []
44
- for path, res in zip(self.file_paths, self.file_result):
45
- with open(path + '.polish.tex', 'w', encoding='utf8') as f:
46
- manifest.append(path + '.polish.tex')
47
- f.write(res)
48
- return manifest
49
-
50
- def zip_result(self):
51
- import os, time
52
- folder = os.path.dirname(self.file_paths[0])
53
- t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
54
- zip_folder(folder, get_log_folder(), f'{t}-polished.zip')
55
-
56
-
57
- def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
58
- import time, os, re
59
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
60
-
61
-
62
- # <-------- 读取Latex文件,删除其中的所有注释 ---------->
63
- pfg = PaperFileGroup()
64
-
65
- for index, fp in enumerate(file_manifest):
66
- with open(fp, 'r', encoding='utf-8', errors='replace') as f:
67
- file_content = f.read()
68
- # 定义注释的正则表达式
69
- comment_pattern = r'(?<!\\)%.*'
70
- # 使用正则表达式查找注释,并替换为空字符串
71
- clean_tex_content = re.sub(comment_pattern, '', file_content)
72
- # 记录删除注释后的文本
73
- pfg.file_paths.append(fp)
74
- pfg.file_contents.append(clean_tex_content)
75
-
76
- # <-------- 拆分过长的latex文件 ---------->
77
- pfg.run_file_split(max_token_limit=1024)
78
- n_split = len(pfg.sp_file_contents)
79
-
80
-
81
- # <-------- 多线程润色开始 ---------->
82
- if language == 'en':
83
- if mode == 'polish':
84
- inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " +
85
- "improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
86
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
87
- else:
88
- inputs_array = [r"Below is a section from an academic paper, proofread this section." +
89
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
90
- r"Answer me only with the revised text:" +
91
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
92
- inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
93
- sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
94
- elif language == 'zh':
95
- if mode == 'polish':
96
- inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
97
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
98
- else:
99
- inputs_array = [f"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
100
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
101
- inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
102
- sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
103
-
104
-
105
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
106
- inputs_array=inputs_array,
107
- inputs_show_user_array=inputs_show_user_array,
108
- llm_kwargs=llm_kwargs,
109
- chatbot=chatbot,
110
- history_array=[[""] for _ in range(n_split)],
111
- sys_prompt_array=sys_prompt_array,
112
- # max_workers=5, # 并行任务数量限制,最多同时执行5个,其他的排队等待
113
- scroller_max_len = 80
114
- )
115
-
116
- # <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ---------->
117
- try:
118
- pfg.sp_file_result = []
119
- for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
120
- pfg.sp_file_result.append(gpt_say)
121
- pfg.merge_result()
122
- pfg.write_result()
123
- pfg.zip_result()
124
- except:
125
- print(trimmed_format_exc())
126
-
127
- # <-------- 整理结果,退出 ---------->
128
- create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
129
- res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
130
- promote_file_to_downloadzone(res, chatbot=chatbot)
131
-
132
- history = gpt_response_collection
133
- chatbot.append((f"{fp}完成了吗?", res))
134
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
135
-
136
-
137
- @CatchException
138
- def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
139
- # 基本信息:功能、贡献者
140
- chatbot.append([
141
- "函数插件功能?",
142
- "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。(注意,此插件不调用Latex,如果有Latex环境,请使用「Latex英文纠错+高亮修正位置(需Latex)插件」"])
143
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
144
-
145
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
146
- try:
147
- import tiktoken
148
- except:
149
- report_exception(chatbot, history,
150
- a=f"解析项目: {txt}",
151
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
152
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
153
- return
154
- history = [] # 清空历史,以免输入溢出
155
- import glob, os
156
- if os.path.exists(txt):
157
- project_folder = txt
158
- else:
159
- if txt == "": txt = '空空如也的输入栏'
160
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
161
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
162
- return
163
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
164
- if len(file_manifest) == 0:
165
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
166
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
- return
168
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
169
-
170
-
171
-
172
-
173
-
174
-
175
- @CatchException
176
- def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
177
- # 基本信息:功能、贡献者
178
- chatbot.append([
179
- "函数插件功能?",
180
- "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"])
181
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
182
-
183
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
184
- try:
185
- import tiktoken
186
- except:
187
- report_exception(chatbot, history,
188
- a=f"解析项目: {txt}",
189
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
190
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
191
- return
192
- history = [] # 清空历史,以免输入溢出
193
- import glob, os
194
- if os.path.exists(txt):
195
- project_folder = txt
196
- else:
197
- if txt == "": txt = '空空如也的输入栏'
198
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
199
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
200
- return
201
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
202
- if len(file_manifest) == 0:
203
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
204
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
205
- return
206
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
207
-
208
-
209
-
210
-
211
- @CatchException
212
- def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
213
- # 基本信息:功能、贡献者
214
- chatbot.append([
215
- "函数插件功能?",
216
- "对整个Latex项目进行纠错。函数插件贡献者: Binary-Husky"])
217
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
218
-
219
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
220
- try:
221
- import tiktoken
222
- except:
223
- report_exception(chatbot, history,
224
- a=f"解析项目: {txt}",
225
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
226
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
227
- return
228
- history = [] # 清空历史,以免输入溢出
229
- import glob, os
230
- if os.path.exists(txt):
231
- project_folder = txt
232
- else:
233
- if txt == "": txt = '空空如也的输入栏'
234
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
235
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
236
- return
237
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
238
- if len(file_manifest) == 0:
239
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
240
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
241
- return
242
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
243
-
244
-
245
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex全文翻译.py DELETED
@@ -1,176 +0,0 @@
1
- from toolbox import update_ui, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, write_history_to_file
3
- fast_debug = False
4
-
5
- class PaperFileGroup():
6
- def __init__(self):
7
- self.file_paths = []
8
- self.file_contents = []
9
- self.sp_file_contents = []
10
- self.sp_file_index = []
11
- self.sp_file_tag = []
12
-
13
- # count_token
14
- from request_llms.bridge_all import model_info
15
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
16
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
17
- self.get_token_num = get_token_num
18
-
19
- def run_file_split(self, max_token_limit=1900):
20
- """
21
- 将长文本分离开来
22
- """
23
- for index, file_content in enumerate(self.file_contents):
24
- if self.get_token_num(file_content) < max_token_limit:
25
- self.sp_file_contents.append(file_content)
26
- self.sp_file_index.append(index)
27
- self.sp_file_tag.append(self.file_paths[index])
28
- else:
29
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
30
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
31
- for j, segment in enumerate(segments):
32
- self.sp_file_contents.append(segment)
33
- self.sp_file_index.append(index)
34
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
35
-
36
- print('Segmentation: done')
37
-
38
- def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
39
- import time, os, re
40
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
41
-
42
- # <-------- 读取Latex文件,删除其中的所有注释 ---------->
43
- pfg = PaperFileGroup()
44
-
45
- for index, fp in enumerate(file_manifest):
46
- with open(fp, 'r', encoding='utf-8', errors='replace') as f:
47
- file_content = f.read()
48
- # 定义注释的正则表达式
49
- comment_pattern = r'(?<!\\)%.*'
50
- # 使用正则表达式查找注释,并替换为空字符串
51
- clean_tex_content = re.sub(comment_pattern, '', file_content)
52
- # 记录删除注释后的文本
53
- pfg.file_paths.append(fp)
54
- pfg.file_contents.append(clean_tex_content)
55
-
56
- # <-------- 拆分过长的latex文件 ---------->
57
- pfg.run_file_split(max_token_limit=1024)
58
- n_split = len(pfg.sp_file_contents)
59
-
60
- # <-------- 抽取摘要 ---------->
61
- # if language == 'en':
62
- # abs_extract_inputs = f"Please write an abstract for this paper"
63
-
64
- # # 单线,获取文章meta信息
65
- # paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
66
- # inputs=abs_extract_inputs,
67
- # inputs_show_user=f"正在抽取摘要信息。",
68
- # llm_kwargs=llm_kwargs,
69
- # chatbot=chatbot, history=[],
70
- # sys_prompt="Your job is to collect information from materials。",
71
- # )
72
-
73
- # <-------- 多线程润色开始 ---------->
74
- if language == 'en->zh':
75
- inputs_array = ["Below is a section from an English academic paper, translate it into Chinese, do not modify any latex command such as \section, \cite and equations:" +
76
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
77
- inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
78
- sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
79
- elif language == 'zh->en':
80
- inputs_array = [f"Below is a section from a Chinese academic paper, translate it into English, do not modify any latex command such as \section, \cite and equations:" +
81
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
82
- inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag]
83
- sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)]
84
-
85
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
86
- inputs_array=inputs_array,
87
- inputs_show_user_array=inputs_show_user_array,
88
- llm_kwargs=llm_kwargs,
89
- chatbot=chatbot,
90
- history_array=[[""] for _ in range(n_split)],
91
- sys_prompt_array=sys_prompt_array,
92
- # max_workers=5, # OpenAI所允许的最大并行过载
93
- scroller_max_len = 80
94
- )
95
-
96
- # <-------- 整理结果,退出 ---------->
97
- create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
98
- res = write_history_to_file(gpt_response_collection, create_report_file_name)
99
- promote_file_to_downloadzone(res, chatbot=chatbot)
100
- history = gpt_response_collection
101
- chatbot.append((f"{fp}完成了吗?", res))
102
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
103
-
104
-
105
-
106
-
107
-
108
- @CatchException
109
- def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
110
- # 基本信息:功能、贡献者
111
- chatbot.append([
112
- "函数插件功能?",
113
- "对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"])
114
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
115
-
116
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
117
- try:
118
- import tiktoken
119
- except:
120
- report_exception(chatbot, history,
121
- a=f"解析项目: {txt}",
122
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
123
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
124
- return
125
- history = [] # 清空历史,以免输入溢出
126
- import glob, os
127
- if os.path.exists(txt):
128
- project_folder = txt
129
- else:
130
- if txt == "": txt = '空空如也的输入栏'
131
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
132
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
133
- return
134
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
135
- if len(file_manifest) == 0:
136
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
137
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
138
- return
139
- yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
140
-
141
-
142
-
143
-
144
-
145
- @CatchException
146
- def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
147
- # 基本信息:功能、贡献者
148
- chatbot.append([
149
- "函数插件功能?",
150
- "对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"])
151
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
152
-
153
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
154
- try:
155
- import tiktoken
156
- except:
157
- report_exception(chatbot, history,
158
- a=f"解析项目: {txt}",
159
- b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
160
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
161
- return
162
- history = [] # 清空历史,以免输入溢出
163
- import glob, os
164
- if os.path.exists(txt):
165
- project_folder = txt
166
- else:
167
- if txt == "": txt = '空空如也的输入栏'
168
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
169
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
170
- return
171
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
172
- if len(file_manifest) == 0:
173
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
174
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
175
- return
176
- yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex输出PDF.py DELETED
@@ -1,484 +0,0 @@
1
- from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
3
- from functools import partial
4
- import glob, os, requests, time, json, tarfile
5
-
6
- pj = os.path.join
7
- ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
8
-
9
-
10
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 工具函数 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
11
- # 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
12
- def switch_prompt(pfg, mode, more_requirement):
13
- """
14
- Generate prompts and system prompts based on the mode for proofreading or translating.
15
- Args:
16
- - pfg: Proofreader or Translator instance.
17
- - mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
18
-
19
- Returns:
20
- - inputs_array: A list of strings containing prompts for users to respond to.
21
- - sys_prompt_array: A list of strings containing prompts for system prompts.
22
- """
23
- n_split = len(pfg.sp_file_contents)
24
- if mode == 'proofread_en':
25
- inputs_array = [r"Below is a section from an academic paper, proofread this section." +
26
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
27
- r"Answer me only with the revised text:" +
28
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
29
- sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
30
- elif mode == 'translate_zh':
31
- inputs_array = [
32
- r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
33
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
34
- r"Answer me only with the translated text:" +
35
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
36
- sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
37
- else:
38
- assert False, "未知指令"
39
- return inputs_array, sys_prompt_array
40
-
41
-
42
- def desend_to_extracted_folder_if_exist(project_folder):
43
- """
44
- Descend into the extracted folder if it exists, otherwise return the original folder.
45
-
46
- Args:
47
- - project_folder: A string specifying the folder path.
48
-
49
- Returns:
50
- - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
51
- """
52
- maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
53
- if len(maybe_dir) == 0: return project_folder
54
- if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
55
- return project_folder
56
-
57
-
58
- def move_project(project_folder, arxiv_id=None):
59
- """
60
- Create a new work folder and copy the project folder to it.
61
-
62
- Args:
63
- - project_folder: A string specifying the folder path of the project.
64
-
65
- Returns:
66
- - A string specifying the path to the new work folder.
67
- """
68
- import shutil, time
69
- time.sleep(2) # avoid time string conflict
70
- if arxiv_id is not None:
71
- new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
72
- else:
73
- new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
74
- try:
75
- shutil.rmtree(new_workfolder)
76
- except:
77
- pass
78
-
79
- # align subfolder if there is a folder wrapper
80
- items = glob.glob(pj(project_folder, '*'))
81
- items = [item for item in items if os.path.basename(item) != '__MACOSX']
82
- if len(glob.glob(pj(project_folder, '*.tex'))) == 0 and len(items) == 1:
83
- if os.path.isdir(items[0]): project_folder = items[0]
84
-
85
- shutil.copytree(src=project_folder, dst=new_workfolder)
86
- return new_workfolder
87
-
88
-
89
- def arxiv_download(chatbot, history, txt, allow_cache=True):
90
- def check_cached_translation_pdf(arxiv_id):
91
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
92
- if not os.path.exists(translation_dir):
93
- os.makedirs(translation_dir)
94
- target_file = pj(translation_dir, 'translate_zh.pdf')
95
- if os.path.exists(target_file):
96
- promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
97
- target_file_compare = pj(translation_dir, 'comparison.pdf')
98
- if os.path.exists(target_file_compare):
99
- promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
100
- return target_file
101
- return False
102
-
103
- def is_float(s):
104
- try:
105
- float(s)
106
- return True
107
- except ValueError:
108
- return False
109
-
110
- if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
111
- txt = 'https://arxiv.org/abs/' + txt.strip()
112
- if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
113
- txt = 'https://arxiv.org/abs/' + txt[:10]
114
-
115
- if not txt.startswith('https://arxiv.org'):
116
- return txt, None # 是本地文件,跳过下载
117
-
118
- # <-------------- inspect format ------------->
119
- chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
120
- yield from update_ui(chatbot=chatbot, history=history)
121
- time.sleep(1) # 刷新界面
122
-
123
- url_ = txt # https://arxiv.org/abs/1707.06690
124
- if not txt.startswith('https://arxiv.org/abs/'):
125
- msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。"
126
- yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
127
- return msg, None
128
- # <-------------- set format ------------->
129
- arxiv_id = url_.split('/abs/')[-1]
130
- if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
131
- cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
132
- if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
133
-
134
- url_tar = url_.replace('/abs/', '/e-print/')
135
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
136
- extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
137
- os.makedirs(translation_dir, exist_ok=True)
138
-
139
- # <-------------- download arxiv source file ------------->
140
- dst = pj(translation_dir, arxiv_id + '.tar')
141
- if os.path.exists(dst):
142
- yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
143
- else:
144
- yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
145
- proxies = get_conf('proxies')
146
- r = requests.get(url_tar, proxies=proxies)
147
- with open(dst, 'wb+') as f:
148
- f.write(r.content)
149
- # <-------------- extract file ------------->
150
- yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
151
- from toolbox import extract_archive
152
- extract_archive(file_path=dst, dest_dir=extract_dst)
153
- return extract_dst, arxiv_id
154
-
155
-
156
- def pdf2tex_project(pdf_file_path):
157
- # Mathpix API credentials
158
- app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
159
- headers = {"app_id": app_id, "app_key": app_key}
160
-
161
- # Step 1: Send PDF file for processing
162
- options = {
163
- "conversion_formats": {"tex.zip": True},
164
- "math_inline_delimiters": ["$", "$"],
165
- "rm_spaces": True
166
- }
167
-
168
- response = requests.post(url="https://api.mathpix.com/v3/pdf",
169
- headers=headers,
170
- data={"options_json": json.dumps(options)},
171
- files={"file": open(pdf_file_path, "rb")})
172
-
173
- if response.ok:
174
- pdf_id = response.json()["pdf_id"]
175
- print(f"PDF processing initiated. PDF ID: {pdf_id}")
176
-
177
- # Step 2: Check processing status
178
- while True:
179
- conversion_response = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}", headers=headers)
180
- conversion_data = conversion_response.json()
181
-
182
- if conversion_data["status"] == "completed":
183
- print("PDF processing completed.")
184
- break
185
- elif conversion_data["status"] == "error":
186
- print("Error occurred during processing.")
187
- else:
188
- print(f"Processing status: {conversion_data['status']}")
189
- time.sleep(5) # wait for a few seconds before checking again
190
-
191
- # Step 3: Save results to local files
192
- output_dir = os.path.join(os.path.dirname(pdf_file_path), 'mathpix_output')
193
- if not os.path.exists(output_dir):
194
- os.makedirs(output_dir)
195
-
196
- url = f"https://api.mathpix.com/v3/pdf/{pdf_id}.tex"
197
- response = requests.get(url, headers=headers)
198
- file_name_wo_dot = '_'.join(os.path.basename(pdf_file_path).split('.')[:-1])
199
- output_name = f"{file_name_wo_dot}.tex.zip"
200
- output_path = os.path.join(output_dir, output_name)
201
- with open(output_path, "wb") as output_file:
202
- output_file.write(response.content)
203
- print(f"tex.zip file saved at: {output_path}")
204
-
205
- import zipfile
206
- unzip_dir = os.path.join(output_dir, file_name_wo_dot)
207
- with zipfile.ZipFile(output_path, 'r') as zip_ref:
208
- zip_ref.extractall(unzip_dir)
209
-
210
- return unzip_dir
211
-
212
- else:
213
- print(f"Error sending PDF for processing. Status code: {response.status_code}")
214
- return None
215
-
216
-
217
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序1 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
218
-
219
-
220
- @CatchException
221
- def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
222
- # <-------------- information about this plugin ------------->
223
- chatbot.append(["函数插件功能?",
224
- "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系��表现未知。"])
225
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
226
-
227
- # <-------------- more requirements ------------->
228
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
229
- more_req = plugin_kwargs.get("advanced_arg", "")
230
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
231
-
232
- # <-------------- check deps ------------->
233
- try:
234
- import glob, os, time, subprocess
235
- subprocess.Popen(['pdflatex', '-version'])
236
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
237
- except Exception as e:
238
- chatbot.append([f"解析项目: {txt}",
239
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
240
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
241
- return
242
-
243
- # <-------------- clear history and read input ------------->
244
- history = []
245
- if os.path.exists(txt):
246
- project_folder = txt
247
- else:
248
- if txt == "": txt = '空空如也的输入栏'
249
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
250
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
251
- return
252
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
253
- if len(file_manifest) == 0:
254
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
255
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
256
- return
257
-
258
- # <-------------- if is a zip/tar file ------------->
259
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
260
-
261
- # <-------------- move latex project away from temp folder ------------->
262
- project_folder = move_project(project_folder, arxiv_id=None)
263
-
264
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
265
- if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
266
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
267
- chatbot, history, system_prompt, mode='proofread_en',
268
- switch_prompt=_switch_prompt_)
269
-
270
- # <-------------- compile PDF ------------->
271
- success = yield from 编译Latex(chatbot, history, main_file_original='merge',
272
- main_file_modified='merge_proofread_en',
273
- work_folder_original=project_folder, work_folder_modified=project_folder,
274
- work_folder=project_folder)
275
-
276
- # <-------------- zip PDF ------------->
277
- zip_res = zip_result(project_folder)
278
- if success:
279
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
280
- yield from update_ui(chatbot=chatbot, history=history);
281
- time.sleep(1) # 刷新界面
282
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
283
- else:
284
- chatbot.append((f"失败了",
285
- '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
286
- yield from update_ui(chatbot=chatbot, history=history);
287
- time.sleep(1) # 刷新界面
288
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
289
-
290
- # <-------------- we are done ------------->
291
- return success
292
-
293
-
294
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序2 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
295
-
296
- @CatchException
297
- def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
298
- # <-------------- information about this plugin ------------->
299
- chatbot.append([
300
- "函数插件功能?",
301
- "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
302
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
303
-
304
- # <-------------- more requirements ------------->
305
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
306
- more_req = plugin_kwargs.get("advanced_arg", "")
307
- no_cache = more_req.startswith("--no-cache")
308
- if no_cache: more_req.lstrip("--no-cache")
309
- allow_cache = not no_cache
310
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
311
-
312
- # <-------------- check deps ------------->
313
- try:
314
- import glob, os, time, subprocess
315
- subprocess.Popen(['pdflatex', '-version'])
316
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
317
- except Exception as e:
318
- chatbot.append([f"解析项目: {txt}",
319
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
320
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
321
- return
322
-
323
- # <-------------- clear history and read input ------------->
324
- history = []
325
- try:
326
- txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
327
- except tarfile.ReadError as e:
328
- yield from update_ui_lastest_msg(
329
- "无法自动下载该论文的Latex源码,请前往arxiv打开此论文下载页面,点other Formats,然后download source手动下载latex源码包。接下来调用本地Latex翻译插件即可。",
330
- chatbot=chatbot, history=history)
331
- return
332
-
333
- if txt.endswith('.pdf'):
334
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
335
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
336
- return
337
-
338
- if os.path.exists(txt):
339
- project_folder = txt
340
- else:
341
- if txt == "": txt = '空空如也的输入栏'
342
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
343
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
344
- return
345
-
346
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
347
- if len(file_manifest) == 0:
348
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
349
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
350
- return
351
-
352
- # <-------------- if is a zip/tar file ------------->
353
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
354
-
355
- # <-------------- move latex project away from temp folder ------------->
356
- project_folder = move_project(project_folder, arxiv_id)
357
-
358
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
359
- if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
360
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
361
- chatbot, history, system_prompt, mode='translate_zh',
362
- switch_prompt=_switch_prompt_)
363
-
364
- # <-------------- compile PDF ------------->
365
- success = yield from 编译Latex(chatbot, history, main_file_original='merge',
366
- main_file_modified='merge_translate_zh', mode='translate_zh',
367
- work_folder_original=project_folder, work_folder_modified=project_folder,
368
- work_folder=project_folder)
369
-
370
- # <-------------- zip PDF ------------->
371
- zip_res = zip_result(project_folder)
372
- if success:
373
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
374
- yield from update_ui(chatbot=chatbot, history=history);
375
- time.sleep(1) # 刷新界面
376
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
377
- else:
378
- chatbot.append((f"失败了",
379
- '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
380
- yield from update_ui(chatbot=chatbot, history=history);
381
- time.sleep(1) # 刷新界面
382
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
383
-
384
- # <-------------- we are done ------------->
385
- return success
386
-
387
-
388
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 插件主程序3 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
389
-
390
- @CatchException
391
- def PDF翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
392
- # <-------------- information about this plugin ------------->
393
- chatbot.append([
394
- "函数插件功能?",
395
- "将PDF转换为Latex项目,翻译为中文后重新编译为PDF。函数插件贡献者: Marroh。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
396
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
397
-
398
- # <-------------- more requirements ------------->
399
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
400
- more_req = plugin_kwargs.get("advanced_arg", "")
401
- no_cache = more_req.startswith("--no-cache")
402
- if no_cache: more_req.lstrip("--no-cache")
403
- allow_cache = not no_cache
404
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
405
-
406
- # <-------------- check deps ------------->
407
- try:
408
- import glob, os, time, subprocess
409
- subprocess.Popen(['pdflatex', '-version'])
410
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
411
- except Exception as e:
412
- chatbot.append([f"解析项目: {txt}",
413
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
414
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
415
- return
416
-
417
- # <-------------- clear history and read input ------------->
418
- if os.path.exists(txt):
419
- project_folder = txt
420
- else:
421
- if txt == "": txt = '空空如也的输入栏'
422
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无法处理: {txt}")
423
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
424
- return
425
-
426
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)]
427
- if len(file_manifest) == 0:
428
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.pdf文件: {txt}")
429
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
430
- return
431
- if len(file_manifest) != 1:
432
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"不支持同时处理多个pdf文件: {txt}")
433
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
434
- return
435
- app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
436
- if len(app_id) == 0 or len(app_key) == 0:
437
- report_exception(chatbot, history, a="缺失 MATHPIX_APPID 和 MATHPIX_APPKEY。", b=f"请配置 MATHPIX_APPID 和 MATHPIX_APPKEY")
438
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
439
- return
440
-
441
- # <-------------- convert pdf into tex ------------->
442
- project_folder = pdf2tex_project(file_manifest[0])
443
-
444
- # Translate English Latex to Chinese Latex, and compile it
445
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
446
- if len(file_manifest) == 0:
447
- report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
448
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
449
- return
450
-
451
- # <-------------- if is a zip/tar file ------------->
452
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
453
-
454
- # <-------------- move latex project away from temp folder ------------->
455
- project_folder = move_project(project_folder)
456
-
457
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
458
- if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
459
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
460
- chatbot, history, system_prompt, mode='translate_zh',
461
- switch_prompt=_switch_prompt_)
462
-
463
- # <-------------- compile PDF ------------->
464
- success = yield from 编译Latex(chatbot, history, main_file_original='merge',
465
- main_file_modified='merge_translate_zh', mode='translate_zh',
466
- work_folder_original=project_folder, work_folder_modified=project_folder,
467
- work_folder=project_folder)
468
-
469
- # <-------------- zip PDF ------------->
470
- zip_res = zip_result(project_folder)
471
- if success:
472
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
473
- yield from update_ui(chatbot=chatbot, history=history);
474
- time.sleep(1) # 刷新界面
475
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
476
- else:
477
- chatbot.append((f"失败了",
478
- '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
479
- yield from update_ui(chatbot=chatbot, history=history);
480
- time.sleep(1) # 刷新界面
481
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
482
-
483
- # <-------------- we are done ------------->
484
- return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/Latex输出PDF结果.py DELETED
@@ -1,306 +0,0 @@
1
- from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
2
- from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
3
- from functools import partial
4
- import glob, os, requests, time
5
- pj = os.path.join
6
- ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
7
-
8
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 工具函数 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
9
- # 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
10
- def switch_prompt(pfg, mode, more_requirement):
11
- """
12
- Generate prompts and system prompts based on the mode for proofreading or translating.
13
- Args:
14
- - pfg: Proofreader or Translator instance.
15
- - mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
16
-
17
- Returns:
18
- - inputs_array: A list of strings containing prompts for users to respond to.
19
- - sys_prompt_array: A list of strings containing prompts for system prompts.
20
- """
21
- n_split = len(pfg.sp_file_contents)
22
- if mode == 'proofread_en':
23
- inputs_array = [r"Below is a section from an academic paper, proofread this section." +
24
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
25
- r"Answer me only with the revised text:" +
26
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
27
- sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
28
- elif mode == 'translate_zh':
29
- inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
30
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
31
- r"Answer me only with the translated text:" +
32
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
33
- sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
34
- else:
35
- assert False, "未知指令"
36
- return inputs_array, sys_prompt_array
37
-
38
- def desend_to_extracted_folder_if_exist(project_folder):
39
- """
40
- Descend into the extracted folder if it exists, otherwise return the original folder.
41
-
42
- Args:
43
- - project_folder: A string specifying the folder path.
44
-
45
- Returns:
46
- - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
47
- """
48
- maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
49
- if len(maybe_dir) == 0: return project_folder
50
- if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
51
- return project_folder
52
-
53
- def move_project(project_folder, arxiv_id=None):
54
- """
55
- Create a new work folder and copy the project folder to it.
56
-
57
- Args:
58
- - project_folder: A string specifying the folder path of the project.
59
-
60
- Returns:
61
- - A string specifying the path to the new work folder.
62
- """
63
- import shutil, time
64
- time.sleep(2) # avoid time string conflict
65
- if arxiv_id is not None:
66
- new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
67
- else:
68
- new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
69
- try:
70
- shutil.rmtree(new_workfolder)
71
- except:
72
- pass
73
-
74
- # align subfolder if there is a folder wrapper
75
- items = glob.glob(pj(project_folder,'*'))
76
- items = [item for item in items if os.path.basename(item)!='__MACOSX']
77
- if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
78
- if os.path.isdir(items[0]): project_folder = items[0]
79
-
80
- shutil.copytree(src=project_folder, dst=new_workfolder)
81
- return new_workfolder
82
-
83
- def arxiv_download(chatbot, history, txt, allow_cache=True):
84
- def check_cached_translation_pdf(arxiv_id):
85
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
86
- if not os.path.exists(translation_dir):
87
- os.makedirs(translation_dir)
88
- target_file = pj(translation_dir, 'translate_zh.pdf')
89
- if os.path.exists(target_file):
90
- promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
91
- target_file_compare = pj(translation_dir, 'comparison.pdf')
92
- if os.path.exists(target_file_compare):
93
- promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
94
- return target_file
95
- return False
96
- def is_float(s):
97
- try:
98
- float(s)
99
- return True
100
- except ValueError:
101
- return False
102
- if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
103
- txt = 'https://arxiv.org/abs/' + txt.strip()
104
- if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
105
- txt = 'https://arxiv.org/abs/' + txt[:10]
106
- if not txt.startswith('https://arxiv.org'):
107
- return txt, None
108
-
109
- # <-------------- inspect format ------------->
110
- chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
111
- yield from update_ui(chatbot=chatbot, history=history)
112
- time.sleep(1) # 刷新界面
113
-
114
- url_ = txt # https://arxiv.org/abs/1707.06690
115
- if not txt.startswith('https://arxiv.org/abs/'):
116
- msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。"
117
- yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
118
- return msg, None
119
- # <-------------- set format ------------->
120
- arxiv_id = url_.split('/abs/')[-1]
121
- if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
122
- cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
123
- if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
124
-
125
- url_tar = url_.replace('/abs/', '/e-print/')
126
- translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
127
- extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
128
- os.makedirs(translation_dir, exist_ok=True)
129
-
130
- # <-------------- download arxiv source file ------------->
131
- dst = pj(translation_dir, arxiv_id+'.tar')
132
- if os.path.exists(dst):
133
- yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
134
- else:
135
- yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
136
- proxies = get_conf('proxies')
137
- r = requests.get(url_tar, proxies=proxies)
138
- with open(dst, 'wb+') as f:
139
- f.write(r.content)
140
- # <-------------- extract file ------------->
141
- yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
142
- from toolbox import extract_archive
143
- extract_archive(file_path=dst, dest_dir=extract_dst)
144
- return extract_dst, arxiv_id
145
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序1 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
146
-
147
-
148
- @CatchException
149
- def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
150
- # <-------------- information about this plugin ------------->
151
- chatbot.append([ "函数插件功能?",
152
- "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"])
153
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
154
-
155
- # <-------------- more requirements ------------->
156
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
157
- more_req = plugin_kwargs.get("advanced_arg", "")
158
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
159
-
160
- # <-------------- check deps ------------->
161
- try:
162
- import glob, os, time, subprocess
163
- subprocess.Popen(['pdflatex', '-version'])
164
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
165
- except Exception as e:
166
- chatbot.append([ f"解析项目: {txt}",
167
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
168
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
169
- return
170
-
171
-
172
- # <-------------- clear history and read input ------------->
173
- history = []
174
- if os.path.exists(txt):
175
- project_folder = txt
176
- else:
177
- if txt == "": txt = '空空如也的输入栏'
178
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
179
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
180
- return
181
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
182
- if len(file_manifest) == 0:
183
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
184
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
185
- return
186
-
187
-
188
- # <-------------- if is a zip/tar file ------------->
189
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
190
-
191
-
192
- # <-------------- move latex project away from temp folder ------------->
193
- project_folder = move_project(project_folder, arxiv_id=None)
194
-
195
-
196
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
197
- if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
198
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
199
- chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
200
-
201
-
202
- # <-------------- compile PDF ------------->
203
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
204
- work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
205
-
206
-
207
- # <-------------- zip PDF ------------->
208
- zip_res = zip_result(project_folder)
209
- if success:
210
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
211
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
212
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
213
- else:
214
- chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
215
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
216
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
217
-
218
- # <-------------- we are done ------------->
219
- return success
220
-
221
- # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 插件主程序2 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
222
-
223
- @CatchException
224
- def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
225
- # <-------------- information about this plugin ------------->
226
- chatbot.append([
227
- "函数插件功能?",
228
- "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
229
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
230
-
231
- # <-------------- more requirements ------------->
232
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
233
- more_req = plugin_kwargs.get("advanced_arg", "")
234
- no_cache = more_req.startswith("--no-cache")
235
- if no_cache: more_req.lstrip("--no-cache")
236
- allow_cache = not no_cache
237
- _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
238
-
239
- # <-------------- check deps ------------->
240
- try:
241
- import glob, os, time, subprocess
242
- subprocess.Popen(['pdflatex', '-version'])
243
- from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
244
- except Exception as e:
245
- chatbot.append([ f"解析项目: {txt}",
246
- f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
247
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
248
- return
249
-
250
-
251
- # <-------------- clear history and read input ------------->
252
- history = []
253
- txt, arxiv_id = yield from arxiv_download(chatbot, history, txt, allow_cache)
254
- if txt.endswith('.pdf'):
255
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
256
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
257
- return
258
-
259
-
260
- if os.path.exists(txt):
261
- project_folder = txt
262
- else:
263
- if txt == "": txt = '空空如也的输入栏'
264
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无法处理: {txt}")
265
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
266
- return
267
-
268
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
269
- if len(file_manifest) == 0:
270
- report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
271
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
272
- return
273
-
274
-
275
- # <-------------- if is a zip/tar file ------------->
276
- project_folder = desend_to_extracted_folder_if_exist(project_folder)
277
-
278
-
279
- # <-------------- move latex project away from temp folder ------------->
280
- project_folder = move_project(project_folder, arxiv_id)
281
-
282
-
283
- # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
284
- if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
285
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
286
- chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)
287
-
288
-
289
- # <-------------- compile PDF ------------->
290
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh',
291
- work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
292
-
293
- # <-------------- zip PDF ------------->
294
- zip_res = zip_result(project_folder)
295
- if success:
296
- chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
297
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
298
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
299
- else:
300
- chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
301
- yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
302
- promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
303
-
304
-
305
- # <-------------- we are done ------------->
306
- return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/__init__.py DELETED
File without changes
crazy_functions/agent_fns/auto_agent.py DELETED
@@ -1,23 +0,0 @@
1
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
2
- from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
3
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
4
- from crazy_functions.agent_fns.general import AutoGenGeneral
5
-
6
-
7
-
8
- class AutoGenMath(AutoGenGeneral):
9
-
10
- def define_agents(self):
11
- from autogen import AssistantAgent, UserProxyAgent
12
- return [
13
- {
14
- "name": "assistant", # name of the agent.
15
- "cls": AssistantAgent, # class of the agent.
16
- },
17
- {
18
- "name": "user_proxy", # name of the agent.
19
- "cls": UserProxyAgent, # class of the agent.
20
- "human_input_mode": "ALWAYS", # always ask for human input.
21
- "llm_config": False, # disables llm-based auto reply.
22
- },
23
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/echo_agent.py DELETED
@@ -1,19 +0,0 @@
1
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
2
-
3
- class EchoDemo(PluginMultiprocessManager):
4
- def subprocess_worker(self, child_conn):
5
- # ⭐⭐ 子进程
6
- self.child_conn = child_conn
7
- while True:
8
- msg = self.child_conn.recv() # PipeCom
9
- if msg.cmd == "user_input":
10
- # wait futher user input
11
- self.child_conn.send(PipeCom("show", msg.content))
12
- wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
13
- if not wait_success:
14
- # wait timeout, terminate this subprocess_worker
15
- break
16
- elif msg.cmd == "terminate":
17
- self.child_conn.send(PipeCom("done", ""))
18
- break
19
- print('[debug] subprocess_worker terminated')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/general.py DELETED
@@ -1,138 +0,0 @@
1
- from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
2
- from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
3
- from request_llms.bridge_all import predict_no_ui_long_connection
4
- import time
5
-
6
- def gpt_academic_generate_oai_reply(
7
- self,
8
- messages,
9
- sender,
10
- config,
11
- ):
12
- llm_config = self.llm_config if config is None else config
13
- if llm_config is False:
14
- return False, None
15
- if messages is None:
16
- messages = self._oai_messages[sender]
17
-
18
- inputs = messages[-1]['content']
19
- history = []
20
- for message in messages[:-1]:
21
- history.append(message['content'])
22
- context=messages[-1].pop("context", None)
23
- assert context is None, "预留参数 context 未实现"
24
-
25
- reply = predict_no_ui_long_connection(
26
- inputs=inputs,
27
- llm_kwargs=llm_config,
28
- history=history,
29
- sys_prompt=self._oai_system_message[0]['content'],
30
- console_slience=True
31
- )
32
- assumed_done = reply.endswith('\nTERMINATE')
33
- return True, reply
34
-
35
- class AutoGenGeneral(PluginMultiprocessManager):
36
- def gpt_academic_print_override(self, user_proxy, message, sender):
37
- # ⭐⭐ run in subprocess
38
- try:
39
- print_msg = sender.name + "\n\n---\n\n" + message["content"]
40
- except:
41
- print_msg = sender.name + "\n\n---\n\n" + message
42
- self.child_conn.send(PipeCom("show", print_msg))
43
-
44
- def gpt_academic_get_human_input(self, user_proxy, message):
45
- # ⭐⭐ run in subprocess
46
- patience = 300
47
- begin_waiting_time = time.time()
48
- self.child_conn.send(PipeCom("interact", message))
49
- while True:
50
- time.sleep(0.5)
51
- if self.child_conn.poll():
52
- wait_success = True
53
- break
54
- if time.time() - begin_waiting_time > patience:
55
- self.child_conn.send(PipeCom("done", ""))
56
- wait_success = False
57
- break
58
- if wait_success:
59
- return self.child_conn.recv().content
60
- else:
61
- raise TimeoutError("等待用户输入超时")
62
-
63
- def define_agents(self):
64
- raise NotImplementedError
65
-
66
- def exe_autogen(self, input):
67
- # ⭐⭐ run in subprocess
68
- input = input.content
69
- code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
70
- agents = self.define_agents()
71
- user_proxy = None
72
- assistant = None
73
- for agent_kwargs in agents:
74
- agent_cls = agent_kwargs.pop('cls')
75
- kwargs = {
76
- 'llm_config':self.llm_kwargs,
77
- 'code_execution_config':code_execution_config
78
- }
79
- kwargs.update(agent_kwargs)
80
- agent_handle = agent_cls(**kwargs)
81
- agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
82
- for d in agent_handle._reply_func_list:
83
- if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
84
- d['reply_func'] = gpt_academic_generate_oai_reply
85
- if agent_kwargs['name'] == 'user_proxy':
86
- agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
87
- user_proxy = agent_handle
88
- if agent_kwargs['name'] == 'assistant': assistant = agent_handle
89
- try:
90
- if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
91
- with ProxyNetworkActivate("AutoGen"):
92
- user_proxy.initiate_chat(assistant, message=input)
93
- except Exception as e:
94
- tb_str = '```\n' + trimmed_format_exc() + '```'
95
- self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
96
-
97
- def subprocess_worker(self, child_conn):
98
- # ⭐⭐ run in subprocess
99
- self.child_conn = child_conn
100
- while True:
101
- msg = self.child_conn.recv() # PipeCom
102
- self.exe_autogen(msg)
103
-
104
-
105
- class AutoGenGroupChat(AutoGenGeneral):
106
- def exe_autogen(self, input):
107
- # ⭐⭐ run in subprocess
108
- import autogen
109
-
110
- input = input.content
111
- with ProxyNetworkActivate("AutoGen"):
112
- code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
113
- agents = self.define_agents()
114
- agents_instances = []
115
- for agent_kwargs in agents:
116
- agent_cls = agent_kwargs.pop("cls")
117
- kwargs = {"code_execution_config": code_execution_config}
118
- kwargs.update(agent_kwargs)
119
- agent_handle = agent_cls(**kwargs)
120
- agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
121
- agents_instances.append(agent_handle)
122
- if agent_kwargs["name"] == "user_proxy":
123
- user_proxy = agent_handle
124
- user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
125
- try:
126
- groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
127
- manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
128
- manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
129
- manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
130
- if user_proxy is None:
131
- raise Exception("user_proxy is not defined")
132
- user_proxy.initiate_chat(manager, message=input)
133
- except Exception:
134
- tb_str = "```\n" + trimmed_format_exc() + "```"
135
- self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
136
-
137
- def define_group_chat_manager_config(self):
138
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/persistent.py DELETED
@@ -1,16 +0,0 @@
1
- from toolbox import Singleton
2
- @Singleton
3
- class GradioMultiuserManagerForPersistentClasses():
4
- def __init__(self):
5
- self.mapping = {}
6
-
7
- def already_alive(self, key):
8
- return (key in self.mapping) and (self.mapping[key].is_alive())
9
-
10
- def set(self, key, x):
11
- self.mapping[key] = x
12
- return self.mapping[key]
13
-
14
- def get(self, key):
15
- return self.mapping[key]
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/pipe.py DELETED
@@ -1,194 +0,0 @@
1
- from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
2
- from crazy_functions.agent_fns.watchdog import WatchDog
3
- import time, os
4
-
5
- class PipeCom:
6
- def __init__(self, cmd, content) -> None:
7
- self.cmd = cmd
8
- self.content = content
9
-
10
-
11
- class PluginMultiprocessManager:
12
- def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
13
- # ⭐ run in main process
14
- self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
15
- self.previous_work_dir_files = {}
16
- self.llm_kwargs = llm_kwargs
17
- self.plugin_kwargs = plugin_kwargs
18
- self.chatbot = chatbot
19
- self.history = history
20
- self.system_prompt = system_prompt
21
- # self.user_request = user_request
22
- self.alive = True
23
- self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
24
- self.last_user_input = ""
25
- # create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
26
- timeout_seconds = 5 * 60
27
- self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
28
- self.heartbeat_watchdog.begin_watch()
29
-
30
- def feed_heartbeat_watchdog(self):
31
- # feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
32
- self.heartbeat_watchdog.feed()
33
-
34
- def is_alive(self):
35
- return self.alive
36
-
37
- def launch_subprocess_with_pipe(self):
38
- # ⭐ run in main process
39
- from multiprocessing import Process, Pipe
40
-
41
- parent_conn, child_conn = Pipe()
42
- self.p = Process(target=self.subprocess_worker, args=(child_conn,))
43
- self.p.daemon = True
44
- self.p.start()
45
- return parent_conn
46
-
47
- def terminate(self):
48
- self.p.terminate()
49
- self.alive = False
50
- print("[debug] instance terminated")
51
-
52
- def subprocess_worker(self, child_conn):
53
- # ⭐⭐ run in subprocess
54
- raise NotImplementedError
55
-
56
- def send_command(self, cmd):
57
- # ⭐ run in main process
58
- repeated = False
59
- if cmd == self.last_user_input:
60
- repeated = True
61
- cmd = ""
62
- else:
63
- self.last_user_input = cmd
64
- self.parent_conn.send(PipeCom("user_input", cmd))
65
- return repeated, cmd
66
-
67
- def immediate_showoff_when_possible(self, fp):
68
- # ⭐ 主进程
69
- # 获取fp的拓展名
70
- file_type = fp.split('.')[-1]
71
- # 如果是文本文件, 则直接显示文本内容
72
- if file_type.lower() in ['png', 'jpg']:
73
- image_path = os.path.abspath(fp)
74
- self.chatbot.append([
75
- '检测到新生图像:',
76
- f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
77
- ])
78
- yield from update_ui(chatbot=self.chatbot, history=self.history)
79
-
80
- def overwatch_workdir_file_change(self):
81
- # ⭐ 主进程 Docker 外挂文件夹监控
82
- path_to_overwatch = self.autogen_work_dir
83
- change_list = []
84
- # 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比,
85
- # 如果有新文件出现,或者文件的修改时间发生变化,则更新self.previous_work_dir_files中
86
- # 把新文件和发生变化的文件的路径记录到 change_list 中
87
- for root, dirs, files in os.walk(path_to_overwatch):
88
- for file in files:
89
- file_path = os.path.join(root, file)
90
- if file_path not in self.previous_work_dir_files.keys():
91
- last_modified_time = os.stat(file_path).st_mtime
92
- self.previous_work_dir_files.update({file_path: last_modified_time})
93
- change_list.append(file_path)
94
- else:
95
- last_modified_time = os.stat(file_path).st_mtime
96
- if last_modified_time != self.previous_work_dir_files[file_path]:
97
- self.previous_work_dir_files[file_path] = last_modified_time
98
- change_list.append(file_path)
99
- if len(change_list) > 0:
100
- file_links = ""
101
- for f in change_list:
102
- res = promote_file_to_downloadzone(f)
103
- file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
104
- yield from self.immediate_showoff_when_possible(f)
105
-
106
- self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
107
- yield from update_ui(chatbot=self.chatbot, history=self.history)
108
- return change_list
109
-
110
-
111
- def main_process_ui_control(self, txt, create_or_resume) -> str:
112
- # ⭐ 主进程
113
- if create_or_resume == 'create':
114
- self.cnt = 1
115
- self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
116
- repeated, cmd_to_autogen = self.send_command(txt)
117
- if txt == 'exit':
118
- self.chatbot.append([f"结束", "结束信号已明确,终止AutoGen程序。"])
119
- yield from update_ui(chatbot=self.chatbot, history=self.history)
120
- self.terminate()
121
- return "terminate"
122
-
123
- # patience = 10
124
-
125
- while True:
126
- time.sleep(0.5)
127
- if not self.alive:
128
- # the heartbeat watchdog might have it killed
129
- self.terminate()
130
- return "terminate"
131
- if self.parent_conn.poll():
132
- self.feed_heartbeat_watchdog()
133
- if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
134
- self.chatbot.pop(-1) # remove the last line
135
- if "等待您的进一步指令" in self.chatbot[-1][-1]:
136
- self.chatbot.pop(-1) # remove the last line
137
- if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
138
- self.chatbot.pop(-1) # remove the last line
139
- msg = self.parent_conn.recv() # PipeCom
140
- if msg.cmd == "done":
141
- self.chatbot.append([f"结束", msg.content])
142
- self.cnt += 1
143
- yield from update_ui(chatbot=self.chatbot, history=self.history)
144
- self.terminate()
145
- break
146
- if msg.cmd == "show":
147
- yield from self.overwatch_workdir_file_change()
148
- notice = ""
149
- if repeated: notice = "(自动忽略重复的输入)"
150
- self.chatbot.append([f"运行阶段-{self.cnt}(上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
151
- self.cnt += 1
152
- yield from update_ui(chatbot=self.chatbot, history=self.history)
153
- if msg.cmd == "interact":
154
- yield from self.overwatch_workdir_file_change()
155
- self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
156
- "\n\n等待您的进一步指令." +
157
- "\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
158
- "\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
159
- "\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
160
- ])
161
- yield from update_ui(chatbot=self.chatbot, history=self.history)
162
- # do not terminate here, leave the subprocess_worker instance alive
163
- return "wait_feedback"
164
- else:
165
- self.feed_heartbeat_watchdog()
166
- if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
167
- # begin_waiting_time = time.time()
168
- self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
169
- self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
170
- yield from update_ui(chatbot=self.chatbot, history=self.history)
171
- # if time.time() - begin_waiting_time > patience:
172
- # self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
173
- # yield from update_ui(chatbot=self.chatbot, history=self.history)
174
- # self.terminate()
175
- # return "terminate"
176
-
177
- self.terminate()
178
- return "terminate"
179
-
180
- def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
181
- # ⭐⭐ run in subprocess
182
- patience = 5 * 60
183
- begin_waiting_time = time.time()
184
- self.child_conn.send(PipeCom("interact", wait_msg))
185
- while True:
186
- time.sleep(0.5)
187
- if self.child_conn.poll():
188
- wait_success = True
189
- break
190
- if time.time() - begin_waiting_time > patience:
191
- self.child_conn.send(PipeCom("done", ""))
192
- wait_success = False
193
- break
194
- return wait_success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/agent_fns/watchdog.py DELETED
@@ -1,28 +0,0 @@
1
- import threading, time
2
-
3
- class WatchDog():
4
- def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
5
- self.last_feed = None
6
- self.timeout = timeout
7
- self.bark_fn = bark_fn
8
- self.interval = interval
9
- self.msg = msg
10
- self.kill_dog = False
11
-
12
- def watch(self):
13
- while True:
14
- if self.kill_dog: break
15
- if time.time() - self.last_feed > self.timeout:
16
- if len(self.msg) > 0: print(self.msg)
17
- self.bark_fn()
18
- break
19
- time.sleep(self.interval)
20
-
21
- def begin_watch(self):
22
- self.last_feed = time.time()
23
- th = threading.Thread(target=self.watch)
24
- th.daemon = True
25
- th.start()
26
-
27
- def feed(self):
28
- self.last_feed = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/chatglm微调工具.py DELETED
@@ -1,141 +0,0 @@
1
- from toolbox import CatchException, update_ui, promote_file_to_downloadzone
2
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
3
- import datetime, json
4
-
5
- def fetch_items(list_of_items, batch_size):
6
- for i in range(0, len(list_of_items), batch_size):
7
- yield list_of_items[i:i + batch_size]
8
-
9
- def string_to_options(arguments):
10
- import argparse
11
- import shlex
12
-
13
- # Create an argparse.ArgumentParser instance
14
- parser = argparse.ArgumentParser()
15
-
16
- # Add command-line arguments
17
- parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
18
- parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
19
- parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
20
- parser.add_argument("--batch", type=int, help="System prompt", default=50)
21
- parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50)
22
- parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2)
23
- parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1)
24
- parser.add_argument("--json_dataset", type=str, help="json_dataset", default="")
25
- parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="")
26
-
27
-
28
-
29
- # Parse the arguments
30
- args = parser.parse_args(shlex.split(arguments))
31
-
32
- return args
33
-
34
- @CatchException
35
- def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
36
- """
37
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
38
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
39
- plugin_kwargs 插件模型的参数
40
- chatbot 聊天显示框的句柄,用于显示给用户
41
- history 聊天历史,前情提要
42
- system_prompt 给gpt的静默提醒
43
- user_request 当前用户的请求信息(IP地址等)
44
- """
45
- history = [] # 清空历史,以免输入溢出
46
- chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
47
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
48
- args = plugin_kwargs.get("advanced_arg", None)
49
- if args is None:
50
- chatbot.append(("没给定指令", "退出"))
51
- yield from update_ui(chatbot=chatbot, history=history); return
52
- else:
53
- arguments = string_to_options(arguments=args)
54
-
55
- dat = []
56
- with open(txt, 'r', encoding='utf8') as f:
57
- for line in f.readlines():
58
- json_dat = json.loads(line)
59
- dat.append(json_dat["content"])
60
-
61
- llm_kwargs['llm_model'] = arguments.llm_to_learn
62
- for batch in fetch_items(dat, arguments.batch):
63
- res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
64
- inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)],
65
- inputs_show_user_array=[f"Show Nothing" for _ in (batch)],
66
- llm_kwargs=llm_kwargs,
67
- chatbot=chatbot,
68
- history_array=[[] for _ in (batch)],
69
- sys_prompt_array=[arguments.system_prompt for _ in (batch)],
70
- max_workers=10 # OpenAI所允许的最大并行过载
71
- )
72
-
73
- with open(txt+'.generated.json', 'a+', encoding='utf8') as f:
74
- for b, r in zip(batch, res[1::2]):
75
- f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n')
76
-
77
- promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot)
78
- return
79
-
80
-
81
-
82
- @CatchException
83
- def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
84
- """
85
- txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
86
- llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
87
- plugin_kwargs 插件模型的参数
88
- chatbot 聊天显示框的句柄,用于显示给用户
89
- history 聊天历史,前情提要
90
- system_prompt 给gpt的静默提醒
91
- user_request 当前用户的请求信息(IP地址等)
92
- """
93
- import subprocess
94
- history = [] # 清空历史,以免输入溢出
95
- chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
96
- if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
97
- args = plugin_kwargs.get("advanced_arg", None)
98
- if args is None:
99
- chatbot.append(("没给定指令", "退出"))
100
- yield from update_ui(chatbot=chatbot, history=history); return
101
- else:
102
- arguments = string_to_options(arguments=args)
103
-
104
-
105
-
106
- pre_seq_len = arguments.pre_seq_len # 128
107
- learning_rate = arguments.learning_rate # 2e-2
108
- num_gpus = arguments.num_gpus # 1
109
- json_dataset = arguments.json_dataset # 't_code.json'
110
- ptuning_directory = arguments.ptuning_directory # '/home/hmp/ChatGLM2-6B/ptuning'
111
-
112
- command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \
113
- --do_train \
114
- --train_file AdvertiseGen/{json_dataset} \
115
- --validation_file AdvertiseGen/{json_dataset} \
116
- --preprocessing_num_workers 20 \
117
- --prompt_column content \
118
- --response_column summary \
119
- --overwrite_cache \
120
- --model_name_or_path THUDM/chatglm2-6b \
121
- --output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \
122
- --overwrite_output_dir \
123
- --max_source_length 256 \
124
- --max_target_length 256 \
125
- --per_device_train_batch_size 1 \
126
- --per_device_eval_batch_size 1 \
127
- --gradient_accumulation_steps 16 \
128
- --predict_with_generate \
129
- --max_steps 100 \
130
- --logging_steps 10 \
131
- --save_steps 20 \
132
- --learning_rate {learning_rate} \
133
- --pre_seq_len {pre_seq_len} \
134
- --quantization_bit 4"
135
-
136
- process = subprocess.Popen(command, shell=True, cwd=ptuning_directory)
137
- try:
138
- process.communicate(timeout=3600*24)
139
- except subprocess.TimeoutExpired:
140
- process.kill()
141
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/crazy_functions_test.py DELETED
@@ -1,231 +0,0 @@
1
- """
2
- 这是什么?
3
- 这个文件用于函数插件的单元测试
4
- 运行方法 python crazy_functions/crazy_functions_test.py
5
- """
6
-
7
- # ==============================================================================================================================
8
-
9
- def validate_path():
10
- import os, sys
11
- dir_name = os.path.dirname(__file__)
12
- root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
13
- os.chdir(root_dir_assume)
14
- sys.path.append(root_dir_assume)
15
- validate_path() # validate path so you can run from base directory
16
-
17
- # ==============================================================================================================================
18
-
19
- from colorful import *
20
- from toolbox import get_conf, ChatBotWithCookies
21
- import contextlib
22
- import os
23
- import sys
24
- from functools import wraps
25
- proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
26
- get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
27
-
28
- llm_kwargs = {
29
- 'api_key': API_KEY,
30
- 'llm_model': LLM_MODEL,
31
- 'top_p':1.0,
32
- 'max_length': None,
33
- 'temperature':1.0,
34
- }
35
- plugin_kwargs = { }
36
- chatbot = ChatBotWithCookies(llm_kwargs)
37
- history = []
38
- system_prompt = "Serve me as a writing and programming assistant."
39
- web_port = 1024
40
-
41
- # ==============================================================================================================================
42
-
43
- def silence_stdout(func):
44
- @wraps(func)
45
- def wrapper(*args, **kwargs):
46
- _original_stdout = sys.stdout
47
- sys.stdout = open(os.devnull, 'w')
48
- for q in func(*args, **kwargs):
49
- sys.stdout = _original_stdout
50
- yield q
51
- sys.stdout = open(os.devnull, 'w')
52
- sys.stdout.close()
53
- sys.stdout = _original_stdout
54
- return wrapper
55
-
56
- class CLI_Printer():
57
- def __init__(self) -> None:
58
- self.pre_buf = ""
59
-
60
- def print(self, buf):
61
- bufp = ""
62
- for index, chat in enumerate(buf):
63
- a, b = chat
64
- bufp += sprint亮靛('[Me]:' + a) + '\n'
65
- bufp += '[GPT]:' + b
66
- if index < len(buf)-1:
67
- bufp += '\n'
68
-
69
- if self.pre_buf!="" and bufp.startswith(self.pre_buf):
70
- print(bufp[len(self.pre_buf):], end='')
71
- else:
72
- print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='')
73
- self.pre_buf = bufp
74
- return
75
-
76
- cli_printer = CLI_Printer()
77
- # ==============================================================================================================================
78
- def test_解析一个Python项目():
79
- from crazy_functions.解析项目源代码 import 解析一个Python项目
80
- txt = "crazy_functions/test_project/python/dqn"
81
- for cookies, cb, hist, msg in 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
82
- print(cb)
83
-
84
- def test_解析一个Cpp项目():
85
- from crazy_functions.解析项目源代码 import 解析一个C项目
86
- txt = "crazy_functions/test_project/cpp/cppipc"
87
- for cookies, cb, hist, msg in 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
88
- print(cb)
89
-
90
- def test_Latex英文润色():
91
- from crazy_functions.Latex全文润色 import Latex英文润色
92
- txt = "crazy_functions/test_project/latex/attention"
93
- for cookies, cb, hist, msg in Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
94
- print(cb)
95
-
96
- def test_Markdown中译英():
97
- from crazy_functions.批量Markdown翻译 import Markdown中译英
98
- txt = "README.md"
99
- for cookies, cb, hist, msg in Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
100
- print(cb)
101
-
102
- def test_批量翻译PDF文档():
103
- from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
104
- txt = "crazy_functions/test_project/pdf_and_word"
105
- for cookies, cb, hist, msg in 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
106
- print(cb)
107
-
108
- def test_谷歌检索小助手():
109
- from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
110
- txt = "https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG="
111
- for cookies, cb, hist, msg in 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
112
- print(cb)
113
-
114
- def test_总结word文档():
115
- from crazy_functions.总结word文档 import 总结word文档
116
- txt = "crazy_functions/test_project/pdf_and_word"
117
- for cookies, cb, hist, msg in 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
118
- print(cb)
119
-
120
- def test_下载arxiv论文并翻译摘要():
121
- from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
122
- txt = "1812.10695"
123
- for cookies, cb, hist, msg in 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
124
- print(cb)
125
-
126
- def test_联网回答问题():
127
- from crazy_functions.联网的ChatGPT import 连接网络回答问题
128
- # txt = "谁是应急食品?"
129
- # >> '根据以上搜索结果可以得知,应急食品是“原神”游戏中的角色派蒙的外号。'
130
- # txt = "道路千万条,安全第一条。后面两句是?"
131
- # >> '行车不规范,亲人两行泪。'
132
- # txt = "You should have gone for the head. What does that mean?"
133
- # >> The phrase "You should have gone for the head" is a quote from the Marvel movies, Avengers: Infinity War and Avengers: Endgame. It was spoken by the character Thanos in Infinity War and by Thor in Endgame.
134
- txt = "AutoGPT是什么?"
135
- for cookies, cb, hist, msg in 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
136
- print("当前问答:", cb[-1][-1].replace("\n"," "))
137
- for i, it in enumerate(cb): print亮蓝(it[0]); print亮黄(it[1])
138
-
139
- def test_解析ipynb文件():
140
- from crazy_functions.解析JupyterNotebook import 解析ipynb文件
141
- txt = "crazy_functions/test_samples"
142
- for cookies, cb, hist, msg in 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
143
- print(cb)
144
-
145
-
146
- def test_数学动画生成manim():
147
- from crazy_functions.数学动画生成manim import 动画生成
148
- txt = "A ball split into 2, and then split into 4, and finally split into 8."
149
- for cookies, cb, hist, msg in 动画生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
150
- print(cb)
151
-
152
-
153
-
154
- def test_Markdown多语言():
155
- from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言
156
- txt = "README.md"
157
- history = []
158
- for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]:
159
- plugin_kwargs = {"advanced_arg": lang}
160
- for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
161
- print(cb)
162
-
163
- def test_Langchain知识库():
164
- from crazy_functions.Langchain知识库 import 知识库问答
165
- txt = "./"
166
- chatbot = ChatBotWithCookies(llm_kwargs)
167
- for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
168
- cli_printer.print(cb) # print(cb)
169
-
170
- chatbot = ChatBotWithCookies(cookies)
171
- from crazy_functions.Langchain知识库 import 读取知识库作答
172
- txt = "What is the installation method?"
173
- for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
174
- cli_printer.print(cb) # print(cb)
175
-
176
- def test_Langchain知识库读取():
177
- from crazy_functions.Langchain知识库 import 读取知识库作答
178
- txt = "远程云服务器部署?"
179
- for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
180
- cli_printer.print(cb) # print(cb)
181
-
182
- def test_Latex():
183
- from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF
184
-
185
- # txt = r"https://arxiv.org/abs/1706.03762"
186
- # txt = r"https://arxiv.org/abs/1902.03185"
187
- # txt = r"https://arxiv.org/abs/2305.18290"
188
- # txt = r"https://arxiv.org/abs/2305.17608"
189
- # txt = r"https://arxiv.org/abs/2211.16068" # ACE
190
- # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
191
- # txt = r"https://arxiv.org/abs/2002.09253"
192
- # txt = r"https://arxiv.org/abs/2306.07831"
193
- # txt = r"https://arxiv.org/abs/2212.10156"
194
- # txt = r"https://arxiv.org/abs/2211.11559"
195
- # txt = r"https://arxiv.org/abs/2303.08774"
196
- txt = r"https://arxiv.org/abs/2303.12712"
197
- # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
198
-
199
-
200
- for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
201
- cli_printer.print(cb) # print(cb)
202
-
203
-
204
-
205
- # txt = "2302.02948.tar"
206
- # print(txt)
207
- # main_tex, work_folder = Latex预处理(txt)
208
- # print('main tex:', main_tex)
209
- # res = 编译Latex(main_tex, work_folder)
210
- # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
211
- # cli_printer.print(cb) # print(cb)
212
-
213
-
214
-
215
- # test_解析一个Python项目()
216
- # test_Latex英文润色()
217
- # test_Markdown中译英()
218
- # test_批量翻译PDF文档()
219
- # test_谷歌检索小助手()
220
- # test_总结word文档()
221
- # test_下载arxiv论文并翻译摘要()
222
- # test_解析一个Cpp项目()
223
- # test_联网回答问题()
224
- # test_解析ipynb文件()
225
- # test_数学动画生成manim()
226
- # test_Langchain知识库()
227
- # test_Langchain知识库读取()
228
- if __name__ == "__main__":
229
- test_Latex()
230
- input("程序完成,回车退出。")
231
- print("退出。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/crazy_utils.py DELETED
@@ -1,608 +0,0 @@
1
- from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
2
- import threading
3
- import os
4
- import logging
5
-
6
- def input_clipping(inputs, history, max_token_limit):
7
- import numpy as np
8
- from request_llms.bridge_all import model_info
9
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
10
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
11
-
12
- mode = 'input-and-history'
13
- # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
14
- input_token_num = get_token_num(inputs)
15
- if input_token_num < max_token_limit//2:
16
- mode = 'only-history'
17
- max_token_limit = max_token_limit - input_token_num
18
-
19
- everything = [inputs] if mode == 'input-and-history' else ['']
20
- everything.extend(history)
21
- n_token = get_token_num('\n'.join(everything))
22
- everything_token = [get_token_num(e) for e in everything]
23
- delta = max(everything_token) // 16 # 截断时的颗粒度
24
-
25
- while n_token > max_token_limit:
26
- where = np.argmax(everything_token)
27
- encoded = enc.encode(everything[where], disallowed_special=())
28
- clipped_encoded = encoded[:len(encoded)-delta]
29
- everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
30
- everything_token[where] = get_token_num(everything[where])
31
- n_token = get_token_num('\n'.join(everything))
32
-
33
- if mode == 'input-and-history':
34
- inputs = everything[0]
35
- else:
36
- pass
37
- history = everything[1:]
38
- return inputs, history
39
-
40
- def request_gpt_model_in_new_thread_with_ui_alive(
41
- inputs, inputs_show_user, llm_kwargs,
42
- chatbot, history, sys_prompt, refresh_interval=0.2,
43
- handle_token_exceed=True,
44
- retry_times_at_unknown_error=2,
45
- ):
46
- """
47
- Request GPT model,请求GPT模型同时维持用户界面活跃。
48
-
49
- 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
50
- inputs (string): List of inputs (输入)
51
- inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
52
- top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
53
- temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
54
- chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
55
- history (list): List of chat history (历史,对话历史列表)
56
- sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
57
- refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
58
- handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
59
- retry_times_at_unknown_error:失败时的重试次数
60
-
61
- 输出 Returns:
62
- future: 输出,GPT返回的结果
63
- """
64
- import time
65
- from concurrent.futures import ThreadPoolExecutor
66
- from request_llms.bridge_all import predict_no_ui_long_connection
67
- # 用户反馈
68
- chatbot.append([inputs_show_user, ""])
69
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
70
- executor = ThreadPoolExecutor(max_workers=16)
71
- mutable = ["", time.time(), ""]
72
- # 看门狗耐心
73
- watch_dog_patience = 5
74
- # 请求任务
75
- def _req_gpt(inputs, history, sys_prompt):
76
- retry_op = retry_times_at_unknown_error
77
- exceeded_cnt = 0
78
- while True:
79
- # watchdog error
80
- if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
81
- raise RuntimeError("检测到程序终止。")
82
- try:
83
- # 【第一种情况】:顺利完成
84
- result = predict_no_ui_long_connection(
85
- inputs=inputs, llm_kwargs=llm_kwargs,
86
- history=history, sys_prompt=sys_prompt, observe_window=mutable)
87
- return result
88
- except ConnectionAbortedError as token_exceeded_error:
89
- # 【第二种情况】:Token溢出
90
- if handle_token_exceed:
91
- exceeded_cnt += 1
92
- # 【选择处理】 尝试计算比例,尽可能多地保留文本
93
- from toolbox import get_reduce_token_percent
94
- p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
95
- MAX_TOKEN = get_max_token(llm_kwargs)
96
- EXCEED_ALLO = 512 + 512 * exceeded_cnt
97
- inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
98
- mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
99
- continue # 返回重试
100
- else:
101
- # 【选择放弃】
102
- tb_str = '```\n' + trimmed_format_exc() + '```'
103
- mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
104
- return mutable[0] # 放弃
105
- except:
106
- # 【第三种情况】:其他错误:重试几次
107
- tb_str = '```\n' + trimmed_format_exc() + '```'
108
- print(tb_str)
109
- mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
110
- if retry_op > 0:
111
- retry_op -= 1
112
- mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
113
- if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
114
- time.sleep(30)
115
- time.sleep(5)
116
- continue # 返回重试
117
- else:
118
- time.sleep(5)
119
- return mutable[0] # 放弃
120
-
121
- # 提交任务
122
- future = executor.submit(_req_gpt, inputs, history, sys_prompt)
123
- while True:
124
- # yield一次以刷新前端页面
125
- time.sleep(refresh_interval)
126
- # “喂狗”(看门狗)
127
- mutable[1] = time.time()
128
- if future.done():
129
- break
130
- chatbot[-1] = [chatbot[-1][0], mutable[0]]
131
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
132
-
133
- final_result = future.result()
134
- chatbot[-1] = [chatbot[-1][0], final_result]
135
- yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
136
- return final_result
137
-
138
- def can_multi_process(llm):
139
- if llm.startswith('gpt-'): return True
140
- if llm.startswith('api2d-'): return True
141
- if llm.startswith('azure-'): return True
142
- if llm.startswith('spark'): return True
143
- if llm.startswith('zhipuai') or llm.startswith('glm-'): return True
144
- return False
145
-
146
- def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
147
- inputs_array, inputs_show_user_array, llm_kwargs,
148
- chatbot, history_array, sys_prompt_array,
149
- refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
150
- handle_token_exceed=True, show_user_at_complete=False,
151
- retry_times_at_unknown_error=2,
152
- ):
153
- """
154
- Request GPT model using multiple threads with UI and high efficiency
155
- 请求GPT模型的[多线程]版。
156
- 具备以下功能:
157
- 实时在UI上反馈远程数据流
158
- 使用线程池,可调节线程池的大小避免openai的流量限制错误
159
- 处理中途中止的情况
160
- 网络等出问题时,会把traceback和已经接收的数据转入输出
161
-
162
- 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
163
- inputs_array (list): List of inputs (每个子任务的输入)
164
- inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
165
- llm_kwargs: llm_kwargs参数
166
- chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化)
167
- history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
168
- sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
169
- refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
170
- max_workers (int, optional): Maximum number of threads (default: see config.py) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
171
- scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
172
- handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
173
- handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
174
- show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
175
- retry_times_at_unknown_error:子任务失败时的重试次数
176
-
177
- 输出 Returns:
178
- list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错��息,方便调试和定位问题。)
179
- """
180
- import time, random
181
- from concurrent.futures import ThreadPoolExecutor
182
- from request_llms.bridge_all import predict_no_ui_long_connection
183
- assert len(inputs_array) == len(history_array)
184
- assert len(inputs_array) == len(sys_prompt_array)
185
- if max_workers == -1: # 读取配置文件
186
- try: max_workers = get_conf('DEFAULT_WORKER_NUM')
187
- except: max_workers = 8
188
- if max_workers <= 0: max_workers = 3
189
- # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
190
- if not can_multi_process(llm_kwargs['llm_model']):
191
- max_workers = 1
192
-
193
- executor = ThreadPoolExecutor(max_workers=max_workers)
194
- n_frag = len(inputs_array)
195
- # 用户反馈
196
- chatbot.append(["请开始多线程操作。", ""])
197
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
198
- # 跨线程传递
199
- mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
200
-
201
- # 看门狗耐心
202
- watch_dog_patience = 5
203
-
204
- # 子线程任务
205
- def _req_gpt(index, inputs, history, sys_prompt):
206
- gpt_say = ""
207
- retry_op = retry_times_at_unknown_error
208
- exceeded_cnt = 0
209
- mutable[index][2] = "执行中"
210
- detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
211
- while True:
212
- # watchdog error
213
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
214
- try:
215
- # 【第一种情况】:顺利完成
216
- gpt_say = predict_no_ui_long_connection(
217
- inputs=inputs, llm_kwargs=llm_kwargs, history=history,
218
- sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
219
- )
220
- mutable[index][2] = "已成功"
221
- return gpt_say
222
- except ConnectionAbortedError as token_exceeded_error:
223
- # 【第二种情况】:Token溢出
224
- if handle_token_exceed:
225
- exceeded_cnt += 1
226
- # 【选择处理】 尝试计算比例,尽可能多地保留文本
227
- from toolbox import get_reduce_token_percent
228
- p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
229
- MAX_TOKEN = get_max_token(llm_kwargs)
230
- EXCEED_ALLO = 512 + 512 * exceeded_cnt
231
- inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
232
- gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
233
- mutable[index][2] = f"截断重试"
234
- continue # 返回重试
235
- else:
236
- # 【选择放弃】
237
- tb_str = '```\n' + trimmed_format_exc() + '```'
238
- gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
239
- if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
240
- mutable[index][2] = "输入过长已放弃"
241
- return gpt_say # 放弃
242
- except:
243
- # 【第三种情况】:其他错误
244
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
245
- tb_str = '```\n' + trimmed_format_exc() + '```'
246
- print(tb_str)
247
- gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
248
- if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
249
- if retry_op > 0:
250
- retry_op -= 1
251
- wait = random.randint(5, 20)
252
- if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
253
- wait = wait * 3
254
- fail_info = "OpenAI绑定信用卡可解除频率限制 "
255
- else:
256
- fail_info = ""
257
- # 也许等待十几秒后,情况会好转
258
- for i in range(wait):
259
- mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
260
- # 开始重试
261
- if detect_timeout(): raise RuntimeError("检测到程序终止。")
262
- mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
263
- continue # 返回重试
264
- else:
265
- mutable[index][2] = "已失败"
266
- wait = 5
267
- time.sleep(5)
268
- return gpt_say # 放弃
269
-
270
- # 异步任务开始
271
- futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
272
- range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
273
- cnt = 0
274
- while True:
275
- # yield一次以刷新前端页面
276
- time.sleep(refresh_interval)
277
- cnt += 1
278
- worker_done = [h.done() for h in futures]
279
- # 更好的UI视觉效果
280
- observe_win = []
281
- # 每个线程都要“喂狗”(看门狗)
282
- for thread_index, _ in enumerate(worker_done):
283
- mutable[thread_index][1] = time.time()
284
- # 在前端打印些好玩的东西
285
- for thread_index, _ in enumerate(worker_done):
286
- print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
287
- replace('\n', '').replace('`', '.').replace(' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
288
- observe_win.append(print_something_really_funny)
289
- # 在前端打印些好玩的东西
290
- stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
291
- if not done else f'`{mutable[thread_index][2]}`\n\n'
292
- for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
293
- # 在前端打印些好玩的东西
294
- chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
295
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
296
- if all(worker_done):
297
- executor.shutdown()
298
- break
299
-
300
- # 异步任务结束
301
- gpt_response_collection = []
302
- for inputs_show_user, f in zip(inputs_show_user_array, futures):
303
- gpt_res = f.result()
304
- gpt_response_collection.extend([inputs_show_user, gpt_res])
305
-
306
- # 是否在结束时,在界面上显示结果
307
- if show_user_at_complete:
308
- for inputs_show_user, f in zip(inputs_show_user_array, futures):
309
- gpt_res = f.result()
310
- chatbot.append([inputs_show_user, gpt_res])
311
- yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
312
- time.sleep(0.5)
313
- return gpt_response_collection
314
-
315
-
316
-
317
- def read_and_clean_pdf_text(fp):
318
- """
319
- 这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
320
-
321
- **输入参数说明**
322
- - `fp`:需要读取和清理文本的pdf文件路径
323
-
324
- **输出参数说明**
325
- - `meta_txt`:清理后的文本内容字符串
326
- - `page_one_meta`:第一页清理后的文本内容列表
327
-
328
- **函数功能**
329
- 读取pdf文件并清理其中的文本内容,清理规则包括:
330
- - 提取所有块元的文本信息,并合并为一个字符串
331
- - 去除短块(字符数小于100)并替换为回车符
332
- - 清理多余的空行
333
- - 合并小写字母开头的段落块并替换为空格
334
- - 清除重复的换行
335
- - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
336
- """
337
- import fitz, copy
338
- import re
339
- import numpy as np
340
- from colorful import print亮黄, print亮绿
341
- fc = 0 # Index 0 文本
342
- fs = 1 # Index 1 字体
343
- fb = 2 # Index 2 框框
344
- REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
345
- REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
346
- def primary_ffsize(l):
347
- """
348
- 提取文本块主字体
349
- """
350
- fsize_statiscs = {}
351
- for wtf in l['spans']:
352
- if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
353
- fsize_statiscs[wtf['size']] += len(wtf['text'])
354
- return max(fsize_statiscs, key=fsize_statiscs.get)
355
-
356
- def ffsize_same(a,b):
357
- """
358
- 提取字体大小是否近似相等
359
- """
360
- return abs((a-b)/max(a,b)) < 0.02
361
-
362
- with fitz.open(fp) as doc:
363
- meta_txt = []
364
- meta_font = []
365
-
366
- meta_line = []
367
- meta_span = []
368
- ############################## <第 1 步,搜集初始信息> ##################################
369
- for index, page in enumerate(doc):
370
- # file_content += page.get_text()
371
- text_areas = page.get_text("dict") # 获取页面上的文本信息
372
- for t in text_areas['blocks']:
373
- if 'lines' in t:
374
- pf = 998
375
- for l in t['lines']:
376
- txt_line = "".join([wtf['text'] for wtf in l['spans']])
377
- if len(txt_line) == 0: continue
378
- pf = primary_ffsize(l)
379
- meta_line.append([txt_line, pf, l['bbox'], l])
380
- for wtf in l['spans']: # for l in t['lines']:
381
- meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
382
- # meta_line.append(["NEW_BLOCK", pf])
383
- # 块元提取 for each word segment with in line for each line cross-line words for each block
384
- meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
385
- '- ', '') for t in text_areas['blocks'] if 'lines' in t])
386
- meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
387
- for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
388
- if index == 0:
389
- page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
390
- '- ', '') for t in text_areas['blocks'] if 'lines' in t]
391
-
392
- ############################## <第 2 步,获取正文主字体> ##################################
393
- try:
394
- fsize_statiscs = {}
395
- for span in meta_span:
396
- if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
397
- fsize_statiscs[span[1]] += span[2]
398
- main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
399
- if REMOVE_FOOT_NOTE:
400
- give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
401
- except:
402
- raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。')
403
- ############################## <第 3 步,切分和重新整合> ##################################
404
- mega_sec = []
405
- sec = []
406
- for index, line in enumerate(meta_line):
407
- if index == 0:
408
- sec.append(line[fc])
409
- continue
410
- if REMOVE_FOOT_NOTE:
411
- if meta_line[index][fs] <= give_up_fize_threshold:
412
- continue
413
- if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
414
- # 尝试识别段落
415
- if meta_line[index][fc].endswith('.') and\
416
- (meta_line[index-1][fc] != 'NEW_BLOCK') and \
417
- (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
418
- sec[-1] += line[fc]
419
- sec[-1] += "\n\n"
420
- else:
421
- sec[-1] += " "
422
- sec[-1] += line[fc]
423
- else:
424
- if (index+1 < len(meta_line)) and \
425
- meta_line[index][fs] > main_fsize:
426
- # 单行 + 字体大
427
- mega_sec.append(copy.deepcopy(sec))
428
- sec = []
429
- sec.append("# " + line[fc])
430
- else:
431
- # 尝试识别section
432
- if meta_line[index-1][fs] > meta_line[index][fs]:
433
- sec.append("\n" + line[fc])
434
- else:
435
- sec.append(line[fc])
436
- mega_sec.append(copy.deepcopy(sec))
437
-
438
- finals = []
439
- for ms in mega_sec:
440
- final = " ".join(ms)
441
- final = final.replace('- ', ' ')
442
- finals.append(final)
443
- meta_txt = finals
444
-
445
- ############################## <第 4 步,乱七八糟的后处理> ##################################
446
- def 把字符太少的块清除为回车(meta_txt):
447
- for index, block_txt in enumerate(meta_txt):
448
- if len(block_txt) < 100:
449
- meta_txt[index] = '\n'
450
- return meta_txt
451
- meta_txt = 把字符太少的块清除为回车(meta_txt)
452
-
453
- def 清理多余的空行(meta_txt):
454
- for index in reversed(range(1, len(meta_txt))):
455
- if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
456
- meta_txt.pop(index)
457
- return meta_txt
458
- meta_txt = 清理多余的空行(meta_txt)
459
-
460
- def 合并小写开头的段落块(meta_txt):
461
- def starts_with_lowercase_word(s):
462
- pattern = r"^[a-z]+"
463
- match = re.match(pattern, s)
464
- if match:
465
- return True
466
- else:
467
- return False
468
- # 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
469
- if starts_with_lowercase_word(meta_txt[0]):
470
- meta_txt[0] = meta_txt[0].capitalize()
471
- for _ in range(100):
472
- for index, block_txt in enumerate(meta_txt):
473
- if starts_with_lowercase_word(block_txt):
474
- if meta_txt[index-1] != '\n':
475
- meta_txt[index-1] += ' '
476
- else:
477
- meta_txt[index-1] = ''
478
- meta_txt[index-1] += meta_txt[index]
479
- meta_txt[index] = '\n'
480
- return meta_txt
481
- meta_txt = 合并小写开头的段落块(meta_txt)
482
- meta_txt = 清理多余的空行(meta_txt)
483
-
484
- meta_txt = '\n'.join(meta_txt)
485
- # 清除重复的换行
486
- for _ in range(5):
487
- meta_txt = meta_txt.replace('\n\n', '\n')
488
-
489
- # 换行 -> 双换行
490
- meta_txt = meta_txt.replace('\n', '\n\n')
491
-
492
- ############################## <第 5 步,展示分割效果> ##################################
493
- # for f in finals:
494
- # print亮黄(f)
495
- # print亮绿('***************************')
496
-
497
- return meta_txt, page_one_meta
498
-
499
-
500
- def get_files_from_everything(txt, type): # type='.md'
501
- """
502
- 这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
503
- 下面是对每个参数和返回值的说明:
504
- 参数
505
- - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
506
- - type: 字符串,表示要搜索的文件类型。默认是.md。
507
- 返回值
508
- - success: 布尔值,表示函数是否成功执行。
509
- - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
510
- - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
511
- 该函数详细注释已添加,请确认是否满足您的需要。
512
- """
513
- import glob, os
514
-
515
- success = True
516
- if txt.startswith('http'):
517
- # 网络的远程文件
518
- import requests
519
- from toolbox import get_conf
520
- from toolbox import get_log_folder, gen_time_str
521
- proxies = get_conf('proxies')
522
- try:
523
- r = requests.get(txt, proxies=proxies)
524
- except:
525
- raise ConnectionRefusedError(f"无法下载资源{txt},请检查。")
526
- path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
527
- with open(path, 'wb+') as f: f.write(r.content)
528
- project_folder = get_log_folder(plugin_name='web_download')
529
- file_manifest = [path]
530
- elif txt.endswith(type):
531
- # 直接给定文件
532
- file_manifest = [txt]
533
- project_folder = os.path.dirname(txt)
534
- elif os.path.exists(txt):
535
- # 本地路径,递归搜索
536
- project_folder = txt
537
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
538
- if len(file_manifest) == 0:
539
- success = False
540
- else:
541
- project_folder = None
542
- file_manifest = []
543
- success = False
544
-
545
- return success, file_manifest, project_folder
546
-
547
-
548
-
549
- @Singleton
550
- class nougat_interface():
551
- def __init__(self):
552
- self.threadLock = threading.Lock()
553
-
554
- def nougat_with_timeout(self, command, cwd, timeout=3600):
555
- import subprocess
556
- from toolbox import ProxyNetworkActivate
557
- logging.info(f'正在执行命令 {command}')
558
- with ProxyNetworkActivate("Nougat_Download"):
559
- process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
560
- try:
561
- stdout, stderr = process.communicate(timeout=timeout)
562
- except subprocess.TimeoutExpired:
563
- process.kill()
564
- stdout, stderr = process.communicate()
565
- print("Process timed out!")
566
- return False
567
- return True
568
-
569
-
570
- def NOUGAT_parse_pdf(self, fp, chatbot, history):
571
- from toolbox import update_ui_lastest_msg
572
-
573
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
574
- chatbot=chatbot, history=history, delay=0)
575
- self.threadLock.acquire()
576
- import glob, threading, os
577
- from toolbox import get_log_folder, gen_time_str
578
- dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
579
- os.makedirs(dst)
580
-
581
- yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
582
- chatbot=chatbot, history=history, delay=0)
583
- self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
584
- res = glob.glob(os.path.join(dst,'*.mmd'))
585
- if len(res) == 0:
586
- self.threadLock.release()
587
- raise RuntimeError("Nougat解析论文失败。")
588
- self.threadLock.release()
589
- return res[0]
590
-
591
-
592
-
593
-
594
- def try_install_deps(deps, reload_m=[]):
595
- import subprocess, sys, importlib
596
- for dep in deps:
597
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
598
- import site
599
- importlib.reload(site)
600
- for m in reload_m:
601
- importlib.reload(__import__(m))
602
-
603
-
604
- def get_plugin_arg(plugin_kwargs, key, default):
605
- # 如果参数是空的
606
- if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key)
607
- # 正常情况
608
- return plugin_kwargs.get(key, default)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/diagram_fns/file_tree.py DELETED
@@ -1,122 +0,0 @@
1
- import os
2
- from textwrap import indent
3
-
4
- class FileNode:
5
- def __init__(self, name):
6
- self.name = name
7
- self.children = []
8
- self.is_leaf = False
9
- self.level = 0
10
- self.parenting_ship = []
11
- self.comment = ""
12
- self.comment_maxlen_show = 50
13
-
14
- @staticmethod
15
- def add_linebreaks_at_spaces(string, interval=10):
16
- return '\n'.join(string[i:i+interval] for i in range(0, len(string), interval))
17
-
18
- def sanitize_comment(self, comment):
19
- if len(comment) > self.comment_maxlen_show: suf = '...'
20
- else: suf = ''
21
- comment = comment[:self.comment_maxlen_show]
22
- comment = comment.replace('\"', '').replace('`', '').replace('\n', '').replace('`', '').replace('$', '')
23
- comment = self.add_linebreaks_at_spaces(comment, 10)
24
- return '`' + comment + suf + '`'
25
-
26
- def add_file(self, file_path, file_comment):
27
- directory_names, file_name = os.path.split(file_path)
28
- current_node = self
29
- level = 1
30
- if directory_names == "":
31
- new_node = FileNode(file_name)
32
- current_node.children.append(new_node)
33
- new_node.is_leaf = True
34
- new_node.comment = self.sanitize_comment(file_comment)
35
- new_node.level = level
36
- current_node = new_node
37
- else:
38
- dnamesplit = directory_names.split(os.sep)
39
- for i, directory_name in enumerate(dnamesplit):
40
- found_child = False
41
- level += 1
42
- for child in current_node.children:
43
- if child.name == directory_name:
44
- current_node = child
45
- found_child = True
46
- break
47
- if not found_child:
48
- new_node = FileNode(directory_name)
49
- current_node.children.append(new_node)
50
- new_node.level = level - 1
51
- current_node = new_node
52
- term = FileNode(file_name)
53
- term.level = level
54
- term.comment = self.sanitize_comment(file_comment)
55
- term.is_leaf = True
56
- current_node.children.append(term)
57
-
58
- def print_files_recursively(self, level=0, code="R0"):
59
- print(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
60
- for j, child in enumerate(self.children):
61
- child.print_files_recursively(level=level+1, code=code+str(j))
62
- self.parenting_ship.extend(child.parenting_ship)
63
- p1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
64
- p2 = """ --> """
65
- p3 = f"""{code+str(j)}[\"🗎{child.name}\"]""" if child.is_leaf else f"""{code+str(j)}[[\"📁{child.name}\"]]"""
66
- edge_code = p1 + p2 + p3
67
- if edge_code in self.parenting_ship:
68
- continue
69
- self.parenting_ship.append(edge_code)
70
- if self.comment != "":
71
- pc1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
72
- pc2 = f""" -.-x """
73
- pc3 = f"""C{code}[\"{self.comment}\"]:::Comment"""
74
- edge_code = pc1 + pc2 + pc3
75
- self.parenting_ship.append(edge_code)
76
-
77
-
78
- MERMAID_TEMPLATE = r"""
79
- ```mermaid
80
- flowchart LR
81
- %% <gpt_academic_hide_mermaid_code> 一个特殊标记,用于在生成mermaid图表时隐藏代码块
82
- classDef Comment stroke-dasharray: 5 5
83
- subgraph {graph_name}
84
- {relationship}
85
- end
86
- ```
87
- """
88
-
89
- def build_file_tree_mermaid_diagram(file_manifest, file_comments, graph_name):
90
- # Create the root node
91
- file_tree_struct = FileNode("root")
92
- # Build the tree structure
93
- for file_path, file_comment in zip(file_manifest, file_comments):
94
- file_tree_struct.add_file(file_path, file_comment)
95
- file_tree_struct.print_files_recursively()
96
- cc = "\n".join(file_tree_struct.parenting_ship)
97
- ccc = indent(cc, prefix=" "*8)
98
- return MERMAID_TEMPLATE.format(graph_name=graph_name, relationship=ccc)
99
-
100
- if __name__ == "__main__":
101
- # File manifest
102
- file_manifest = [
103
- "cradle_void_terminal.ipynb",
104
- "tests/test_utils.py",
105
- "tests/test_plugins.py",
106
- "tests/test_llms.py",
107
- "config.py",
108
- "build/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/model_weights_0.bin",
109
- "crazy_functions/latex_fns/latex_actions.py",
110
- "crazy_functions/latex_fns/latex_toolbox.py"
111
- ]
112
- file_comments = [
113
- "根据位置和名称,可能是一个模块的初始化文件根据位置和名称,可能是一个模块的初始化文件根据位置和名称,可能是一个模块的初始化文件",
114
- "包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器",
115
- "用于构建HTML报告的类和方法用于构建HTML报告的类和方法��于构建HTML报告的类和方法",
116
- "包含了用于文本切分的函数,以及处理PDF文件的示例代码包含了用于文本切分的函数,以及处理PDF文件的示例代码包含了用于文本切分的函数,以及处理PDF文件的示例代码",
117
- "用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数",
118
- "是一个包的初始化文件,用于初始化包的属性和导入模块是一个包的初始化文件,用于初始化包的属性和导入模块是一个包的初始化文件,用于初始化包的属性和导入模块",
119
- "用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
120
- "包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
121
- ]
122
- print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_ascii_art.py DELETED
@@ -1,42 +0,0 @@
1
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
2
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
3
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
4
- from request_llms.bridge_all import predict_no_ui_long_connection
5
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
6
- import random
7
-
8
-
9
- class MiniGame_ASCII_Art(GptAcademicGameBaseState):
10
- def step(self, prompt, chatbot, history):
11
- if self.step_cnt == 0:
12
- chatbot.append(["我画你猜(动物)", "请稍等..."])
13
- else:
14
- if prompt.strip() == 'exit':
15
- self.delete_game = True
16
- yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.)
17
- return
18
- chatbot.append([prompt, ""])
19
- yield from update_ui(chatbot=chatbot, history=history)
20
-
21
- if self.step_cnt == 0:
22
- self.lock_plugin(chatbot)
23
- self.cur_task = 'draw'
24
-
25
- if self.cur_task == 'draw':
26
- avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"]
27
- self.obj = random.choice(avail_obj)
28
- inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + \
29
- f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. "
30
- raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="")
31
- self.cur_task = 'identify user guess'
32
- res = get_code_block(raw_res)
33
- history += ['', f'the answer is {self.obj}', inputs, res]
34
- yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.)
35
-
36
- elif self.cur_task == 'identify user guess':
37
- if is_same_thing(self.obj, prompt, self.llm_kwargs):
38
- self.delete_game = True
39
- yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.)
40
- else:
41
- self.cur_task = 'identify user guess'
42
- yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_interactive_story.py DELETED
@@ -1,212 +0,0 @@
1
- prompts_hs = """ 请以“{headstart}”为开头,编写一个小说的第一幕。
2
-
3
- - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
4
- - 出现人物时,给出人物的名字。
5
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
6
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
7
- - 字数要求:第一幕的字数少于300字,且少于2个段落。
8
- """
9
-
10
- prompts_interact = """ 小说的前文回顾:
11
-
12
- {previously_on_story}
13
-
14
-
15
- 你是一个作家,根据以上的情节,给出4种不同的后续剧情发展方向,每个发展方向都精明扼要地用一句话说明。稍后,我将在这4个选择中,挑选一种剧情发展。
16
-
17
- 输出格式例如:
18
- 1. 后续剧情发展1
19
- 2. 后续剧情发展2
20
- 3. 后续剧情发展3
21
- 4. 后续剧情发展4
22
- """
23
-
24
-
25
- prompts_resume = """小说的前文回顾:
26
-
27
- {previously_on_story}
28
-
29
-
30
- 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
31
- 在以下的剧情发展中,
32
-
33
- {choice}
34
-
35
- 我认为更合理的是:{user_choice}。
36
- 请在前文的基础上(不要重复前文),围绕我选定的剧情情节,编写小说的下一幕。
37
-
38
- - 禁止杜撰不符合我选择的剧情。
39
- - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
40
- - 不要重复前文。
41
- - 出现人物时,给出人物的名字。
42
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
43
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
44
- - 小说的下一幕字数少于300字,且少于2个段落。
45
- """
46
-
47
-
48
- prompts_terminate = """小说的前文回顾:
49
-
50
- {previously_on_story}
51
-
52
-
53
- 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
54
- 现在,故事该结束了,我认为最合理的故事结局是:{user_choice}。
55
-
56
- 请在前文的基础上(不要重复前文),编写小说的最后一幕。
57
-
58
- - 不要重复前文。
59
- - 出现人物时,给出人物的名字。
60
- - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
61
- - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
62
- - 字数要求:最后一幕的字数少于1000字。
63
- """
64
-
65
-
66
- from toolbox import CatchException, update_ui, update_ui_lastest_msg
67
- from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
68
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
69
- from request_llms.bridge_all import predict_no_ui_long_connection
70
- from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
71
- import random
72
-
73
-
74
- class MiniGame_ResumeStory(GptAcademicGameBaseState):
75
- story_headstart = [
76
- '先行者知道,他现在是全宇宙中唯一的一个人了。',
77
- '深夜,一个年轻人穿过天安门广场向纪念堂走去。在二十二世纪编年史中,计算机把他的代号定为M102。',
78
- '他知道,这最后一课要提前讲了。又一阵剧痛从肝部袭来,几乎使他晕厥过去。',
79
- '在距地球五万光年的远方,在银河系的中心,一场延续了两万年的星际战争已接近尾声。那里的太空中渐渐隐现出一个方形区域,仿佛灿烂的群星的背景被剪出一个方口。',
80
- '伊依一行三人乘坐一艘游艇在南太平洋上做吟诗航行,他们的目的地是南极,如果几天后能顺利到达那里,他们将钻出地壳去看诗云。',
81
- '很多人生来就会莫名其妙地迷上一样东西,仿佛他的出生就是要和这东西约会似的,正是这样,圆圆迷上了肥皂泡。'
82
- ]
83
-
84
-
85
- def begin_game_step_0(self, prompt, chatbot, history):
86
- # init game at step 0
87
- self.headstart = random.choice(self.story_headstart)
88
- self.story = []
89
- chatbot.append(["互动写故事", f"这次的故事开头是:{self.headstart}"])
90
- self.sys_prompt_ = '你是一个想象力丰富的杰出作家。正在与你的朋友互动,一起写故事,因此你每次写的故事段落应少于300字(结局除外)。'
91
-
92
-
93
- def generate_story_image(self, story_paragraph):
94
- try:
95
- from crazy_functions.图片生成 import gen_image
96
- prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落,进行简短的环境描写。要求:80字以内。')
97
- image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural')
98
- return f'<br/><div align="center"><img src="file={image_path}"></div>'
99
- except:
100
- return ''
101
-
102
- def step(self, prompt, chatbot, history):
103
-
104
- """
105
- 首先,处理游戏初始化等特殊情况
106
- """
107
- if self.step_cnt == 0:
108
- self.begin_game_step_0(prompt, chatbot, history)
109
- self.lock_plugin(chatbot)
110
- self.cur_task = 'head_start'
111
- else:
112
- if prompt.strip() == 'exit' or prompt.strip() == '结束剧情':
113
- # should we terminate game here?
114
- self.delete_game = True
115
- yield from update_ui_lastest_msg(lastmsg=f"游戏结束。", chatbot=chatbot, history=history, delay=0.)
116
- return
117
- if '剧情收尾' in prompt:
118
- self.cur_task = 'story_terminate'
119
- # # well, game resumes
120
- # chatbot.append([prompt, ""])
121
- # update ui, don't keep the user waiting
122
- yield from update_ui(chatbot=chatbot, history=history)
123
-
124
-
125
- """
126
- 处理游戏的主体逻辑
127
- """
128
- if self.cur_task == 'head_start':
129
- """
130
- 这是游戏的第一步
131
- """
132
- inputs_ = prompts_hs.format(headstart=self.headstart)
133
- history_ = []
134
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
135
- inputs_, '故事开头', self.llm_kwargs,
136
- chatbot, history_, self.sys_prompt_
137
- )
138
- self.story.append(story_paragraph)
139
- # # 配图
140
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
141
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
142
-
143
- # # 构建后续剧情引导
144
- previously_on_story = ""
145
- for s in self.story:
146
- previously_on_story += s + '\n'
147
- inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
148
- history_ = []
149
- self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
150
- inputs_, '请在以下几种故事走向中,选择一种(当然,您也可以选择给出其他故事走向):', self.llm_kwargs,
151
- chatbot,
152
- history_,
153
- self.sys_prompt_
154
- )
155
- self.cur_task = 'user_choice'
156
-
157
-
158
- elif self.cur_task == 'user_choice':
159
- """
160
- 根据用户的提示,确定故事的下一步
161
- """
162
- if '请在以下几种故事走向中,选择一种' in chatbot[-1][0]: chatbot.pop(-1)
163
- previously_on_story = ""
164
- for s in self.story:
165
- previously_on_story += s + '\n'
166
- inputs_ = prompts_resume.format(previously_on_story=previously_on_story, choice=self.next_choices, user_choice=prompt)
167
- history_ = []
168
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
169
- inputs_, f'下一段故事(您的选择是:{prompt})。', self.llm_kwargs,
170
- chatbot, history_, self.sys_prompt_
171
- )
172
- self.story.append(story_paragraph)
173
- # # 配图
174
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
175
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
176
-
177
- # # 构建后续剧情引导
178
- previously_on_story = ""
179
- for s in self.story:
180
- previously_on_story += s + '\n'
181
- inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
182
- history_ = []
183
- self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
184
- inputs_,
185
- '请在以下几种故事走向中,选择一种。当然,您也可以给出您心中的其他故事走向。另外,如果您希望剧情立即收尾,请输入剧情走向,并以“剧情收尾”四个字提示程序。', self.llm_kwargs,
186
- chatbot,
187
- history_,
188
- self.sys_prompt_
189
- )
190
- self.cur_task = 'user_choice'
191
-
192
-
193
- elif self.cur_task == 'story_terminate':
194
- """
195
- 根据用户的提示,确定故事的结局
196
- """
197
- previously_on_story = ""
198
- for s in self.story:
199
- previously_on_story += s + '\n'
200
- inputs_ = prompts_terminate.format(previously_on_story=previously_on_story, user_choice=prompt)
201
- history_ = []
202
- story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
203
- inputs_, f'故事收尾(您的选择是:{prompt})。', self.llm_kwargs,
204
- chatbot, history_, self.sys_prompt_
205
- )
206
- # # 配图
207
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
208
- yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
209
-
210
- # terminate game
211
- self.delete_game = True
212
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/game_fns/game_utils.py DELETED
@@ -1,35 +0,0 @@
1
-
2
- from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
3
- from request_llms.bridge_all import predict_no_ui_long_connection
4
- def get_code_block(reply):
5
- import re
6
- pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
7
- matches = re.findall(pattern, reply) # find all code blocks in text
8
- if len(matches) == 1:
9
- return "```" + matches[0] + "```" # code block
10
- raise RuntimeError("GPT is not generating proper code.")
11
-
12
- def is_same_thing(a, b, llm_kwargs):
13
- from pydantic import BaseModel, Field
14
- class IsSameThing(BaseModel):
15
- is_same_thing: bool = Field(description="determine whether two objects are same thing.", default=False)
16
-
17
- def run_gpt_fn(inputs, sys_prompt, history=[]):
18
- return predict_no_ui_long_connection(
19
- inputs=inputs, llm_kwargs=llm_kwargs,
20
- history=history, sys_prompt=sys_prompt, observe_window=[]
21
- )
22
-
23
- gpt_json_io = GptJsonIO(IsSameThing)
24
- inputs_01 = "Identity whether the user input and the target is the same thing: \n target object: {a} \n user input object: {b} \n\n\n".format(a=a, b=b)
25
- inputs_01 += "\n\n\n Note that the user may describe the target object with a different language, e.g. cat and 猫 are the same thing."
26
- analyze_res_cot_01 = run_gpt_fn(inputs_01, "", [])
27
-
28
- inputs_02 = inputs_01 + gpt_json_io.format_instructions
29
- analyze_res = run_gpt_fn(inputs_02, "", [inputs_01, analyze_res_cot_01])
30
-
31
- try:
32
- res = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
33
- return res.is_same_thing
34
- except JsonStringError as e:
35
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/gen_fns/gen_fns_shared.py DELETED
@@ -1,70 +0,0 @@
1
- import time
2
- import importlib
3
- from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
4
- from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
5
- from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
6
- import multiprocessing
7
-
8
- def get_class_name(class_string):
9
- import re
10
- # Use regex to extract the class name
11
- class_name = re.search(r'class (\w+)\(', class_string).group(1)
12
- return class_name
13
-
14
- def try_make_module(code, chatbot):
15
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
16
- fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
17
- with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
18
- promote_file_to_downloadzone(fn_path, chatbot=chatbot)
19
- class_name = get_class_name(code)
20
- manager = multiprocessing.Manager()
21
- return_dict = manager.dict()
22
- p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
23
- # only has 10 seconds to run
24
- p.start(); p.join(timeout=10)
25
- if p.is_alive(): p.terminate(); p.join()
26
- p.close()
27
- return return_dict["success"], return_dict['traceback']
28
-
29
- # check is_function_successfully_generated
30
- def is_function_successfully_generated(fn_path, class_name, return_dict):
31
- return_dict['success'] = False
32
- return_dict['traceback'] = ""
33
- try:
34
- # Create a spec for the module
35
- module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
36
- # Load the module
37
- example_module = importlib.util.module_from_spec(module_spec)
38
- module_spec.loader.exec_module(example_module)
39
- # Now you can use the module
40
- some_class = getattr(example_module, class_name)
41
- # Now you can create an instance of the class
42
- instance = some_class()
43
- return_dict['success'] = True
44
- return
45
- except:
46
- return_dict['traceback'] = trimmed_format_exc()
47
- return
48
-
49
- def subprocess_worker(code, file_path, return_dict):
50
- return_dict['result'] = None
51
- return_dict['success'] = False
52
- return_dict['traceback'] = ""
53
- try:
54
- module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
55
- fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
56
- with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
57
- class_name = get_class_name(code)
58
- # Create a spec for the module
59
- module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
60
- # Load the module
61
- example_module = importlib.util.module_from_spec(module_spec)
62
- module_spec.loader.exec_module(example_module)
63
- # Now you can use the module
64
- some_class = getattr(example_module, class_name)
65
- # Now you can create an instance of the class
66
- instance = some_class()
67
- return_dict['result'] = instance.run(file_path)
68
- return_dict['success'] = True
69
- except:
70
- return_dict['traceback'] = trimmed_format_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/ipc_fns/mp.py DELETED
@@ -1,37 +0,0 @@
1
- import platform
2
- import pickle
3
- import multiprocessing
4
-
5
- def run_in_subprocess_wrapper_func(v_args):
6
- func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
7
- import sys
8
- try:
9
- result = func(*args, **kwargs)
10
- return_dict['result'] = result
11
- except Exception as e:
12
- exc_info = sys.exc_info()
13
- exception_dict['exception'] = exc_info
14
-
15
- def run_in_subprocess_with_timeout(func, timeout=60):
16
- if platform.system() == 'Linux':
17
- def wrapper(*args, **kwargs):
18
- return_dict = multiprocessing.Manager().dict()
19
- exception_dict = multiprocessing.Manager().dict()
20
- v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
21
- process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
22
- process.start()
23
- process.join(timeout)
24
- if process.is_alive():
25
- process.terminate()
26
- raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
27
- process.close()
28
- if 'exception' in exception_dict:
29
- # ooops, the subprocess ran into an exception
30
- exc_info = exception_dict['exception']
31
- raise exc_info[1].with_traceback(exc_info[2])
32
- if 'result' in return_dict.keys():
33
- # If the subprocess ran successfully, return the result
34
- return return_dict['result']
35
- return wrapper
36
- else:
37
- return func
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/json_fns/pydantic_io.py DELETED
@@ -1,111 +0,0 @@
1
- """
2
- https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb
3
-
4
- Example 1.
5
-
6
- # Define your desired data structure.
7
- class Joke(BaseModel):
8
- setup: str = Field(description="question to set up a joke")
9
- punchline: str = Field(description="answer to resolve the joke")
10
-
11
- # You can add custom validation logic easily with Pydantic.
12
- @validator("setup")
13
- def question_ends_with_question_mark(cls, field):
14
- if field[-1] != "?":
15
- raise ValueError("Badly formed question!")
16
- return field
17
-
18
-
19
- Example 2.
20
-
21
- # Here's another example, but with a compound typed field.
22
- class Actor(BaseModel):
23
- name: str = Field(description="name of an actor")
24
- film_names: List[str] = Field(description="list of names of films they starred in")
25
- """
26
-
27
- import json, re, logging
28
-
29
-
30
- PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
31
-
32
- As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
33
- the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
34
-
35
- Here is the output schema:
36
- ```
37
- {schema}
38
- ```"""
39
-
40
-
41
- PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
42
- ```
43
- {schema}
44
- ```"""
45
-
46
- class JsonStringError(Exception): ...
47
-
48
- class GptJsonIO():
49
-
50
- def __init__(self, schema, example_instruction=True):
51
- self.pydantic_object = schema
52
- self.example_instruction = example_instruction
53
- self.format_instructions = self.generate_format_instructions()
54
-
55
- def generate_format_instructions(self):
56
- schema = self.pydantic_object.schema()
57
-
58
- # Remove extraneous fields.
59
- reduced_schema = schema
60
- if "title" in reduced_schema:
61
- del reduced_schema["title"]
62
- if "type" in reduced_schema:
63
- del reduced_schema["type"]
64
- # Ensure json in context is well-formed with double quotes.
65
- if self.example_instruction:
66
- schema_str = json.dumps(reduced_schema)
67
- return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
68
- else:
69
- return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)
70
-
71
- def generate_output(self, text):
72
- # Greedy search for 1st json candidate.
73
- match = re.search(
74
- r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
75
- )
76
- json_str = ""
77
- if match: json_str = match.group()
78
- json_object = json.loads(json_str, strict=False)
79
- final_object = self.pydantic_object.parse_obj(json_object)
80
- return final_object
81
-
82
- def generate_repair_prompt(self, broken_json, error):
83
- prompt = "Fix a broken json string.\n\n" + \
84
- "(1) The broken json string need to fix is: \n\n" + \
85
- "```" + "\n" + \
86
- broken_json + "\n" + \
87
- "```" + "\n\n" + \
88
- "(2) The error message is: \n\n" + \
89
- error + "\n\n" + \
90
- "Now, fix this json string. \n\n"
91
- return prompt
92
-
93
- def generate_output_auto_repair(self, response, gpt_gen_fn):
94
- """
95
- response: string containing canidate json
96
- gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt)
97
- """
98
- try:
99
- result = self.generate_output(response)
100
- except Exception as e:
101
- try:
102
- logging.info(f'Repairing json:{response}')
103
- repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e))
104
- result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions))
105
- logging.info('Repaire json success.')
106
- except Exception as e:
107
- # 没辙了,放弃治疗
108
- logging.info('Repaire json fail.')
109
- raise JsonStringError('Cannot repair json.', str(e))
110
- return result
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_fns/latex_actions.py DELETED
@@ -1,467 +0,0 @@
1
- from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
2
- from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone
3
- from .latex_toolbox import PRESERVE, TRANSFORM
4
- from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
5
- from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
6
- from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
7
- from .latex_toolbox import find_title_and_abs
8
-
9
- import os, shutil
10
- import re
11
- import numpy as np
12
-
13
- pj = os.path.join
14
-
15
-
16
- def split_subprocess(txt, project_folder, return_dict, opts):
17
- """
18
- break down latex file to a linked list,
19
- each node use a preserve flag to indicate whether it should
20
- be proccessed by GPT.
21
- """
22
- text = txt
23
- mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
24
-
25
- # 吸收title与作者以上的部分
26
- text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL)
27
- text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL)
28
- # 吸收iffalse注释
29
- text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
30
- # 吸收在42行以内的begin-end组合
31
- text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
32
- # 吸收匿名公式
33
- text, mask = set_forbidden_text(text, mask, [ r"\$\$([^$]+)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
34
- # 吸收其他杂项
35
- text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
36
- text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
37
- text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
38
- text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
39
- text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
40
- text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
41
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
42
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
43
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
44
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
45
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
46
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
47
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
48
- text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
49
- text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
50
- text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
51
- # reverse 操作必须放在最后
52
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
53
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
54
- text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
55
- root = convert_to_linklist(text, mask)
56
-
57
- # 最后一步处理,增强稳健性
58
- root = post_process(root)
59
-
60
- # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
61
- with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
62
- segment_parts_for_gpt = []
63
- nodes = []
64
- node = root
65
- while True:
66
- nodes.append(node)
67
- show_html = node.string.replace('\n','<br/>')
68
- if not node.preserve:
69
- segment_parts_for_gpt.append(node.string)
70
- f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
71
- else:
72
- f.write(f'<p style="color:red;">{show_html}</p>')
73
- node = node.next
74
- if node is None: break
75
-
76
- for n in nodes: n.next = None # break
77
- return_dict['nodes'] = nodes
78
- return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
79
- return return_dict
80
-
81
- class LatexPaperSplit():
82
- """
83
- break down latex file to a linked list,
84
- each node use a preserve flag to indicate whether it should
85
- be proccessed by GPT.
86
- """
87
- def __init__(self) -> None:
88
- self.nodes = None
89
- self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
90
- "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
91
- "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
92
- # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
93
- self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
94
- self.title = "unknown"
95
- self.abstract = "unknown"
96
-
97
- def read_title_and_abstract(self, txt):
98
- try:
99
- title, abstract = find_title_and_abs(txt)
100
- if title is not None:
101
- self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
102
- if abstract is not None:
103
- self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
104
- except:
105
- pass
106
-
107
- def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
108
- """
109
- Merge the result after the GPT process completed
110
- """
111
- result_string = ""
112
- node_cnt = 0
113
- line_cnt = 0
114
-
115
- for node in self.nodes:
116
- if node.preserve:
117
- line_cnt += node.string.count('\n')
118
- result_string += node.string
119
- else:
120
- translated_txt = fix_content(arr[node_cnt], node.string)
121
- begin_line = line_cnt
122
- end_line = line_cnt + translated_txt.count('\n')
123
-
124
- # reverse translation if any error
125
- if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
126
- translated_txt = node.string
127
-
128
- result_string += translated_txt
129
- node_cnt += 1
130
- line_cnt += translated_txt.count('\n')
131
-
132
- if mode == 'translate_zh':
133
- pattern = re.compile(r'\\begin\{abstract\}.*\n')
134
- match = pattern.search(result_string)
135
- if not match:
136
- # match \abstract{xxxx}
137
- pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
138
- match = pattern_compile.search(result_string)
139
- position = match.regs[1][0]
140
- else:
141
- # match \begin{abstract}xxxx\end{abstract}
142
- position = match.end()
143
- result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
144
- return result_string
145
-
146
-
147
- def split(self, txt, project_folder, opts):
148
- """
149
- break down latex file to a linked list,
150
- each node use a preserve flag to indicate whether it should
151
- be proccessed by GPT.
152
- P.S. use multiprocessing to avoid timeout error
153
- """
154
- import multiprocessing
155
- manager = multiprocessing.Manager()
156
- return_dict = manager.dict()
157
- p = multiprocessing.Process(
158
- target=split_subprocess,
159
- args=(txt, project_folder, return_dict, opts))
160
- p.start()
161
- p.join()
162
- p.close()
163
- self.nodes = return_dict['nodes']
164
- self.sp = return_dict['segment_parts_for_gpt']
165
- return self.sp
166
-
167
-
168
- class LatexPaperFileGroup():
169
- """
170
- use tokenizer to break down text according to max_token_limit
171
- """
172
- def __init__(self):
173
- self.file_paths = []
174
- self.file_contents = []
175
- self.sp_file_contents = []
176
- self.sp_file_index = []
177
- self.sp_file_tag = []
178
- # count_token
179
- from request_llms.bridge_all import model_info
180
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
181
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
182
- self.get_token_num = get_token_num
183
-
184
- def run_file_split(self, max_token_limit=1900):
185
- """
186
- use tokenizer to break down text according to max_token_limit
187
- """
188
- for index, file_content in enumerate(self.file_contents):
189
- if self.get_token_num(file_content) < max_token_limit:
190
- self.sp_file_contents.append(file_content)
191
- self.sp_file_index.append(index)
192
- self.sp_file_tag.append(self.file_paths[index])
193
- else:
194
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
195
- segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
196
- for j, segment in enumerate(segments):
197
- self.sp_file_contents.append(segment)
198
- self.sp_file_index.append(index)
199
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
200
-
201
- def merge_result(self):
202
- self.file_result = ["" for _ in range(len(self.file_paths))]
203
- for r, k in zip(self.sp_file_result, self.sp_file_index):
204
- self.file_result[k] += r
205
-
206
- def write_result(self):
207
- manifest = []
208
- for path, res in zip(self.file_paths, self.file_result):
209
- with open(path + '.polish.tex', 'w', encoding='utf8') as f:
210
- manifest.append(path + '.polish.tex')
211
- f.write(res)
212
- return manifest
213
-
214
-
215
- def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
216
- import time, os, re
217
- from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
218
- from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
219
-
220
- # <-------- 寻找主tex文件 ---------->
221
- maintex = find_main_tex_file(file_manifest, mode)
222
- chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
223
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
224
- time.sleep(3)
225
-
226
- # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
227
- main_tex_basename = os.path.basename(maintex)
228
- assert main_tex_basename.endswith('.tex')
229
- main_tex_basename_bare = main_tex_basename[:-4]
230
- may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
231
- if os.path.exists(may_exist_bbl):
232
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
233
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
234
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
235
-
236
- with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
237
- content = f.read()
238
- merged_content = merge_tex_files(project_folder, content, mode)
239
-
240
- with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
241
- f.write(merged_content)
242
-
243
- # <-------- 精细切分latex文件 ---------->
244
- chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
245
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
246
- lps = LatexPaperSplit()
247
- lps.read_title_and_abstract(merged_content)
248
- res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
249
- # <-------- 拆分过长的latex片段 ---------->
250
- pfg = LatexPaperFileGroup()
251
- for index, r in enumerate(res):
252
- pfg.file_paths.append('segment-' + str(index))
253
- pfg.file_contents.append(r)
254
-
255
- pfg.run_file_split(max_token_limit=1024)
256
- n_split = len(pfg.sp_file_contents)
257
-
258
- # <-------- 根据需要切换prompt ---------->
259
- inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
260
- inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
261
-
262
- if os.path.exists(pj(project_folder,'temp.pkl')):
263
-
264
- # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ---------->
265
- pfg = objload(file=pj(project_folder,'temp.pkl'))
266
-
267
- else:
268
- # <-------- gpt 多线程请求 ---------->
269
- history_array = [[""] for _ in range(n_split)]
270
- # LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
271
- # if LATEX_EXPERIMENTAL:
272
- # paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
273
- # paper_meta_max_len = 888
274
- # history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
275
-
276
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
277
- inputs_array=inputs_array,
278
- inputs_show_user_array=inputs_show_user_array,
279
- llm_kwargs=llm_kwargs,
280
- chatbot=chatbot,
281
- history_array=history_array,
282
- sys_prompt_array=sys_prompt_array,
283
- # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
284
- scroller_max_len = 40
285
- )
286
-
287
- # <-------- 文本碎片重组为完整的tex片段 ---------->
288
- pfg.sp_file_result = []
289
- for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
290
- pfg.sp_file_result.append(gpt_say)
291
- pfg.merge_result()
292
-
293
- # <-------- 临时存储用于调试 ---------->
294
- pfg.get_token_num = None
295
- objdump(pfg, file=pj(project_folder,'temp.pkl'))
296
-
297
- write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
298
-
299
- # <-------- 写出文件 ---------->
300
- msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
301
- final_tex = lps.merge_result(pfg.file_result, mode, msg)
302
- objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
303
-
304
- with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
305
- if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
306
-
307
-
308
- # <-------- 整理结果, 退出 ---------->
309
- chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
310
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
311
-
312
- # <-------- 返回 ---------->
313
- return project_folder + f'/merge_{mode}.tex'
314
-
315
-
316
- def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
317
- try:
318
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
319
- log = f.read()
320
- import re
321
- buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
322
- buggy_lines = [int(l) for l in buggy_lines]
323
- buggy_lines = sorted(buggy_lines)
324
- buggy_line = buggy_lines[0]-1
325
- print("reversing tex line that has errors", buggy_line)
326
-
327
- # 重组,逆转出错的段落
328
- if buggy_line not in fixed_line:
329
- fixed_line.append(buggy_line)
330
-
331
- lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
332
- final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
333
-
334
- with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
335
- f.write(final_tex)
336
-
337
- return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
338
- except:
339
- print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
340
- return False, -1, [-1]
341
-
342
-
343
- def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
344
- import os, time
345
- n_fix = 1
346
- fixed_line = []
347
- max_try = 32
348
- chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
349
- chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
350
- yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
351
-
352
- while True:
353
- import os
354
- may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
355
- target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
356
- if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
357
- shutil.copyfile(may_exist_bbl, target_bbl)
358
-
359
- # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
360
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
361
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
362
-
363
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
364
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
365
-
366
- if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
367
- # 只有第二步成功,才能继续下面的步骤
368
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
369
- if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
370
- ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
371
- if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
372
- ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
373
-
374
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
375
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
376
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
377
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
378
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
379
-
380
- if mode!='translate_zh':
381
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
382
- print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
383
- ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
384
-
385
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
386
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
387
- ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
388
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
389
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
390
-
391
- # <---------- 检查结果 ----------->
392
- results_ = ""
393
- original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
394
- modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
395
- diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
396
- results_ += f"原始PDF编译是否成功: {original_pdf_success};"
397
- results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
398
- results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
399
- yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
400
-
401
- if diff_pdf_success:
402
- result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
403
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
404
- if modified_pdf_success:
405
- yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history) # 刷新Gradio前端界面
406
- result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
407
- origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
408
- if os.path.exists(pj(work_folder, '..', 'translation')):
409
- shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
410
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
411
- # 将两个PDF拼接
412
- if original_pdf_success:
413
- try:
414
- from .latex_toolbox import merge_pdfs
415
- concat_pdf = pj(work_folder_modified, f'comparison.pdf')
416
- merge_pdfs(origin_pdf, result_pdf, concat_pdf)
417
- if os.path.exists(pj(work_folder, '..', 'translation')):
418
- shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
419
- promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
420
- except Exception as e:
421
- print(e)
422
- pass
423
- return True # 成功啦
424
- else:
425
- if n_fix>=max_try: break
426
- n_fix += 1
427
- can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
428
- file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
429
- log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
430
- tex_name=f'{main_file_modified}.tex',
431
- tex_name_pure=f'{main_file_modified}',
432
- n_fix=n_fix,
433
- work_folder_modified=work_folder_modified,
434
- fixed_line=fixed_line
435
- )
436
- yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
437
- if not can_retry: break
438
-
439
- return False # 失败啦
440
-
441
-
442
- def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
443
- # write html
444
- try:
445
- import shutil
446
- from crazy_functions.pdf_fns.report_gen_html import construct_html
447
- from toolbox import gen_time_str
448
- ch = construct_html()
449
- orig = ""
450
- trans = ""
451
- final = []
452
- for c,r in zip(sp_file_contents, sp_file_result):
453
- final.append(c)
454
- final.append(r)
455
- for i, k in enumerate(final):
456
- if i%2==0:
457
- orig = k
458
- if i%2==1:
459
- trans = k
460
- ch.add_row(a=orig, b=trans)
461
- create_report_file_name = f"{gen_time_str()}.trans.html"
462
- res = ch.save_file(create_report_file_name)
463
- shutil.copyfile(res, pj(project_folder, create_report_file_name))
464
- promote_file_to_downloadzone(file=res, chatbot=chatbot)
465
- except:
466
- from toolbox import trimmed_format_exc
467
- print('writing html result failed:', trimmed_format_exc())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_fns/latex_toolbox.py DELETED
@@ -1,694 +0,0 @@
1
- import os, shutil
2
- import re
3
- import numpy as np
4
-
5
- PRESERVE = 0
6
- TRANSFORM = 1
7
-
8
- pj = os.path.join
9
-
10
-
11
- class LinkedListNode:
12
- """
13
- Linked List Node
14
- """
15
-
16
- def __init__(self, string, preserve=True) -> None:
17
- self.string = string
18
- self.preserve = preserve
19
- self.next = None
20
- self.range = None
21
- # self.begin_line = 0
22
- # self.begin_char = 0
23
-
24
-
25
- def convert_to_linklist(text, mask):
26
- root = LinkedListNode("", preserve=True)
27
- current_node = root
28
- for c, m, i in zip(text, mask, range(len(text))):
29
- if (m == PRESERVE and current_node.preserve) or (
30
- m == TRANSFORM and not current_node.preserve
31
- ):
32
- # add
33
- current_node.string += c
34
- else:
35
- current_node.next = LinkedListNode(c, preserve=(m == PRESERVE))
36
- current_node = current_node.next
37
- return root
38
-
39
-
40
- def post_process(root):
41
- # 修复括号
42
- node = root
43
- while True:
44
- string = node.string
45
- if node.preserve:
46
- node = node.next
47
- if node is None:
48
- break
49
- continue
50
-
51
- def break_check(string):
52
- str_stack = [""] # (lv, index)
53
- for i, c in enumerate(string):
54
- if c == "{":
55
- str_stack.append("{")
56
- elif c == "}":
57
- if len(str_stack) == 1:
58
- print("stack fix")
59
- return i
60
- str_stack.pop(-1)
61
- else:
62
- str_stack[-1] += c
63
- return -1
64
-
65
- bp = break_check(string)
66
-
67
- if bp == -1:
68
- pass
69
- elif bp == 0:
70
- node.string = string[:1]
71
- q = LinkedListNode(string[1:], False)
72
- q.next = node.next
73
- node.next = q
74
- else:
75
- node.string = string[:bp]
76
- q = LinkedListNode(string[bp:], False)
77
- q.next = node.next
78
- node.next = q
79
-
80
- node = node.next
81
- if node is None:
82
- break
83
-
84
- # 屏蔽空行和太短的句子
85
- node = root
86
- while True:
87
- if len(node.string.strip("\n").strip("")) == 0:
88
- node.preserve = True
89
- if len(node.string.strip("\n").strip("")) < 42:
90
- node.preserve = True
91
- node = node.next
92
- if node is None:
93
- break
94
- node = root
95
- while True:
96
- if node.next and node.preserve and node.next.preserve:
97
- node.string += node.next.string
98
- node.next = node.next.next
99
- node = node.next
100
- if node is None:
101
- break
102
-
103
- # 将前后断行符脱离
104
- node = root
105
- prev_node = None
106
- while True:
107
- if not node.preserve:
108
- lstriped_ = node.string.lstrip().lstrip("\n")
109
- if (
110
- (prev_node is not None)
111
- and (prev_node.preserve)
112
- and (len(lstriped_) != len(node.string))
113
- ):
114
- prev_node.string += node.string[: -len(lstriped_)]
115
- node.string = lstriped_
116
- rstriped_ = node.string.rstrip().rstrip("\n")
117
- if (
118
- (node.next is not None)
119
- and (node.next.preserve)
120
- and (len(rstriped_) != len(node.string))
121
- ):
122
- node.next.string = node.string[len(rstriped_) :] + node.next.string
123
- node.string = rstriped_
124
- # =-=-=
125
- prev_node = node
126
- node = node.next
127
- if node is None:
128
- break
129
-
130
- # 标注节点的行数范围
131
- node = root
132
- n_line = 0
133
- expansion = 2
134
- while True:
135
- n_l = node.string.count("\n")
136
- node.range = [n_line - expansion, n_line + n_l + expansion] # 失败时,扭转的范围
137
- n_line = n_line + n_l
138
- node = node.next
139
- if node is None:
140
- break
141
- return root
142
-
143
-
144
- """
145
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
146
- Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
147
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
148
- """
149
-
150
-
151
- def set_forbidden_text(text, mask, pattern, flags=0):
152
- """
153
- Add a preserve text area in this paper
154
- e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
155
- you can mask out (mask = PRESERVE so that text become untouchable for GPT)
156
- everything between "\begin{equation}" and "\end{equation}"
157
- """
158
- if isinstance(pattern, list):
159
- pattern = "|".join(pattern)
160
- pattern_compile = re.compile(pattern, flags)
161
- for res in pattern_compile.finditer(text):
162
- mask[res.span()[0] : res.span()[1]] = PRESERVE
163
- return text, mask
164
-
165
-
166
- def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
167
- """
168
- Move area out of preserve area (make text editable for GPT)
169
- count the number of the braces so as to catch compelete text area.
170
- e.g.
171
- \begin{abstract} blablablablablabla. \end{abstract}
172
- """
173
- if isinstance(pattern, list):
174
- pattern = "|".join(pattern)
175
- pattern_compile = re.compile(pattern, flags)
176
- for res in pattern_compile.finditer(text):
177
- if not forbid_wrapper:
178
- mask[res.span()[0] : res.span()[1]] = TRANSFORM
179
- else:
180
- mask[res.regs[0][0] : res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
181
- mask[res.regs[1][0] : res.regs[1][1]] = TRANSFORM # abstract
182
- mask[res.regs[1][1] : res.regs[0][1]] = PRESERVE # abstract
183
- return text, mask
184
-
185
-
186
- def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
187
- """
188
- Add a preserve text area in this paper (text become untouchable for GPT).
189
- count the number of the braces so as to catch compelete text area.
190
- e.g.
191
- \caption{blablablablabla\texbf{blablabla}blablabla.}
192
- """
193
- pattern_compile = re.compile(pattern, flags)
194
- for res in pattern_compile.finditer(text):
195
- brace_level = -1
196
- p = begin = end = res.regs[0][0]
197
- for _ in range(1024 * 16):
198
- if text[p] == "}" and brace_level == 0:
199
- break
200
- elif text[p] == "}":
201
- brace_level -= 1
202
- elif text[p] == "{":
203
- brace_level += 1
204
- p += 1
205
- end = p + 1
206
- mask[begin:end] = PRESERVE
207
- return text, mask
208
-
209
-
210
- def reverse_forbidden_text_careful_brace(
211
- text, mask, pattern, flags=0, forbid_wrapper=True
212
- ):
213
- """
214
- Move area out of preserve area (make text editable for GPT)
215
- count the number of the braces so as to catch compelete text area.
216
- e.g.
217
- \caption{blablablablabla\texbf{blablabla}blablabla.}
218
- """
219
- pattern_compile = re.compile(pattern, flags)
220
- for res in pattern_compile.finditer(text):
221
- brace_level = 0
222
- p = begin = end = res.regs[1][0]
223
- for _ in range(1024 * 16):
224
- if text[p] == "}" and brace_level == 0:
225
- break
226
- elif text[p] == "}":
227
- brace_level -= 1
228
- elif text[p] == "{":
229
- brace_level += 1
230
- p += 1
231
- end = p
232
- mask[begin:end] = TRANSFORM
233
- if forbid_wrapper:
234
- mask[res.regs[0][0] : begin] = PRESERVE
235
- mask[end : res.regs[0][1]] = PRESERVE
236
- return text, mask
237
-
238
-
239
- def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
240
- """
241
- Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
242
- Add it to preserve area
243
- """
244
- pattern_compile = re.compile(pattern, flags)
245
-
246
- def search_with_line_limit(text, mask):
247
- for res in pattern_compile.finditer(text):
248
- cmd = res.group(1) # begin{what}
249
- this = res.group(2) # content between begin and end
250
- this_mask = mask[res.regs[2][0] : res.regs[2][1]]
251
- white_list = [
252
- "document",
253
- "abstract",
254
- "lemma",
255
- "definition",
256
- "sproof",
257
- "em",
258
- "emph",
259
- "textit",
260
- "textbf",
261
- "itemize",
262
- "enumerate",
263
- ]
264
- if (cmd in white_list) or this.count(
265
- "\n"
266
- ) >= limit_n_lines: # use a magical number 42
267
- this, this_mask = search_with_line_limit(this, this_mask)
268
- mask[res.regs[2][0] : res.regs[2][1]] = this_mask
269
- else:
270
- mask[res.regs[0][0] : res.regs[0][1]] = PRESERVE
271
- return text, mask
272
-
273
- return search_with_line_limit(text, mask)
274
-
275
-
276
- """
277
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
278
- Latex Merge File
279
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
280
- """
281
-
282
-
283
- def find_main_tex_file(file_manifest, mode):
284
- """
285
- 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
286
- P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
287
- """
288
- canidates = []
289
- for texf in file_manifest:
290
- if os.path.basename(texf).startswith("merge"):
291
- continue
292
- with open(texf, "r", encoding="utf8", errors="ignore") as f:
293
- file_content = f.read()
294
- if r"\documentclass" in file_content:
295
- canidates.append(texf)
296
- else:
297
- continue
298
-
299
- if len(canidates) == 0:
300
- raise RuntimeError("无法找到一个主Tex文件(包含documentclass关键字)")
301
- elif len(canidates) == 1:
302
- return canidates[0]
303
- else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
304
- canidates_score = []
305
- # 给出一些判定模板文档的词作为扣分项
306
- unexpected_words = [
307
- "\\LaTeX",
308
- "manuscript",
309
- "Guidelines",
310
- "font",
311
- "citations",
312
- "rejected",
313
- "blind review",
314
- "reviewers",
315
- ]
316
- expected_words = ["\\input", "\\ref", "\\cite"]
317
- for texf in canidates:
318
- canidates_score.append(0)
319
- with open(texf, "r", encoding="utf8", errors="ignore") as f:
320
- file_content = f.read()
321
- file_content = rm_comments(file_content)
322
- for uw in unexpected_words:
323
- if uw in file_content:
324
- canidates_score[-1] -= 1
325
- for uw in expected_words:
326
- if uw in file_content:
327
- canidates_score[-1] += 1
328
- select = np.argmax(canidates_score) # 取评分最高者返回
329
- return canidates[select]
330
-
331
-
332
- def rm_comments(main_file):
333
- new_file_remove_comment_lines = []
334
- for l in main_file.splitlines():
335
- # 删除整行的空注释
336
- if l.lstrip().startswith("%"):
337
- pass
338
- else:
339
- new_file_remove_comment_lines.append(l)
340
- main_file = "\n".join(new_file_remove_comment_lines)
341
- # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
342
- main_file = re.sub(r"(?<!\\)%.*", "", main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
343
- return main_file
344
-
345
-
346
- def find_tex_file_ignore_case(fp):
347
- dir_name = os.path.dirname(fp)
348
- base_name = os.path.basename(fp)
349
- # 如果输入的文件路径是正确的
350
- if os.path.isfile(pj(dir_name, base_name)):
351
- return pj(dir_name, base_name)
352
- # 如果不正确,试着加上.tex后缀试试
353
- if not base_name.endswith(".tex"):
354
- base_name += ".tex"
355
- if os.path.isfile(pj(dir_name, base_name)):
356
- return pj(dir_name, base_name)
357
- # 如果还找不到,解除大小写限制,再试一次
358
- import glob
359
-
360
- for f in glob.glob(dir_name + "/*.tex"):
361
- base_name_s = os.path.basename(fp)
362
- base_name_f = os.path.basename(f)
363
- if base_name_s.lower() == base_name_f.lower():
364
- return f
365
- # 试着加上.tex后缀试试
366
- if not base_name_s.endswith(".tex"):
367
- base_name_s += ".tex"
368
- if base_name_s.lower() == base_name_f.lower():
369
- return f
370
- return None
371
-
372
-
373
- def merge_tex_files_(project_foler, main_file, mode):
374
- """
375
- Merge Tex project recrusively
376
- """
377
- main_file = rm_comments(main_file)
378
- for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
379
- f = s.group(1)
380
- fp = os.path.join(project_foler, f)
381
- fp_ = find_tex_file_ignore_case(fp)
382
- if fp_:
383
- try:
384
- with open(fp_, "r", encoding="utf-8", errors="replace") as fx:
385
- c = fx.read()
386
- except:
387
- c = f"\n\nWarning from GPT-Academic: LaTex source file is missing!\n\n"
388
- else:
389
- raise RuntimeError(f"找不到{fp},Tex源文件缺失!")
390
- c = merge_tex_files_(project_foler, c, mode)
391
- main_file = main_file[: s.span()[0]] + c + main_file[s.span()[1] :]
392
- return main_file
393
-
394
-
395
- def find_title_and_abs(main_file):
396
- def extract_abstract_1(text):
397
- pattern = r"\\abstract\{(.*?)\}"
398
- match = re.search(pattern, text, re.DOTALL)
399
- if match:
400
- return match.group(1)
401
- else:
402
- return None
403
-
404
- def extract_abstract_2(text):
405
- pattern = r"\\begin\{abstract\}(.*?)\\end\{abstract\}"
406
- match = re.search(pattern, text, re.DOTALL)
407
- if match:
408
- return match.group(1)
409
- else:
410
- return None
411
-
412
- def extract_title(string):
413
- pattern = r"\\title\{(.*?)\}"
414
- match = re.search(pattern, string, re.DOTALL)
415
-
416
- if match:
417
- return match.group(1)
418
- else:
419
- return None
420
-
421
- abstract = extract_abstract_1(main_file)
422
- if abstract is None:
423
- abstract = extract_abstract_2(main_file)
424
- title = extract_title(main_file)
425
- return title, abstract
426
-
427
-
428
- def merge_tex_files(project_foler, main_file, mode):
429
- """
430
- Merge Tex project recrusively
431
- P.S. 顺便把CTEX塞进去以支持中文
432
- P.S. 顺便把Latex的注释去除
433
- """
434
- main_file = merge_tex_files_(project_foler, main_file, mode)
435
- main_file = rm_comments(main_file)
436
-
437
- if mode == "translate_zh":
438
- # find paper documentclass
439
- pattern = re.compile(r"\\documentclass.*\n")
440
- match = pattern.search(main_file)
441
- assert match is not None, "Cannot find documentclass statement!"
442
- position = match.end()
443
- add_ctex = "\\usepackage{ctex}\n"
444
- add_url = "\\usepackage{url}\n" if "{url}" not in main_file else ""
445
- main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
446
- # fontset=windows
447
- import platform
448
-
449
- main_file = re.sub(
450
- r"\\documentclass\[(.*?)\]{(.*?)}",
451
- r"\\documentclass[\1,fontset=windows,UTF8]{\2}",
452
- main_file,
453
- )
454
- main_file = re.sub(
455
- r"\\documentclass{(.*?)}",
456
- r"\\documentclass[fontset=windows,UTF8]{\1}",
457
- main_file,
458
- )
459
- # find paper abstract
460
- pattern_opt1 = re.compile(r"\\begin\{abstract\}.*\n")
461
- pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
462
- match_opt1 = pattern_opt1.search(main_file)
463
- match_opt2 = pattern_opt2.search(main_file)
464
- if (match_opt1 is None) and (match_opt2 is None):
465
- # "Cannot find paper abstract section!"
466
- main_file = insert_abstract(main_file)
467
- match_opt1 = pattern_opt1.search(main_file)
468
- match_opt2 = pattern_opt2.search(main_file)
469
- assert (match_opt1 is not None) or (
470
- match_opt2 is not None
471
- ), "Cannot find paper abstract section!"
472
- return main_file
473
-
474
-
475
- insert_missing_abs_str = r"""
476
- \begin{abstract}
477
- The GPT-Academic program cannot find abstract section in this paper.
478
- \end{abstract}
479
- """
480
-
481
-
482
- def insert_abstract(tex_content):
483
- if "\\maketitle" in tex_content:
484
- # find the position of "\maketitle"
485
- find_index = tex_content.index("\\maketitle")
486
- # find the nearest ending line
487
- end_line_index = tex_content.find("\n", find_index)
488
- # insert "abs_str" on the next line
489
- modified_tex = (
490
- tex_content[: end_line_index + 1]
491
- + "\n\n"
492
- + insert_missing_abs_str
493
- + "\n\n"
494
- + tex_content[end_line_index + 1 :]
495
- )
496
- return modified_tex
497
- elif r"\begin{document}" in tex_content:
498
- # find the position of "\maketitle"
499
- find_index = tex_content.index(r"\begin{document}")
500
- # find the nearest ending line
501
- end_line_index = tex_content.find("\n", find_index)
502
- # insert "abs_str" on the next line
503
- modified_tex = (
504
- tex_content[: end_line_index + 1]
505
- + "\n\n"
506
- + insert_missing_abs_str
507
- + "\n\n"
508
- + tex_content[end_line_index + 1 :]
509
- )
510
- return modified_tex
511
- else:
512
- return tex_content
513
-
514
-
515
- """
516
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
517
- Post process
518
- =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
519
- """
520
-
521
-
522
- def mod_inbraket(match):
523
- """
524
- 为啥chatgpt会把cite里面的逗号换成中文逗号呀
525
- """
526
- # get the matched string
527
- cmd = match.group(1)
528
- str_to_modify = match.group(2)
529
- # modify the matched string
530
- str_to_modify = str_to_modify.replace(":", ":") # 前面是中文冒号,后面是英文冒号
531
- str_to_modify = str_to_modify.replace(",", ",") # 前面是中文逗号,后面是英文逗号
532
- # str_to_modify = 'BOOM'
533
- return "\\" + cmd + "{" + str_to_modify + "}"
534
-
535
-
536
- def fix_content(final_tex, node_string):
537
- """
538
- Fix common GPT errors to increase success rate
539
- """
540
- final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
541
- final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
542
- final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
543
- final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
544
-
545
- if "Traceback" in final_tex and "[Local Message]" in final_tex:
546
- final_tex = node_string # 出问题了,还原原文
547
- if node_string.count("\\begin") != final_tex.count("\\begin"):
548
- final_tex = node_string # 出问题了,还原原文
549
- if node_string.count("\_") > 0 and node_string.count("\_") > final_tex.count("\_"):
550
- # walk and replace any _ without \
551
- final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
552
-
553
- def compute_brace_level(string):
554
- # this function count the number of { and }
555
- brace_level = 0
556
- for c in string:
557
- if c == "{":
558
- brace_level += 1
559
- elif c == "}":
560
- brace_level -= 1
561
- return brace_level
562
-
563
- def join_most(tex_t, tex_o):
564
- # this function join translated string and original string when something goes wrong
565
- p_t = 0
566
- p_o = 0
567
-
568
- def find_next(string, chars, begin):
569
- p = begin
570
- while p < len(string):
571
- if string[p] in chars:
572
- return p, string[p]
573
- p += 1
574
- return None, None
575
-
576
- while True:
577
- res1, char = find_next(tex_o, ["{", "}"], p_o)
578
- if res1 is None:
579
- break
580
- res2, char = find_next(tex_t, [char], p_t)
581
- if res2 is None:
582
- break
583
- p_o = res1 + 1
584
- p_t = res2 + 1
585
- return tex_t[:p_t] + tex_o[p_o:]
586
-
587
- if compute_brace_level(final_tex) != compute_brace_level(node_string):
588
- # 出问题了,还原部分原文,保证括号正确
589
- final_tex = join_most(final_tex, node_string)
590
- return final_tex
591
-
592
-
593
- def compile_latex_with_timeout(command, cwd, timeout=60):
594
- import subprocess
595
-
596
- process = subprocess.Popen(
597
- command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd
598
- )
599
- try:
600
- stdout, stderr = process.communicate(timeout=timeout)
601
- except subprocess.TimeoutExpired:
602
- process.kill()
603
- stdout, stderr = process.communicate()
604
- print("Process timed out!")
605
- return False
606
- return True
607
-
608
-
609
- def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict):
610
- import sys
611
-
612
- try:
613
- result = func(*args, **kwargs)
614
- return_dict["result"] = result
615
- except Exception as e:
616
- exc_info = sys.exc_info()
617
- exception_dict["exception"] = exc_info
618
-
619
-
620
- def run_in_subprocess(func):
621
- import multiprocessing
622
-
623
- def wrapper(*args, **kwargs):
624
- return_dict = multiprocessing.Manager().dict()
625
- exception_dict = multiprocessing.Manager().dict()
626
- process = multiprocessing.Process(
627
- target=run_in_subprocess_wrapper_func,
628
- args=(func, args, kwargs, return_dict, exception_dict),
629
- )
630
- process.start()
631
- process.join()
632
- process.close()
633
- if "exception" in exception_dict:
634
- # ooops, the subprocess ran into an exception
635
- exc_info = exception_dict["exception"]
636
- raise exc_info[1].with_traceback(exc_info[2])
637
- if "result" in return_dict.keys():
638
- # If the subprocess ran successfully, return the result
639
- return return_dict["result"]
640
-
641
- return wrapper
642
-
643
-
644
- def _merge_pdfs(pdf1_path, pdf2_path, output_path):
645
- import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
646
-
647
- Percent = 0.95
648
- # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
649
- # Open the first PDF file
650
- with open(pdf1_path, "rb") as pdf1_file:
651
- pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
652
- # Open the second PDF file
653
- with open(pdf2_path, "rb") as pdf2_file:
654
- pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
655
- # Create a new PDF file to store the merged pages
656
- output_writer = PyPDF2.PdfFileWriter()
657
- # Determine the number of pages in each PDF file
658
- num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
659
- # Merge the pages from the two PDF files
660
- for page_num in range(num_pages):
661
- # Add the page from the first PDF file
662
- if page_num < pdf1_reader.numPages:
663
- page1 = pdf1_reader.getPage(page_num)
664
- else:
665
- page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
666
- # Add the page from the second PDF file
667
- if page_num < pdf2_reader.numPages:
668
- page2 = pdf2_reader.getPage(page_num)
669
- else:
670
- page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
671
- # Create a new empty page with double width
672
- new_page = PyPDF2.PageObject.createBlankPage(
673
- width=int(
674
- int(page1.mediaBox.getWidth())
675
- + int(page2.mediaBox.getWidth()) * Percent
676
- ),
677
- height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight()),
678
- )
679
- new_page.mergeTranslatedPage(page1, 0, 0)
680
- new_page.mergeTranslatedPage(
681
- page2,
682
- int(
683
- int(page1.mediaBox.getWidth())
684
- - int(page2.mediaBox.getWidth()) * (1 - Percent)
685
- ),
686
- 0,
687
- )
688
- output_writer.addPage(new_page)
689
- # Save the merged PDF file
690
- with open(output_path, "wb") as output_file:
691
- output_writer.write(output_file)
692
-
693
-
694
- merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/latex_utils.py DELETED
@@ -1,788 +0,0 @@
1
- from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
2
- from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
3
- import os, shutil
4
- import re
5
- import numpy as np
6
- pj = os.path.join
7
-
8
- """
9
- ========================================================================
10
- Part One
11
- Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
12
- ========================================================================
13
- """
14
- PRESERVE = 0
15
- TRANSFORM = 1
16
-
17
- def set_forbidden_text(text, mask, pattern, flags=0):
18
- """
19
- Add a preserve text area in this paper
20
- e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
21
- you can mask out (mask = PRESERVE so that text become untouchable for GPT)
22
- everything between "\begin{equation}" and "\end{equation}"
23
- """
24
- if isinstance(pattern, list): pattern = '|'.join(pattern)
25
- pattern_compile = re.compile(pattern, flags)
26
- for res in pattern_compile.finditer(text):
27
- mask[res.span()[0]:res.span()[1]] = PRESERVE
28
- return text, mask
29
-
30
- def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
31
- """
32
- Move area out of preserve area (make text editable for GPT)
33
- count the number of the braces so as to catch compelete text area.
34
- e.g.
35
- \begin{abstract} blablablablablabla. \end{abstract}
36
- """
37
- if isinstance(pattern, list): pattern = '|'.join(pattern)
38
- pattern_compile = re.compile(pattern, flags)
39
- for res in pattern_compile.finditer(text):
40
- if not forbid_wrapper:
41
- mask[res.span()[0]:res.span()[1]] = TRANSFORM
42
- else:
43
- mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
44
- mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
45
- mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
46
- return text, mask
47
-
48
- def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
49
- """
50
- Add a preserve text area in this paper (text become untouchable for GPT).
51
- count the number of the braces so as to catch compelete text area.
52
- e.g.
53
- \caption{blablablablabla\texbf{blablabla}blablabla.}
54
- """
55
- pattern_compile = re.compile(pattern, flags)
56
- for res in pattern_compile.finditer(text):
57
- brace_level = -1
58
- p = begin = end = res.regs[0][0]
59
- for _ in range(1024*16):
60
- if text[p] == '}' and brace_level == 0: break
61
- elif text[p] == '}': brace_level -= 1
62
- elif text[p] == '{': brace_level += 1
63
- p += 1
64
- end = p+1
65
- mask[begin:end] = PRESERVE
66
- return text, mask
67
-
68
- def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
69
- """
70
- Move area out of preserve area (make text editable for GPT)
71
- count the number of the braces so as to catch compelete text area.
72
- e.g.
73
- \caption{blablablablabla\texbf{blablabla}blablabla.}
74
- """
75
- pattern_compile = re.compile(pattern, flags)
76
- for res in pattern_compile.finditer(text):
77
- brace_level = 0
78
- p = begin = end = res.regs[1][0]
79
- for _ in range(1024*16):
80
- if text[p] == '}' and brace_level == 0: break
81
- elif text[p] == '}': brace_level -= 1
82
- elif text[p] == '{': brace_level += 1
83
- p += 1
84
- end = p
85
- mask[begin:end] = TRANSFORM
86
- if forbid_wrapper:
87
- mask[res.regs[0][0]:begin] = PRESERVE
88
- mask[end:res.regs[0][1]] = PRESERVE
89
- return text, mask
90
-
91
- def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
92
- """
93
- Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
94
- Add it to preserve area
95
- """
96
- pattern_compile = re.compile(pattern, flags)
97
- def search_with_line_limit(text, mask):
98
- for res in pattern_compile.finditer(text):
99
- cmd = res.group(1) # begin{what}
100
- this = res.group(2) # content between begin and end
101
- this_mask = mask[res.regs[2][0]:res.regs[2][1]]
102
- white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
103
- 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
104
- if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
105
- this, this_mask = search_with_line_limit(this, this_mask)
106
- mask[res.regs[2][0]:res.regs[2][1]] = this_mask
107
- else:
108
- mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
109
- return text, mask
110
- return search_with_line_limit(text, mask)
111
-
112
- class LinkedListNode():
113
- """
114
- Linked List Node
115
- """
116
- def __init__(self, string, preserve=True) -> None:
117
- self.string = string
118
- self.preserve = preserve
119
- self.next = None
120
- # self.begin_line = 0
121
- # self.begin_char = 0
122
-
123
- def convert_to_linklist(text, mask):
124
- root = LinkedListNode("", preserve=True)
125
- current_node = root
126
- for c, m, i in zip(text, mask, range(len(text))):
127
- if (m==PRESERVE and current_node.preserve) \
128
- or (m==TRANSFORM and not current_node.preserve):
129
- # add
130
- current_node.string += c
131
- else:
132
- current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
133
- current_node = current_node.next
134
- return root
135
- """
136
- ========================================================================
137
- Latex Merge File
138
- ========================================================================
139
- """
140
-
141
- def 寻找Latex主文件(file_manifest, mode):
142
- """
143
- 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
144
- P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
145
- """
146
- canidates = []
147
- for texf in file_manifest:
148
- if os.path.basename(texf).startswith('merge'):
149
- continue
150
- with open(texf, 'r', encoding='utf8') as f:
151
- file_content = f.read()
152
- if r'\documentclass' in file_content:
153
- canidates.append(texf)
154
- else:
155
- continue
156
-
157
- if len(canidates) == 0:
158
- raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
159
- elif len(canidates) == 1:
160
- return canidates[0]
161
- else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
162
- canidates_score = []
163
- # 给出一些判定模板文档的词作为扣分项
164
- unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
165
- expected_words = ['\input', '\ref', '\cite']
166
- for texf in canidates:
167
- canidates_score.append(0)
168
- with open(texf, 'r', encoding='utf8') as f:
169
- file_content = f.read()
170
- for uw in unexpected_words:
171
- if uw in file_content:
172
- canidates_score[-1] -= 1
173
- for uw in expected_words:
174
- if uw in file_content:
175
- canidates_score[-1] += 1
176
- select = np.argmax(canidates_score) # 取评分最高者返回
177
- return canidates[select]
178
-
179
- def rm_comments(main_file):
180
- new_file_remove_comment_lines = []
181
- for l in main_file.splitlines():
182
- # 删除整行的空注释
183
- if l.lstrip().startswith("%"):
184
- pass
185
- else:
186
- new_file_remove_comment_lines.append(l)
187
- main_file = '\n'.join(new_file_remove_comment_lines)
188
- # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
189
- main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
190
- return main_file
191
-
192
- def merge_tex_files_(project_foler, main_file, mode):
193
- """
194
- Merge Tex project recrusively
195
- """
196
- main_file = rm_comments(main_file)
197
- for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
198
- f = s.group(1)
199
- fp = os.path.join(project_foler, f)
200
- if os.path.exists(fp):
201
- # e.g., \input{srcs/07_appendix.tex}
202
- with open(fp, 'r', encoding='utf-8', errors='replace') as fx:
203
- c = fx.read()
204
- else:
205
- # e.g., \input{srcs/07_appendix}
206
- with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
207
- c = fx.read()
208
- c = merge_tex_files_(project_foler, c, mode)
209
- main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
210
- return main_file
211
-
212
- def merge_tex_files(project_foler, main_file, mode):
213
- """
214
- Merge Tex project recrusively
215
- P.S. 顺便把CTEX塞进去以支持中文
216
- P.S. 顺便把Latex的注释去除
217
- """
218
- main_file = merge_tex_files_(project_foler, main_file, mode)
219
- main_file = rm_comments(main_file)
220
-
221
- if mode == 'translate_zh':
222
- # find paper documentclass
223
- pattern = re.compile(r'\\documentclass.*\n')
224
- match = pattern.search(main_file)
225
- assert match is not None, "Cannot find documentclass statement!"
226
- position = match.end()
227
- add_ctex = '\\usepackage{ctex}\n'
228
- add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
229
- main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
230
- # fontset=windows
231
- import platform
232
- main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
233
- main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
234
- # find paper abstract
235
- pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
236
- pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
237
- match_opt1 = pattern_opt1.search(main_file)
238
- match_opt2 = pattern_opt2.search(main_file)
239
- assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
240
- return main_file
241
-
242
-
243
-
244
- """
245
- ========================================================================
246
- Post process
247
- ========================================================================
248
- """
249
- def mod_inbraket(match):
250
- """
251
- 为啥chatgpt会把cite里面的逗号换成中文逗号呀
252
- """
253
- # get the matched string
254
- cmd = match.group(1)
255
- str_to_modify = match.group(2)
256
- # modify the matched string
257
- str_to_modify = str_to_modify.replace(':', ':') # 前面是中文冒号,后面是英文冒号
258
- str_to_modify = str_to_modify.replace(',', ',') # 前面是中文逗号,后面是英文逗号
259
- # str_to_modify = 'BOOM'
260
- return "\\" + cmd + "{" + str_to_modify + "}"
261
-
262
- def fix_content(final_tex, node_string):
263
- """
264
- Fix common GPT errors to increase success rate
265
- """
266
- final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
267
- final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
268
- final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
269
- final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
270
-
271
- if "Traceback" in final_tex and "[Local Message]" in final_tex:
272
- final_tex = node_string # 出问题了,还原原文
273
- if node_string.count('\\begin') != final_tex.count('\\begin'):
274
- final_tex = node_string # 出问题了,还原原文
275
- if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
276
- # walk and replace any _ without \
277
- final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
278
-
279
- def compute_brace_level(string):
280
- # this function count the number of { and }
281
- brace_level = 0
282
- for c in string:
283
- if c == "{": brace_level += 1
284
- elif c == "}": brace_level -= 1
285
- return brace_level
286
- def join_most(tex_t, tex_o):
287
- # this function join translated string and original string when something goes wrong
288
- p_t = 0
289
- p_o = 0
290
- def find_next(string, chars, begin):
291
- p = begin
292
- while p < len(string):
293
- if string[p] in chars: return p, string[p]
294
- p += 1
295
- return None, None
296
- while True:
297
- res1, char = find_next(tex_o, ['{','}'], p_o)
298
- if res1 is None: break
299
- res2, char = find_next(tex_t, [char], p_t)
300
- if res2 is None: break
301
- p_o = res1 + 1
302
- p_t = res2 + 1
303
- return tex_t[:p_t] + tex_o[p_o:]
304
-
305
- if compute_brace_level(final_tex) != compute_brace_level(node_string):
306
- # 出问题了,还原部分原文,保证括号正确
307
- final_tex = join_most(final_tex, node_string)
308
- return final_tex
309
-
310
- def split_subprocess(txt, project_folder, return_dict, opts):
311
- """
312
- break down latex file to a linked list,
313
- each node use a preserve flag to indicate whether it should
314
- be proccessed by GPT.
315
- """
316
- text = txt
317
- mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
318
-
319
- # 吸收title与作者以上的部分
320
- text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
321
- # 吸收iffalse注释
322
- text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
323
- # 吸收在42行以内的begin-end组合
324
- text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
325
- # 吸收匿名公式
326
- text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
327
- # 吸收其他杂项
328
- text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
329
- text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
330
- text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
331
- text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
332
- text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
333
- text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
334
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
335
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
336
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
337
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
338
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
339
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
340
- text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
341
- text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
342
- text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
343
- text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
344
- # reverse 操作必须放在最后
345
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
346
- text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
347
- text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
348
- root = convert_to_linklist(text, mask)
349
-
350
- # 修复括号
351
- node = root
352
- while True:
353
- string = node.string
354
- if node.preserve:
355
- node = node.next
356
- if node is None: break
357
- continue
358
- def break_check(string):
359
- str_stack = [""] # (lv, index)
360
- for i, c in enumerate(string):
361
- if c == '{':
362
- str_stack.append('{')
363
- elif c == '}':
364
- if len(str_stack) == 1:
365
- print('stack fix')
366
- return i
367
- str_stack.pop(-1)
368
- else:
369
- str_stack[-1] += c
370
- return -1
371
- bp = break_check(string)
372
-
373
- if bp == -1:
374
- pass
375
- elif bp == 0:
376
- node.string = string[:1]
377
- q = LinkedListNode(string[1:], False)
378
- q.next = node.next
379
- node.next = q
380
- else:
381
- node.string = string[:bp]
382
- q = LinkedListNode(string[bp:], False)
383
- q.next = node.next
384
- node.next = q
385
-
386
- node = node.next
387
- if node is None: break
388
-
389
- # 屏蔽空行和太短的句子
390
- node = root
391
- while True:
392
- if len(node.string.strip('\n').strip(''))==0: node.preserve = True
393
- if len(node.string.strip('\n').strip(''))<42: node.preserve = True
394
- node = node.next
395
- if node is None: break
396
- node = root
397
- while True:
398
- if node.next and node.preserve and node.next.preserve:
399
- node.string += node.next.string
400
- node.next = node.next.next
401
- node = node.next
402
- if node is None: break
403
-
404
- # 将前后断行符脱离
405
- node = root
406
- prev_node = None
407
- while True:
408
- if not node.preserve:
409
- lstriped_ = node.string.lstrip().lstrip('\n')
410
- if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
411
- prev_node.string += node.string[:-len(lstriped_)]
412
- node.string = lstriped_
413
- rstriped_ = node.string.rstrip().rstrip('\n')
414
- if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
415
- node.next.string = node.string[len(rstriped_):] + node.next.string
416
- node.string = rstriped_
417
- # =====
418
- prev_node = node
419
- node = node.next
420
- if node is None: break
421
- # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
422
- with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
423
- segment_parts_for_gpt = []
424
- nodes = []
425
- node = root
426
- while True:
427
- nodes.append(node)
428
- show_html = node.string.replace('\n','<br/>')
429
- if not node.preserve:
430
- segment_parts_for_gpt.append(node.string)
431
- f.write(f'<p style="color:black;">#{show_html}#</p>')
432
- else:
433
- f.write(f'<p style="color:red;">{show_html}</p>')
434
- node = node.next
435
- if node is None: break
436
-
437
- for n in nodes: n.next = None # break
438
- return_dict['nodes'] = nodes
439
- return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
440
- return return_dict
441
-
442
-
443
-
444
- class LatexPaperSplit():
445
- """
446
- break down latex file to a linked list,
447
- each node use a preserve flag to indicate whether it should
448
- be proccessed by GPT.
449
- """
450
- def __init__(self) -> None:
451
- self.nodes = None
452
- self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
453
- "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
454
- "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
455
- # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
456
- self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
457
-
458
- def merge_result(self, arr, mode, msg):
459
- """
460
- Merge the result after the GPT process completed
461
- """
462
- result_string = ""
463
- p = 0
464
- for node in self.nodes:
465
- if node.preserve:
466
- result_string += node.string
467
- else:
468
- result_string += fix_content(arr[p], node.string)
469
- p += 1
470
- if mode == 'translate_zh':
471
- pattern = re.compile(r'\\begin\{abstract\}.*\n')
472
- match = pattern.search(result_string)
473
- if not match:
474
- # match \abstract{xxxx}
475
- pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
476
- match = pattern_compile.search(result_string)
477
- position = match.regs[1][0]
478
- else:
479
- # match \begin{abstract}xxxx\end{abstract}
480
- position = match.end()
481
- result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
482
- return result_string
483
-
484
- def split(self, txt, project_folder, opts):
485
- """
486
- break down latex file to a linked list,
487
- each node use a preserve flag to indicate whether it should
488
- be proccessed by GPT.
489
- P.S. use multiprocessing to avoid timeout error
490
- """
491
- import multiprocessing
492
- manager = multiprocessing.Manager()
493
- return_dict = manager.dict()
494
- p = multiprocessing.Process(
495
- target=split_subprocess,
496
- args=(txt, project_folder, return_dict, opts))
497
- p.start()
498
- p.join()
499
- p.close()
500
- self.nodes = return_dict['nodes']
501
- self.sp = return_dict['segment_parts_for_gpt']
502
- return self.sp
503
-
504
-
505
-
506
- class LatexPaperFileGroup():
507
- """
508
- use tokenizer to break down text according to max_token_limit
509
- """
510
- def __init__(self):
511
- self.file_paths = []
512
- self.file_contents = []
513
- self.sp_file_contents = []
514
- self.sp_file_index = []
515
- self.sp_file_tag = []
516
-
517
- # count_token
518
- from request_llm.bridge_all import model_info
519
- enc = model_info["gpt-3.5-turbo"]['tokenizer']
520
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
521
- self.get_token_num = get_token_num
522
-
523
- def run_file_split(self, max_token_limit=1900):
524
- """
525
- use tokenizer to break down text according to max_token_limit
526
- """
527
- for index, file_content in enumerate(self.file_contents):
528
- if self.get_token_num(file_content) < max_token_limit:
529
- self.sp_file_contents.append(file_content)
530
- self.sp_file_index.append(index)
531
- self.sp_file_tag.append(self.file_paths[index])
532
- else:
533
- from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
534
- segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
535
- for j, segment in enumerate(segments):
536
- self.sp_file_contents.append(segment)
537
- self.sp_file_index.append(index)
538
- self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
539
- print('Segmentation: done')
540
-
541
- def merge_result(self):
542
- self.file_result = ["" for _ in range(len(self.file_paths))]
543
- for r, k in zip(self.sp_file_result, self.sp_file_index):
544
- self.file_result[k] += r
545
-
546
- def write_result(self):
547
- manifest = []
548
- for path, res in zip(self.file_paths, self.file_result):
549
- with open(path + '.polish.tex', 'w', encoding='utf8') as f:
550
- manifest.append(path + '.polish.tex')
551
- f.write(res)
552
- return manifest
553
-
554
- def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
555
-
556
- # write html
557
- try:
558
- import shutil
559
- from .crazy_utils import construct_html
560
- from toolbox import gen_time_str
561
- ch = construct_html()
562
- orig = ""
563
- trans = ""
564
- final = []
565
- for c,r in zip(sp_file_contents, sp_file_result):
566
- final.append(c)
567
- final.append(r)
568
- for i, k in enumerate(final):
569
- if i%2==0:
570
- orig = k
571
- if i%2==1:
572
- trans = k
573
- ch.add_row(a=orig, b=trans)
574
- create_report_file_name = f"{gen_time_str()}.trans.html"
575
- ch.save_file(create_report_file_name)
576
- shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
577
- promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
578
- except:
579
- from toolbox import trimmed_format_exc
580
- print('writing html result failed:', trimmed_format_exc())
581
-
582
- def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
583
- import time, os, re
584
- from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
585
- from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
586
-
587
- # <-------- 寻找主tex文件 ---------->
588
- maintex = 寻找Latex主文件(file_manifest, mode)
589
- chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
590
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
591
- time.sleep(3)
592
-
593
- # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
594
- main_tex_basename = os.path.basename(maintex)
595
- assert main_tex_basename.endswith('.tex')
596
- main_tex_basename_bare = main_tex_basename[:-4]
597
- may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
598
- if os.path.exists(may_exist_bbl):
599
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
600
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
601
- shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
602
-
603
- with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
604
- content = f.read()
605
- merged_content = merge_tex_files(project_folder, content, mode)
606
-
607
- with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
608
- f.write(merged_content)
609
-
610
- # <-------- 精细切分latex文件 ---------->
611
- chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
612
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
613
- lps = LatexPaperSplit()
614
- res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
615
-
616
- # <-------- 拆分过长的latex片段 ---------->
617
- pfg = LatexPaperFileGroup()
618
- for index, r in enumerate(res):
619
- pfg.file_paths.append('segment-' + str(index))
620
- pfg.file_contents.append(r)
621
-
622
- pfg.run_file_split(max_token_limit=1024)
623
- n_split = len(pfg.sp_file_contents)
624
-
625
- # <-------- 根据需要切换prompt ---------->
626
- inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
627
- inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
628
-
629
- if os.path.exists(pj(project_folder,'temp.pkl')):
630
-
631
- # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ---------->
632
- pfg = objload(file=pj(project_folder,'temp.pkl'))
633
-
634
- else:
635
- # <-------- gpt 多线程请求 ---------->
636
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
637
- inputs_array=inputs_array,
638
- inputs_show_user_array=inputs_show_user_array,
639
- llm_kwargs=llm_kwargs,
640
- chatbot=chatbot,
641
- history_array=[[""] for _ in range(n_split)],
642
- sys_prompt_array=sys_prompt_array,
643
- # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
644
- scroller_max_len = 40
645
- )
646
-
647
- # <-------- 文本碎片重组为完整的tex片段 ---------->
648
- pfg.sp_file_result = []
649
- for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
650
- pfg.sp_file_result.append(gpt_say)
651
- pfg.merge_result()
652
-
653
- # <-------- 临时存储用于调试 ---------->
654
- pfg.get_token_num = None
655
- objdump(pfg, file=pj(project_folder,'temp.pkl'))
656
-
657
- write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
658
-
659
- # <-------- 写出文件 ---------->
660
- msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
661
- final_tex = lps.merge_result(pfg.file_result, mode, msg)
662
- with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
663
- if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
664
-
665
-
666
- # <-------- 整理结果, 退出 ---------->
667
- chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
668
- yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
669
-
670
- # <-------- 返回 ---------->
671
- return project_folder + f'/merge_{mode}.tex'
672
-
673
-
674
-
675
- def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
676
- try:
677
- with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
678
- log = f.read()
679
- with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
680
- file_lines = f.readlines()
681
- import re
682
- buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
683
- buggy_lines = [int(l) for l in buggy_lines]
684
- buggy_lines = sorted(buggy_lines)
685
- print("removing lines that has errors", buggy_lines)
686
- file_lines.pop(buggy_lines[0]-1)
687
- with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
688
- f.writelines(file_lines)
689
- return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
690
- except:
691
- print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
692
- return False, -1, [-1]
693
-
694
- def compile_latex_with_timeout(command, cwd, timeout=60):
695
- import subprocess
696
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
697
- try:
698
- stdout, stderr = process.communicate(timeout=timeout)
699
- except subprocess.TimeoutExpired:
700
- process.kill()
701
- stdout, stderr = process.communicate()
702
- print("Process timed out!")
703
- return False
704
- return True
705
-
706
- def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
707
- import os, time
708
- current_dir = os.getcwd()
709
- n_fix = 1
710
- max_try = 32
711
- chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
712
- chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
713
- yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
714
-
715
- while True:
716
- import os
717
-
718
- # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
719
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
720
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
721
-
722
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
723
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
724
-
725
- if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
726
- # 只有第二步成功,才能继续下面的步骤
727
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
728
- if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
729
- ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
730
- if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
731
- ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
732
-
733
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
734
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
735
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
736
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
737
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
738
-
739
- if mode!='translate_zh':
740
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
741
- print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
742
- ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
743
-
744
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
745
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
746
- ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
747
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
748
- ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
749
-
750
-
751
- # <---------- 检查结果 ----------->
752
- results_ = ""
753
- original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
754
- modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
755
- diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
756
- results_ += f"原始PDF编译是否成功: {original_pdf_success};"
757
- results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
758
- results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
759
- yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
760
-
761
- if diff_pdf_success:
762
- result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
763
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
764
- if modified_pdf_success:
765
- yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
766
- result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
767
- if os.path.exists(pj(work_folder, '..', 'translation')):
768
- shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
769
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
770
- return True # 成功啦
771
- else:
772
- if n_fix>=max_try: break
773
- n_fix += 1
774
- can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
775
- file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
776
- log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
777
- tex_name=f'{main_file_modified}.tex',
778
- tex_name_pure=f'{main_file_modified}',
779
- n_fix=n_fix,
780
- work_folder_modified=work_folder_modified,
781
- )
782
- yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
783
- if not can_retry: break
784
-
785
- return False # 失败啦
786
-
787
-
788
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/live_audio/aliyunASR.py DELETED
@@ -1,261 +0,0 @@
1
- import time, logging, json, sys, struct
2
- import numpy as np
3
- from scipy.io.wavfile import WAVE_FORMAT
4
-
5
- def write_numpy_to_wave(filename, rate, data, add_header=False):
6
- """
7
- Write a NumPy array as a WAV file.
8
- """
9
- def _array_tofile(fid, data):
10
- # ravel gives a c-contiguous buffer
11
- fid.write(data.ravel().view('b').data)
12
-
13
- if hasattr(filename, 'write'):
14
- fid = filename
15
- else:
16
- fid = open(filename, 'wb')
17
-
18
- fs = rate
19
-
20
- try:
21
- dkind = data.dtype.kind
22
- if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
23
- data.dtype.itemsize == 1)):
24
- raise ValueError("Unsupported data type '%s'" % data.dtype)
25
-
26
- header_data = b''
27
-
28
- header_data += b'RIFF'
29
- header_data += b'\x00\x00\x00\x00'
30
- header_data += b'WAVE'
31
-
32
- # fmt chunk
33
- header_data += b'fmt '
34
- if dkind == 'f':
35
- format_tag = WAVE_FORMAT.IEEE_FLOAT
36
- else:
37
- format_tag = WAVE_FORMAT.PCM
38
- if data.ndim == 1:
39
- channels = 1
40
- else:
41
- channels = data.shape[1]
42
- bit_depth = data.dtype.itemsize * 8
43
- bytes_per_second = fs*(bit_depth // 8)*channels
44
- block_align = channels * (bit_depth // 8)
45
-
46
- fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
47
- bytes_per_second, block_align, bit_depth)
48
- if not (dkind == 'i' or dkind == 'u'):
49
- # add cbSize field for non-PCM files
50
- fmt_chunk_data += b'\x00\x00'
51
-
52
- header_data += struct.pack('<I', len(fmt_chunk_data))
53
- header_data += fmt_chunk_data
54
-
55
- # fact chunk (non-PCM files)
56
- if not (dkind == 'i' or dkind == 'u'):
57
- header_data += b'fact'
58
- header_data += struct.pack('<II', 4, data.shape[0])
59
-
60
- # check data size (needs to be immediately before the data chunk)
61
- if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
62
- raise ValueError("Data exceeds wave file size limit")
63
- if add_header:
64
- fid.write(header_data)
65
- # data chunk
66
- fid.write(b'data')
67
- fid.write(struct.pack('<I', data.nbytes))
68
- if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
69
- sys.byteorder == 'big'):
70
- data = data.byteswap()
71
- _array_tofile(fid, data)
72
-
73
- if add_header:
74
- # Determine file size and place it in correct
75
- # position at start of the file.
76
- size = fid.tell()
77
- fid.seek(4)
78
- fid.write(struct.pack('<I', size-8))
79
-
80
- finally:
81
- if not hasattr(filename, 'write'):
82
- fid.close()
83
- else:
84
- fid.seek(0)
85
-
86
- def is_speaker_speaking(vad, data, sample_rate):
87
- # Function to detect if the speaker is speaking
88
- # The WebRTC VAD only accepts 16-bit mono PCM audio,
89
- # sampled at 8000, 16000, 32000 or 48000 Hz.
90
- # A frame must be either 10, 20, or 30 ms in duration:
91
- frame_duration = 30
92
- n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
93
- res_list = []
94
- for t in range(len(data)):
95
- if t!=0 and t % n_bit_each == 0:
96
- res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
97
-
98
- info = ''.join(['^' if r else '.' for r in res_list])
99
- info = info[:10]
100
- if any(res_list):
101
- return True, info
102
- else:
103
- return False, info
104
-
105
-
106
- class AliyunASR():
107
-
108
- def test_on_sentence_begin(self, message, *args):
109
- # print("test_on_sentence_begin:{}".format(message))
110
- pass
111
-
112
- def test_on_sentence_end(self, message, *args):
113
- # print("test_on_sentence_end:{}".format(message))
114
- message = json.loads(message)
115
- self.parsed_sentence = message['payload']['result']
116
- self.event_on_entence_end.set()
117
- # print(self.parsed_sentence)
118
-
119
- def test_on_start(self, message, *args):
120
- # print("test_on_start:{}".format(message))
121
- pass
122
-
123
- def test_on_error(self, message, *args):
124
- logging.error("on_error args=>{}".format(args))
125
- pass
126
-
127
- def test_on_close(self, *args):
128
- self.aliyun_service_ok = False
129
- pass
130
-
131
- def test_on_result_chg(self, message, *args):
132
- # print("test_on_chg:{}".format(message))
133
- message = json.loads(message)
134
- self.parsed_text = message['payload']['result']
135
- self.event_on_result_chg.set()
136
-
137
- def test_on_completed(self, message, *args):
138
- # print("on_completed:args=>{} message=>{}".format(args, message))
139
- pass
140
-
141
- def audio_convertion_thread(self, uuid):
142
- # 在一个异步线程中采集音频
143
- import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
144
- import tempfile
145
- from scipy import io
146
- from toolbox import get_conf
147
- from .audio_io import change_sample_rate
148
- from .audio_io import RealtimeAudioDistribution
149
- NEW_SAMPLERATE = 16000
150
- rad = RealtimeAudioDistribution()
151
- rad.clean_up()
152
- temp_folder = tempfile.gettempdir()
153
- TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
154
- if len(TOKEN) == 0:
155
- TOKEN = self.get_token()
156
- self.aliyun_service_ok = True
157
- URL="wss://nls-gateway.aliyuncs.com/ws/v1"
158
- sr = nls.NlsSpeechTranscriber(
159
- url=URL,
160
- token=TOKEN,
161
- appkey=APPKEY,
162
- on_sentence_begin=self.test_on_sentence_begin,
163
- on_sentence_end=self.test_on_sentence_end,
164
- on_start=self.test_on_start,
165
- on_result_changed=self.test_on_result_chg,
166
- on_completed=self.test_on_completed,
167
- on_error=self.test_on_error,
168
- on_close=self.test_on_close,
169
- callback_args=[uuid.hex]
170
- )
171
- timeout_limit_second = 20
172
- r = sr.start(aformat="pcm",
173
- timeout=timeout_limit_second,
174
- enable_intermediate_result=True,
175
- enable_punctuation_prediction=True,
176
- enable_inverse_text_normalization=True)
177
-
178
- import webrtcvad
179
- vad = webrtcvad.Vad()
180
- vad.set_mode(1)
181
-
182
- is_previous_frame_transmitted = False # 上一帧是否有人说话
183
- previous_frame_data = None
184
- echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
185
- echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
186
- keep_alive_last_send_time = time.time()
187
- while not self.stop:
188
- # time.sleep(self.capture_interval)
189
- audio = rad.read(uuid.hex)
190
- if audio is not None:
191
- # convert to pcm file
192
- temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
193
- dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
194
- write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
195
- # read pcm binary
196
- with open(temp_file, "rb") as f: data = f.read()
197
- is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
198
-
199
- if is_speaking or echo_cnt > 0:
200
- # 如果话筒激活 / 如果处于回声收尾阶段
201
- echo_cnt -= 1
202
- if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
203
- if previous_frame_data is not None: data = previous_frame_data + data
204
- if is_speaking:
205
- echo_cnt = echo_cnt_max
206
- slices = zip(*(iter(data),) * 640) # 640个字节为一组
207
- for i in slices: sr.send_audio(bytes(i))
208
- keep_alive_last_send_time = time.time()
209
- is_previous_frame_transmitted = True
210
- else:
211
- is_previous_frame_transmitted = False
212
- echo_cnt = 0
213
- # 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
214
- if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
215
- slices = zip(*(iter(data),) * 640) # 640个字节为一组
216
- for i in slices: sr.send_audio(bytes(i))
217
- keep_alive_last_send_time = time.time()
218
- is_previous_frame_transmitted = True
219
- self.audio_shape = info
220
- else:
221
- time.sleep(0.1)
222
-
223
- if not self.aliyun_service_ok:
224
- self.stop = True
225
- self.stop_msg = 'Aliyun音频服务异常,请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
226
- r = sr.stop()
227
-
228
- def get_token(self):
229
- from toolbox import get_conf
230
- import json
231
- from aliyunsdkcore.request import CommonRequest
232
- from aliyunsdkcore.client import AcsClient
233
- AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
234
-
235
- # 创建AcsClient实例
236
- client = AcsClient(
237
- AccessKey_ID,
238
- AccessKey_secret,
239
- "cn-shanghai"
240
- )
241
-
242
- # 创建request,并设置参数。
243
- request = CommonRequest()
244
- request.set_method('POST')
245
- request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
246
- request.set_version('2019-02-28')
247
- request.set_action_name('CreateToken')
248
-
249
- try:
250
- response = client.do_action_with_exception(request)
251
- print(response)
252
- jss = json.loads(response)
253
- if 'Token' in jss and 'Id' in jss['Token']:
254
- token = jss['Token']['Id']
255
- expireTime = jss['Token']['ExpireTime']
256
- print("token = " + token)
257
- print("expireTime = " + str(expireTime))
258
- except Exception as e:
259
- print(e)
260
-
261
- return token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/live_audio/audio_io.py DELETED
@@ -1,51 +0,0 @@
1
- import numpy as np
2
- from scipy import interpolate
3
-
4
- def Singleton(cls):
5
- _instance = {}
6
-
7
- def _singleton(*args, **kargs):
8
- if cls not in _instance:
9
- _instance[cls] = cls(*args, **kargs)
10
- return _instance[cls]
11
-
12
- return _singleton
13
-
14
-
15
- @Singleton
16
- class RealtimeAudioDistribution():
17
- def __init__(self) -> None:
18
- self.data = {}
19
- self.max_len = 1024*1024
20
- self.rate = 48000 # 只读,每秒采样数量
21
-
22
- def clean_up(self):
23
- self.data = {}
24
-
25
- def feed(self, uuid, audio):
26
- self.rate, audio_ = audio
27
- # print('feed', len(audio_), audio_[-25:])
28
- if uuid not in self.data:
29
- self.data[uuid] = audio_
30
- else:
31
- new_arr = np.concatenate((self.data[uuid], audio_))
32
- if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
33
- self.data[uuid] = new_arr
34
-
35
- def read(self, uuid):
36
- if uuid in self.data:
37
- res = self.data.pop(uuid)
38
- # print('\r read-', len(res), '-', max(res), end='', flush=True)
39
- else:
40
- res = None
41
- return res
42
-
43
- def change_sample_rate(audio, old_sr, new_sr):
44
- duration = audio.shape[0] / old_sr
45
-
46
- time_old = np.linspace(0, duration, audio.shape[0])
47
- time_new = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
48
-
49
- interpolator = interpolate.interp1d(time_old, audio.T)
50
- new_audio = interpolator(time_new).T
51
- return new_audio.astype(np.int16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/multi_stage/multi_stage_utils.py DELETED
@@ -1,93 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from typing import List
3
- from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
- from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
5
- from request_llms.bridge_all import predict_no_ui_long_connection
6
- from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
7
- import time
8
- import pickle
9
-
10
- def have_any_recent_upload_files(chatbot):
11
- _5min = 5 * 60
12
- if not chatbot: return False # chatbot is None
13
- most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
14
- if not most_recent_uploaded: return False # most_recent_uploaded is None
15
- if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
16
- else: return False # most_recent_uploaded is too old
17
-
18
- class GptAcademicState():
19
- def __init__(self):
20
- self.reset()
21
-
22
- def reset(self):
23
- pass
24
-
25
- def dump_state(self, chatbot):
26
- chatbot._cookies['plugin_state'] = pickle.dumps(self)
27
-
28
- def set_state(self, chatbot, key, value):
29
- setattr(self, key, value)
30
- chatbot._cookies['plugin_state'] = pickle.dumps(self)
31
-
32
- def get_state(chatbot, cls=None):
33
- state = chatbot._cookies.get('plugin_state', None)
34
- if state is not None: state = pickle.loads(state)
35
- elif cls is not None: state = cls()
36
- else: state = GptAcademicState()
37
- state.chatbot = chatbot
38
- return state
39
-
40
-
41
- class GptAcademicGameBaseState():
42
- """
43
- 1. first init: __init__ ->
44
- """
45
- def init_game(self, chatbot, lock_plugin):
46
- self.plugin_name = None
47
- self.callback_fn = None
48
- self.delete_game = False
49
- self.step_cnt = 0
50
-
51
- def lock_plugin(self, chatbot):
52
- if self.callback_fn is None:
53
- raise ValueError("callback_fn is None")
54
- chatbot._cookies['lock_plugin'] = self.callback_fn
55
- self.dump_state(chatbot)
56
-
57
- def get_plugin_name(self):
58
- if self.plugin_name is None:
59
- raise ValueError("plugin_name is None")
60
- return self.plugin_name
61
-
62
- def dump_state(self, chatbot):
63
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
64
-
65
- def set_state(self, chatbot, key, value):
66
- setattr(self, key, value)
67
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
68
-
69
- @staticmethod
70
- def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
71
- state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
72
- if state is not None:
73
- state = pickle.loads(state)
74
- else:
75
- state = cls()
76
- state.init_game(chatbot, lock_plugin)
77
- state.plugin_name = plugin_name
78
- state.llm_kwargs = llm_kwargs
79
- state.chatbot = chatbot
80
- state.callback_fn = callback_fn
81
- return state
82
-
83
- def continue_game(self, prompt, chatbot, history):
84
- # 游戏主体
85
- yield from self.step(prompt, chatbot, history)
86
- self.step_cnt += 1
87
- # 保存状态,收尾
88
- self.dump_state(chatbot)
89
- # 如果游戏结束,清理
90
- if self.delete_game:
91
- chatbot._cookies['lock_plugin'] = None
92
- chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
93
- yield from update_ui(chatbot=chatbot, history=history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/breakdown_txt.py DELETED
@@ -1,125 +0,0 @@
1
- from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
2
-
3
- def force_breakdown(txt, limit, get_token_fn):
4
- """ 当无法用标点、空行分割时,我们用最暴力的方法切割
5
- """
6
- for i in reversed(range(len(txt))):
7
- if get_token_fn(txt[:i]) < limit:
8
- return txt[:i], txt[i:]
9
- return "Tiktoken未知错误", "Tiktoken未知错误"
10
-
11
-
12
- def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
13
- """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
14
- 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
15
- """
16
- _min = int(5e4)
17
- _max = int(1e5)
18
- # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
19
- if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
20
- remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
21
- remain_txt_to_cut_storage = ""
22
- if len(remain_txt_to_cut) > _max:
23
- remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
24
- remain_txt_to_cut = remain_txt_to_cut[:_max]
25
- return remain_txt_to_cut, remain_txt_to_cut_storage
26
-
27
-
28
- def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
29
- """ 文本切分
30
- """
31
- res = []
32
- total_len = len(txt_tocut)
33
- fin_len = 0
34
- remain_txt_to_cut = txt_tocut
35
- remain_txt_to_cut_storage = ""
36
- # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
37
- remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
38
-
39
- while True:
40
- if get_token_fn(remain_txt_to_cut) <= limit:
41
- # 如果剩余文本的token数小于限制,那么就不用切了
42
- res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
43
- break
44
- else:
45
- # 如果剩余文本的token数大于限制,那么就切
46
- lines = remain_txt_to_cut.split('\n')
47
-
48
- # 估计一个切分点
49
- estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
50
- estimated_line_cut = int(estimated_line_cut)
51
-
52
- # 开始查找合适切分点的偏移(cnt)
53
- cnt = 0
54
- for cnt in reversed(range(estimated_line_cut)):
55
- if must_break_at_empty_line:
56
- # 首先尝试用双空行(\n\n)作为切分点
57
- if lines[cnt] != "":
58
- continue
59
- prev = "\n".join(lines[:cnt])
60
- post = "\n".join(lines[cnt:])
61
- if get_token_fn(prev) < limit:
62
- break
63
-
64
- if cnt == 0:
65
- # 如果没有找到合适的切分点
66
- if break_anyway:
67
- # 是否允许暴力切分
68
- prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
69
- else:
70
- # 不允许直接报错
71
- raise RuntimeError(f"存在一行极长的文本!{remain_txt_to_cut}")
72
-
73
- # 追加列表
74
- res.append(prev); fin_len+=len(prev)
75
- # 准备下一次迭代
76
- remain_txt_to_cut = post
77
- remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
78
- process = fin_len/total_len
79
- print(f'正在文本切分 {int(process*100)}%')
80
- if len(remain_txt_to_cut.strip()) == 0:
81
- break
82
- return res
83
-
84
-
85
- def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
86
- """ 使用多种方式尝试切分文本,以满足 token 限制
87
- """
88
- from request_llms.bridge_all import model_info
89
- enc = model_info[llm_model]['tokenizer']
90
- def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
91
- try:
92
- # 第1次尝试,将双空行(\n\n)作为切分点
93
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
94
- except RuntimeError:
95
- try:
96
- # 第2次尝试,将单空行(\n)作为切分点
97
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
98
- except RuntimeError:
99
- try:
100
- # 第3次尝试,将英文句号(.)作为切分点
101
- res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
102
- return [r.replace('。\n', '.') for r in res]
103
- except RuntimeError as e:
104
- try:
105
- # 第4次尝试,将中文句号(。)作为切分点
106
- res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
107
- return [r.replace('。。\n', '。') for r in res]
108
- except RuntimeError as e:
109
- # 第5次尝试,没办法了,随便切一下吧
110
- return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
111
-
112
- breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
113
-
114
- if __name__ == '__main__':
115
- from crazy_functions.crazy_utils import read_and_clean_pdf_text
116
- file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
117
-
118
- from request_llms.bridge_all import model_info
119
- for i in range(5):
120
- file_content += file_content
121
-
122
- print(len(file_content))
123
- TOKEN_LIMIT_PER_FRAGMENT = 2500
124
- res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
125
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/parse_pdf.py DELETED
@@ -1,171 +0,0 @@
1
- from functools import lru_cache
2
- from toolbox import gen_time_str
3
- from toolbox import promote_file_to_downloadzone
4
- from toolbox import write_history_to_file, promote_file_to_downloadzone
5
- from toolbox import get_conf
6
- from toolbox import ProxyNetworkActivate
7
- from colorful import *
8
- import requests
9
- import random
10
- import copy
11
- import os
12
- import math
13
-
14
- class GROBID_OFFLINE_EXCEPTION(Exception): pass
15
-
16
- def get_avail_grobid_url():
17
- GROBID_URLS = get_conf('GROBID_URLS')
18
- if len(GROBID_URLS) == 0: return None
19
- try:
20
- _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
21
- if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
22
- with ProxyNetworkActivate('Connect_Grobid'):
23
- res = requests.get(_grobid_url+'/api/isalive')
24
- if res.text=='true': return _grobid_url
25
- else: return None
26
- except:
27
- return None
28
-
29
- @lru_cache(maxsize=32)
30
- def parse_pdf(pdf_path, grobid_url):
31
- import scipdf # pip install scipdf_parser
32
- if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
33
- try:
34
- with ProxyNetworkActivate('Connect_Grobid'):
35
- article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
36
- except GROBID_OFFLINE_EXCEPTION:
37
- raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
38
- except:
39
- raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
40
- return article_dict
41
-
42
-
43
- def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
44
- # -=-=-=-=-=-=-=-= 写出第1个文件:翻译前后混合 -=-=-=-=-=-=-=-=
45
- res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
46
- promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
47
- generated_conclusion_files.append(res_path)
48
-
49
- # -=-=-=-=-=-=-=-= 写出第2个文件:仅翻译后的文本 -=-=-=-=-=-=-=-=
50
- translated_res_array = []
51
- # 记录当前的大章节标题:
52
- last_section_name = ""
53
- for index, value in enumerate(gpt_response_collection):
54
- # 先挑选偶数序列号:
55
- if index % 2 != 0:
56
- # 先提取当前英文标题:
57
- cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
58
- # 如果index是1的话,则直接使用first section name:
59
- if cur_section_name != last_section_name:
60
- cur_value = cur_section_name + '\n'
61
- last_section_name = copy.deepcopy(cur_section_name)
62
- else:
63
- cur_value = ""
64
- # 再做一个小修改:重新修改当前part的标题,默认用英文的
65
- cur_value += value
66
- translated_res_array.append(cur_value)
67
- res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + translated_res_array,
68
- file_basename = f"{gen_time_str()}-translated_only.md",
69
- file_fullname = None,
70
- auto_caption = False)
71
- promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
72
- generated_conclusion_files.append(res_path)
73
- return res_path
74
-
75
- def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
76
- from crazy_functions.pdf_fns.report_gen_html import construct_html
77
- from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
78
- from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
79
- from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
80
-
81
- prompt = "以下是一篇学术论文的基本信息:\n"
82
- # title
83
- title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
84
- # authors
85
- authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
86
- # abstract
87
- abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
88
- # command
89
- prompt += f"请将题目和摘要翻译为{DST_LANG}。"
90
- meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
91
-
92
- # 单线,获取文章meta信息
93
- paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
94
- inputs=prompt,
95
- inputs_show_user=prompt,
96
- llm_kwargs=llm_kwargs,
97
- chatbot=chatbot, history=[],
98
- sys_prompt="You are an academic paper reader。",
99
- )
100
-
101
- # 多线,翻译
102
- inputs_array = []
103
- inputs_show_user_array = []
104
-
105
- # get_token_num
106
- from request_llms.bridge_all import model_info
107
- enc = model_info[llm_kwargs['llm_model']]['tokenizer']
108
- def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
109
-
110
- def break_down(txt):
111
- raw_token_num = get_token_num(txt)
112
- if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
113
- return [txt]
114
- else:
115
- # raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
116
- # find a smooth token limit to achieve even seperation
117
- count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
118
- token_limit_smooth = raw_token_num // count + count
119
- return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
120
-
121
- for section in article_dict.get('sections'):
122
- if len(section['text']) == 0: continue
123
- section_frags = break_down(section['text'])
124
- for i, fragment in enumerate(section_frags):
125
- heading = section['heading']
126
- if len(section_frags) > 1: heading += f' Part-{i+1}'
127
- inputs_array.append(
128
- f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
129
- )
130
- inputs_show_user_array.append(
131
- f"# {heading}\n\n{fragment}"
132
- )
133
-
134
- gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
135
- inputs_array=inputs_array,
136
- inputs_show_user_array=inputs_show_user_array,
137
- llm_kwargs=llm_kwargs,
138
- chatbot=chatbot,
139
- history_array=[meta for _ in inputs_array],
140
- sys_prompt_array=[
141
- "请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
142
- )
143
- # -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
144
- produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
145
-
146
- # -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
147
- ch = construct_html()
148
- orig = ""
149
- trans = ""
150
- gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
151
- for i,k in enumerate(gpt_response_collection_html):
152
- if i%2==0:
153
- gpt_response_collection_html[i] = inputs_show_user_array[i//2]
154
- else:
155
- # 先提取当前英文标题:
156
- cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
157
- cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
158
- gpt_response_collection_html[i] = cur_value
159
-
160
- final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
161
- final.extend(gpt_response_collection_html)
162
- for i, k in enumerate(final):
163
- if i%2==0:
164
- orig = k
165
- if i%2==1:
166
- trans = k
167
- ch.add_row(a=orig, b=trans)
168
- create_report_file_name = f"{os.path.basename(fp)}.trans.html"
169
- html_file = ch.save_file(create_report_file_name)
170
- generated_conclusion_files.append(html_file)
171
- promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/parse_word.py DELETED
@@ -1,85 +0,0 @@
1
- from crazy_functions.crazy_utils import read_and_clean_pdf_text, get_files_from_everything
2
- import os
3
- import re
4
- def extract_text_from_files(txt, chatbot, history):
5
- """
6
- 查找pdf/md/word并获取文本内容并返回状态以及文本
7
-
8
- 输入参数 Args:
9
- chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
10
- history (list): List of chat history (历史,对话历史列表)
11
-
12
- 输出 Returns:
13
- 文件是否存在(bool)
14
- final_result(list):文本内容
15
- page_one(list):第一页内容/摘要
16
- file_manifest(list):文件路径
17
- excption(string):需要用户手动处理的信息,如没出错则保持为空
18
- """
19
-
20
- final_result = []
21
- page_one = []
22
- file_manifest = []
23
- excption = ""
24
-
25
- if txt == "":
26
- final_result.append(txt)
27
- return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
28
-
29
- #查找输入区内容中的文件
30
- file_pdf,pdf_manifest,folder_pdf = get_files_from_everything(txt, '.pdf')
31
- file_md,md_manifest,folder_md = get_files_from_everything(txt, '.md')
32
- file_word,word_manifest,folder_word = get_files_from_everything(txt, '.docx')
33
- file_doc,doc_manifest,folder_doc = get_files_from_everything(txt, '.doc')
34
-
35
- if file_doc:
36
- excption = "word"
37
- return False, final_result, page_one, file_manifest, excption
38
-
39
- file_num = len(pdf_manifest) + len(md_manifest) + len(word_manifest)
40
- if file_num == 0:
41
- final_result.append(txt)
42
- return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
43
-
44
- if file_pdf:
45
- try: # 尝试导入依赖,如果缺少依赖,则给出安装建议
46
- import fitz
47
- except:
48
- excption = "pdf"
49
- return False, final_result, page_one, file_manifest, excption
50
- for index, fp in enumerate(pdf_manifest):
51
- file_content, pdf_one = read_and_clean_pdf_text(fp) # (尝试)按照章节切割PDF
52
- file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
53
- pdf_one = str(pdf_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
54
- final_result.append(file_content)
55
- page_one.append(pdf_one)
56
- file_manifest.append(os.path.relpath(fp, folder_pdf))
57
-
58
- if file_md:
59
- for index, fp in enumerate(md_manifest):
60
- with open(fp, 'r', encoding='utf-8', errors='replace') as f:
61
- file_content = f.read()
62
- file_content = file_content.encode('utf-8', 'ignore').decode()
63
- headers = re.findall(r'^#\s(.*)$', file_content, re.MULTILINE) #接下来提取md中的一级/二级标题作为摘要
64
- if len(headers) > 0:
65
- page_one.append("\n".join(headers)) #合并所有的标题,以换行符分割
66
- else:
67
- page_one.append("")
68
- final_result.append(file_content)
69
- file_manifest.append(os.path.relpath(fp, folder_md))
70
-
71
- if file_word:
72
- try: # 尝试导入依赖,如果缺少依赖,则给出安装建议
73
- from docx import Document
74
- except:
75
- excption = "word_pip"
76
- return False, final_result, page_one, file_manifest, excption
77
- for index, fp in enumerate(word_manifest):
78
- doc = Document(fp)
79
- file_content = '\n'.join([p.text for p in doc.paragraphs])
80
- file_content = file_content.encode('utf-8', 'ignore').decode()
81
- page_one.append(file_content[:200])
82
- final_result.append(file_content)
83
- file_manifest.append(os.path.relpath(fp, folder_word))
84
-
85
- return True, final_result, page_one, file_manifest, excption
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
crazy_functions/pdf_fns/report_gen_html.py DELETED
@@ -1,58 +0,0 @@
1
- from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
2
- import os
3
-
4
-
5
-
6
-
7
- class construct_html():
8
- def __init__(self) -> None:
9
- self.html_string = ""
10
-
11
- def add_row(self, a, b):
12
- from toolbox import markdown_convertion
13
- template = """
14
- {
15
- primary_col: {
16
- header: String.raw`__PRIMARY_HEADER__`,
17
- msg: String.raw`__PRIMARY_MSG__`,
18
- },
19
- secondary_rol: {
20
- header: String.raw`__SECONDARY_HEADER__`,
21
- msg: String.raw`__SECONDARY_MSG__`,
22
- }
23
- },
24
- """
25
- def std(str):
26
- str = str.replace(r'`',r'&#96;')
27
- if str.endswith("\\"): str += ' '
28
- if str.endswith("}"): str += ' '
29
- if str.endswith("$"): str += ' '
30
- return str
31
-
32
- template_ = template
33
- a_lines = a.split('\n')
34
- b_lines = b.split('\n')
35
-
36
- if len(a_lines) == 1 or len(a_lines[0]) > 50:
37
- template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
38
- template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
39
- else:
40
- template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
41
- template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
42
-
43
- if len(b_lines) == 1 or len(b_lines[0]) > 50:
44
- template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
45
- template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
46
- else:
47
- template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
48
- template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
49
- self.html_string += template_
50
-
51
- def save_file(self, file_name):
52
- from toolbox import get_log_folder
53
- with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
54
- html_template = f.read()
55
- html_template = html_template.replace("__TF_ARR__", self.html_string)
56
- with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
57
- f.write(html_template.encode('utf-8', 'ignore').decode())
58
- return os.path.join(get_log_folder(), file_name)