Spaces:
Running
Running
cxumol
commited on
Commit
·
b22f922
0
Parent(s):
preprocess ok
Browse files- .gitattributes +3 -0
- .gitignore +162 -0
- .lightning_studio/.studiorc +1 -0
- .lightning_studio/on_start.sh +13 -0
- .litng.gradio.sh +3 -0
- .vscode/settings.json +75 -0
- app.py +152 -0
- config.py +20 -0
- config_secret.tmpl.py +3 -0
- data_test.py +67 -0
- pyproject.toml +31 -0
- requirements.txt +12 -0
- run.sh +2 -0
- setup.sh +3 -0
- taskAI.py +74 -0
- taskNonAI.py +32 -0
- test.py +32 -0
- util.py +44 -0
.gitattributes
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*.ttf filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.otf filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*_secret.py
|
2 |
+
*_secret.py*
|
3 |
+
|
4 |
+
.local/
|
5 |
+
.ruff_cache/
|
6 |
+
bin/
|
7 |
+
*.deb
|
8 |
+
*.bin
|
9 |
+
|
10 |
+
# Byte-compiled / optimized / DLL files
|
11 |
+
__pycache__/
|
12 |
+
*.py[cod]
|
13 |
+
*$py.class
|
14 |
+
|
15 |
+
# C extensions
|
16 |
+
*.so
|
17 |
+
|
18 |
+
# Distribution / packaging
|
19 |
+
.Python
|
20 |
+
build/
|
21 |
+
develop-eggs/
|
22 |
+
dist/
|
23 |
+
downloads/
|
24 |
+
eggs/
|
25 |
+
.eggs/
|
26 |
+
lib/
|
27 |
+
lib64/
|
28 |
+
parts/
|
29 |
+
sdist/
|
30 |
+
var/
|
31 |
+
wheels/
|
32 |
+
share/python-wheels/
|
33 |
+
*.egg-info/
|
34 |
+
.installed.cfg
|
35 |
+
*.egg
|
36 |
+
MANIFEST
|
37 |
+
|
38 |
+
# PyInstaller
|
39 |
+
# Usually these files are written by a python script from a template
|
40 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
41 |
+
*.manifest
|
42 |
+
*.spec
|
43 |
+
|
44 |
+
# Installer logs
|
45 |
+
pip-log.txt
|
46 |
+
pip-delete-this-directory.txt
|
47 |
+
|
48 |
+
# Unit test / coverage reports
|
49 |
+
htmlcov/
|
50 |
+
.tox/
|
51 |
+
.nox/
|
52 |
+
.coverage
|
53 |
+
.coverage.*
|
54 |
+
.cache
|
55 |
+
nosetests.xml
|
56 |
+
coverage.xml
|
57 |
+
*.cover
|
58 |
+
*.py,cover
|
59 |
+
.hypothesis/
|
60 |
+
.pytest_cache/
|
61 |
+
cover/
|
62 |
+
|
63 |
+
# Translations
|
64 |
+
*.mo
|
65 |
+
*.pot
|
66 |
+
|
67 |
+
# Django stuff:
|
68 |
+
*.log
|
69 |
+
local_settings.py
|
70 |
+
db.sqlite3
|
71 |
+
db.sqlite3-journal
|
72 |
+
|
73 |
+
# Flask stuff:
|
74 |
+
instance/
|
75 |
+
.webassets-cache
|
76 |
+
|
77 |
+
# Scrapy stuff:
|
78 |
+
.scrapy
|
79 |
+
|
80 |
+
# Sphinx documentation
|
81 |
+
docs/_build/
|
82 |
+
|
83 |
+
# PyBuilder
|
84 |
+
.pybuilder/
|
85 |
+
target/
|
86 |
+
|
87 |
+
# Jupyter Notebook
|
88 |
+
.ipynb_checkpoints
|
89 |
+
|
90 |
+
# IPython
|
91 |
+
profile_default/
|
92 |
+
ipython_config.py
|
93 |
+
|
94 |
+
# pyenv
|
95 |
+
# For a library or package, you might want to ignore these files since the code is
|
96 |
+
# intended to run in multiple environments; otherwise, check them in:
|
97 |
+
# .python-version
|
98 |
+
|
99 |
+
# pipenv
|
100 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
101 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
102 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
103 |
+
# install all needed dependencies.
|
104 |
+
#Pipfile.lock
|
105 |
+
|
106 |
+
# poetry
|
107 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
108 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
109 |
+
# commonly ignored for libraries.
|
110 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
111 |
+
#poetry.lock
|
112 |
+
|
113 |
+
# pdm
|
114 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
115 |
+
#pdm.lock
|
116 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
117 |
+
# in version control.
|
118 |
+
# https://pdm.fming.dev/#use-with-ide
|
119 |
+
.pdm.toml
|
120 |
+
|
121 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
122 |
+
__pypackages__/
|
123 |
+
|
124 |
+
# Celery stuff
|
125 |
+
celerybeat-schedule
|
126 |
+
celerybeat.pid
|
127 |
+
|
128 |
+
# SageMath parsed files
|
129 |
+
*.sage.py
|
130 |
+
|
131 |
+
# Environments
|
132 |
+
.env
|
133 |
+
.venv
|
134 |
+
env/
|
135 |
+
venv/
|
136 |
+
ENV/
|
137 |
+
env.bak/
|
138 |
+
venv.bak/
|
139 |
+
|
140 |
+
# Spyder project settings
|
141 |
+
.spyderproject
|
142 |
+
.spyproject
|
143 |
+
|
144 |
+
# Rope project settings
|
145 |
+
.ropeproject
|
146 |
+
|
147 |
+
# mkdocs documentation
|
148 |
+
/site
|
149 |
+
|
150 |
+
# mypy
|
151 |
+
.mypy_cache/
|
152 |
+
.dmypy.json
|
153 |
+
dmypy.json
|
154 |
+
|
155 |
+
# Pyre type checker
|
156 |
+
.pyre/
|
157 |
+
|
158 |
+
# pytype static type analyzer
|
159 |
+
.pytype/
|
160 |
+
|
161 |
+
# Cython debug symbols
|
162 |
+
cython_debug/
|
.lightning_studio/.studiorc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/settings/.studiorc
|
.lightning_studio/on_start.sh
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# This script runs every time your Studio starts, from your home directory.
|
4 |
+
|
5 |
+
# List files under fast_load that need to load quickly on start (e.g. model checkpoints).
|
6 |
+
#
|
7 |
+
# ! fast_load
|
8 |
+
# <your file here>
|
9 |
+
|
10 |
+
# Add your startup commands below.
|
11 |
+
#
|
12 |
+
# Example: streamlit run my_app.py
|
13 |
+
# Example: gradio my_app.py
|
.litng.gradio.sh
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
pip install -U "gradio>=4,<=5"
|
3 |
+
GRADIO_SERVER_PORT=7860 gradio app.py --watch-dirs .
|
.vscode/settings.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"python.defaultInterpreterPath": "/home/zeus/miniconda3/envs/cloudspace/bin/python",
|
3 |
+
"workbench.startupEditor": "none",
|
4 |
+
"python.terminal.activateEnvironment": false,
|
5 |
+
"terminal.integrated.drawBoldTextInBrightColors": false,
|
6 |
+
"terminal.integrated.gpuAcceleration": "on",
|
7 |
+
"terminal.integrated.localEchoLatencyThreshold": 0,
|
8 |
+
"terminal.integrated.localEchoEnabled": "off",
|
9 |
+
"terminal.integrated.localEchoStyle": "#000000",
|
10 |
+
"remote.autoForwardPorts": false,
|
11 |
+
"terminal.integrated.defaultProfile.linux": "zsh",
|
12 |
+
"terminal.integrated.tabs.title": "${process}${separator}${task}",
|
13 |
+
"jupyter.notebookFileRoot": "${workspaceFolder}",
|
14 |
+
"terminal.integrated.enableMultiLinePasteWarning": false,
|
15 |
+
"files.exclude": {
|
16 |
+
"venv": true,
|
17 |
+
"lightning-annotations.json": true,
|
18 |
+
".lightning-app-sync": true,
|
19 |
+
".lighting-app-sync": true,
|
20 |
+
".lightning-app-run": true,
|
21 |
+
"**/*.lightning_upload": true,
|
22 |
+
".ssh*/**": true,
|
23 |
+
".conda*/**": true,
|
24 |
+
".config*/**": true,
|
25 |
+
".npm*/**": true,
|
26 |
+
".nvm*/**": true,
|
27 |
+
".bower*/**": true,
|
28 |
+
".ipython": true,
|
29 |
+
".local*/**": true,
|
30 |
+
".oh-my-zsh*/**": true,
|
31 |
+
".cache": true,
|
32 |
+
"miniconda3": true,
|
33 |
+
".condarc": true,
|
34 |
+
".gitconfig": true,
|
35 |
+
".hushlogin": true,
|
36 |
+
".profile": true,
|
37 |
+
".screenrc": true,
|
38 |
+
".sudo_as_admin_successful": true,
|
39 |
+
".zcompdump*": true,
|
40 |
+
".bash_history": true,
|
41 |
+
".bashrc": true,
|
42 |
+
".zsh_history": true,
|
43 |
+
".zshrc": true,
|
44 |
+
".zshenv": true,
|
45 |
+
".zlogin": true,
|
46 |
+
".zprofile": true,
|
47 |
+
".zlogout": true,
|
48 |
+
".python_history": true,
|
49 |
+
".lightningignore": true,
|
50 |
+
".nv": true,
|
51 |
+
".docker": true,
|
52 |
+
".jupyter": true,
|
53 |
+
".lightning": true,
|
54 |
+
".vscode": true,
|
55 |
+
".vscode/**": true,
|
56 |
+
".wget-hsts": true,
|
57 |
+
".vscode-server/**": true,
|
58 |
+
".vscode-server-insiders/**": true,
|
59 |
+
".tmplaigit": true,
|
60 |
+
".viminfo": true
|
61 |
+
},
|
62 |
+
"files.associations": {
|
63 |
+
"*.studiorc": "shellscript"
|
64 |
+
},
|
65 |
+
"jupyter.kernels.excludePythonEnvironments": [
|
66 |
+
"/commands/python3",
|
67 |
+
"/commands/python",
|
68 |
+
"/bin/python3.10",
|
69 |
+
"/usr/bin/python3.10",
|
70 |
+
"/bin/python3",
|
71 |
+
"/usr/bin/python3",
|
72 |
+
"/bin/python3.8",
|
73 |
+
"/usr/bin/python3.8"
|
74 |
+
]
|
75 |
+
}
|
app.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from config import DEMO_TITLE, IS_SHARE, CV_EXT, EXT_TXT
|
2 |
+
from config import CHEAP_API_BASE, CHEAP_API_KEY, CHEAP_MODEL
|
3 |
+
from config import STRONG_API_BASE, STRONG_API_KEY, STRONG_MODEL
|
4 |
+
from util import is_valid_url
|
5 |
+
from util import mylogger
|
6 |
+
from taskNonAI import extract_url, file_to_html
|
7 |
+
from taskAI import TaskAI
|
8 |
+
## load data
|
9 |
+
from data_test import mock_jd, mock_cv
|
10 |
+
## ui
|
11 |
+
import gradio as gr
|
12 |
+
## dependency
|
13 |
+
from pypandoc.pandoc_download import download_pandoc
|
14 |
+
## std
|
15 |
+
import os
|
16 |
+
|
17 |
+
|
18 |
+
logger = mylogger(__name__,'%(asctime)s:%(levelname)s:%(message)s')
|
19 |
+
info = logger.info
|
20 |
+
|
21 |
+
def init():
|
22 |
+
os.system("shot-scraper install -b firefox")
|
23 |
+
download_pandoc()
|
24 |
+
|
25 |
+
|
26 |
+
def run_refine(api_base, api_key, api_model, jd_info, cv_file: str, cv_text):
|
27 |
+
if jd_info:
|
28 |
+
if is_valid_url(jd_info):
|
29 |
+
jd = extract_url(jd_info)
|
30 |
+
else:
|
31 |
+
jd = jd_info
|
32 |
+
else:
|
33 |
+
jd = mock_jd
|
34 |
+
|
35 |
+
if cv_text:
|
36 |
+
cv = cv_text
|
37 |
+
elif cv_file:
|
38 |
+
if any([cv_file.endswith(ext) for ext in EXT_TXT]):
|
39 |
+
with open(cv_file, "r", encoding="utf8") as f:
|
40 |
+
cv = f.read()
|
41 |
+
else:
|
42 |
+
cv = file_to_html(cv_file)
|
43 |
+
else:
|
44 |
+
cv = mock_cv
|
45 |
+
cheapAPI = {"base": api_base, "key": api_key, "model": api_model}
|
46 |
+
taskAI = TaskAI(cheapAPI, temperature=0.2, max_tokens=2048) # max_tokens=2048
|
47 |
+
info("API initialized")
|
48 |
+
gen = (
|
49 |
+
taskAI.jd_preprocess(topic="job description", input=jd),
|
50 |
+
taskAI.cv_preprocess(input=cv),
|
51 |
+
)
|
52 |
+
info("tasks initialized")
|
53 |
+
result = [""] * 2
|
54 |
+
while 1:
|
55 |
+
stop: bool = True
|
56 |
+
for i in range(len(gen)):
|
57 |
+
try:
|
58 |
+
result[i] += next(gen[i]).delta
|
59 |
+
stop = False
|
60 |
+
except StopIteration:
|
61 |
+
# info(f"gen[{i}] exhausted")
|
62 |
+
pass
|
63 |
+
yield result
|
64 |
+
if stop:
|
65 |
+
info("tasks done")
|
66 |
+
break
|
67 |
+
|
68 |
+
def run_compose(api_base, api_key, api_model, min_jd, min_cv):
|
69 |
+
strongAPI = {"base": api_base, "key": api_key, "model": api_model}
|
70 |
+
taskAI = TaskAI(strongAPI, temperature=0.5, max_tokens=2048)
|
71 |
+
info("API initialized")
|
72 |
+
|
73 |
+
|
74 |
+
with gr.Blocks(
|
75 |
+
title=DEMO_TITLE,
|
76 |
+
theme=gr.themes.Base(primary_hue="blue", secondary_hue="sky", neutral_hue="slate"),
|
77 |
+
) as demo:
|
78 |
+
intro = f"""# {DEMO_TITLE}
|
79 |
+
> You provide job description and résumé. I write Cover letter for you!
|
80 |
+
Before you use, please setup OpenAI-like API for 2 AI agents': Cheap AI and Strong AI.
|
81 |
+
"""
|
82 |
+
gr.Markdown(intro)
|
83 |
+
|
84 |
+
with gr.Row():
|
85 |
+
with gr.Column(scale=1):
|
86 |
+
with gr.Accordion("AI setup (OpenAI-like API)", open=False):
|
87 |
+
gr.Markdown(
|
88 |
+
"**Cheap AI**, an honest format converter and refinery machine, extracts essential info from job description and résumé, to reduce subsequent cost on Strong AI."
|
89 |
+
)
|
90 |
+
with gr.Group():
|
91 |
+
weak_base = gr.Textbox(
|
92 |
+
value=CHEAP_API_BASE, label="API BASE"
|
93 |
+
)
|
94 |
+
weak_key = gr.Textbox(value=CHEAP_API_KEY, label="API key")
|
95 |
+
weak_model = gr.Textbox(value=CHEAP_MODEL, label="Model ID")
|
96 |
+
gr.Markdown(
|
97 |
+
"---\n**Strong AI**, a thoughtful wordsmith, generates perfect cover letters to make both you and recruiters happy."
|
98 |
+
)
|
99 |
+
with gr.Group():
|
100 |
+
strong_base = gr.Textbox(
|
101 |
+
value=STRONG_API_BASE, label="API BASE"
|
102 |
+
)
|
103 |
+
strong_key = gr.Textbox(
|
104 |
+
value=STRONG_API_KEY, label="API key", type="password"
|
105 |
+
)
|
106 |
+
strong_model = gr.Textbox(value=STRONG_MODEL, label="Model ID")
|
107 |
+
with gr.Group():
|
108 |
+
gr.Markdown("## Employer - Job Description")
|
109 |
+
jd_info = gr.Textbox(
|
110 |
+
label="Job Description",
|
111 |
+
placeholder="Paste as Full Text (recommmend) or URL (may fail)",
|
112 |
+
lines=5,
|
113 |
+
)
|
114 |
+
with gr.Group():
|
115 |
+
gr.Markdown("## Applicant - CV / Résumé")
|
116 |
+
with gr.Row():
|
117 |
+
cv_file = gr.File(
|
118 |
+
label="Allowed formats: " + " ".join(CV_EXT),
|
119 |
+
file_count="single",
|
120 |
+
file_types=CV_EXT,
|
121 |
+
type="filepath",
|
122 |
+
)
|
123 |
+
cv_text = gr.TextArea(
|
124 |
+
label="Or enter text",
|
125 |
+
placeholder="If attempting to both upload a file and enter text, only this text will be used.",
|
126 |
+
)
|
127 |
+
with gr.Column(scale=2):
|
128 |
+
gr.Markdown("## Result")
|
129 |
+
with gr.Row():
|
130 |
+
min_jd = gr.TextArea(label="Minimized Job Description")
|
131 |
+
min_cv = gr.TextArea(label="Minimized CV / Résumé")
|
132 |
+
cover_letter_text = gr.TextArea(label="Cover Letter")
|
133 |
+
cover_letter_pdf = gr.File(
|
134 |
+
label="Cover Letter PDF",
|
135 |
+
file_count="single",
|
136 |
+
file_types=[".pdf"],
|
137 |
+
type="filepath",
|
138 |
+
)
|
139 |
+
infer_btn = gr.Button("Go!", variant="primary")
|
140 |
+
infer_btn.click(
|
141 |
+
fn=run_refine,
|
142 |
+
inputs=[weak_base, weak_key, weak_model, jd_info, cv_file, cv_text],
|
143 |
+
outputs=[min_jd, min_cv],
|
144 |
+
concurrency_limit=5,
|
145 |
+
)
|
146 |
+
|
147 |
+
|
148 |
+
if __name__ == "__main__":
|
149 |
+
init()
|
150 |
+
demo.queue(max_size=10).launch(
|
151 |
+
show_error=True, debug=True, share=IS_SHARE
|
152 |
+
)
|
config.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1"
|
4 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or ""
|
5 |
+
|
6 |
+
CHEAP_API_BASE = os.getenv("CHEAP_API_BASE") or OPENAI_API_BASE
|
7 |
+
CHEAP_API_KEY = os.getenv("CHEAP_API_KEY") or OPENAI_API_KEY
|
8 |
+
CHEAP_MODEL = os.getenv("CHEAP_MODEL") or "gpt-3.5-turbo"
|
9 |
+
|
10 |
+
STRONG_API_BASE = os.getenv("STRONG_API_BASE") or OPENAI_API_BASE
|
11 |
+
STRONG_API_KEY = os.getenv("STRONG_API_KEY") or OPENAI_API_KEY
|
12 |
+
STRONG_MODEL = os.getenv("STRONG_MODEL") or "mixtral-8x7b"
|
13 |
+
|
14 |
+
IS_SHARE = bool(os.getenv("IS_SHARE")) or False
|
15 |
+
|
16 |
+
DEMO_TITLE = "Cover Letter Generator"
|
17 |
+
DEMO_DESCRIPTION = "This is a demo of the OpenAI API for generating cover letters. The model is trained on a dataset of cover letters and job descriptions, and generates a cover letter based on the job description and the applicant's CV. The model is fine-tuned on the OpenAI API, and is able to generate cover letters that are tailored to the job description and the applicant's CV. The model is able to generate cover letters for a wide range of jobs, and is able to generate cover letters that are tailored to the job description and the applicant's CV. The model is able to generate cover letters for a wide range of jobs, and is able to generate cover letters that are tailored to the job description and the applicant's CV. The model is able to generate cover letters for a wide range of jobs, and is able to generate cover letters that are tailored to the job description and the applicant's CV."
|
18 |
+
|
19 |
+
CV_EXT = [".typ", ".tex", ".html", ".docx", ".rst", ".rtf", ".odt", ".txt", ".md"]
|
20 |
+
EXT_TXT = [".txt", ".md"]
|
config_secret.tmpl.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from util import zip_api
|
2 |
+
|
3 |
+
api_test = zip_api()
|
data_test.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mock_jd = """
|
2 |
+
Queen of Hearts' Garden - Card Guards Job Description
|
3 |
+
|
4 |
+
The Queen of Hearts' Garden is seeking a compassionate and highly motivated individual to join our team of Card Guards. As a Card Guard, you will be the guardian of the hearts of our beloved playing cards, ensuring their safety and integrity.
|
5 |
+
|
6 |
+
Responsibilities:
|
7 |
+
|
8 |
+
Guardian of the Cards: You will be responsible for safeguarding the Queen's precious playing cards from damage, loss, or misuse. This includes meticulously shuffling, dealing, and returning the cards to their proper place after use.
|
9 |
+
Keeper of Secrets: You will be privy to the secrets of the cards, their values, and their histories. This knowledge will allow you to provide a more engaging and personalized experience for our guests.
|
10 |
+
Master of the Deal: You will be adept at dealing cards with precision and fairness, ensuring that each player has an equal opportunity to win.
|
11 |
+
Embracer of Joy: You will be the embodiment of joy and happiness, spreading smiles across the faces of our guests as you interact with them and ensure their cards are treated with the utmost respect.
|
12 |
+
|
13 |
+
Qualifications:
|
14 |
+
|
15 |
+
Passion for Playing Cards: You have a deep love for playing cards and a strong understanding of their rules and history.
|
16 |
+
Exceptional Customer Service: You are able to interact with guests professionally and with empathy, ensuring their experience is enjoyable and memorable.
|
17 |
+
Attention to Detail: You have a meticulous eye for detail and are able to handle delicate cards with the utmost care.
|
18 |
+
Strong Communication Skills: You are able to communicate clearly and effectively with guests, resolving any issues with patience and understanding.
|
19 |
+
|
20 |
+
Additional Desired Qualities:
|
21 |
+
|
22 |
+
Creativity: You have a creative spirit and are able to find innovative solutions to challenges.
|
23 |
+
Flexibility: You are adaptable and can easily adjust to new situations and changes.
|
24 |
+
Positive Attitude: You have a positive attitude and are able to spread joy to others.
|
25 |
+
|
26 |
+
Please send your resume and cover letter to [email address] to apply. We look forward to meeting you and learning more about your passion for playing cards and your ability to bring joy to our guests.
|
27 |
+
"""
|
28 |
+
mock_cv = """
|
29 |
+
Dorothy Gale
|
30 |
+
|
31 |
+
123 Main St., Emerald City, KS 12345, (123) 456-7890, [email protected]
|
32 |
+
|
33 |
+
Summary
|
34 |
+
|
35 |
+
Highly motivated and resourceful individual with a strong work ethic and a positive attitude. Proven ability to manage multiple tasks effectively and work independently. Skilled in a variety of industries, including retail, hospitality, and education.
|
36 |
+
|
37 |
+
Skills
|
38 |
+
|
39 |
+
Strong communication and interpersonal skills
|
40 |
+
Excellent organizational and time management skills
|
41 |
+
Proficiency in Microsoft Office Suite
|
42 |
+
Strong customer service orientation
|
43 |
+
Ability to work independently and as part of a team
|
44 |
+
Creativity and problem-solving abilities
|
45 |
+
|
46 |
+
Experience
|
47 |
+
|
48 |
+
Teacher Emerald City Elementary School, Emerald City, KS 2000-2006
|
49 |
+
|
50 |
+
Provided a safe and engaging learning environment for students
|
51 |
+
Developed and implemented lesson plans that aligned with curriculum standards
|
52 |
+
Mentored and guided other teachers
|
53 |
+
|
54 |
+
Retail Sales Associate The Ruby Slipper Shop, Emerald City, KS 1998-2000
|
55 |
+
|
56 |
+
Provided excellent customer service to clients
|
57 |
+
Managed inventory and maintained store displays
|
58 |
+
Achieved high sales goals
|
59 |
+
|
60 |
+
Education
|
61 |
+
|
62 |
+
Bachelor of Arts in Education, University of Kansas Master of Arts in Education, University of Kansas
|
63 |
+
|
64 |
+
References
|
65 |
+
|
66 |
+
Available upon request.
|
67 |
+
"""
|
pyproject.toml
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.ruff]
|
2 |
+
# Exclude a variety of commonly ignored directories.
|
3 |
+
exclude = [
|
4 |
+
".bzr",
|
5 |
+
".direnv",
|
6 |
+
".eggs",
|
7 |
+
".git",
|
8 |
+
".git-rewrite",
|
9 |
+
".hg",
|
10 |
+
".ipynb_checkpoints",
|
11 |
+
".mypy_cache",
|
12 |
+
".nox",
|
13 |
+
".pants.d",
|
14 |
+
".pyenv",
|
15 |
+
".pytest_cache",
|
16 |
+
".pytype",
|
17 |
+
".ruff_cache",
|
18 |
+
".svn",
|
19 |
+
".tox",
|
20 |
+
".venv",
|
21 |
+
".vscode",
|
22 |
+
"__pypackages__",
|
23 |
+
"_build",
|
24 |
+
"buck-out",
|
25 |
+
"build",
|
26 |
+
"dist",
|
27 |
+
"node_modules",
|
28 |
+
"site-packages",
|
29 |
+
"venv",
|
30 |
+
".local",
|
31 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# UI
|
2 |
+
gradio>=4.0,<=5.0
|
3 |
+
# external tool
|
4 |
+
pypandoc
|
5 |
+
shot-scraper
|
6 |
+
# LLM related
|
7 |
+
llama-index-llms-openai-like
|
8 |
+
tiktoken
|
9 |
+
# lib
|
10 |
+
requests
|
11 |
+
# dev tools
|
12 |
+
ruff
|
run.sh
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
IS_SHARE=1 python app.py
|
setup.sh
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
pip install -r requirements.txt
|
2 |
+
ruff check
|
3 |
+
ruff format
|
taskAI.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.llms.openai_like import OpenAILike
|
2 |
+
from llama_index.core.llms import ChatMessage # , MessageRole
|
3 |
+
from llama_index.core import ChatPromptTemplate
|
4 |
+
|
5 |
+
from util import mylogger
|
6 |
+
|
7 |
+
logger = mylogger(__name__,'%(asctime)s:%(levelname)s:%(message)s')
|
8 |
+
info = logger.info
|
9 |
+
|
10 |
+
|
11 |
+
## define templates
|
12 |
+
|
13 |
+
### topic,input
|
14 |
+
JD_PREPROCESS = ChatPromptTemplate(
|
15 |
+
[
|
16 |
+
ChatMessage(
|
17 |
+
role="system",
|
18 |
+
content="You are a content extractor. You never paraphrase; you only reduce content at the sentence level. Your mission is to extract information directly related to {topic} from user input. Make sure output contains complete information.",
|
19 |
+
),
|
20 |
+
ChatMessage(role="user", content="{input}"),
|
21 |
+
]
|
22 |
+
)
|
23 |
+
|
24 |
+
### input
|
25 |
+
CV_PREPROCESS = ChatPromptTemplate(
|
26 |
+
[
|
27 |
+
ChatMessage(
|
28 |
+
role="system",
|
29 |
+
content="You are an AI text converter alternative to pandoc. Your mission is to convert the input content into markdown. Regarding styles, only keep headers, lists and links, and remove other styles.",
|
30 |
+
),
|
31 |
+
ChatMessage(role="user", content="{input}"),
|
32 |
+
]
|
33 |
+
)
|
34 |
+
|
35 |
+
## basic func
|
36 |
+
|
37 |
+
|
38 |
+
def oai(base: str, key: str, model: str, **kwargs) -> OpenAILike:
|
39 |
+
return OpenAILike(
|
40 |
+
api_base=base,
|
41 |
+
api_key=key,
|
42 |
+
model=model,
|
43 |
+
is_chat_model=True,
|
44 |
+
context_window=window_size,
|
45 |
+
**kwargs,
|
46 |
+
)
|
47 |
+
|
48 |
+
|
49 |
+
## tasks
|
50 |
+
class TaskAI(OpenAILike):
|
51 |
+
def __init__(self, api: dict[str, str], **kwargs):
|
52 |
+
def guess_window_size(model=api["model"]):
|
53 |
+
_mid = model.lower()
|
54 |
+
windows: dict = {
|
55 |
+
8000: ["gemma", "8k"],
|
56 |
+
16000: ["16k"],
|
57 |
+
32000: ["mistral", "mixtral", "32k"],
|
58 |
+
}
|
59 |
+
window_size = 3900
|
60 |
+
for ws, names in windows.items():
|
61 |
+
if any([n in _mid for n in names]):
|
62 |
+
window_size = ws
|
63 |
+
info(f"use context window size: {window_size} for {model}")
|
64 |
+
return window_size
|
65 |
+
|
66 |
+
super().__init__(
|
67 |
+
api_base=api["base"], api_key=api["key"], model=api["model"], is_chat_model=True, context_window=guess_window_size(), **kwargs
|
68 |
+
)
|
69 |
+
|
70 |
+
def jd_preprocess(self, topic: str, input: str):
|
71 |
+
return self.stream_chat(JD_PREPROCESS.format_messages(topic=topic, input=input))
|
72 |
+
|
73 |
+
def cv_preprocess(self, input: str):
|
74 |
+
return self.stream_chat(CV_PREPROCESS.format_messages(input=input))
|
taskNonAI.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pypandoc
|
2 |
+
## stdlib
|
3 |
+
import subprocess
|
4 |
+
import json
|
5 |
+
from typing import Optional
|
6 |
+
|
7 |
+
def file_to_html(file_path: str) -> str:
|
8 |
+
return pypandoc.convert_file(file_path, "html")
|
9 |
+
|
10 |
+
|
11 |
+
def extract_url(url: str) -> Optional[str]:
|
12 |
+
cmd = f"""shot-scraper javascript -b firefox \
|
13 |
+
"{url}" "
|
14 |
+
async () => {{
|
15 |
+
const readability = await import('https://cdn.skypack.dev/@mozilla/readability');
|
16 |
+
return (new readability.Readability(document)).parse();
|
17 |
+
}}"
|
18 |
+
"""
|
19 |
+
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
20 |
+
try:
|
21 |
+
result.check_returncode()
|
22 |
+
except:
|
23 |
+
raise Exception(
|
24 |
+
f"Please try copy-paste as input. Failed to extract content from url: {url}. Error: {result.stderr}"
|
25 |
+
)
|
26 |
+
result = json.loads(result.stdout)
|
27 |
+
try:
|
28 |
+
return result["textContent"]
|
29 |
+
except:
|
30 |
+
raise Exception(
|
31 |
+
f"Please try copy-paste as input. Failed to extract content from: {url}. Didn't find content from given URL!"
|
32 |
+
)
|
test.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from taskAI import TaskAI
|
2 |
+
from data_test import mock_jd, mock_cv
|
3 |
+
from config_secret import api_test
|
4 |
+
|
5 |
+
from llama_index.llms.openai_like import OpenAILike
|
6 |
+
from llama_index.core.llms import ChatMessage
|
7 |
+
|
8 |
+
|
9 |
+
def test_integration():
|
10 |
+
messages = [
|
11 |
+
ChatMessage(role="system", content="You are a helpful assistant"),
|
12 |
+
ChatMessage(role="user", content="What is your name"),
|
13 |
+
]
|
14 |
+
print("Testing integration:")
|
15 |
+
response = OpenAILike(
|
16 |
+
model=api_test["model"],
|
17 |
+
api_key=api_test["key"],
|
18 |
+
api_base=api_test["base"],
|
19 |
+
max_retries=0,
|
20 |
+
is_chat_model=True,
|
21 |
+
).chat(messages)
|
22 |
+
print(response)
|
23 |
+
|
24 |
+
def test_taskAI():
|
25 |
+
taskAI = TaskAI(api_test)
|
26 |
+
gen = taskAI.cv_preprocess(mock_cv)
|
27 |
+
for chunk in gen:
|
28 |
+
print(chunk)
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
|
32 |
+
# integration()
|
util.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tiktoken
|
2 |
+
|
3 |
+
from urllib.parse import urlparse
|
4 |
+
import requests
|
5 |
+
|
6 |
+
import logging
|
7 |
+
|
8 |
+
def mylogger(name, format, level=logging.INFO):
|
9 |
+
# Create a custom logger
|
10 |
+
logger = logging.getLogger("custom_logger")
|
11 |
+
logger.setLevel(level)
|
12 |
+
# Configure the custom logger with the desired settings
|
13 |
+
formatter = logging.Formatter(format)
|
14 |
+
c_handler = logging.StreamHandler()
|
15 |
+
c_handler.setFormatter(formatter)
|
16 |
+
# file_handler = logging.FileHandler('custom_logs.log')
|
17 |
+
# file_handler.setFormatter(formatter)
|
18 |
+
logger.addHandler(c_handler)
|
19 |
+
|
20 |
+
return logger
|
21 |
+
|
22 |
+
|
23 |
+
def count_token(text, encoding="cl100k_base"):
|
24 |
+
return len(tiktoken.get_encoding(encoding).encode(text))
|
25 |
+
|
26 |
+
|
27 |
+
def is_valid_url(url: str) -> bool:
|
28 |
+
try:
|
29 |
+
result = urlparse(url)
|
30 |
+
return all([result.scheme, result.netloc])
|
31 |
+
except ValueError:
|
32 |
+
return False
|
33 |
+
|
34 |
+
|
35 |
+
def is_valid_openai_api_key(api_base:str, api_key: str)->bool:
|
36 |
+
headers = {"Authorization": f"Bearer {api_key}"}
|
37 |
+
|
38 |
+
response = requests.get(api_base, headers=headers)
|
39 |
+
|
40 |
+
return response.status_code == 200
|
41 |
+
|
42 |
+
|
43 |
+
def zip_api(api_base:str, api_key:str, model:str)->dict[str, str]:
|
44 |
+
return {"base": api_base, "key": api_key, "model": model}
|