trducng commited on
Commit
ffbe1f3
1 Parent(s): e412ef8

initial commit

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. flowsettings.py +243 -0
  3. ktem_app_data.zip +3 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1.0.0-experimental
2
+ FROM taprosoft/kotaemon:v1.0 as base_image
3
+ ARG APP_DATA_DIR=/home/ubuntu/lib-knowledgehub/kotaemon/
4
+
5
+ RUN apt update -qqy \
6
+ && apt install -y unzip \
7
+ && \
8
+ apt-get clean && \
9
+ apt-get autoremove
10
+
11
+ WORKDIR /app
12
+ RUN --mount=type=ssh mkdir -p ${APP_DATA_DIR}
13
+
14
+ COPY ktem_app_data.zip ${APP_DATA_DIR}
15
+ COPY flowsettings.py /app
16
+
17
+ RUN --mount=type=ssh cd ${APP_DATA_DIR} \
18
+ && unzip ktem_app_data.zip \
19
+ && rm ktem_app_data.zip
20
+
21
+ ENTRYPOINT ["python", "app.py"]
flowsettings.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from importlib.metadata import version
3
+ from inspect import currentframe, getframeinfo
4
+ from pathlib import Path
5
+
6
+ from decouple import config
7
+ from theflow.settings.default import * # noqa
8
+
9
+ cur_frame = currentframe()
10
+ if cur_frame is None:
11
+ raise ValueError("Cannot get the current frame.")
12
+ this_file = getframeinfo(cur_frame).filename
13
+ this_dir = Path(this_file).parent
14
+
15
+ # change this if your app use a different name
16
+ KH_PACKAGE_NAME = "kotaemon_app"
17
+
18
+ KH_APP_VERSION = config("KH_APP_VERSION", "local")
19
+ if not KH_APP_VERSION:
20
+ try:
21
+ # Caution: This might produce the wrong version
22
+ # https://stackoverflow.com/a/59533071
23
+ KH_APP_VERSION = version(KH_PACKAGE_NAME)
24
+ except Exception as e:
25
+ print(f"Failed to get app version: {e}")
26
+
27
+ # App can be ran from anywhere and it's not trivial to decide where to store app data.
28
+ # So let's use the same directory as the flowsetting.py file.
29
+ # KH_APP_DATA_DIR = this_dir / "ktem_app_data"
30
+
31
+ # override app data dir to fit preview data
32
+ KH_APP_DATA_DIR = Path("/home/ubuntu/lib-knowledgehub/kotaemon/ktem_app_data")
33
+ KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)
34
+
35
+ # User data directory
36
+ KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data"
37
+ KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True)
38
+
39
+ # markdowm output directory
40
+ KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir"
41
+ KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
42
+
43
+ # chunks output directory
44
+ KH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / "chunks_cache_dir"
45
+ KH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
46
+
47
+ # zip output directory
48
+ KH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir"
49
+ KH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
50
+
51
+ # zip input directory
52
+ KH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir_in"
53
+ KH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True)
54
+
55
+ # HF models can be big, let's store them in the app data directory so that it's easier
56
+ # for users to manage their storage.
57
+ # ref: https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache
58
+ os.environ["HF_HOME"] = str(KH_APP_DATA_DIR / "huggingface")
59
+ os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface")
60
+
61
+ # doc directory
62
+ KH_DOC_DIR = this_dir / "docs"
63
+
64
+ KH_MODE = "dev"
65
+ KH_FEATURE_USER_MANAGEMENT = False
66
+ KH_USER_CAN_SEE_PUBLIC = None
67
+ KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
68
+ config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
69
+ )
70
+ KH_FEATURE_USER_MANAGEMENT_PASSWORD = str(
71
+ config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin")
72
+ )
73
+ KH_ENABLE_ALEMBIC = False
74
+ KH_DATABASE = f"sqlite:///file:{KH_USER_DATA_DIR / 'sql.db?mode=ro&uri=true'}"
75
+ KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files")
76
+
77
+ KH_DOCSTORE = {
78
+ # "__type__": "kotaemon.storages.ElasticsearchDocumentStore",
79
+ # "__type__": "kotaemon.storages.SimpleFileDocumentStore",
80
+ "__type__": "kotaemon.storages.LanceDBDocumentStore",
81
+ "path": str(KH_USER_DATA_DIR / "docstore"),
82
+ }
83
+ KH_VECTORSTORE = {
84
+ # "__type__": "kotaemon.storages.LanceDBVectorStore",
85
+ "__type__": "kotaemon.storages.ChromaVectorStore",
86
+ "path": str(KH_USER_DATA_DIR / "vectorstore"),
87
+ }
88
+ KH_LLMS = {}
89
+ KH_EMBEDDINGS = {}
90
+
91
+ # populate options from config
92
+ if config("AZURE_OPENAI_API_KEY", default="") and config(
93
+ "AZURE_OPENAI_ENDPOINT", default=""
94
+ ):
95
+ if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""):
96
+ KH_LLMS["azure"] = {
97
+ "spec": {
98
+ "__type__": "kotaemon.llms.AzureChatOpenAI",
99
+ "temperature": 0,
100
+ "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
101
+ "api_key": config("AZURE_OPENAI_API_KEY", default=""),
102
+ "api_version": config("OPENAI_API_VERSION", default="")
103
+ or "2024-02-15-preview",
104
+ "azure_deployment": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""),
105
+ "timeout": 20,
106
+ },
107
+ "default": False,
108
+ }
109
+ if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""):
110
+ KH_EMBEDDINGS["azure"] = {
111
+ "spec": {
112
+ "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
113
+ "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
114
+ "api_key": config("AZURE_OPENAI_API_KEY", default=""),
115
+ "api_version": config("OPENAI_API_VERSION", default="")
116
+ or "2024-02-15-preview",
117
+ "azure_deployment": config(
118
+ "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""
119
+ ),
120
+ "timeout": 10,
121
+ },
122
+ "default": False,
123
+ }
124
+
125
+ if config("OPENAI_API_KEY", default=""):
126
+ KH_LLMS["openai"] = {
127
+ "spec": {
128
+ "__type__": "kotaemon.llms.ChatOpenAI",
129
+ "temperature": 0,
130
+ "base_url": config("OPENAI_API_BASE", default="")
131
+ or "https://api.openai.com/v1",
132
+ "api_key": config("OPENAI_API_KEY", default=""),
133
+ "model": config("OPENAI_CHAT_MODEL", default="gpt-3.5-turbo"),
134
+ "timeout": 20,
135
+ },
136
+ "default": True,
137
+ }
138
+ KH_EMBEDDINGS["openai"] = {
139
+ "spec": {
140
+ "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
141
+ "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"),
142
+ "api_key": config("OPENAI_API_KEY", default=""),
143
+ "model": config(
144
+ "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002"
145
+ ),
146
+ "timeout": 10,
147
+ "context_length": 8191,
148
+ },
149
+ "default": True,
150
+ }
151
+
152
+ if config("LOCAL_MODEL", default=""):
153
+ KH_LLMS["ollama"] = {
154
+ "spec": {
155
+ "__type__": "kotaemon.llms.ChatOpenAI",
156
+ "base_url": "http://localhost:11434/v1/",
157
+ "model": config("LOCAL_MODEL", default="llama3.1:8b"),
158
+ },
159
+ "default": False,
160
+ }
161
+ KH_EMBEDDINGS["ollama"] = {
162
+ "spec": {
163
+ "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
164
+ "base_url": "http://localhost:11434/v1/",
165
+ "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
166
+ },
167
+ "default": False,
168
+ }
169
+
170
+ KH_EMBEDDINGS["local-bge-en"] = {
171
+ "spec": {
172
+ "__type__": "kotaemon.embeddings.FastEmbedEmbeddings",
173
+ "model_name": "BAAI/bge-base-en-v1.5",
174
+ },
175
+ "default": False,
176
+ }
177
+
178
+ KH_REASONINGS = [
179
+ "ktem.reasoning.simple.FullQAPipeline",
180
+ "ktem.reasoning.simple.FullDecomposeQAPipeline",
181
+ "ktem.reasoning.react.ReactAgentPipeline",
182
+ "ktem.reasoning.rewoo.RewooAgentPipeline",
183
+ ]
184
+ KH_REASONINGS_USE_MULTIMODAL = False
185
+ KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
186
+ config("AZURE_OPENAI_ENDPOINT", default=""),
187
+ config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
188
+ config("OPENAI_API_VERSION", default=""),
189
+ )
190
+
191
+
192
+ SETTINGS_APP: dict[str, dict] = {}
193
+
194
+
195
+ SETTINGS_REASONING = {
196
+ "use": {
197
+ "name": "Reasoning options",
198
+ "value": None,
199
+ "choices": [],
200
+ "component": "radio",
201
+ },
202
+ "lang": {
203
+ "name": "Language",
204
+ "value": "en",
205
+ "choices": [("English", "en"), ("Japanese", "ja"), ("Vietnamese", "vi")],
206
+ "component": "dropdown",
207
+ },
208
+ "max_context_length": {
209
+ "name": "Max context length (LLM)",
210
+ "value": 32000,
211
+ "component": "number",
212
+ },
213
+ }
214
+
215
+
216
+ KH_INDEX_TYPES = [
217
+ "ktem.index.file.FileIndex",
218
+ "ktem.index.file.graph.GraphRAGIndex",
219
+ ]
220
+ KH_INDICES = [
221
+ {
222
+ "name": "File",
223
+ "config": {
224
+ "supported_file_types": (
225
+ ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
226
+ ".pptx, .csv, .html, .mhtml, .txt, .zip"
227
+ ),
228
+ "private": False,
229
+ },
230
+ "index_type": "ktem.index.file.FileIndex",
231
+ },
232
+ {
233
+ "name": "GraphRAG",
234
+ "config": {
235
+ "supported_file_types": (
236
+ ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
237
+ ".pptx, .csv, .html, .mhtml, .txt, .zip"
238
+ ),
239
+ "private": False,
240
+ },
241
+ "index_type": "ktem.index.file.graph.GraphRAGIndex",
242
+ },
243
+ ]
ktem_app_data.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:939929c877f3ba49cb82fbf7743e4ab383a3e60d8c4581c521713fa240a96e17
3
+ size 330285315