Commit
·
4ba83ab
0
Parent(s):
Duplicate from limcheekin/orca_mini_v3_7B-GGUF
Browse filesCo-authored-by: Lim Chee Kin <[email protected]>
- .gitattributes +35 -0
- Dockerfile +35 -0
- LICENSE +21 -0
- README.md +21 -0
- index.html +37 -0
- main.py +27 -0
- start_server.sh +6 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grab a fresh copy of the Python image
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
# Install build and runtime dependencies
|
5 |
+
RUN apt-get update && \
|
6 |
+
apt-get install -y \
|
7 |
+
libopenblas-dev \
|
8 |
+
ninja-build \
|
9 |
+
build-essential \
|
10 |
+
pkg-config \
|
11 |
+
curl
|
12 |
+
|
13 |
+
RUN pip install -U pip setuptools wheel && \
|
14 |
+
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
|
15 |
+
|
16 |
+
# Download model
|
17 |
+
RUN mkdir model && \
|
18 |
+
curl -L https://huggingface.co/TheBloke/orca_mini_v3_7B-GGUF/resolve/main/orca_mini_v3_7b.Q4_K_M.gguf -o model/gguf-model.bin
|
19 |
+
|
20 |
+
COPY ./start_server.sh ./
|
21 |
+
COPY ./main.py ./
|
22 |
+
COPY ./index.html ./
|
23 |
+
|
24 |
+
# Make the server start script executable
|
25 |
+
RUN chmod +x ./start_server.sh
|
26 |
+
|
27 |
+
# Set environment variable for the host
|
28 |
+
ENV HOST=0.0.0.0
|
29 |
+
ENV PORT=7860
|
30 |
+
|
31 |
+
# Expose a port for the server
|
32 |
+
EXPOSE ${PORT}
|
33 |
+
|
34 |
+
# Run the server start script
|
35 |
+
CMD ["/bin/sh", "./start_server.sh"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Lim Chee Kin
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: orca_mini_v3_7B-GGUF (Q4_K_M)
|
3 |
+
colorFrom: purple
|
4 |
+
colorTo: blue
|
5 |
+
sdk: docker
|
6 |
+
models:
|
7 |
+
- psmathur/orca_mini_v3_7b
|
8 |
+
- TheBloke/orca_mini_v3_7B-GGUF
|
9 |
+
tags:
|
10 |
+
- inference api
|
11 |
+
- openai-api compatible
|
12 |
+
- llama-cpp-python
|
13 |
+
- orca_mini_v3_7B
|
14 |
+
- gguf
|
15 |
+
pinned: false
|
16 |
+
duplicated_from: limcheekin/orca_mini_v3_7B-GGUF
|
17 |
+
---
|
18 |
+
|
19 |
+
# orca_mini_v3_7B-GGUF (Q4_K_M)
|
20 |
+
|
21 |
+
Please refer to the [index.html](index.html) for more information.
|
index.html
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<title>orca_mini_v3_7B-GGUF (Q4_K_M)</title>
|
5 |
+
</head>
|
6 |
+
<body>
|
7 |
+
<h1>orca_mini_v3_7B-GGUF (Q4_K_M)</h1>
|
8 |
+
<p>
|
9 |
+
With the utilization of the
|
10 |
+
<a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
|
11 |
+
package, we are excited to introduce the GGUF model hosted in the Hugging
|
12 |
+
Face Docker Spaces, made accessible through an OpenAI-compatible API. This
|
13 |
+
space includes comprehensive API documentation to facilitate seamless
|
14 |
+
integration.
|
15 |
+
</p>
|
16 |
+
<ul>
|
17 |
+
<li>
|
18 |
+
The API endpoint:
|
19 |
+
<a href="https://limcheekin-orca-mini-v3-7b-gguf.hf.space/v1"
|
20 |
+
>https://limcheekin-orca-mini-v3-7b-gguf.hf.space/v1</a
|
21 |
+
>
|
22 |
+
</li>
|
23 |
+
<li>
|
24 |
+
The API doc:
|
25 |
+
<a href="https://limcheekin-orca-mini-v3-7b-gguf.hf.space/docs"
|
26 |
+
>https://limcheekin-orca-mini-v3-7b-gguf.hf.space/docs</a
|
27 |
+
>
|
28 |
+
</li>
|
29 |
+
</ul>
|
30 |
+
<p>
|
31 |
+
If you find this resource valuable, your support in the form of starring
|
32 |
+
the space would be greatly appreciated. Your engagement plays a vital role
|
33 |
+
in furthering the application for a community GPU grant, ultimately
|
34 |
+
enhancing the capabilities and accessibility of this space.
|
35 |
+
</p>
|
36 |
+
</body>
|
37 |
+
</html>
|
main.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_cpp.server.app import create_app, Settings
|
2 |
+
from fastapi.responses import HTMLResponse
|
3 |
+
import os
|
4 |
+
|
5 |
+
app = create_app(
|
6 |
+
Settings(
|
7 |
+
n_threads=2, # set to number of cpu cores
|
8 |
+
model="model/gguf-model.bin",
|
9 |
+
embedding=False
|
10 |
+
)
|
11 |
+
)
|
12 |
+
|
13 |
+
# Read the content of index.html once and store it in memory
|
14 |
+
with open("index.html", "r") as f:
|
15 |
+
content = f.read()
|
16 |
+
|
17 |
+
|
18 |
+
@app.get("/", response_class=HTMLResponse)
|
19 |
+
async def read_items():
|
20 |
+
return content
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
import uvicorn
|
24 |
+
uvicorn.run(app,
|
25 |
+
host=os.environ["HOST"],
|
26 |
+
port=int(os.environ["PORT"])
|
27 |
+
)
|
start_server.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
# For mlock support
|
4 |
+
ulimit -l unlimited
|
5 |
+
|
6 |
+
python3 -B main.py
|