Spaces:
Sleeping
Sleeping
shethjenil
commited on
Commit
•
2b1406e
1
Parent(s):
0493cdd
Upload 3 files
Browse files- .gitattributes +1 -0
- Dockerfile +8 -0
- allbook.book +3 -0
- app.py +76 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
allbook.book filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
RUN useradd -ms /bin/bash myuser
|
3 |
+
WORKDIR /code
|
4 |
+
COPY . .
|
5 |
+
RUN pip install llama-index flask requests chromadb llama-index-embeddings-huggingface-api llama-index-vector-stores-chroma
|
6 |
+
RUN chown -R myuser:myuser /code
|
7 |
+
USER myuser
|
8 |
+
CMD ["python", "app.py"]
|
allbook.book
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f43cb55656b6e5c510c478656d38bda6c769b9e6483410373789d50bf2fe15d
|
3 |
+
size 16263962
|
app.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict
|
2 |
+
from llama_index.core import StorageContext , VectorStoreIndex
|
3 |
+
from llama_index.core.base.base_retriever import BaseRetriever
|
4 |
+
from llama_index.core.schema import TextNode , NodeRelationship
|
5 |
+
from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding
|
6 |
+
from json import loads
|
7 |
+
from chromadb import EphemeralClient
|
8 |
+
from llama_index.vector_stores.chroma import ChromaVectorStore
|
9 |
+
import requests
|
10 |
+
import nest_asyncio
|
11 |
+
import os
|
12 |
+
import pickle
|
13 |
+
import flask
|
14 |
+
app = flask.Flask(__name__)
|
15 |
+
|
16 |
+
nest_asyncio.apply()
|
17 |
+
class AIBook:
|
18 |
+
embed_model:HuggingFaceInferenceAPIEmbedding
|
19 |
+
index:VectorStoreIndex
|
20 |
+
retriever:BaseRetriever
|
21 |
+
def __init__(self,token=os.environ["hf_api"],srcnum=2):
|
22 |
+
self.embed_model = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-large-en-v1.5",token=token)
|
23 |
+
self.index = VectorStoreIndex(nodes=pickle.load(open('allbook.book', 'rb')),embed_model=self.embed_model,storage_context=StorageContext.from_defaults(vector_store=ChromaVectorStore(chroma_collection= EphemeralClient().get_or_create_collection("jainbook"))))
|
24 |
+
self.retriever = self.index.as_retriever(similarity_top_k=srcnum,vector_store_query_mode="default")
|
25 |
+
def changeToken(self,token:str):
|
26 |
+
if self.embed_model.token != token:
|
27 |
+
self.embed_model = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-large-en-v1.5",token=token)
|
28 |
+
def changesrcnum(self,srcnum:int):
|
29 |
+
self.retriever = self.index.as_retriever(similarity_top_k=srcnum,vector_store_query_mode="default")
|
30 |
+
def retrieve(self,query:str):
|
31 |
+
return self.retriever.retrieve(query)
|
32 |
+
|
33 |
+
@classmethod
|
34 |
+
def nodes_to_guj(cls,nodes:list[TextNode])->list[str]:
|
35 |
+
return [node.node.relationships[NodeRelationship("1")].metadata["maintext"] for node in nodes]
|
36 |
+
@classmethod
|
37 |
+
def nodes_to_eng(cls,nodes:list[TextNode])->list[str]:
|
38 |
+
return [node.node.text for node in nodes]
|
39 |
+
@classmethod
|
40 |
+
def nodes_to_page_with_bookname(cls,nodes:list[TextNode])->list[Dict]:
|
41 |
+
return [{"page":int(node.node.relationships[NodeRelationship("1")].metadata["page"]),"bookname":node.node.relationships[NodeRelationship("1")].metadata["book"]} for node in nodes]
|
42 |
+
@classmethod
|
43 |
+
def translate_to_eng(cls,text:str)->str:
|
44 |
+
return "".join([i[0] for i in requests.get(f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=gu&tl=en&dt=t&q={text}").json()[0]])
|
45 |
+
|
46 |
+
book = AIBook()
|
47 |
+
@app.route("/")
|
48 |
+
def function():
|
49 |
+
question = flask.request.args.get("question")
|
50 |
+
if question is None:
|
51 |
+
return """Please provide a question as 'https://shethjenil-apiofbookai.hf.space?question=પ્રભુ છે કે નહિ'<br>Change Token as https://shethjenil-apiofbookai.hf.space/changeToken?token=hf_<b>rgPNhjnXpLSodIphwjmRvPbvrovNYnQavj</b><br>Change srcnum as https://shethjenil-apiofbookai.hf.space/changesrcnum?srcnum=2<br><br><form action="/insert_node" method="post" enctype="multipart/form-data"><input type="file" name="file" accept=".book"><input type="submit" value="Upload Book File"></form>"""
|
52 |
+
if question == "":
|
53 |
+
return ""
|
54 |
+
return "\n\n".join(AIBook.nodes_to_guj(book.retrieve(AIBook.translate_to_eng(question))))
|
55 |
+
|
56 |
+
@app.route("/changeToken")
|
57 |
+
def function2():
|
58 |
+
book.changeToken(flask.request.args.get("token"))
|
59 |
+
return "Token changed"
|
60 |
+
@app.route("/changesrcnum")
|
61 |
+
def function3():
|
62 |
+
book.changesrcnum(int(flask.request.args.get("srcnum")))
|
63 |
+
return "srcnum changed"
|
64 |
+
@app.route("/insert_nodes",methods=["POST"])
|
65 |
+
def function4():
|
66 |
+
if 'file' not in flask.request.files:
|
67 |
+
return "Error"
|
68 |
+
file = flask.request.files['file']
|
69 |
+
if file.filename.endswith(".book"):
|
70 |
+
book.index.insert(pickle.load(file))
|
71 |
+
return "inserted" + file.filename
|
72 |
+
else:
|
73 |
+
return "not inserted because file is not a book"
|
74 |
+
|
75 |
+
if __name__ == '__main__':
|
76 |
+
app.run("0.0.0.0",7860)
|