mgchavez commited on
Commit
4b8b6dd
·
verified ·
1 Parent(s): 556a2ff

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dataset-10k.zip +3 -0
  2. app.py +74 -0
Dataset-10k.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2aaba7baab33f913f9e984d8a247052c1203f71cdb2a05eb2a98708044fbfa4
3
+ size 5198341
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ ## Setup
4
+ # Import the necessary Libraries
5
+ import json
6
+ import tiktoken
7
+ import pandas as pd
8
+ from openai import OpenAI
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
11
+ from langchain_community.embeddings.sentence_transformer import (
12
+ SentenceTransformerEmbeddings
13
+ )
14
+ from langchain_community.vectorstores import Chroma
15
+
16
+ import os
17
+ import uuid
18
+ import joblib
19
+ import json
20
+
21
+ import gradio as gr
22
+
23
+
24
+ from huggingface_hub import CommitScheduler
25
+ from pathlib import Path
26
+
27
+ # Create Client
28
+ client = OpenAI(
29
+ base_url="https://api.endpoints.anyscale.com/v1",
30
+ api_key=secret_key
31
+ )
32
+
33
+ # Define the embedding model and the vectorstore
34
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
35
+
36
+ # Load the persisted vectorDB
37
+ persisted_vectordb_location = './proj3_db'
38
+
39
+ # Prepare the logging functionality
40
+
41
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
42
+ log_folder = log_file.parent
43
+
44
+ scheduler = CommitScheduler(
45
+ repo_id="---------",
46
+ repo_type="dataset",
47
+ folder_path=log_folder,
48
+ path_in_repo="data",
49
+ every=2
50
+ )
51
+
52
+ # Define the Q&A system message
53
+ qna_system_message = """
54
+
55
+ User input will have the context required by you to answer user questions.
56
+ This context will begin with the token: ###Context
57
+ The context contains references to specific portions of a document relevant to the user query.
58
+
59
+ User questions will begin with the token: ###Question
60
+
61
+ Please answer only using the context provided in the input. Do not mention anything about the context in your final answer.
62
+
63
+ If the answer is not found in the context, respond "I don't know".
64
+ """
65
+
66
+ # Define the user message template
67
+ qna_user_message_template = """
68
+ ###Context
69
+ Here are some documents that are relevant to the question mentioned below.
70
+ {context}
71
+
72
+ ###Question
73
+ {question}
74
+ """