Spaces:

jaywadekar
/

gwIAS

Sleeping

App Files Files Community

JayWadekar commited on 27 days ago

Commit

6855cb4

1 Parent(s): b3882e1

First commit

Browse files

Files changed (9) hide show

README.md +8 -7
app.py +59 -51
cookies.txt +4 -0
gitattributes +35 -0
ims/gwIASlogo.jpg +0 -0
ims/userpic.png +0 -0
rag.py +57 -0
requirements.txt +10 -1
urls.txt +19 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: GwIAS
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
 sdk: gradio
-sdk_version: 5.0.1
 app_file: app.py
 pinned: false
-short_description: LLM for the IAS-HM gravitational wave search pipeline
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 ---
+title: CAMELSDocBot
+emoji: 🐨
+colorFrom: pink
+colorTo: blue
 sdk: gradio
+sdk_version: 5.5.0
 app_file: app.py
 pinned: false
+license: mit
+short_description: Chatbot assistant for the CAMELS simulations documentation
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,64 +1,72 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
     demo.launch()

+# AI assistant with a RAG system to query information from
+#  the gwIAS search pipline
+# using Langchain and deployed with Gradio
+# Thanks to Pablo Villanueva Domingo for sharing his CAMELS template
+# https://huggingface.co/spaces/PabloVD/CAMELSDocBot
+from rag import RAG, load_docs
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from langchain.chat_models import ChatOpenAI
+import gradio as gr
+# Load the documentation
+docs = load_docs()
+print("Pages loaded:", len(docs))
+# LLM model
+llm = ChatOpenAI(model="gpt-4o-mini")
+# Embeddings
+embed_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
+# embed_model = "nvidia/NV-Embed-v2"
+embeddings = HuggingFaceInstructEmbeddings(model_name=embed_model)
+# RAG chain
+rag_chain = RAG(llm, docs, embeddings)
+# Function to handle prompt and query the RAG chain
+def handle_prompt(message, history):
+    try:
+        # Stream output
+        out = ""
+        for chunk in rag_chain.stream(message):
+            out += chunk
+            yield out
+    except Exception as e:
+        raise gr.Error(f"An error occurred: {str(e)}")
+if __name__ == "__main__":
+    # Predefined messages and examples
+    description = "AI powered assistant to help with [gwIAS](https://github.com/JayWadekar/gwIAS-HM) gravitational wave search pipeline."
+    greetingsmessage = "Hi, I'm the gwIAS Bot, I'm here to assist you with the search pipeline."
+    example_questions = [
+        "Can you give me the code for calculating coherent score?",
+        "Which module in the code is used for collecting coincident triggers?",
+        "How are template banks constructed?"
+    ]
+    # Define customized Gradio chatbot
+    chatbot = gr.Chatbot([{"role": "assistant", "content": greetingsmessage}],
+                         type="messages",
+                         avatar_images=["ims/userpic.png", "ims/gwIASlogo.jpg"],
+                         height="60vh")
+    # Define Gradio interface
+    demo = gr.ChatInterface(handle_prompt,
+                            type="messages",
+                            title="gwIAS DocBot",
+                            fill_height=True,
+                            examples=example_questions,
+                            theme=gr.themes.Soft(),
+                            description=description,
+                            # cache_examples=False,
+                            chatbot=chatbot)
     demo.launch()
+# https://arxiv.org/html/2405.17400v2
+# https://arxiv.org/html/2312.06631v1
+# https://arxiv.org/html/2310.15233v2

cookies.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+# Netscape HTTP Cookie File
+# https://curl.haxx.se/docs/http-cookies.html
+# This file was generated by libcurl! Edit at your own risk.

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

ims/gwIASlogo.jpg ADDED Viewed

ims/userpic.png ADDED Viewed

rag.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Utilities to build a RAG system to query information from the
+#  gwIAS search pipeline using Langchain
+# Thanks to Pablo Villanueva Domingo for sharing his CAMELS template
+# https://huggingface.co/spaces/PabloVD/CAMELSDocBot
+from langchain import hub
+from langchain_chroma import Chroma
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import WebBaseLoader
+# Load documentation from urls
+def load_docs():
+    # Get urls
+    urlsfile = open("urls.txt")
+    urls = urlsfile.readlines()
+    urls = [url.replace("\n","") for url in urls]
+    urlsfile.close()
+    # Load, chunk and index the contents of the blog.
+    loader = WebBaseLoader(urls)
+    docs = loader.load()
+    return docs
+# Join content pages for processing
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+# Create a RAG chain
+def RAG(llm, docs, embeddings):
+    # Split text
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    splits = text_splitter.split_documents(docs)
+    # Create vector store
+    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
+    # Retrieve and generate using the relevant snippets of the documents
+    retriever = vectorstore.as_retriever()
+    # Prompt basis example for RAG systems
+    prompt = hub.pull("rlm/rag-prompt")
+    # Create the chain
+    rag_chain = (
+        {"context": retriever | format_docs, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    return rag_chain

requirements.txt CHANGED Viewed

	@@ -1 +1,10 @@
1	- ~~huggingface_hub==0.25.2~~

+langchain
+langchain-community
+langchain-chroma
+langchain-mistralai
+beautifulsoup4
+pypdf==5.0.1
+sentence-transformers==2.2.2
+huggingface_hub==0.25.2
+InstructorEmbedding
+openai

urls.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+https://github.com/JayWadekar/gwIAS-HM/tree/main
+https://github.com/JayWadekar/gwIAS-HM/tree/main/Pipeline
+https://github.com/JayWadekar/gwIAS-HM/blob/main/README.md
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/ML_modules.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/coherent_score_hm_search.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/coherent_score_mz_fast.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/coincidence_HM.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/data_operations.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/download_data.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/gw_detect_file.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/params.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/python_utils.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/ranking_HM.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/readligo.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/template_bank_generator_HM.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/template_bank_params_O3a_HM.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/triggering_on_cluster.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/triggers_single_detector_HM.py
+https://github.com/JayWadekar/gwIAS-HM/blob/main/Pipeline/utils.py