Spaces:

abdulazeezoj
/

alaroye

Build error

App Files Files Community

abdulazeezoj commited on Jun 22, 2023

Commit

be9a30f

0 Parent(s):

Add alaroye

Browse files

Files changed (18) hide show

.gitattributes +37 -0
.gitignore +7 -0
Dockerfile +44 -0
Makefile +11 -0
Pipfile +24 -0
Pipfile.lock +0 -0
alaroye/__init__.py +0 -0
alaroye/alaroye.py +363 -0
alaroye/alaroyedb/chroma-collections.parquet +3 -0
alaroye/alaroyedb/chroma-embeddings.parquet +3 -0
alaroye/alaroyedb/index/id_to_uuid_676957b6-5b85-4306-bd90-8fbd6a25173a.pkl +3 -0
alaroye/alaroyedb/index/index_676957b6-5b85-4306-bd90-8fbd6a25173a.bin +3 -0
alaroye/alaroyedb/index/index_metadata_676957b6-5b85-4306-bd90-8fbd6a25173a.pkl +3 -0
alaroye/alaroyedb/index/uuid_to_id_676957b6-5b85-4306-bd90-8fbd6a25173a.pkl +3 -0
docs/state_of_the_union.txt +3 -0
notebooks/0-alaroye-v0.0.0.ipynb +498 -0
src/cli.py +9 -0
src/web.py +68 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.docx filter=lfs diff=lfs merge=lfs -text
+docs/**/* filter=lfs diff=lfs merge=lfs -text
+alaroye/alaroyedb/**/* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+**/.vscode
+**/coverage
+**/.env
+**/.aws
+**/.ssh
+**/.DS_Store
+**/__pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+FROM python:3.11.3
+# Set the working directory to /code
+WORKDIR /code
+# Copy Pipfile
+COPY Pipfile Pipfile.lock ./
+# Python PIP Upgrade & Install Dependencies
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir pipenv
+# Install Python Dependencies
+RUN pipenv install --system --deploy --ignore-pipfile
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 alaye
+# Switch to the "alaye" user
+USER alaye
+# Set home to the alaye's home directory
+ENV HOME=/home/alaye \
+    PATH=/home/alaye/.local/bin:$PATH \
+    PYTHONPATH=$HOME/omdenabot \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+# Set the working directory to the user's home directory
+WORKDIR $HOME/omdenabot
+# Copy Project
+COPY --chown=alaye . $HOME/omdenabot
+# Expose Port
+EXPOSE 7860
+# Run entrypoint
+CMD [ "python", "src/web.py"]

Makefile ADDED Viewed

	@@ -0,0 +1,11 @@

+include .env
+DOCKER_IMAGE ?= alaroye
+DOCKER_CONTAINER ?= alaroye
+DOCKER_TAG ?= latest
+docker-build:
+	docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) .
+docker-run:
+	docker run --name $(DOCKER_CONTAINER) -it --rm -p 7860:7860 -e OPENAI_API_KEY=$(OPENAI_API_KEY) $(DOCKER_IMAGE):$(DOCKER_TAG)

Pipfile ADDED Viewed

	@@ -0,0 +1,24 @@

+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+[packages]
+gtts = "*"
+speechrecognition = "*"
+pyaudio = "*"
+langchain = "*"
+openai = "*"
+gradio = "*"
+chromadb = "*"
+pydub = "*"
+tiktoken = "*"
+[dev-packages]
+flake8 = "*"
+black = "*"
+ipykernel = "*"
+[requires]
+python_version = "3.10"
+python_full_version = "3.10.10"

Pipfile.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

alaroye/__init__.py ADDED Viewed

File without changes

alaroye/alaroye.py ADDED Viewed

	@@ -0,0 +1,363 @@

+import os
+import sys
+import textwrap
+import pathlib
+from typing import Any, Dict, List
+import gradio as gr
+from gtts import gTTS
+from pydub import AudioSegment
+from pydub.playback import play
+from io import BytesIO
+from dotenv import find_dotenv, load_dotenv
+from langchain.chains import RetrievalQA
+from langchain.chains.retrieval_qa.base import BaseRetrievalQA
+from langchain.docstore.document import Document
+from langchain.document_loaders import DirectoryLoader, TextLoader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import OpenAI
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.prompts import PromptTemplate
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import Chroma
+import speech_recognition as sr
+# Load environment variables
+load_dotenv(find_dotenv())
+class Alaroye:
+    """
+    A bot that answers questions about Omdena
+    """
+    def __init__(self, version: str = "v0.0.0"):
+        """
+        Initialize the Alaroye.
+        Parameters
+        ----------
+        version : str (default="v0.0.0")
+            The version of the Alaroye.
+        """
+        self.embeddings = OpenAIEmbeddings()  # type: ignore
+        self.llm = OpenAI(temperature=0.1)  # type: ignore
+        self.vector_store: Chroma | None = None
+        self.speech_recognizer = sr.Recognizer()
+        self.retrieval_qa: BaseRetrievalQA | None = None
+        self.persist_directory = os.path.join(os.path.dirname(__file__), "Alaroyedb")
+        self.prompt_template = """
+Use the following pieces of context to answer the question delimited <<< >>>. If you don't know the answer, \
+just say `I don't know, rephrase the question or contect omdena support on slack or email ([email protected])` \
+Don't try to make up an answer.
+{context}
+<<<{question}>>>
+"""
+        self.prompt = PromptTemplate(
+            template=self.prompt_template, input_variables=["context", "question"]
+        )
+        # Load initializations variables
+        self.version = version
+    def train(
+        self, doc_dir: str, chunk: bool = True, chunk_size: int = 1000, chunk_overlap: int = 0
+    ) -> None:
+        """
+        Train the Alaroye.
+        Parameters
+        ----------
+        doc_dir : str
+            The directory containing the docx documents to train the Alaroye on.
+        chunk : bool (default=True)
+            Whether to chunk the documents.
+        chunk_size : int (default=1000)
+            The size of the chunks.
+        chunk_overlap : int (default=0)
+            The overlap between chunks.
+        """
+        # Get the documents to train on
+        documents = self._get_documents(
+            doc_dir, chunck=chunk, chunk_size=chunk_size, chunk_overlap=chunk_overlap
+        )
+        # Train the Alaroye
+        self.vector_store = Chroma.from_documents(
+            documents=documents,
+            embedding=self.embeddings,
+            collection_name=f"alaroye-{self.version}",
+            persist_directory=self.persist_directory,
+        )
+        # Persist the vectorstore
+        self.vector_store.persist()
+        # Create a retrieval QA chain
+        self.retrieval_qa = RetrievalQA.from_chain_type(
+            llm=self.llm,
+            chain_type="stuff",
+            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
+            return_source_documents=True,
+            chain_type_kwargs={
+                "prompt": self.prompt,
+            },
+        )
+    def load(self) -> None:
+        """
+        Load the vectorstore.
+        """
+        # Load the vectorstore
+        self.vector_store = Chroma(
+            embedding_function=self.embeddings,
+            collection_name=f"alaroye-{self.version}",
+            persist_directory=self.persist_directory,
+        )
+        # Create a retrieval QA chain
+        self.retrieval_qa = RetrievalQA.from_chain_type(
+            llm=self.llm,
+            chain_type="stuff",
+            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
+            return_source_documents=True,
+            chain_type_kwargs={
+                "prompt": self.prompt,
+            },
+        )
+    def ask(self, question: str, verbose: bool = False) -> dict[str, Any]:
+        """
+        Ask the Alaroye a question.
+        Parameters
+        ----------
+        question : str
+            The question to ask the Alaroye.
+        verbose : bool (default=False)
+            Whether to print the answer.
+        Returns
+        -------
+        dict[str, Any]
+            The answer to the question.
+        """
+        # Check if the Alaroye has been trained or loaded
+        if self.vector_store is None or self.retrieval_qa is None:
+            raise ValueError("The Alaroye has not been trained or loaded.")
+        # Get the answer
+        answer = self.retrieval_qa(question)
+        # Format the answer
+        formatted_answer = self._format_answer(answer, verbose=verbose)
+        # Return the formatted answer
+        return formatted_answer
+    def _listen(self) -> str:
+        """
+        Listen to the user.
+        Returns
+        -------
+        str
+            The user's input.
+        """
+        # Initialize the microphone
+        mic = sr.Microphone()
+        # Listen to the user
+        with mic as source:
+            print("Calibrating microphone...")
+            self.speech_recognizer.adjust_for_ambient_noise(source, duration=5)
+            print("Listening...")
+            try:
+                audio = self.speech_recognizer.listen(source, timeout=5)
+                print("Recognizing...")
+                text = self.speech_recognizer.recognize_google(audio)
+                return text
+            except Exception as e:
+                bad_response = "Sorry, I didn't catch that. Could you repeat yourself?"
+                return bad_response
+    def _speak(self, text: str) -> None:
+        """
+        Speak to the user.
+        Parameters
+        ----------
+        text : str
+            The text to speak to the user.
+        """
+        # Generate speech using gTTS
+        tts = gTTS(text=text, lang="de")
+        # Write the speech to bytes
+        mp3_fp = BytesIO()
+        tts.write_to_fp(mp3_fp)
+        # Play the speech
+        stream = BytesIO(mp3_fp.read())
+        mp3_fp.close()
+        audio = AudioSegment.from_file(stream, format="mp3")
+        play(audio)
+    @staticmethod
+    def _get_documents(
+        doc_dir: str, chunck: bool = True, chunk_size: int = 1000, chunk_overlap: int = 0
+    ) -> List[Document]:
+        """
+        Get the documents to train the Alaroye on.
+        Parameters
+        ----------
+        doc_dir : str
+            The directory containing the docx documents to train the Alaroye on.
+        chunk : bool (default=True)
+            Whether to split the documents into chunks.
+        chunk_size : int (default=1000)
+            The size of each chunk in characters.
+        chunk_overlap : int (default=0)
+            The number of characters to overlap between chunks.
+        Returns
+        -------
+        List[Document]
+            The documents to train the Alaroye on.
+        """
+        # Load the documents
+        docx_loader = DirectoryLoader(
+            doc_dir,
+            glob="./*.txt",
+            loader_cls=TextLoader,  # type: ignore
+        )
+        documents: List[Document] = docx_loader.load()
+        # Split the documents into chunks
+        if chunck:
+            document_chuncks = Alaroye._split_documents(documents, chunk_size, chunk_overlap)
+            return document_chuncks
+        return documents
+    @staticmethod
+    def _split_documents(
+        documents: List[Document], chunk_size: int, chunk_overlap: int
+    ) -> List[Document]:
+        """
+        Split the documents into chunks.
+        Parameters
+        ----------
+        documents : List[Document]
+            The documents to split.
+        chunk_size : int
+            The size of each chunk in characters.
+        chunk_overlap : int
+            The number of characters to overlap between chunks.
+        Returns
+        -------
+        List[Document]
+            The chunked documents to train the Alaroye on.
+        """
+        # Create a text splitter
+        text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+        # Split the documents into chunks
+        document_chuncks: List[Document] = text_splitter.split_documents(documents)
+        return document_chuncks
+    @staticmethod
+    def _wrap_text(text: str, width: int = 80) -> str:
+        """
+        Wrap text to a specified width while preserving newlines.
+        Parameters
+        ----------
+        text : str
+            The text to wrap.
+        width : int (default=110)
+            The maximum width of a line in characters.
+        Returns
+        -------
+        str
+            The wrapped text.
+        """
+        # Split the input text into lines based on newline characters
+        lines = text.strip().split("\n")
+        # Wrap each line individually
+        wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
+        # Join the wrapped lines back together using newline characters
+        wrapped_text = "\n".join(wrapped_lines)
+        return wrapped_text
+    @staticmethod
+    def _format_answer(response: Dict[str, Any], verbose: bool = False) -> dict[str, Any]:
+        """
+        Parse the response from the Alaroye.
+        Parameters
+        ----------
+        response : Dict[str, Any]
+            The response from the Alaroye.
+        verbose : bool (default=False)
+            Whether to print the answer and source documents.
+        Returns
+        -------
+        dict[str, Any]
+            The parsed response.
+        """
+        # Get the answer from the response
+        answer = response["result"]
+        answer = answer.strip()
+        # Get the source documents from the response
+        source_documents: List[str] = [
+            source.metadata["source"] for source in response["source_documents"]
+        ]
+        # Extract the source documents file names
+        source_documents = [os.path.basename(source) for source in source_documents]
+        # Filter out duplicate source documents
+        source_documents = list(set(source_documents))
+        # Print the answer and source documents
+        if verbose:
+            print(f"Answer: \n{Alaroye._wrap_text(answer)}")
+            print("\n\nSource documents:")
+            for source_document in source_documents:
+                print(f"   - {source_document}")
+        return {
+            "answer": answer,
+            "source_documents": source_documents,
+        }

alaroye/alaroyedb/chroma-collections.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba8f8c7ce3183dcece433278eda253ac414602bbf8fc056492f88fcb0f72dc41
+size 582

alaroye/alaroyedb/chroma-embeddings.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c63ddf58e8724ec72057ac7ff59a8247bd51e0053e37fadc1c33c72e5cce719
+size 133228

alaroye/alaroyedb/index/id_to_uuid_676957b6-5b85-4306-bd90-8fbd6a25173a.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c9b3099292e9a7715bec1415f43620f076b4815fd3b462df83cf7302b9a3030
+size 350

alaroye/alaroyedb/index/index_676957b6-5b85-4306-bd90-8fbd6a25173a.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05dacfe0ea852273fa006eba491d99cce965b33f328da42cb36befcf4aa99948
+size 63044

alaroye/alaroyedb/index/index_metadata_676957b6-5b85-4306-bd90-8fbd6a25173a.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c6688716b16bb645997c219107f1a6f13c20d5ea26fe4165014c682b08f29b5
+size 103

alaroye/alaroyedb/index/uuid_to_id_676957b6-5b85-4306-bd90-8fbd6a25173a.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5652d13e72f5f9fbf7d9d28dba4f216c927b0c28cd6722c8061f31d549d6d3e
+size 386

docs/state_of_the_union.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e70b0de092c9cfe2b6158ab1d24b5f472bc879340d4a199dd6f4bf8b5d5091f
+size 13111

notebooks/0-alaroye-v0.0.0.ipynb ADDED Viewed

	@@ -0,0 +1,498 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Alaroye\n",
+    "\n",
+    "```\n",
+    "Version: 0.0.0\n",
+    "```"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Alaroye"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import libraries\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['/Users/abdulazeezoj/Desktop/Devspace/Play/alaroye', '/Users/abdulazeezoj/Desktop/Devspace/Play/alaroye/notebooks', '/Users/abdulazeezoj/.pyenv/versions/3.10.10/lib/python310.zip', '/Users/abdulazeezoj/.pyenv/versions/3.10.10/lib/python3.10', '/Users/abdulazeezoj/.pyenv/versions/3.10.10/lib/python3.10/lib-dynload', '', '/Users/abdulazeezoj/.local/share/virtualenvs/alaroye-YVTdCuTc/lib/python3.10/site-packages']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/abdulazeezoj/.local/share/virtualenvs/alaroye-YVTdCuTc/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "# Add the parent directory to the path\n",
+    "sys.path.insert(0, os.path.abspath(\"../\"))\n",
+    "print(sys.path)\n",
+    "\n",
+    "from alaroye.alaroye import Alaroye"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create Alaroye"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Created a chunk of size 1773, which is longer than the specified 1000\n",
+      "Created a chunk of size 1074, which is longer than the specified 1000\n",
+      "Created a chunk of size 2134, which is longer than the specified 1000\n",
+      "Created a chunk of size 1013, which is longer than the specified 1000\n"
+     ]
+    }
+   ],
+   "source": [
+    "alaroye = Alaroye(version=\"v0.0.0\")\n",
+    "\n",
+    "# Train the Osanyin\n",
+    "alaroye.train(doc_dir=\"../docs/\", chunk=True, chunk_size=1000, chunk_overlap=0)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test Alaroye"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Alles gut so weit. Ich habe ein ganz wichtiges Anliegen mit ihnen zu besprechen.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - state_of_the_union.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"Hallo, wie geht es Ihnen? Gut, sehen sie aus. Geht es ihnen gut?\"\n",
+    "response = alaroye.ask(query, verbose=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Calibrating microphone...\n",
+      "Listening...\n",
+      "Recognizing...\n",
+      "hello\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model listening to the microphone\n",
+    "query = alaroye._listen()\n",
+    "print(query)\n",
+    "# response = alaroye.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Omdena's terms and conditions state that applicants must be chosen and verified\n",
+      "before they can become collaborators, and that Omdena may collect and use\n",
+      "personal information for the purposes of administering the project. If there is\n",
+      "a conflict between the terms and conditions and the Code of Conduct, the last\n",
+      "appearing in the list will take precedence. Applicants must also represent,\n",
+      "warrant and undertake that their institution has authorized their entry into the\n",
+      "project, and must cease use of the project website and not participate if it is\n",
+      "contrary to their institution's policies. They may also be requested to sign a\n",
+      "Code of Conduct if chosen as a project participant.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Terms and Conditions.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"Summarize the omdena terms and conditions.\"\n",
+    "response = alaroye.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Omdena Top Talent projects are yet another frontier for people to collaborate on\n",
+      "solving AI-related problems. Here Omdena selects a smaller team (2-5 people)\n",
+      "from its top talents to work on projects. All members of top talent projects are\n",
+      "paid as per the market rates.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Top Talent Projects Guidebook.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"What is a top talent project?\"\n",
+    "response = alaroye.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "To apply at Omdena School, you can visit the website www.omdena.com/school and\n",
+      "fill out the application form.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Local Chapter Collaborator Onboarding.docx\n",
+      "   - Omdena Terms and Conditions.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"how to apply at Omdenaschool\"\n",
+    "response = alaroye.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Osanyin"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import libraries\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/abdulazeezoj/.local/share/virtualenvs/omdenabot-m2zzZ4nN/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pathlib\n",
+    "import sys\n",
+    "\n",
+    "# Add the parent directory to the path\n",
+    "sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))\n",
+    "\n",
+    "from osanyin.osanyin import Osanyin"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load Osanyin"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load Osanyin\n",
+    "osanyin = Osanyin(version=\"v0.0.0\")\n",
+    "\n",
+    "# Load the Osanyin\n",
+    "osanyin.load()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Test Osanyin"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "The publishing process for Omdena writers involves submitting an article idea\n",
+      "via a form shared in Slack, getting approval to publish the article from the\n",
+      "project, submitting a draft of the article if the quality is good enough\n",
+      "according to the guidelines, having the Omdena team edit and suggest\n",
+      "improvements, and then submitting the manuscript according to the Omdena JAII\n",
+      "Template via the journal online submission link at https://omdena.com/JAII/.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Local Chapter Lead Manual.docx\n",
+      "   - Omdena Writers Guidelines.docx\n",
+      "   - Omdena AI Researcher Guidebook.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"What is the publishing process for omdena writers?\"\n",
+    "response = osanyin.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Rudradeb Mitra is the Founder & CEO of Omdena.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Local Chapter Collaborator Onboarding.docx\n",
+      "   - Omdena Product Manager QA.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"Who is the CEO of omdena?\"\n",
+    "response = osanyin.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Omdena's most unique feature is its focus on collaboration, compassion,\n",
+      "curiosity, and consciousness. This approach creates empowerment, builds trust,\n",
+      "gives access to data, generates diverse opinions, and spurs innovation, while\n",
+      "also making the solutions more ethical.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Local Chapter Collaborator Onboarding.docx\n",
+      "   - Omdena Local Chapter Lead Manual.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"What do you think is MOST unique about Omdena compared to other platforms which build AI (or other software) solutions? Maximum 3 points.\"\n",
+    "response = osanyin.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Omdena is a global community of collaborators working together to solve social\n",
+      "and environmental issues through technology. We provide a platform for people\n",
+      "with the right skills and motivation to come together, learn, share and build\n",
+      "solutions. Our approach is to create a community-first innovation model where\n",
+      "members feel like a family.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Local Chapter Collaborator Onboarding.docx\n",
+      "   - Omdena Product Manager QA.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"Please write in maximum 3 sentences how you will explain Omdena to someone who has no idea about Omdena.\"\n",
+    "response = osanyin.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "I don't know, rephrase the question or contact Omdena support on slack or email\n",
+      "([email protected]).\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Product Manager Handbook.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"What is google?\"\n",
+    "response = osanyin.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answer: \n",
+      "Machine learning is a subset of artificial intelligence (AI) that enables\n",
+      "computers to learn from data and experiences without being explicitly\n",
+      "programmed. It uses algorithms to find patterns in data and make decisions with\n",
+      "minimal human intervention.\n",
+      "\n",
+      "\n",
+      "Source documents:\n",
+      "   - Omdena Local Chapter Lead Manual.docx\n",
+      "   - Omdena AI Researcher Guidebook.docx\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test the model\n",
+    "query = \"What is machine learning?\"\n",
+    "response = osanyin.ask(query, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "omdenabot-m2zzZ4nN",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

src/cli.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+import sys
+import textwrap
+import pathlib
+import typer
+# Add Osanyin to the path
+sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))

src/web.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import sys
+import pathlib
+import gradio as gr
+# Add Osanyin to the path
+sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))
+from alaroye.alaroye import Alaroye
+# Initialize the OmdenaBot
+alaroye = Alaroye(version="v0.0.0")
+# Load the vectorstore
+alaroye.load()
+def add_text(history, text):
+    history = history + [(text, None)]
+    return history, gr.update(value="", interactive=False)
+def bot(history):
+    # Generate a response
+    response = alaroye.ask(history[-1][0])
+    # Get the answer
+    answer = response.get(
+        "answer",
+        "Sorry, I don't know that.",
+    )
+    # Check if the answer is about out of context
+    # Update the history
+    history[-1][1] = answer
+    return history
+with gr.Blocks(title="OmdenaBot") as demo:
+    chatbot = gr.Chatbot([], elem_id="chatbot", label="Osanyin").style(height=750)
+    with gr.Row():
+        with gr.Column(scale=0.85):
+            txt = gr.Textbox(
+                show_label=False,
+                placeholder="Enter text and press enter, or upload an image",
+            ).style(container=False)
+        with gr.Column(scale=0.15, min_width=0):
+            btn = gr.Button(value="Send")
+    # Button click
+    btn_msg = btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, chatbot, chatbot
+    )
+    btn_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
+    # Textbox enter
+    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
+        bot, chatbot, chatbot
+    )
+    txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
+if __name__ == "__main__":
+    demo.launch()