{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "faec3801", "metadata": {}, "outputs": [], "source": [ "import os\n", "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" ] }, { "cell_type": "code", "execution_count": 2, "id": "a1e3860e", "metadata": {}, "outputs": [], "source": [ "import openai\n", "from langchain.llms import OpenAI\n", "from langchain.document_loaders import TextLoader\n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain.vectorstores import Chroma\n", "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 3, "id": "2131f179", "metadata": {}, "outputs": [], "source": [ "loader = TextLoader(\"Machine Learning Introduction A Com.txt\", encoding='utf-8')\n", "document = loader.load()" ] }, { "cell_type": "code", "execution_count": 4, "id": "02ec4e57", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(document)" ] }, { "cell_type": "code", "execution_count": 5, "id": "89310192", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12177" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(document[0].page_content)" ] }, { "cell_type": "code", "execution_count": 6, "id": "8e4c3e72", "metadata": {}, "outputs": [], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size = 100,\n", " chunk_overlap = 20,\n", " length_function = len,\n", " add_start_index = True,\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "c552df57", "metadata": {}, "outputs": [], "source": [ "texts = text_splitter.split_documents(document)" ] }, { "cell_type": "code", "execution_count": 8, "id": "1e109910", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "181" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(texts)" ] }, { "cell_type": "code", "execution_count": 9, "id": "4f84f608", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "52" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(texts[0].page_content)" ] }, { "cell_type": "code", "execution_count": null, "id": "0c550bb5", "metadata": {}, "outputs": [], "source": [ "db = Chroma.from_documents(texts, OpenAIEmbeddings())" ] }, { "cell_type": "code", "execution_count": null, "id": "461925e5", "metadata": { "scrolled": true }, "outputs": [], "source": [ "query = \"recommend machine learning courses\"\n", "docs = db.similarity_search(query)\n", "\n", "print(f\"Query: {query}\\n\")\n", "for index,answer in enumerate(docs):\n", " print(f\"{index +1}: {answer.page_content}\")\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "id": "c8fb01e5", "metadata": {}, "outputs": [], "source": [ "def answer_question(fn_query):\n", " docs = db.similarity_search(fn_query)\n", "\n", "# for answer in docs:\n", "# print(answer.page_content)\n", "# print()\n", "\n", " return \"\\n\".join([doc.page_content for doc in docs])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "eb3b7131", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Create a Gradio interface\n", "interface = gr.Interface(\n", " fn=answer_question,\n", " inputs=\"text\",\n", " outputs=\"text\",\n", ")\n", "\n", "interface.launch()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }