Spaces:
Sleeping
Sleeping
File size: 19,795 Bytes
0797bc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# !pip install -qU langchain_milvus python-dotenv langchain-openai langchain_ollama langchain_community GitPython"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"import time\n",
"import logging\n",
"from dotenv import load_dotenv\n",
"from git import Repo\n",
"from langchain_milvus import Milvus\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_community.document_loaders import GitLoader\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"class GitHubGPT:\n",
" def __init__(self):\n",
" self.OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
" self.embeddings = self.__initialize_embeddings()\n",
" self.vector_db = self.__initialize_vector_db()\n",
" self.client = OpenAI(api_key=self.OPENAI_API_KEY)\n",
" self.system_prompt = self.__initialize_system_prompt()\n",
" self.thread_id = None\n",
" self.assistant_id = self.__create_assistant(name='Github GPT', instructions='Please address the user as Github GPT')\n",
" self.thread_messages = [] # Store the conversation history\n",
"\n",
" def __initialize_embeddings(self):\n",
" return OpenAIEmbeddings(\n",
" model=\"text-embedding-3-small\",\n",
" openai_api_key=self.OPENAI_API_KEY\n",
" )\n",
"\n",
" def __initialize_vector_db(self):\n",
" if not os.path.exists(\"./vector_db\"):\n",
" os.makedirs(\"./vector_db\", mode=0o777)\n",
" \n",
" return Milvus(\n",
" embedding_function=self.embeddings,\n",
" connection_args={\"uri\": \"./vector_db/milvus_example.db\"},\n",
" auto_id=True,\n",
" collection_name=\"github_gpt\",\n",
" )\n",
" \n",
" def __initialize_system_prompt(self):\n",
" return '''\n",
" What are you? A well-informed, intelligent chatbot that can interact with a codebase.\n",
" What do you do? You are always provided with some file content from a codebase and a question/prompt. Your job is to generate a response.\n",
" What should be the tone of your output? It should be friendly, helpful, confident, and narrative.\n",
" What outputs can we expect from you? You can be asked to generate documentations, code, or anything else only relevant to the given codebase content.\n",
" '''\n",
" \n",
" @staticmethod\n",
" def __clean_repo_name(name):\n",
" return name.replace('-', '_')\n",
" \n",
" @staticmethod\n",
" def __declean_repo_name(name):\n",
" return name.replace('_', '-')\n",
" \n",
" def __add_repo_data_to_db(self):\n",
" data = self.loader.load()\n",
" print(f'Length of Data to Add: {len(data)}')\n",
" print(f'Adding Data to Milvus Vector DB')\n",
" text_splitter = RecursiveCharacterTextSplitter(\n",
" chunk_size=1000,\n",
" chunk_overlap=200,\n",
" length_function=len\n",
" )\n",
" data = text_splitter.split_documents(data)\n",
" self.vector_db.add_documents(documents=data)\n",
" print(f'Done Adding Data to Milvus Vector DB')\n",
" \n",
" def add_repo(self, repo_url):\n",
" repo_name = repo_url.split('/')[-1]\n",
" repo_save_path = f\"./Data/Repos\"\n",
" if not os.path.exists(repo_save_path):\n",
" os.makedirs(repo_save_path)\n",
" else:\n",
" shutil.rmtree(repo_save_path)\n",
" os.makedirs(repo_save_path)\n",
" repo_save_path = repo_save_path + \"/\" + self.__clean_repo_name(repo_name)\n",
" \n",
" print(f'Cloning the repo from: {repo_url}')\n",
" repo = Repo.clone_from(\n",
" repo_url, \n",
" to_path=repo_save_path,\n",
" branch=\"master\"\n",
" )\n",
" print(f'Repo Cloned to: {repo_save_path}')\n",
" self.repo_save_path = repo_save_path\n",
" self.branch = repo.head.reference\n",
" self.loader = GitLoader(repo_path=repo_save_path, branch=self.branch)\n",
" self.__add_repo_data_to_db()\n",
"\n",
" def load_repo(self):\n",
" repo_save_path = \"./Data/Repos\"\n",
" repo_name = os.listdir(repo_save_path)[0]\n",
" self.repo_save_path = repo_save_path + \"/\" + repo_name\n",
" self.branch = \"master\"\n",
" print(f'Loading repo: {repo_name}')\n",
" print(f'Branch: {self.branch}')\n",
" print(f'Repo path: {self.repo_save_path}')\n",
" self.loader = GitLoader(repo_path=self.repo_save_path, branch=self.branch)\n",
" self.__add_repo_data_to_db()\n",
"\n",
" def __create_assistant(self, name, instructions, model=\"gpt-3.5-turbo-16k\"):\n",
" assistant = self.client.beta.assistants.create(\n",
" name=name,\n",
" instructions=instructions,\n",
" model=model,\n",
" )\n",
" print(f'Assistant created with ID: {assistant.id}')\n",
" return assistant.id\n",
"\n",
" def __retrieve_documents(self, prompt, k=3):\n",
" retrieved_documents = self.vector_db.similarity_search(\n",
" prompt,\n",
" k=k\n",
" )\n",
" return retrieved_documents\n",
" \n",
" @staticmethod\n",
" def __concatenate_documents(documents):\n",
" print(f'Length of docs to concatenate: {len(documents)}')\n",
" all_content = ''\n",
" for idx, doc in enumerate(documents):\n",
" print(f\"Retrieved Document: {idx} --- [{doc.metadata}]\")\n",
" all_content += \"Chunk:\" + str(idx) + \":\\n\" + doc.page_content + \"\\n\\n\"\n",
" print(\"\\n\\n\")\n",
" return all_content\n",
"\n",
" def query(self, prompt, instructions=\"Please address the user as Github User\"):\n",
" # Step 1: Retrieve relevant documents based on the user's query\n",
" retrieved_documents = self.__retrieve_documents(prompt)\n",
" context = self.__concatenate_documents(retrieved_documents)\n",
"\n",
" # Step 2: Add the new user prompt and context to the conversation history\n",
" user_query = f\"Context from codebase: {context}\\nUser query: {prompt}\\n\"\n",
" self.thread_messages.append({\n",
" \"role\": \"user\",\n",
" \"content\": user_query,\n",
" })\n",
"\n",
" # Step 3: If there's no existing thread, create a new one; otherwise, append to the existing thread\n",
" if not self.thread_id:\n",
" thread = self.client.beta.threads.create(\n",
" messages=self.thread_messages\n",
" )\n",
" self.thread_id = thread.id\n",
" print(f'Thread created with ID: {self.thread_id}')\n",
" else:\n",
" print(f'Using the existing thread ID: {self.thread_id}')\n",
" # Add the new message to the existing thread\n",
" self.client.beta.threads.messages.create(\n",
" thread_id=self.thread_id,\n",
" role=\"user\",\n",
" content=user_query\n",
" )\n",
"\n",
" Messages = self.client.beta.threads.messages.list(thread_id=self.thread_id)\n",
" print(f'Count of messages(input prompt + generated response) in the thread:', len(Messages.data))\n",
"\n",
" # Step 4: Run the assistant on the created or updated thread\n",
" run = self.client.beta.threads.runs.create(\n",
" thread_id=self.thread_id,\n",
" assistant_id=self.assistant_id,\n",
" instructions=instructions,\n",
" stream=True,\n",
" )\n",
" \n",
" text = ''\n",
" for event in run:\n",
" try:\n",
" text = event.data.delta.content[0].text.value\n",
" yield text\n",
" except:\n",
" continue"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Assistant created with ID: asst_gS7ryLEGyZigEyy8Z1fHNiFZ\n"
]
}
],
"source": [
"obj = GitHubGPT()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Below functions / statements are responsile to \n",
"- clone + load the data into the vectro db\n",
"- load the already cloned data into the vector db\n",
"Hence only uncomment one which you want to use, else the data will be already in the local vector db."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading repo: creatify_app\n",
"Branch: master\n",
"Repo path: ./Data/Repos/creatify_app\n",
"Length of Data to Add: 985\n",
"Adding Data to Milvus Vector DB\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E20240914 03:07:47.524345 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:48.334723 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:49.062145 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:49.823671 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:50.552961 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:51.415422 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:52.223866 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:53.068990 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:53.934353 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:54.669559 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:55.506398 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:56.319356 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:57.120278 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:57.835793 12158 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:58.513509 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:59.266099 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n",
"E20240914 03:07:59.844120 12146 collection_data.cpp:84] [SERVER][Insert][grpcpp_sync_ser] Insert data failed, errs: attempt to write a readonly database\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done Adding Data to Milvus Vector DB\n"
]
}
],
"source": [
"# obj.add_repo(\"https://github.com/SaschaNe/creatify-app\")\n",
"obj.load_repo()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Length of docs to concatenate: 3\n",
"Retrieved Document: 0 --- [{'file_name': 'CrispService.php', 'file_path': 'app/Services/CrispService.php', 'file_type': '.php', 'pk': 452530082577652067, 'source': 'app/Services/CrispService.php'}]\n",
"Retrieved Document: 1 --- [{'file_name': 'CrispImport.php', 'file_path': 'app/Console/Commands/CrispImport.php', 'file_type': '.php', 'pk': 452530082770854166, 'source': 'app/Console/Commands/CrispImport.php'}]\n",
"Retrieved Document: 2 --- [{'file_name': 'CrispImport.php', 'file_path': 'app/Console/Commands/CrispImport.php', 'file_type': '.php', 'pk': 452530082770854167, 'source': 'app/Console/Commands/CrispImport.php'}]\n",
"\n",
"\n",
"\n",
"Thread created with ID: thread_2WiH9knOg3dTUQ9Vinbx5sdX\n",
"Count of messages(input prompt + generated response) in the thread: 1\n",
"The `processPersonProfile` function in the `CrispService` class is responsible for processing a person's profile data. Here is an explanation of its implementation:\n",
"\n",
"1. It takes a `$person` parameter which is expected to be an array containing the person's data.\n",
"\n",
"2. First, it checks if the `$person` parameter is an array and if it contains the key `'people_id'`. If both conditions are met, it assigns the value of `'people_id'` to the `$peopleId` variable. Otherwise, it sets the `$peopleId` variable to `null` and logs an error message specifying that the `$person` data is invalid.\n",
"\n",
"3. It then retrieves the person's email from the `$person` array and assigns it to the `$email` variable.\n",
"\n",
"4. The person's profile is converted to JSON format using `json_encode` and assigned to the `$profile` variable.\n",
"\n",
"5. The function then checks if the `$peopleId` variable is not empty. If it is not empty, it looks for a record in the `CrispPeople` table where the `people_id` column matches the value of `$peopleId`. The first matching record is assigned to the `$crispPerson` variable.\n",
"\n",
"6. If the `$crispPerson` variable is set (meaning a record in `CrispPeople` exists with the same `people_id`), it updates the email address of the `$crispPerson` with the email address received in the `$webhook` data, if it exists. If not, it assigns `null` to the email address. Then, it saves the changes to the database.\n",
"\n",
"7. Next, it checks if the `$crispSession` variable is set and if the `people_id` of the `$crispSession` is empty. If both conditions are met, it assigns the value of `$crispPeople->people_id` to `$crispSession->people_id` and saves the changes to the database.\n",
"\n",
"8. If the `$crispSession` is set and the `people_id` of the `$crispSession` is not equal to the `people_id` of the `$crispPeople`, it sets the `$crispSession->p_id_changed` property to `true` and saves the changes to the database.\n",
"\n",
"9. If the `$peopleId` is empty, it means that a record in `CrispPeople` doesn't exist for the given `people_id`. In this case, it creates a new record in the `CrispSession` table with the `session_id` value received in the `$webhook` data and the current timestamp as the `last_activity` value. It assigns the `people_id` of the `$crispPeople` to the created `CrispSession` record, sets the `user_id` to `null`, and saves the changes to the database.\n",
"\n",
"In summary, the `processPersonProfile` function retrieves a person's data, checks if a corresponding record exists in the `CrispPeople` table, updates the email address if necessary, synchronizes the `people_id` between `CrispPeople` and `CrispSession`, and creates a new `CrispSession` record if no existing `CrispPeople` record is found."
]
}
],
"source": [
"res = obj.query(\"Explain the implementation of the processPersonProfile function in the CrispService class.\")\n",
"for chunk in res:\n",
" print(chunk, end='', flush=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Length of docs to concatenate: 3\n",
"Retrieved Document: 0 --- [{'file_name': 'Kernel.php', 'file_path': 'app/Http/Kernel.php', 'file_type': '.php', 'pk': 452165072252045298, 'source': 'app/Http/Kernel.php'}]\n",
"Retrieved Document: 1 --- [{'file_name': 'Kernel.php', 'file_path': 'app/Http/Kernel.php', 'file_type': '.php', 'pk': 452529771710513216, 'source': 'app/Http/Kernel.php'}]\n",
"Retrieved Document: 2 --- [{'file_name': 'composer.json', 'file_path': 'composer.json', 'file_type': '.json', 'pk': 452165072252045242, 'source': 'composer.json'}]\n",
"\n",
"\n",
"\n",
"Using the existing thread ID: thread_uzmT0vkjxsJMEKLqWXNZU6Qr\n",
"Count of messages(input prompt + generated response) in the thread: 3\n",
"To identify the middlewares that are not included in the standard Laravel 10 application and are custom developed, we can compare the middleware aliases defined in the Kernel class with the standard Laravel 10 middleware aliases.\n",
"\n",
"Based on the provided code snippets, here are the middleware aliases that are not included in the standard Laravel 10 application and are custom developed:\n",
"\n",
"- CheckUserRole\n",
"- CheckUserStatus\n",
"- SetLocale\n",
"- CheckUserAsaas\n",
"- CheckOrderCobrancaStatus\n",
"- CheckOnboarding\n",
"\n",
"These middlewares are not part of the default Laravel 10 middleware set and have been custom-developed for the application.\n",
"\n",
"Please note that this list assumes the standard Laravel 10 middleware aliases don't include any additional user-defined or third-party middlewares."
]
}
],
"source": [
"res = obj.query(\"List all the middlewares which are not included in the standard Laravel 10 application and custom developed.\")\n",
"for chunk in res:\n",
" print(chunk, end='', flush=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|