Spaces:

mano-wii
/

tools

Running

App Files Files Community

Germano Cavalcante commited on Jul 20

Commit

9a6a74b

•

1 Parent(s): ed15883

API changes

Browse files

Files changed (4) hide show

routers/tool_bpy_doc.py +3 -2
routers/tool_find_related.py +202 -202
routers/tool_gpu_checker.py +4 -2
routers/tool_wiki_search.py +238 -231

routers/tool_bpy_doc.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import pickle
 from fastapi import APIRouter
 router = APIRouter()
@@ -53,10 +54,10 @@ def bpy_doc_get_documentation(api):
     return documentation
-@router.get("/bpy_doc")
 def bpy_doc(api: str = ""):
     message = bpy_doc_get_documentation(api)
-    return {"message": message}
 if __name__ == "__main__":

 import pickle
 from fastapi import APIRouter
+from fastapi.responses import PlainTextResponse
 router = APIRouter()
     return documentation
+@router.get("/bpy_doc", response_class=PlainTextResponse)
 def bpy_doc(api: str = ""):
     message = bpy_doc_get_documentation(api)
+    return message
 if __name__ == "__main__":

routers/tool_find_related.py CHANGED Viewed

@@ -10,6 +10,7 @@ from datetime import datetime, timedelta
 from enum import Enum
 from sentence_transformers import util
 from fastapi import APIRouter
 try:
     from .embedding import EMBEDDING_CTX
@@ -24,9 +25,6 @@ router = APIRouter()
 issue_attr_filter = {'number', 'title', 'body',
                      'state', 'updated_at', 'created_at'}
-G_cache_path = "routers/embedding/embeddings_issues.pkl"
-G_data = {}
 class State(str, Enum):
     opened = "opened"
@@ -34,263 +32,265 @@ class State(str, Enum):
     all = "all"
-def _create_issue_string(title, body):
-    cleaned_body = body.replace('\r', '')
-    cleaned_body = cleaned_body.replace('**System Information**\n', '')
-    cleaned_body = cleaned_body.replace('**Blender Version**\n', '')
-    cleaned_body = cleaned_body.replace(
-        'Worked: (newest version of Blender that worked as expected)\n', '')
-    cleaned_body = cleaned_body.replace('**Short description of error**\n', '')
-    cleaned_body = cleaned_body.replace('**Addon Information**\n', '')
-    cleaned_body = cleaned_body.replace(
-        '**Exact steps for others to reproduce the error**\n', '')
-    cleaned_body = cleaned_body.replace(
-        '[Please describe the exact steps needed to reproduce the issue]\n', '')
-    cleaned_body = cleaned_body.replace(
-        '[Please fill out a short description of the error here]\n', '')
-    cleaned_body = cleaned_body.replace(
-        '[Based on the default startup or an attached .blend file (as simple as possible)]\n', '')
-    cleaned_body = re.sub(
-        r', branch: .+?, commit date: \d{4}-\d{2}-\d{2} \d{2}:\d{2}, hash: `.+?`', '', cleaned_body)
-    cleaned_body = re.sub(
-        r'\/?attachments\/[a-zA-Z0-9\-]+', 'attachment', cleaned_body)
-    cleaned_body = re.sub(
-        r'https?:\/\/[^\s/]+(?:\/[^\s/]+)*\/([^\s/]+)', lambda match: match.group(1), cleaned_body)
-    return title + '\n' + cleaned_body
-def _find_latest_date(issues, default_str=None):
-    # Handle the case where 'issues' is empty
-    if not issues:
-        return default_str
-    return max((issue['updated_at'] for issue in issues), default=default_str)
-def _create_strings_to_embbed(issues):
-    texts_to_embed = [_create_issue_string(
-        issue['title'], issue['body']) for issue in issues]
-    return texts_to_embed
-def _data_ensure_size(repo, size_new):
-    global G_data
-    ARRAY_CHUNK_SIZE = 4096
-    updated_at_old = None
-    arrays_size_old = 0
-    titles_old = []
-    try:
-        arrays_size_old = G_data[repo]['arrays_size']
-        if size_new <= arrays_size_old:
-            return
-    except:
-        pass
-    arrays_size_new = ARRAY_CHUNK_SIZE * (int(size_new / ARRAY_CHUNK_SIZE) + 1)
-    data_new = {
-        'updated_at': updated_at_old,
-        'arrays_size': arrays_size_new,
-        'titles': titles_old + [None] * (arrays_size_new - arrays_size_old),
-        'embeddings': torch.empty((arrays_size_new, *EMBEDDING_CTX.embedding_shape),
-                                  dtype=EMBEDDING_CTX.embedding_dtype,
-                                  device=EMBEDDING_CTX.embedding_device),
-        'opened':  torch.zeros(arrays_size_new, dtype=torch.bool),
-        'closed':  torch.zeros(arrays_size_new, dtype=torch.bool),
-    }
-    try:
-        data_new['embeddings'][:arrays_size_old] = G_data[repo]['embeddings']
-        data_new['opened'][:arrays_size_old] = G_data[repo]['opened']
-        data_new['closed'][:arrays_size_old] = G_data[repo]['closed']
-    except:
-        pass
-    G_data[repo] = data_new
-def _embeddings_generate(repo):
-    global G_data
-    if os.path.exists(G_cache_path):
-        with open(G_cache_path, 'rb') as file:
-            G_data = pickle.load(file)
-            if repo in G_data:
                 return
-    issues = gitea_fetch_issues('blender', repo, state='all', since=None,
-                                issue_attr_filter=issue_attr_filter)
-    # issues = sorted(issues, key=lambda issue: int(issue['number']))
-    print("Embedding Issues...")
-    texts_to_embed = _create_strings_to_embbed(issues)
-    embeddings = EMBEDDING_CTX.encode(texts_to_embed)
-    _data_ensure_size(repo, int(issues[0]['number']))
-    G_data[repo]['updated_at'] = _find_latest_date(issues)
-    titles = G_data[repo]['titles']
-    embeddings_new = G_data[repo]['embeddings']
-    opened = G_data[repo]['opened']
-    closed = G_data[repo]['closed']
-    for i, issue in enumerate(issues):
-        number = int(issue['number'])
-        titles[number] = issue['title']
-        embeddings_new[number] = embeddings[i]
-        if issue['state'] == 'open':
-            opened[number] = True
-        if issue['state'] == 'closed':
-            closed[number] = True
-def _embeddings_updated_get(repo):
-    global G_data
-    with EMBEDDING_CTX.lock:
-        try:
-            data_repo = G_data[repo]
-        except:
-            _embeddings_generate(repo)
-            data_repo = G_data[repo]
-        date_old = data_repo['updated_at']
-        issues = gitea_fetch_issues(
-            'blender', repo, since=date_old, issue_attr_filter=issue_attr_filter)
-        # Get the most recent date
-        date_new = _find_latest_date(issues, date_old)
-        if date_new == date_old:
-            # Nothing changed
-            return data_repo
-        data_repo['updated_at'] = date_new
-# autopep8: off
-        # Consider that if the time hasn't changed, it's the same issue.
-        issues = [issue for issue in issues if issue['updated_at'] != date_old]
-        _data_ensure_size(repo, int(issues[0]['number']))
-        updated_at = gitea_issues_body_updated_at_get(issues)
-        issues_to_embed = []
-        for i, issue in enumerate(issues):
-            number = int(issue['number'])
-            if issue['state'] == 'open':
-                data_repo['opened'][number] = True
-            if issue['state'] == 'closed':
-                data_repo['closed'][number] = True
-            title_old = data_repo['titles'][number]
-            if title_old != issue['title']:
-                data_repo['titles'][number] = issue['title']
-                issues_to_embed.append(issue)
-            elif updated_at[i] >= date_old:
-                issues_to_embed.append(issue)
-        if issues_to_embed:
-            print(f"Embedding {len(issues_to_embed)} issue{'s' if len(issues_to_embed) > 1 else ''}")
-            texts_to_embed = _create_strings_to_embbed(issues_to_embed)
-            embeddings = EMBEDDING_CTX.encode(texts_to_embed)
-            for i, issue in enumerate(issues_to_embed):
-                number = int(issue['number'])
-                data_repo['embeddings'][number] = embeddings[i]
-# autopep8: on
-    return data_repo
-def _sort_similarity(data: dict,
-                     query_emb: List[torch.Tensor],
-                     limit: int,
-                     state: State = State.opened) -> list:
-    duplicates = []
-    embeddings = data['embeddings']
-    mask_opened = data["opened"]
-    if state == State.all:
-        mask = mask_opened | data["closed"]
-    else:
-        mask = data[state.value]
-    embeddings = embeddings[mask]
-    true_indices = mask.nonzero(as_tuple=True)[0]
-    ret = util.semantic_search(
-        query_emb, embeddings, top_k=limit, score_function=util.dot_score)
-    for score in ret[0]:
-        corpus_id = score['corpus_id']
-        number = true_indices[corpus_id].item()
-        closed_char = "" if mask_opened[number] else "~~"
-        text = f"{closed_char}#{number}{closed_char}: {data['titles'][number]}"
-        duplicates.append(text)
-    return duplicates
-def find_relatedness(repo: str, number: int, limit: int = 20, state: State = State.opened):
-    data = _embeddings_updated_get(repo)
-    # Check if the embedding already exists.
-    if data['titles'][number] is not None:
-        new_embedding = data['embeddings'][number]
-    else:
-        gitea_issue = gitea_json_issue_get('blender', repo, number)
-        text_to_embed = _create_issue_string(
-            gitea_issue['title'], gitea_issue['body'])
-        new_embedding = EMBEDDING_CTX.encode([text_to_embed])
-    duplicates = _sort_similarity(
-        data, new_embedding, limit=limit, state=state)
-    if not duplicates:
-        return ''
-    if match := re.search(r'(~~)?#(\d+)(~~)?:', duplicates[0]):
-        number_cached = int(match.group(2))
-        if number_cached == number:
-            return '\n'.join(duplicates[1:])
-    return '\n'.join(duplicates)
-@router.get("/find_related/{repo}/{number}")
 def find_related(repo: str = 'blender', number: int = 104399, limit: int = 15, state: State = State.opened) -> str:
-    related = find_relatedness(repo, number, limit=limit, state=state)
     return related
 if __name__ == "__main__":
     update_cache = True
     if update_cache:
-        _embeddings_updated_get('blender')
-        _embeddings_updated_get('blender-addons')
-        with open(G_cache_path, "wb") as file:
             # Converting the embeddings to be CPU compatible, as the virtual machine in use currently only supports the CPU.
             for val in G_data.values():
                 val['embeddings'] = val['embeddings'].to(torch.device('cpu'))
-            pickle.dump(G_data, file, protocol=pickle.HIGHEST_PROTOCOL)
     # Converting the embeddings to be GPU.
     for val in G_data.values():
         val['embeddings'] = val['embeddings'].to(torch.device('cuda'))
     # 'blender/blender/111434' must print #96153, #83604 and #79762
-    related1 = find_relatedness(
         'blender', 111434, limit=20, state=State.all)
-    related2 = find_relatedness('blender-addons', 104399, limit=20)
     print("These are the 20 most related issues:")
     print(related1)

 from enum import Enum
 from sentence_transformers import util
 from fastapi import APIRouter
+from fastapi.responses import PlainTextResponse
 try:
     from .embedding import EMBEDDING_CTX
 issue_attr_filter = {'number', 'title', 'body',
                      'state', 'updated_at', 'created_at'}
 class State(str, Enum):
     opened = "opened"
     all = "all"
+class _Data(dict):
+    cache_path = "routers/embedding/embeddings_issues.pkl"
+    @staticmethod
+    def _create_issue_string(title, body):
+        cleaned_body = body.replace('\r', '')
+        cleaned_body = cleaned_body.replace('**System Information**\n', '')
+        cleaned_body = cleaned_body.replace('**Blender Version**\n', '')
+        cleaned_body = cleaned_body.replace(
+            'Worked: (newest version of Blender that worked as expected)\n', '')
+        cleaned_body = cleaned_body.replace(
+            '**Short description of error**\n', '')
+        cleaned_body = cleaned_body.replace('**Addon Information**\n', '')
+        cleaned_body = cleaned_body.replace(
+            '**Exact steps for others to reproduce the error**\n', '')
+        cleaned_body = cleaned_body.replace(
+            '[Please describe the exact steps needed to reproduce the issue]\n', '')
+        cleaned_body = cleaned_body.replace(
+            '[Please fill out a short description of the error here]\n', '')
+        cleaned_body = cleaned_body.replace(
+            '[Based on the default startup or an attached .blend file (as simple as possible)]\n', '')
+        cleaned_body = re.sub(
+            r', branch: .+?, commit date: \d{4}-\d{2}-\d{2} \d{2}:\d{2}, hash: `.+?`', '', cleaned_body)
+        cleaned_body = re.sub(
+            r'\/?attachments\/[a-zA-Z0-9\-]+', 'attachment', cleaned_body)
+        cleaned_body = re.sub(
+            r'https?:\/\/[^\s/]+(?:\/[^\s/]+)*\/([^\s/]+)', lambda match: match.group(1), cleaned_body)
+        return title + '\n' + cleaned_body
+    @staticmethod
+    def _find_latest_date(issues, default_str=None):
+        # Handle the case where 'issues' is empty
+        if not issues:
+            return default_str
+        return max((issue['updated_at'] for issue in issues), default=default_str)
+    @classmethod
+    def _create_strings_to_embbed(cls, issues):
+        texts_to_embed = [cls._create_issue_string(
+            issue['title'], issue['body']) for issue in issues]
+        return texts_to_embed
+    def _data_ensure_size(self, repo, size_new):
+        ARRAY_CHUNK_SIZE = 4096
+        updated_at_old = None
+        arrays_size_old = 0
+        titles_old = []
+        try:
+            arrays_size_old = self[repo]['arrays_size']
+            if size_new <= arrays_size_old:
                 return
+        except:
+            pass
+        arrays_size_new = ARRAY_CHUNK_SIZE * \
+            (int(size_new / ARRAY_CHUNK_SIZE) + 1)
+        data_new = {
+            'updated_at': updated_at_old,
+            'arrays_size': arrays_size_new,
+            'titles': titles_old + [None] * (arrays_size_new - arrays_size_old),
+            'embeddings': torch.empty((arrays_size_new, *EMBEDDING_CTX.embedding_shape),
+                                      dtype=EMBEDDING_CTX.embedding_dtype,
+                                      device=EMBEDDING_CTX.embedding_device),
+            'opened':  torch.zeros(arrays_size_new, dtype=torch.bool),
+            'closed':  torch.zeros(arrays_size_new, dtype=torch.bool),
+        }
+        try:
+            data_new['embeddings'][:arrays_size_old] = self[repo]['embeddings']
+            data_new['opened'][:arrays_size_old] = self[repo]['opened']
+            data_new['closed'][:arrays_size_old] = self[repo]['closed']
+        except:
+            pass
+        self[repo] = data_new
+    def _embeddings_generate(self, repo):
+        if os.path.exists(self.cache_path):
+            with open(self.cache_path, 'rb') as file:
+                data = pickle.load(file)
+                self.update(data)
+                if repo in self:
+                    return
+        issues = gitea_fetch_issues('blender', repo, state='all', since=None,
+                                    issue_attr_filter=issue_attr_filter)
+        # issues = sorted(issues, key=lambda issue: int(issue['number']))
+        print("Embedding Issues...")
+        texts_to_embed = self._create_strings_to_embbed(issues)
+        embeddings = EMBEDDING_CTX.encode(texts_to_embed)
+        self._data_ensure_size(repo, int(issues[0]['number']))
+        self[repo]['updated_at'] = self._find_latest_date(issues)
+        titles = self[repo]['titles']
+        embeddings_new = self[repo]['embeddings']
+        opened = self[repo]['opened']
+        closed = self[repo]['closed']
+        for i, issue in enumerate(issues):
+            number = int(issue['number'])
+            titles[number] = issue['title']
+            embeddings_new[number] = embeddings[i]
+            if issue['state'] == 'open':
+                opened[number] = True
+            if issue['state'] == 'closed':
+                closed[number] = True
+    def _embeddings_updated_get(self, repo):
+        with EMBEDDING_CTX.lock:
+            try:
+                data_repo = self[repo]
+            except:
+                self._embeddings_generate(repo)
+                data_repo = self[repo]
+            date_old = data_repo['updated_at']
+            issues = gitea_fetch_issues(
+                'blender', repo, since=date_old, issue_attr_filter=issue_attr_filter)
+            # Get the most recent date
+            date_new = self._find_latest_date(issues, date_old)
+            if date_new == date_old:
+                # Nothing changed
+                return data_repo
+            data_repo['updated_at'] = date_new
+    # autopep8: off
+            # Consider that if the time hasn't changed, it's the same issue.
+            issues = [issue for issue in issues if issue['updated_at'] != date_old]
+            self._data_ensure_size(repo, int(issues[0]['number']))
+            updated_at = gitea_issues_body_updated_at_get(issues)
+            issues_to_embed = []
+            for i, issue in enumerate(issues):
+                number = int(issue['number'])
+                if issue['state'] == 'open':
+                    data_repo['opened'][number] = True
+                if issue['state'] == 'closed':
+                    data_repo['closed'][number] = True
+                title_old = data_repo['titles'][number]
+                if title_old != issue['title']:
+                    data_repo['titles'][number] = issue['title']
+                    issues_to_embed.append(issue)
+                elif updated_at[i] >= date_old:
+                    issues_to_embed.append(issue)
+            if issues_to_embed:
+                print(f"Embedding {len(issues_to_embed)} issue{'s' if len(issues_to_embed) > 1 else ''}")
+                texts_to_embed = self._create_strings_to_embbed(issues_to_embed)
+                embeddings = EMBEDDING_CTX.encode(texts_to_embed)
+                for i, issue in enumerate(issues_to_embed):
+                    number = int(issue['number'])
+                    data_repo['embeddings'][number] = embeddings[i]
+    # autopep8: on
+        return data_repo
+    def _sort_similarity(self,
+                         repo: str,
+                         query_emb: List[torch.Tensor],
+                         limit: int,
+                         state: State = State.opened) -> list:
+        duplicates = []
+        data = self[repo]
+        embeddings = data['embeddings']
+        mask_opened = data["opened"]
+        if state == State.all:
+            mask = mask_opened | data["closed"]
+        else:
+            mask = data[state.value]
+        embeddings = embeddings[mask]
+        true_indices = mask.nonzero(as_tuple=True)[0]
+        ret = util.semantic_search(
+            query_emb, embeddings, top_k=limit, score_function=util.dot_score)
+        for score in ret[0]:
+            corpus_id = score['corpus_id']
+            number = true_indices[corpus_id].item()
+            closed_char = "" if mask_opened[number] else "~~"
+            text = f"{closed_char}#{number}{closed_char}: {data['titles'][number]}"
+            duplicates.append(text)
+        return duplicates
+    def find_relatedness(self, repo: str, number: int, limit: int = 20, state: State = State.opened):
+        data = self._embeddings_updated_get(repo)
+        # Check if the embedding already exists.
+        if data['titles'][number] is not None:
+            new_embedding = data['embeddings'][number]
+        else:
+            gitea_issue = gitea_json_issue_get('blender', repo, number)
+            text_to_embed = self._create_issue_string(
+                gitea_issue['title'], gitea_issue['body'])
+            new_embedding = EMBEDDING_CTX.encode([text_to_embed])
+        duplicates = self._sort_similarity(
+            repo, new_embedding, limit=limit, state=state)
+        if not duplicates:
+            return ''
+        if match := re.search(r'(~~)?#(\d+)(~~)?:', duplicates[0]):
+            number_cached = int(match.group(2))
+            if number_cached == number:
+                return '\n'.join(duplicates[1:])
+        return '\n'.join(duplicates)
+G_data = _Data()
+@router.get("/find_related/{repo}/{number}", response_class=PlainTextResponse)
 def find_related(repo: str = 'blender', number: int = 104399, limit: int = 15, state: State = State.opened) -> str:
+    related = G_data.find_relatedness(repo, number, limit=limit, state=state)
     return related
 if __name__ == "__main__":
     update_cache = True
     if update_cache:
+        G_data._embeddings_updated_get('blender')
+        G_data._embeddings_updated_get('blender-addons')
+        with open(G_data.cache_path, "wb") as file:
             # Converting the embeddings to be CPU compatible, as the virtual machine in use currently only supports the CPU.
             for val in G_data.values():
                 val['embeddings'] = val['embeddings'].to(torch.device('cpu'))
+            pickle.dump(dict(G_data), file, protocol=pickle.HIGHEST_PROTOCOL)
     # Converting the embeddings to be GPU.
     for val in G_data.values():
         val['embeddings'] = val['embeddings'].to(torch.device('cuda'))
     # 'blender/blender/111434' must print #96153, #83604 and #79762
+    related1 = G_data.find_relatedness(
         'blender', 111434, limit=20, state=State.all)
+    related2 = G_data.find_relatedness('blender-addons', 104399, limit=20)
     print("These are the 20 most related issues:")
     print(related1)

routers/tool_gpu_checker.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import re
 from fastapi import APIRouter
 router = APIRouter()
@@ -45,6 +46,7 @@ def _check_amd(graphics_card_info):
         r"(Radeon\s*)?RX\s*560\b": "it has Baffin XT chip that belongs to GCN 4th gen architecture",
         r"(Radeon\s*)?5(40X|50X)\b": "it has Polaris 23 XT chip that belongs to GCN 4th gen architecture",
         r"(Radeon\s*)?RX\s*5(40|50)\b": "it has Lexa Pro chip that belongs to GCN 4th gen architecture",
         r"(Radeon\s*)?(\(TM\)\s*)?RX\s*4[6-8]0(\b|D)": "it has Ellesmere chip that belongs to GCN 4st gen architecture",
         r"(Radeon\s*)?5(30X|35)\b": "it has Polaris 24 XT chip that belongs to GCN 3rd gen architecture",
         r"(Radeon\s*)?530\b": "it has Weston chip that belongs to GCN 3rd gen architecture",
@@ -192,10 +194,10 @@ If that doesn't help, you can use Blender 2.79: https://www.blender.org/download
 @router.get("/gpu_checker")
-def gpu_checker(gpu_info: str = ""):
     message = gpu_checker_get_message(gpu_info)
-    return {"message": message}
 if __name__ == "__main__":

 import re
 from fastapi import APIRouter
+from fastapi.responses import PlainTextResponse
 router = APIRouter()
         r"(Radeon\s*)?RX\s*560\b": "it has Baffin XT chip that belongs to GCN 4th gen architecture",
         r"(Radeon\s*)?5(40X|50X)\b": "it has Polaris 23 XT chip that belongs to GCN 4th gen architecture",
         r"(Radeon\s*)?RX\s*5(40|50)\b": "it has Lexa Pro chip that belongs to GCN 4th gen architecture",
+        r"(Radeon\s*)?RX\s*480\b": "it has Arctic Islands chip that belongs to GCN 4th gen architecture",
         r"(Radeon\s*)?(\(TM\)\s*)?RX\s*4[6-8]0(\b|D)": "it has Ellesmere chip that belongs to GCN 4st gen architecture",
         r"(Radeon\s*)?5(30X|35)\b": "it has Polaris 24 XT chip that belongs to GCN 3rd gen architecture",
         r"(Radeon\s*)?530\b": "it has Weston chip that belongs to GCN 3rd gen architecture",
 @router.get("/gpu_checker")
+def gpu_checker(gpu_info: str = "", response_class=PlainTextResponse):
     message = gpu_checker_get_message(gpu_info)
+    return message
 if __name__ == "__main__":

routers/tool_wiki_search.py CHANGED Viewed

@@ -3,9 +3,11 @@
 import os
 import pickle
 import re
 from typing import Dict, List
 from sentence_transformers import util
 from fastapi import APIRouter
 try:
     from .embedding import EMBEDDING_CTX
@@ -16,267 +18,272 @@ router = APIRouter()
 MANUAL_DIR = "D:/BlenderDev/blender-manual/manual/"
 BASE_URL = "https://docs.blender.org/manual/en/dev"
-G_cache_path = "routers/embedding/embeddings_manual.pkl"
 G_data = None
-def _embeddings_generate():
-    global G_data
-    if os.path.exists(G_cache_path):
-        with open(G_cache_path, 'rb') as file:
-            G_data = pickle.load(file)
-            return G_data
-    # path = 'addons/3d_view'
-    G_data = parse_file_recursive(MANUAL_DIR, 'index.rst')
-    G_data['toctree']["copyright"] = parse_file_recursive(
-        MANUAL_DIR, 'copyright.rst')
-    # Create a list to store the text files
-    texts = get_texts_recursive(data)
-    print("Embedding Texts...")
-    G_data['texts'] = texts
-    G_data['embeddings'] = EMBEDDING_CTX.encode(texts)
-    with open(self.cache_path, "wb") as file:
-        # Converting the embeddings to be CPU compatible, as the virtual machine in use currently only supports the CPU.
-        G_data['embeddings'] = G_data['embeddings'].to(
-            torch.device('cpu'))
-        pickle.dump(G_data, file, protocol=pickle.HIGHEST_PROTOCOL)
-    return G_data
-def reduce_text(text):
-    # Remove repeated characters
-    text = re.sub(r'%{2,}', '', text)  # Title
-    text = re.sub(r'#{2,}', '', text)  # Title
-    text = re.sub(r'\*{3,}', '', text)  # Title
-    text = re.sub(r'={3,}', '', text)  # Topic
-    text = re.sub(r'\^{3,}', '', text)
-    text = re.sub(r'-{3,}', '', text)
-    text = re.sub(r'(\s*\n\s*)+', '\n', text)
-    return text
-def parse_file_recursive(filedir, filename):
-    with open(os.path.join(filedir, filename), 'r', encoding='utf-8') as file:
-        content = file.read()
-    parsed_data = {}
-    if not filename.endswith('index.rst'):
-        body = content.strip()
-    else:
-        parts = content.split(".. toctree::")
-        body = parts[0].strip()
-        if len(parts) > 1:
-            parsed_data["toctree"] = {}
-            for part in parts[1:]:
-                toctree_entries = part.split('\n')
-                line = toctree_entries[0]
-                for entry in toctree_entries[1:]:
-                    entry = entry.strip()
-                    if not entry:
-                        continue
-                    if entry.startswith('/'):
-                        # relative path.
-                        continue
-                    if not entry.endswith('.rst'):
-                        continue
-                    if entry.endswith('/index.rst'):
-                        entry_name = entry[:-10]
-                        filedir_ = os.path.join(filedir, entry_name)
-                        filename_ = 'index.rst'
-                    else:
-                        entry_name = entry[:-4]
-                        filedir_ = filedir
-                        filename_ = entry
-                    parsed_data['toctree'][entry_name] = parse_file_recursive(
-                        filedir_, filename_)
-    # The '\n' at the end of the file resolves regex patterns
-    parsed_data['body'] = body + '\n'
-    return parsed_data
-def split_into_topics(text: str, prefix: str = '') -> Dict[str, List[str]]:
-    """
-    Splits a text into sections based on titles and subtitles, and organizes them into a dictionary.
-    Args:
-        text (str): The input text to be split. The text should contain titles marked by asterisks (***)
-                    or subtitles marked by equal signs (===).
-        prefix (str): prefix to titles and subtitles
-    Returns:
-        Dict[str, List[str]]: A dictionary where keys are section titles or subtitles, and values are lists of
-                               strings corresponding to the content under each title or subtitle.
-    Example:
-        text = '''
-        *********************
-        The Blender Community
-        *********************
-        Being freely available from the start.
-        Independent Sites
-        =================
-        There are `several independent websites.
-        Getting Support
-        ===============
-        Blender's community is one of its greatest features.
-        '''
-        result = split_in_topics(text)
-        # result will be:
-        # {
-        #     "# The Blender Community": [
-        #         "Being freely available from the start."
-        #     ],
-        #     "# The Blender Community | Independent Sites": [
-        #         "There are `several independent websites."
-        #     ],
-        #     "# The Blender Community | Getting Support": [
-        #         "Blender's community is one of its greatest features."
-        #     ]
-        # }
-    """
-    # Remove patterns ".. word::" and ":word:"
-    text = re.sub(r'\.\. [^\n]+\n+(?: {3,}[^\n]*\n)*|:\w+:', '', text)
-    # Regular expression to find titles and subtitles
-    pattern = r'([\*|#|%]{3,}\n[^\n]+\n[\*|#|%]{3,}|(?:={3,}\n)?[^\n]+\n={3,}\n)'
-    # Split text by found patterns
-    sections = re.split(pattern, text)
-    # Remove possible white spaces at the beginning and end of each section
-    sections = [section for section in sections if section.strip()]
-    # Separate sections into a dictionary
-    topics = {}
-    current_title = ''
-    current_topic = prefix
-    for section in sections:
-        if match := re.match(r'[\*|#|%]{3,}\n([^\n]+)\n[\*|#|%]{3,}', section):
-            current_topic = current_title = f'{prefix}# {match.group(1)}'
-            topics[current_topic] = []
-        elif match := re.match(r'(?:={3,}\n)?([^\n]+)\n={3,}\n', section):
-            current_topic = current_title + ' | ' + match.group(1)
-            topics[current_topic] = []
-        else:
-            if current_topic == prefix:
-                raise
-            topics[current_topic].append(section)
-    return topics
-# Function to split the text into chunks of a maximum number of tokens
-def split_into_many(page_body, prefix=''):
-    tokenizer = EMBEDDING_CTX.model.tokenizer
-    max_tokens = EMBEDDING_CTX.model.max_seq_length
-    topics = split_into_topics(page_body, prefix)
-    for topic, content_list in topics.items():
-        title = topic + ':\n'
-        title_tokens_len = len(tokenizer.tokenize(title))
-        content_list_new = []
-        for content in content_list:
-            content_reduced = reduce_text(content)
-            content_tokens_len = len(tokenizer.tokenize(content_reduced))
-            if title_tokens_len + content_tokens_len <= max_tokens:
-                content_list_new.append(content_reduced)
-                continue
-            # Split the text into sentences
-            paragraphs = content_reduced.split('.\n')
-            sentences = ''
-            tokens_so_far = title_tokens_len
-            # Loop through the sentences and tokens joined together in a tuple
-            for sentence in paragraphs:
-                sentence += '.\n'
-                # Get the number of tokens for each sentence
-                n_tokens = len(tokenizer.tokenize(sentence))
-                # If the number of tokens so far plus the number of tokens in the current sentence is greater
-                # than the max number of tokens, then add the chunk to the list of chunks and reset
-                # the chunk and tokens so far
-                if tokens_so_far + n_tokens > max_tokens:
-                    content_list_new.append(sentences)
-                    sentences = ''
-                    tokens_so_far = title_tokens_len
-                sentences += sentence
-                tokens_so_far += n_tokens
-            if sentences:
-                content_list_new.append(sentences)
-        # Replace content_list
-        content_list.clear()
-        content_list.extend(content_list_new)
-    result = []
-    for topic, content_list in topics.items():
-        for content in content_list:
-            result.append(topic + ':\n' + content)
-    return result
-def get_texts_recursive(page, path=''):
-    result = split_into_many(page['body'], path)
-    try:
-        for key in page['toctree'].keys():
-            page_child = page['toctree'][key]
-            result.extend(get_texts_recursive(page_child, f'{path}/{key}'))
-    except KeyError:
-        pass
-    return result
-def _sort_similarity(data, text_to_search, limit):
-    results = []
-    query_emb = EMBEDDING_CTX.encode([text_to_search])
-    ret = util.semantic_search(
-        query_emb, data['embeddings'], top_k=limit, score_function=util.dot_score)
-    texts = data['texts']
-    for score in ret[0]:
-        corpus_id = score['corpus_id']
-        text = texts[corpus_id]
-        results.append(text)
-    return results
-@router.get("/wiki_search")
 def wiki_search(query: str = "") -> str:
-    data = _embeddings_generate()
-    texts = _sort_similarity(data, query, 5)
     result = f'BASE_URL: {BASE_URL}\n'
     for text in texts:

 import os
 import pickle
 import re
+import torch
 from typing import Dict, List
 from sentence_transformers import util
 from fastapi import APIRouter
+from fastapi.responses import PlainTextResponse
 try:
     from .embedding import EMBEDDING_CTX
 MANUAL_DIR = "D:/BlenderDev/blender-manual/manual/"
 BASE_URL = "https://docs.blender.org/manual/en/dev"
 G_data = None
+class _Data(dict):
+    cache_path = "routers/embedding/embeddings_manual.pkl"
+    @staticmethod
+    def reduce_text(text):
+        # Remove repeated characters
+        text = re.sub(r'%{2,}', '', text)  # Title
+        text = re.sub(r'#{2,}', '', text)  # Title
+        text = re.sub(r'\*{3,}', '', text)  # Title
+        text = re.sub(r'={3,}', '', text)  # Topic
+        text = re.sub(r'\^{3,}', '', text)
+        text = re.sub(r'-{3,}', '', text)
+        text = re.sub(r'(\s*\n\s*)+', '\n', text)
+        return text
+    @classmethod
+    def parse_file_recursive(cls, filedir, filename):
+        with open(os.path.join(filedir, filename), 'r', encoding='utf-8') as file:
+            content = file.read()
+        parsed_data = {}
+        if not filename.endswith('index.rst'):
+            body = content.strip()
+        else:
+            parts = content.split(".. toctree::")
+            body = parts[0].strip()
+            if len(parts) > 1:
+                parsed_data["toctree"] = {}
+                for part in parts[1:]:
+                    toctree_entries = part.split('\n')
+                    line = toctree_entries[0]
+                    for entry in toctree_entries[1:]:
+                        entry = entry.strip()
+                        if not entry:
+                            continue
+                        if entry.startswith('/'):
+                            # relative path.
+                            continue
+                        if not entry.endswith('.rst'):
+                            continue
+                        if entry.endswith('/index.rst'):
+                            entry_name = entry[:-10]
+                            filedir_ = os.path.join(filedir, entry_name)
+                            filename_ = 'index.rst'
+                        else:
+                            entry_name = entry[:-4]
+                            filedir_ = filedir
+                            filename_ = entry
+                        parsed_data['toctree'][entry_name] = cls.parse_file_recursive(
+                            filedir_, filename_)
+        # The '\n' at the end of the file resolves regex patterns
+        parsed_data['body'] = body + '\n'
+        return parsed_data
+    @staticmethod
+    def split_into_topics(text: str, prefix: str = '') -> Dict[str, List[str]]:
+        """
+        Splits a text into sections based on titles and subtitles, and organizes them into a dictionary.
+        Args:
+            text (str): The input text to be split. The text should contain titles marked by asterisks (***)
+                        or subtitles marked by equal signs (===).
+            prefix (str): prefix to titles and subtitles
+        Returns:
+            Dict[str, List[str]]: A dictionary where keys are section titles or subtitles, and values are lists of
+                                   strings corresponding to the content under each title or subtitle.
+        Example:
+            text = '''
+            *********************
+            The Blender Community
+            *********************
+            Being freely available from the start.
+            Independent Sites
+            =================
+            There are `several independent websites.
+            Getting Support
+            ===============
+            Blender's community is one of its greatest features.
+            '''
+            result = split_in_topics(text)
+            # result will be:
+            # {
+            #     "# The Blender Community": [
+            #         "Being freely available from the start."
+            #     ],
+            #     "# The Blender Community | Independent Sites": [
+            #         "There are `several independent websites."
+            #     ],
+            #     "# The Blender Community | Getting Support": [
+            #         "Blender's community is one of its greatest features."
+            #     ]
+            # }
+        """
+        # Remove patterns ".. word::" and ":word:"
+        text = re.sub(r'\.\. [^\n]+\n+(?: {3,}[^\n]*\n)*|:\w+:', '', text)
+        # Regular expression to find titles and subtitles
+        pattern = r'([\*|#|%]{3,}\n[^\n]+\n[\*|#|%]{3,}|(?:={3,}\n)?[^\n]+\n={3,}\n)'
+        # Split text by found patterns
+        sections = re.split(pattern, text)
+        # Remove possible white spaces at the beginning and end of each section
+        sections = [section for section in sections if section.strip()]
+        # Separate sections into a dictionary
+        topics = {}
+        current_title = ''
+        current_topic = prefix
+        for section in sections:
+            if match := re.match(r'[\*|#|%]{3,}\n([^\n]+)\n[\*|#|%]{3,}', section):
+                current_topic = current_title = f'{prefix}# {match.group(1)}'
+                topics[current_topic] = []
+            elif match := re.match(r'(?:={3,}\n)?([^\n]+)\n={3,}\n', section):
+                current_topic = current_title + ' | ' + match.group(1)
+                topics[current_topic] = []
+            else:
+                if current_topic == prefix:
+                    raise
+                topics[current_topic].append(section)
+        return topics
+    @classmethod
+    def split_into_many(cls, page_body, prefix=''):
+        """
+        # Function to split the text into chunks of a maximum number of tokens
+        """
+        tokenizer = EMBEDDING_CTX.model.tokenizer
+        max_tokens = EMBEDDING_CTX.model.max_seq_length
+        topics = cls.split_into_topics(page_body, prefix)
+        for topic, content_list in topics.items():
+            title = topic + ':\n'
+            title_tokens_len = len(tokenizer.tokenize(title))
+            content_list_new = []
+            for content in content_list:
+                content_reduced = cls.reduce_text(content)
+                content_tokens_len = len(tokenizer.tokenize(content_reduced))
+                if title_tokens_len + content_tokens_len <= max_tokens:
+                    content_list_new.append(content_reduced)
+                    continue
+                # Split the text into sentences
+                paragraphs = content_reduced.split('.\n')
+                sentences = ''
+                tokens_so_far = title_tokens_len
+                # Loop through the sentences and tokens joined together in a tuple
+                for sentence in paragraphs:
+                    sentence += '.\n'
+                    # Get the number of tokens for each sentence
+                    n_tokens = len(tokenizer.tokenize(sentence))
+                    # If the number of tokens so far plus the number of tokens in the current sentence is greater
+                    # than the max number of tokens, then add the chunk to the list of chunks and reset
+                    # the chunk and tokens so far
+                    if tokens_so_far + n_tokens > max_tokens:
+                        content_list_new.append(sentences)
+                        sentences = ''
+                        tokens_so_far = title_tokens_len
+                    sentences += sentence
+                    tokens_so_far += n_tokens
+                if sentences:
+                    content_list_new.append(sentences)
+            # Replace content_list
+            content_list.clear()
+            content_list.extend(content_list_new)
+        result = []
+        for topic, content_list in topics.items():
+            for content in content_list:
+                result.append(topic + ':\n' + content)
+        return result
+    @classmethod
+    def get_texts_recursive(cls, page, path=''):
+        result = cls.split_into_many(page['body'], path)
+        try:
+            for key in page['toctree'].keys():
+                page_child = page['toctree'][key]
+                result.extend(cls.get_texts_recursive(
+                    page_child, f'{path}/{key}'))
+        except KeyError:
+            pass
+        return result
+    def _embeddings_generate(self):
+        if os.path.exists(self.cache_path):
+            with open(self.cache_path, 'rb') as file:
+                data = pickle.load(file)
+                self.update(data)
+                return self
+        # Generate
+        manual = self.parse_file_recursive(MANUAL_DIR, 'index.rst')
+        manual['toctree']["copyright"] = self.parse_file_recursive(
+            MANUAL_DIR, 'copyright.rst')
+        # Create a list to store the text files
+        texts = self.get_texts_recursive(manual)
+        print("Embedding Texts...")
+        self['texts'] = texts
+        self['embeddings'] = EMBEDDING_CTX.encode(texts)
+        with open(self.cache_path, "wb") as file:
+            # Converting the embeddings to be CPU compatible, as the virtual machine in use currently only supports the CPU.
+            self['embeddings'] = self['embeddings'].to(torch.device('cpu'))
+            pickle.dump(dict(self), file, protocol=pickle.HIGHEST_PROTOCOL)
+        return G_data
+    def _sort_similarity(self, text_to_search, limit):
+        results = []
+        query_emb = EMBEDDING_CTX.encode([text_to_search])
+        ret = util.semantic_search(
+            query_emb, self['embeddings'], top_k=limit, score_function=util.dot_score)
+        texts = self['texts']
+        for score in ret[0]:
+            corpus_id = score['corpus_id']
+            text = texts[corpus_id]
+            results.append(text)
+        return results
+G_data = _Data()
+@router.get("/wiki_search", response_class=PlainTextResponse)
 def wiki_search(query: str = "") -> str:
+    data = G_data._embeddings_generate()
+    texts = G_data._sort_similarity(query, 5)
     result = f'BASE_URL: {BASE_URL}\n'
     for text in texts: