Spaces:
Sleeping
Sleeping
Amy Roberts
commited on
Commit
β’
7d5704e
1
Parent(s):
12ae336
Move to utils
Browse files- app.py +9 -10
- retrieval.py +0 -80
- utils/__init__.py +0 -0
- build_embeddings.py β utils/build_embeddings.py +0 -0
- build_issue_dict.py β utils/build_issue_dict.py +0 -0
- defaults.py β utils/defaults.py +0 -0
- fetch.py β utils/fetch.py +1 -1
- find_similar_issues.py β utils/find_similar_issues.py +0 -0
- update_embeddings.py β utils/update_embeddings.py +0 -0
- update_stored_issues.py β utils/update_stored_issues.py +1 -1
app.py
CHANGED
@@ -1,17 +1,16 @@
|
|
1 |
import datetime
|
2 |
-
import gradio as gr
|
3 |
import os
|
4 |
-
from find_similar_issues import get_similar_issues
|
5 |
-
import requests
|
6 |
-
|
7 |
-
from defaults import OWNER, REPO
|
8 |
-
|
9 |
-
import build_issue_dict
|
10 |
-
import build_embeddings
|
11 |
import shutil
|
12 |
-
from fetch import get_issues
|
13 |
-
from update_stored_issues import update_issues
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def get_query_issue_information(issue_no, token):
|
|
|
1 |
import datetime
|
|
|
2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import shutil
|
|
|
|
|
4 |
|
5 |
+
import gradio as gr
|
6 |
+
import requests
|
7 |
+
|
8 |
+
from utils import build_issue_dict
|
9 |
+
from utils import build_embeddings
|
10 |
+
from utils.defaults import OWNER, REPO
|
11 |
+
from utils.fetch import get_issues
|
12 |
+
from utils.find_similar_issues import get_similar_issues
|
13 |
+
from utils.update_stored_issues import update_issues
|
14 |
|
15 |
|
16 |
def get_query_issue_information(issue_no, token):
|
retrieval.py
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Module which contains functionality to retrieve the most similar issues for a given query
|
3 |
-
"""
|
4 |
-
|
5 |
-
|
6 |
-
import argparse
|
7 |
-
import json
|
8 |
-
|
9 |
-
import numpy as np
|
10 |
-
from sentence_transformers import SentenceTransformer
|
11 |
-
|
12 |
-
def cosine_similarity(a, b):
|
13 |
-
if a.ndim == 1:
|
14 |
-
a = a.reshape(1, -1)
|
15 |
-
|
16 |
-
if b.ndim == 1:
|
17 |
-
b = b.reshape(1, -1)
|
18 |
-
|
19 |
-
return np.dot(a, b.T) / (np.linalg.norm(a, axis=1) * np.linalg.norm(b, axis=1))
|
20 |
-
|
21 |
-
|
22 |
-
def retrieve_issue_rankings(
|
23 |
-
query: str,
|
24 |
-
model_id: str,
|
25 |
-
input_embedding_filename: str,
|
26 |
-
):
|
27 |
-
"""
|
28 |
-
Given a query returns the list of issues sorted by similarity to the query
|
29 |
-
according to their embedding index
|
30 |
-
"""
|
31 |
-
model = SentenceTransformer(model_id)
|
32 |
-
|
33 |
-
embeddings = np.load(input_embedding_filename)
|
34 |
-
|
35 |
-
query_embedding = model.encode(query)
|
36 |
-
|
37 |
-
# Calculate the cosine similarity between the query and all the issues
|
38 |
-
cosine_similarities = cosine_similarity(query_embedding, embeddings)
|
39 |
-
|
40 |
-
# Get the index of the most similar issue
|
41 |
-
most_similar_indices = np.argsort(cosine_similarities)
|
42 |
-
most_similar_indices = most_similar_indices[0][::-1]
|
43 |
-
return most_similar_indices
|
44 |
-
|
45 |
-
|
46 |
-
def print_issue(issues, issue_id):
|
47 |
-
# Get the issue id of the most similar issue
|
48 |
-
issue_info = issues[issue_id]
|
49 |
-
|
50 |
-
print(f"#{issue_id}", issue_info["title"])
|
51 |
-
print(issue_info["body"])
|
52 |
-
|
53 |
-
|
54 |
-
if __name__ == "__main__":
|
55 |
-
parser = argparse.ArgumentParser()
|
56 |
-
parser.add_argument("query", type=str)
|
57 |
-
parser.add_argument("--model_id", type=str, default="all-mpnet-base-v2")
|
58 |
-
parser.add_argument("--input_embedding_filename", type=str, default="issue_embeddings.npy")
|
59 |
-
parser.add_argument("--input_index_filename", type=str, default="embedding_index_to_issue.json")
|
60 |
-
|
61 |
-
args = parser.parse_args()
|
62 |
-
|
63 |
-
issue_rankings = retrieve_issue_rankings(
|
64 |
-
query=args.query,
|
65 |
-
model_id=args.model_id,
|
66 |
-
input_embedding_filename=args.input_embedding_filename,
|
67 |
-
)
|
68 |
-
|
69 |
-
with open("issues_dict.json", "r") as f:
|
70 |
-
issues = json.load(f)
|
71 |
-
|
72 |
-
with open(args.input_index_filename, "r") as f:
|
73 |
-
embedding_index_to_issue = json.load(f)
|
74 |
-
|
75 |
-
issue_ids = [embedding_index_to_issue[str(i)] for i in issue_rankings]
|
76 |
-
|
77 |
-
for issue_id in issue_ids[:3]:
|
78 |
-
print(issue_id)
|
79 |
-
print_issue(issues, issue_id)
|
80 |
-
print("\n\n\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/__init__.py
ADDED
File without changes
|
build_embeddings.py β utils/build_embeddings.py
RENAMED
File without changes
|
build_issue_dict.py β utils/build_issue_dict.py
RENAMED
File without changes
|
defaults.py β utils/defaults.py
RENAMED
File without changes
|
fetch.py β utils/fetch.py
RENAMED
@@ -22,7 +22,7 @@ import os
|
|
22 |
import requests
|
23 |
import numpy as np
|
24 |
|
25 |
-
from defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN, ISSUE_JSON_FILE
|
26 |
|
27 |
logging.basicConfig(level=logging.INFO)
|
28 |
logger = logging.getLogger(__name__)
|
|
|
22 |
import requests
|
23 |
import numpy as np
|
24 |
|
25 |
+
from .defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN, ISSUE_JSON_FILE
|
26 |
|
27 |
logging.basicConfig(level=logging.INFO)
|
28 |
logger = logging.getLogger(__name__)
|
find_similar_issues.py β utils/find_similar_issues.py
RENAMED
File without changes
|
update_embeddings.py β utils/update_embeddings.py
RENAMED
File without changes
|
update_stored_issues.py β utils/update_stored_issues.py
RENAMED
@@ -18,7 +18,7 @@ import os
|
|
18 |
import numpy as np
|
19 |
import requests
|
20 |
|
21 |
-
from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
|
22 |
|
23 |
logging.basicConfig(level=logging.INFO)
|
24 |
logger = logging.getLogger(__name__)
|
|
|
18 |
import numpy as np
|
19 |
import requests
|
20 |
|
21 |
+
from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
|
22 |
|
23 |
logging.basicConfig(level=logging.INFO)
|
24 |
logger = logging.getLogger(__name__)
|