taishi-i commited on
Commit
18c08a2
1 Parent(s): 2976c15

update app.py

Browse files
app.py CHANGED
@@ -1,9 +1,17 @@
 
1
  import json
2
 
 
3
  import streamlit as st
4
  from pyserini.search.lucene import LuceneSearcher
5
 
6
 
 
 
 
 
 
 
7
  class SearchApplication:
8
  def __init__(self):
9
  self.title = "Awesome ChatGPT repositories search"
@@ -26,6 +34,10 @@ class SearchApplication:
26
  )
27
  st.write("#")
28
 
 
 
 
 
29
  self.show_popular_words()
30
  self.show_search_results()
31
 
@@ -37,13 +49,13 @@ class SearchApplication:
37
  )
38
 
39
  def set_searcher(self):
40
- searcher = LuceneSearcher("indexes/docs")
41
  return searcher
42
 
43
  def show_popular_words(self):
44
  st.caption("Popular words")
45
 
46
- word1, word2, word3, word4, _ = st.columns(5)
47
  with word1:
48
  button1 = st.button("Prompt")
49
  if button1:
@@ -64,25 +76,78 @@ class SearchApplication:
64
  if button4:
65
  self.query = "extension"
66
 
 
 
 
 
 
 
 
 
 
 
67
  def show_search_results(self):
68
  if self.query or self.search_button:
69
  st.write("#")
70
 
71
  search_results = self.searcher.search(self.query, k=500)
72
  num_search_results = len(search_results)
73
- st.write(f"{num_search_results} results")
74
-
75
- for result in search_results:
76
- data_json = json.loads(result.raw)
77
- description = data_json["description"]
78
- url = data_json["url"]
79
- project_name = data_json["project_name"]
80
- main_topic = data_json["main_topic"]
81
-
82
- st.subheader(f"[{project_name}]({url})")
83
- st.markdown(description)
84
- st.caption(f"{main_topic}")
85
- st.write("#")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  def main():
 
1
+ import difflib
2
  import json
3
 
4
+ import numpy as np
5
  import streamlit as st
6
  from pyserini.search.lucene import LuceneSearcher
7
 
8
 
9
+ def read_json(file_name):
10
+ with open(file_name, "r") as f:
11
+ json_data = json.load(f)
12
+ return json_data
13
+
14
+
15
  class SearchApplication:
16
  def __init__(self):
17
  self.title = "Awesome ChatGPT repositories search"
 
34
  )
35
  st.write("#")
36
 
37
+ candidate_words_file = "candidate_words.json"
38
+ candidate_words_json = read_json(candidate_words_file)
39
+ self.candidate_words = candidate_words_json["candidate_words"]
40
+
41
  self.show_popular_words()
42
  self.show_search_results()
43
 
 
49
  )
50
 
51
  def set_searcher(self):
52
+ searcher = LuceneSearcher("indexes/temp")
53
  return searcher
54
 
55
  def show_popular_words(self):
56
  st.caption("Popular words")
57
 
58
+ word1, word2, word3, word4, word5, word6 = st.columns(6)
59
  with word1:
60
  button1 = st.button("Prompt")
61
  if button1:
 
76
  if button4:
77
  self.query = "extension"
78
 
79
+ with word5:
80
+ button5 = st.button("LLMs")
81
+ if button5:
82
+ self.query = "llms"
83
+
84
+ with word6:
85
+ button6 = st.button("API")
86
+ if button6:
87
+ self.query = "api"
88
+
89
  def show_search_results(self):
90
  if self.query or self.search_button:
91
  st.write("#")
92
 
93
  search_results = self.searcher.search(self.query, k=500)
94
  num_search_results = len(search_results)
95
+ st.write(f"A total of {num_search_results} repositories found.")
96
+
97
+ if num_search_results > 0:
98
+
99
+ json_search_results = []
100
+ for result in search_results:
101
+ json_data = json.loads(result.raw)
102
+ json_search_results.append(json_data)
103
+
104
+ for json_data in sorted(
105
+ json_search_results, key=lambda x: x["freq"], reverse=True
106
+ ):
107
+
108
+ description = json_data["description"]
109
+ url = json_data["url"]
110
+ project_name = json_data["project_name"]
111
+
112
+ st.write("---")
113
+ st.subheader(f"[{project_name}]({url})")
114
+ st.write(description)
115
+
116
+ info = []
117
+ language = json_data["language"]
118
+ if language is not None and len(language) > 0:
119
+ info.append(language)
120
+ else:
121
+ info.append("Laugage: Unkwown")
122
+
123
+ license = json_data["license"]
124
+ if license is not None:
125
+ info.append(license["name"])
126
+ else:
127
+ info.append("License: Unkwown")
128
+
129
+ st.caption(" / ".join(info))
130
+
131
+ else:
132
+
133
+ if len(self.query) > 0:
134
+ scores = []
135
+ for candidate_word in self.candidate_words:
136
+ score = difflib.SequenceMatcher(
137
+ None, self.query, candidate_word
138
+ ).ratio()
139
+ scores.append(score)
140
+
141
+ num_candidate_words = 6
142
+
143
+ indexes = np.argsort(scores)[::-1][:num_candidate_words]
144
+ suggestions = [self.candidate_words[i] for i in indexes]
145
+ suggestions = sorted(
146
+ set(suggestions), key=suggestions.index
147
+ )
148
+ st.caption("Suggestions")
149
+ for i, word in enumerate(suggestions, start=1):
150
+ st.write(f"{i}: {word}")
151
 
152
 
153
  def main():
candidate_words.json ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "candidate_words": [
3
+ "curated",
4
+ "allow",
5
+ "enabling",
6
+ "includes",
7
+ "docs",
8
+ "curated",
9
+ "apis",
10
+ "consider",
11
+ "friendly",
12
+ "slack",
13
+ "read",
14
+ "webpage",
15
+ "communicate",
16
+ "tried",
17
+ "self",
18
+ "structured",
19
+ "discord",
20
+ "generate",
21
+ "ask",
22
+ "general",
23
+ "papers",
24
+ "lecture",
25
+ "notebooks",
26
+ "engineering",
27
+ "promptr",
28
+ "dynamically",
29
+ "parse",
30
+ "apply",
31
+ "included",
32
+ "purposes",
33
+ "reverse",
34
+ "adventure",
35
+ "image",
36
+ "efficiency",
37
+ "javascript",
38
+ "prompts",
39
+ "create",
40
+ "continuous",
41
+ "creating",
42
+ "created",
43
+ "want",
44
+ "23",
45
+ "oriented",
46
+ "branch",
47
+ "apple",
48
+ "taking",
49
+ "software",
50
+ "notes",
51
+ "speed",
52
+ "product",
53
+ "set",
54
+ "sentence",
55
+ "stories",
56
+ "systems",
57
+ "domain",
58
+ "kubernetes",
59
+ "issues",
60
+ "having",
61
+ "telegram",
62
+ "la",
63
+ "moderation",
64
+ "custom",
65
+ "summarizer",
66
+ "wechat",
67
+ "integrate",
68
+ "databricks",
69
+ "platform",
70
+ "repl",
71
+ "basic",
72
+ "continuously",
73
+ "bots",
74
+ "functions",
75
+ "teach",
76
+ "corpus",
77
+ "talk",
78
+ "reflection",
79
+ "art",
80
+ "got",
81
+ "website",
82
+ "tweets",
83
+ "twitter",
84
+ "post",
85
+ "fixed",
86
+ "comment",
87
+ "gpt4",
88
+ "state",
89
+ "exploring",
90
+ "macos",
91
+ "studio",
92
+ "sample",
93
+ "agents",
94
+ "typescript",
95
+ "projects",
96
+ "voice",
97
+ "gptchat",
98
+ "aims",
99
+ "provide",
100
+ "stream",
101
+ "3d",
102
+ "text",
103
+ "plugin",
104
+ "find",
105
+ "ida",
106
+ "@ykdojo",
107
+ "decompilation",
108
+ "coming",
109
+ "soon",
110
+ "snippets",
111
+ "session",
112
+ "notebook",
113
+ "generates",
114
+ "link",
115
+ "box",
116
+ "minimal",
117
+ "google",
118
+ "editor",
119
+ "manifest",
120
+ "v3",
121
+ "video",
122
+ "vscode",
123
+ "browser",
124
+ "cross",
125
+ "translation",
126
+ "obsidian",
127
+ "enhance",
128
+ "engines",
129
+ "response",
130
+ "next.js",
131
+ "react",
132
+ "friends",
133
+ "vim",
134
+ "neovim",
135
+ "plugins",
136
+ "ts",
137
+ "experiment",
138
+ "accomplish",
139
+ "brings",
140
+ "powered",
141
+ "perform",
142
+ "vulnerabilities",
143
+ "list",
144
+ "giving",
145
+ "nmap",
146
+ "modules",
147
+ "great",
148
+ "module",
149
+ "xml",
150
+ "shows",
151
+ "main",
152
+ "impressive",
153
+ "native",
154
+ "prepare",
155
+ "msg",
156
+ "hook",
157
+ "client",
158
+ "feature",
159
+ "weight",
160
+ "superpowers",
161
+ "database",
162
+ "reliable",
163
+ "feedback",
164
+ "palm",
165
+ "architecture",
166
+ "generative",
167
+ "transformer",
168
+ "transformers",
169
+ "neural",
170
+ "nanogpt",
171
+ "gptq",
172
+ "quantization",
173
+ "implementations",
174
+ "distilbert",
175
+ "answering",
176
+ "30",
177
+ "backend",
178
+ "train",
179
+ "performance",
180
+ "generating",
181
+ "gpt2",
182
+ "integrated",
183
+ "bilingual",
184
+ "implemented",
185
+ "game",
186
+ "seconds",
187
+ "citations",
188
+ "quantized",
189
+ "fine",
190
+ "emails",
191
+ "scale",
192
+ "problem",
193
+ "blog",
194
+ "reading",
195
+ "scale",
196
+ "languages",
197
+ "vision",
198
+ "transform",
199
+ "json",
200
+ "translate",
201
+ "locally",
202
+ "instruction",
203
+ "tuned",
204
+ "style",
205
+ "native",
206
+ "write",
207
+ "flask",
208
+ "siri",
209
+ "question",
210
+ "long",
211
+ "galactica",
212
+ "llamaindex",
213
+ "external",
214
+ "inference",
215
+ "specific",
216
+ "names",
217
+ "control",
218
+ "llama",
219
+ "react",
220
+ "supported",
221
+ "port",
222
+ "mozilla",
223
+ "directory",
224
+ "pure",
225
+ "benchmarks",
226
+ "unofficial",
227
+ "wolverine",
228
+ "runtime",
229
+ "menubar",
230
+ "power",
231
+ "app",
232
+ "dall·e",
233
+ "ruby",
234
+ ".net",
235
+ "access",
236
+ "fastest",
237
+ "medium",
238
+ "sized",
239
+ "gpts",
240
+ "networks",
241
+ "detection",
242
+ "abuseipdb",
243
+ "report",
244
+ "fast",
245
+ "flexible",
246
+ "cloud",
247
+ "web",
248
+ "program",
249
+ "bypass",
250
+ "effective",
251
+ "dl",
252
+ "deep",
253
+ "pytorch",
254
+ "alternative",
255
+ "javascript",
256
+ "high",
257
+ "cpu",
258
+ "server",
259
+ "flutter",
260
+ "room",
261
+ "profile",
262
+ "automatically",
263
+ "notification",
264
+ "powerful",
265
+ "security",
266
+ "quality",
267
+ "gt",
268
+ "effects",
269
+ "vosviewer",
270
+ "chinese",
271
+ "configure",
272
+ "demos",
273
+ "format",
274
+ "built",
275
+ "summarize",
276
+ "youtube",
277
+ "blender",
278
+ "engineering",
279
+ "latest",
280
+ "2023",
281
+ "foundation",
282
+ "makes",
283
+ "learners",
284
+ "chatbot",
285
+ "technology",
286
+ "capabilities",
287
+ "responses",
288
+ "files",
289
+ "engineered",
290
+ "message",
291
+ "starting",
292
+ "productivity",
293
+ "building",
294
+ "fine",
295
+ "useful",
296
+ "light",
297
+ "released",
298
+ "correct",
299
+ "play",
300
+ "cases",
301
+ "biomed",
302
+ "purpose",
303
+ "tuned",
304
+ "discord",
305
+ "machine",
306
+ "similar",
307
+ "support",
308
+ "generating",
309
+ "proof",
310
+ "concept",
311
+ "gpt-3.5",
312
+ "let",
313
+ "hours",
314
+ "human",
315
+ "cloudflare",
316
+ "time",
317
+ "comments",
318
+ "designed",
319
+ "history",
320
+ "experience",
321
+ "edits",
322
+ "error",
323
+ "langchain",
324
+ "sdk",
325
+ "minimal",
326
+ "ios",
327
+ "free",
328
+ "lets",
329
+ "easily",
330
+ "google",
331
+ "decompiler",
332
+ "plugins",
333
+ "deploy",
334
+ "running",
335
+ "local",
336
+ "directly",
337
+ "development",
338
+ "openai",
339
+ "extension",
340
+ "platform",
341
+ "generator",
342
+ "helps",
343
+ "agents",
344
+ "js",
345
+ "experimental",
346
+ "emacs",
347
+ "ask",
348
+ "programming",
349
+ "scan",
350
+ "reverse",
351
+ "faster",
352
+ "auto",
353
+ "shell",
354
+ "english",
355
+ "integrate",
356
+ "translate",
357
+ "writes",
358
+ "php",
359
+ "multiple",
360
+ "quick",
361
+ "sources",
362
+ "implementation",
363
+ "rlhf",
364
+ "reinforcement",
365
+ "chain",
366
+ "reasoning",
367
+ "gpt-2",
368
+ "deepspeed",
369
+ "pre",
370
+ "attention",
371
+ "tuning",
372
+ "c++",
373
+ "bert",
374
+ "deep",
375
+ "stanford",
376
+ "distributed",
377
+ "transformer",
378
+ "instruction",
379
+ "finetuning",
380
+ "dataset",
381
+ "favorite",
382
+ "quickly",
383
+ "gpt-3",
384
+ "amp",
385
+ "llama.cpp",
386
+ "index",
387
+ "examples",
388
+ "bindings",
389
+ "simplest",
390
+ "machine",
391
+ "experimental",
392
+ "macos",
393
+ "memory",
394
+ "efficient",
395
+ "check",
396
+ "backend",
397
+ "list",
398
+ "users",
399
+ "free",
400
+ "better",
401
+ "awesome",
402
+ "community",
403
+ "chatgpt",
404
+ "resources",
405
+ "robot",
406
+ "prompt",
407
+ "generation",
408
+ "knowledge",
409
+ "codebase",
410
+ "templates",
411
+ "official",
412
+ "game",
413
+ "images",
414
+ "page",
415
+ "share",
416
+ "version",
417
+ "build",
418
+ "gpt4all",
419
+ "trained",
420
+ "interact",
421
+ "attempt",
422
+ "style",
423
+ "swift",
424
+ "enables",
425
+ "generated",
426
+ "agent",
427
+ "need",
428
+ "langchain",
429
+ "pdf",
430
+ "dall",
431
+ "documentation",
432
+ "android",
433
+ "application",
434
+ "framework",
435
+ "automatically",
436
+ "results",
437
+ "content",
438
+ "interacting",
439
+ "nlp",
440
+ "analysis",
441
+ "file",
442
+ "proxy",
443
+ "dns",
444
+ "node.js",
445
+ "library",
446
+ "official",
447
+ "ml",
448
+ "para",
449
+ "alpaca",
450
+ "lines",
451
+ "contains",
452
+ "python",
453
+ "github",
454
+ "way",
455
+ "desktop",
456
+ "codes",
457
+ "applications",
458
+ "documents",
459
+ "research",
460
+ "bing",
461
+ "desktop",
462
+ "mac",
463
+ "conversations",
464
+ "key",
465
+ "collection",
466
+ "application",
467
+ "whisper",
468
+ "conversation",
469
+ "telegram",
470
+ "script",
471
+ "gpu",
472
+ "test",
473
+ "easy",
474
+ "review",
475
+ "access",
476
+ "visual",
477
+ "run",
478
+ "git",
479
+ "real",
480
+ "queries",
481
+ "chrome",
482
+ "transformers",
483
+ "building",
484
+ "unofficial",
485
+ "shell",
486
+ "simple",
487
+ "scripts",
488
+ "inference",
489
+ "rust",
490
+ "package",
491
+ "fully",
492
+ "optimization",
493
+ "apps",
494
+ "tools",
495
+ "including",
496
+ "project",
497
+ "gpt3",
498
+ "uses",
499
+ "supports",
500
+ "linux",
501
+ "turbo",
502
+ "work",
503
+ "100",
504
+ "information",
505
+ "fast",
506
+ "api",
507
+ "learning",
508
+ "answers",
509
+ "natural",
510
+ "autonomous",
511
+ "provides",
512
+ "wrapper",
513
+ "de",
514
+ "unity",
515
+ "voice",
516
+ "build",
517
+ "learning",
518
+ "assistant",
519
+ "large",
520
+ "model",
521
+ "messages",
522
+ "paper",
523
+ "questions",
524
+ "integration",
525
+ "windows",
526
+ "gpt-3.5",
527
+ "tasks",
528
+ "written",
529
+ "large",
530
+ "single",
531
+ "run",
532
+ "gpt",
533
+ "commands",
534
+ "implementation",
535
+ "create",
536
+ "ui",
537
+ "gpt-4",
538
+ "interface",
539
+ "allows",
540
+ "chat",
541
+ "llama",
542
+ "new",
543
+ "text",
544
+ "use",
545
+ "models",
546
+ "open",
547
+ "chatbot",
548
+ "code",
549
+ "commit",
550
+ "training",
551
+ "repo",
552
+ "repository",
553
+ "llm",
554
+ "prompts",
555
+ "help",
556
+ "cli",
557
+ "line",
558
+ "browser",
559
+ "terminal",
560
+ "tool",
561
+ "search",
562
+ "like",
563
+ "simple",
564
+ "generate",
565
+ "prompt",
566
+ "command",
567
+ "source",
568
+ "language",
569
+ "plugin",
570
+ "client",
571
+ "use",
572
+ "open",
573
+ "llms",
574
+ "gpt-4",
575
+ "app",
576
+ "web",
577
+ "extension",
578
+ "library",
579
+ "data",
580
+ "bot",
581
+ "chat",
582
+ "powered",
583
+ "python",
584
+ "gpt-3",
585
+ "based",
586
+ "chatgpt",
587
+ "models",
588
+ "model",
589
+ "code",
590
+ "gpt",
591
+ "language",
592
+ "ai",
593
+ "openai",
594
+ "api",
595
+ "chatgpt"
596
+ ]
597
+ }
indexes/docs/_2.fdm ADDED
Binary file (157 Bytes). View file
 
indexes/docs/_2.fdt ADDED
Binary file (85.8 kB). View file
 
indexes/docs/_2.fdx ADDED
Binary file (79 Bytes). View file
 
indexes/docs/_2.fnm ADDED
Binary file (343 Bytes). View file
 
indexes/docs/_2.nvd ADDED
Binary file (617 Bytes). View file
 
indexes/docs/_2.nvm ADDED
Binary file (103 Bytes). View file
 
indexes/docs/_2.si ADDED
Binary file (522 Bytes). View file
 
indexes/docs/_2.tvd ADDED
Binary file (38.2 kB). View file
 
indexes/docs/_2.tvm ADDED
Binary file (162 Bytes). View file
 
indexes/docs/_2.tvx ADDED
Binary file (93 Bytes). View file
 
indexes/docs/_2_Lucene90_0.doc ADDED
Binary file (7.41 kB). View file
 
indexes/docs/_2_Lucene90_0.dvd ADDED
Binary file (2.2 kB). View file
 
indexes/docs/_2_Lucene90_0.dvm ADDED
Binary file (171 Bytes). View file
 
indexes/docs/_2_Lucene90_0.pos ADDED
Binary file (7.42 kB). View file
 
indexes/docs/_2_Lucene90_0.tim ADDED
Binary file (25 kB). View file
 
indexes/docs/_2_Lucene90_0.tip ADDED
Binary file (772 Bytes). View file
 
indexes/docs/_2_Lucene90_0.tmd ADDED
Binary file (283 Bytes). View file
 
indexes/docs/segments_3 ADDED
Binary file (154 Bytes). View file
 
indexes/docs/write.lock ADDED
File without changes