Update README.md
Browse files
README.md
CHANGED
@@ -7,7 +7,7 @@ language:
|
|
7 |
- pt
|
8 |
---
|
9 |
|
10 |
-
A really tiny sentence reranker (models use only
|
11 |
|
12 |
Trained using a mix of embeddings from wordllama and hashingvectorizer, on a dataset based on msmarco.
|
13 |
|
@@ -24,19 +24,18 @@ documents = [
|
|
24 |
"Christopher Nolan is a British-American filmmaker known for his cerebral and nonlinear storytelling in movies like 'Memento', 'The Dark Knight', and 'Inception'.",
|
25 |
"Martin Scorsese directed the crime drama 'Goodfellas', which is considered a masterpiece in the gangster film genre."
|
26 |
]
|
27 |
-
|
28 |
rank(query, documents)
|
29 |
# Output:
|
30 |
# [("'Inception' is a 2010 science fiction film directed by Christopher Nolan. It explores the concept of dream invasion and manipulation.",
|
31 |
-
# 0.
|
|
|
|
|
32 |
# ("'Titanic', directed by James Cameron, was released in 1997 and became one of the highest-grossing films of all time.",
|
33 |
-
# 0.
|
34 |
-
# ("Steven Spielberg is one of the most well-known directors of all time, famous for films like 'E.T.', 'Jaws', and 'Jurassic Park'.",
|
35 |
-
# 0.19266481513294043),
|
36 |
# ("Martin Scorsese directed the crime drama 'Goodfellas', which is considered a masterpiece in the gangster film genre.",
|
37 |
-
# 0.
|
38 |
-
# ("
|
39 |
-
# 0.
|
40 |
|
41 |
query = "What is the speed of light?"
|
42 |
documents = [
|
@@ -46,17 +45,18 @@ documents = [
|
|
46 |
"The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.",
|
47 |
"Light can be described as both a wave and a particle, a concept known as wave-particle duality."
|
48 |
]
|
|
|
49 |
# Output:
|
50 |
-
# [(
|
51 |
-
# 0.
|
52 |
-
# ('The speed of light in a vacuum is approximately 299,792 kilometers per second (km/s), or about 186,282 miles per second.',
|
53 |
-
# 0.22389275760593016),
|
54 |
# ('Light can be described as both a wave and a particle, a concept known as wave-particle duality.',
|
55 |
-
# 0.
|
56 |
# ('The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.',
|
57 |
-
# 0.
|
|
|
|
|
58 |
# ('The theory of relativity, proposed by Albert Einstein, has revolutionized our understanding of space, time, and gravity.',
|
59 |
-
# 0.
|
60 |
|
61 |
query = "Who wrote 'Pride and Prejudice'?"
|
62 |
documents = [
|
@@ -66,15 +66,37 @@ documents = [
|
|
66 |
"Pride and Prejudice explores themes of love, social status, and individual growth, set in the British Regency era.",
|
67 |
"Jane Austen, an English novelist, is renowned for her works that critique the British landed gentry of the 18th century."
|
68 |
]
|
|
|
69 |
# Output:
|
70 |
-
# [('Pride and Prejudice
|
71 |
-
# 0.
|
72 |
-
# ('
|
73 |
-
# 0.
|
74 |
# ('Jane Austen, an English novelist, is renowned for her works that critique the British landed gentry of the 18th century.',
|
75 |
-
# 0.
|
76 |
-
# ('
|
77 |
-
# 0.
|
78 |
# ('Charlotte Brontë, known for her novel Jane Eyre, was a 19th-century English novelist.',
|
79 |
-
# 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
```
|
|
|
7 |
- pt
|
8 |
---
|
9 |
|
10 |
+
A really tiny sentence reranker (models use only 120mb) that runs almost instantly on cpu.
|
11 |
|
12 |
Trained using a mix of embeddings from wordllama and hashingvectorizer, on a dataset based on msmarco.
|
13 |
|
|
|
24 |
"Christopher Nolan is a British-American filmmaker known for his cerebral and nonlinear storytelling in movies like 'Memento', 'The Dark Knight', and 'Inception'.",
|
25 |
"Martin Scorsese directed the crime drama 'Goodfellas', which is considered a masterpiece in the gangster film genre."
|
26 |
]
|
|
|
27 |
rank(query, documents)
|
28 |
# Output:
|
29 |
# [("'Inception' is a 2010 science fiction film directed by Christopher Nolan. It explores the concept of dream invasion and manipulation.",
|
30 |
+
# 0.3140751875733907),
|
31 |
+
# ("Christopher Nolan is a British-American filmmaker known for his cerebral and nonlinear storytelling in movies like 'Memento', 'The Dark Knight', and 'Inception'.",
|
32 |
+
# 0.2776111024668988),
|
33 |
# ("'Titanic', directed by James Cameron, was released in 1997 and became one of the highest-grossing films of all time.",
|
34 |
+
# 0.23555834379639545),
|
|
|
|
|
35 |
# ("Martin Scorsese directed the crime drama 'Goodfellas', which is considered a masterpiece in the gangster film genre.",
|
36 |
+
# 0.13833970116570868),
|
37 |
+
# ("Steven Spielberg is one of the most well-known directors of all time, famous for films like 'E.T.', 'Jaws', and 'Jurassic Park'.",
|
38 |
+
# 0.03441566499760637)]
|
39 |
|
40 |
query = "What is the speed of light?"
|
41 |
documents = [
|
|
|
45 |
"The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.",
|
46 |
"Light can be described as both a wave and a particle, a concept known as wave-particle duality."
|
47 |
]
|
48 |
+
rank(query, documents)
|
49 |
# Output:
|
50 |
+
# [('The speed of light in a vacuum is approximately 299,792 kilometers per second (km/s), or about 186,282 miles per second.',
|
51 |
+
# 0.30580432492241644),
|
|
|
|
|
52 |
# ('Light can be described as both a wave and a particle, a concept known as wave-particle duality.',
|
53 |
+
# 0.28595544542990103),
|
54 |
# ('The Earth orbits the Sun at an average distance of about 93 million miles, taking roughly 365.25 days to complete one revolution.',
|
55 |
+
# 0.26599248433979883),
|
56 |
+
# ("Isaac Newton's laws of motion and gravity laid the groundwork for classical mechanics.",
|
57 |
+
# 0.07144421109976119),
|
58 |
# ('The theory of relativity, proposed by Albert Einstein, has revolutionized our understanding of space, time, and gravity.',
|
59 |
+
# 0.07080353420812247)]
|
60 |
|
61 |
query = "Who wrote 'Pride and Prejudice'?"
|
62 |
documents = [
|
|
|
66 |
"Pride and Prejudice explores themes of love, social status, and individual growth, set in the British Regency era.",
|
67 |
"Jane Austen, an English novelist, is renowned for her works that critique the British landed gentry of the 18th century."
|
68 |
]
|
69 |
+
rank(query, documents)
|
70 |
# Output:
|
71 |
+
# [('Pride and Prejudice is a novel written by Jane Austen, first published in 1813. It is a classic of English literature.',
|
72 |
+
# 0.3413168531560433),
|
73 |
+
# ('Pride and Prejudice explores themes of love, social status, and individual growth, set in the British Regency era.',
|
74 |
+
# 0.31806861613354287),
|
75 |
# ('Jane Austen, an English novelist, is renowned for her works that critique the British landed gentry of the 18th century.',
|
76 |
+
# 0.19975825998268765),
|
77 |
+
# ('William Shakespeare is often considered the greatest playwright in the English language, famous for works such as Hamlet, Romeo and Juliet, and Macbeth.',
|
78 |
+
# 0.07942214548601552),
|
79 |
# ('Charlotte Brontë, known for her novel Jane Eyre, was a 19th-century English novelist.',
|
80 |
+
# 0.06143412524171063)]
|
81 |
+
|
82 |
+
query = "Quem escreveu 'Dom Casmurro'?"
|
83 |
+
documents = [
|
84 |
+
"'Dom Casmurro' é um romance escrito por Machado de Assis, publicado pela primeira vez em 1899. É considerado uma das obras-primas da literatura brasileira.",
|
85 |
+
"Machado de Assis, um dos maiores escritores da literatura brasileira, é autor de obras como 'Dom Casmurro', 'Memórias Póstumas de Brás Cubas' e 'Quincas Borba'.",
|
86 |
+
"'O Guarani', um romance escrito por José de Alencar, é um marco do romantismo no Brasil e foi publicado em 1857.",
|
87 |
+
"Clarice Lispector foi uma importante escritora brasileira, conhecida por obras como 'A Hora da Estrela' e 'Perto do Coração Selvagem'.",
|
88 |
+
"'Dom Casmurro' narra a vida de Bento Santiago, conhecido como Bentinho, e seus complexos sentimentos de amor e ciúmes em relação a Capitu."
|
89 |
+
]
|
90 |
+
rank(query, documents)
|
91 |
+
# Output:
|
92 |
+
# [("'Dom Casmurro' é um romance escrito por Machado de Assis, publicado pela primeira vez em 1899. É considerado uma das obras-primas da literatura brasileira.",
|
93 |
+
# 0.3487686026605069),
|
94 |
+
# ("'Dom Casmurro' narra a vida de Bento Santiago, conhecido como Bentinho, e seus complexos sentimentos de amor e ciúmes em relação a Capitu.",
|
95 |
+
# 0.24520132359838245),
|
96 |
+
# ("Machado de Assis, um dos maiores escritores da literatura brasileira, é autor de obras como 'Dom Casmurro', 'Memórias Póstumas de Brás Cubas' e 'Quincas Borba'.",
|
97 |
+
# 0.2189232997713971),
|
98 |
+
# ("'O Guarani', um romance escrito por José de Alencar, é um marco do romantismo no Brasil e foi publicado em 1857.",
|
99 |
+
# 0.13183125469333265),
|
100 |
+
# ("Clarice Lispector foi uma importante escritora brasileira, conhecida por obras como 'A Hora da Estrela' e 'Perto do Coração Selvagem'.",
|
101 |
+
# 0.05527551927638088)]
|
102 |
```
|