Spaces:

Canstralian
/

sentence-transformers-all-MiniLM-L6-v2

Build error

App Files Files Community

Canstralian commited on 13 days ago

Commit

b2d9c06

verified ·

1 Parent(s): 193caaa

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -13

app.py CHANGED Viewed

@@ -1,26 +1,50 @@
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
-import numpy as np
-# Load the model
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-# Define your sentences
 sentences = [
-    "That is a happy person",
-    "That is a happy dog",
-    "That is a very happy person",
-    "Today is a sunny day"
 ]
-# Encode the sentences to get their embeddings
 embeddings = model.encode(sentences)
-# Compute the cosine similarity matrix
 similarities = cosine_similarity(embeddings)
-# Print the shape of the similarity matrix
-print(similarities.shape)  # Output: (4, 4)
-# Optionally, print the similarity matrix
 print(similarities)

+from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# Load datasets
+dataset_names = [
+    "b-mc2/sql-create-context",
+    "TuneIt/o1-python",
+    "HuggingFaceFW/fineweb-2",
+    "HuggingFaceFW/fineweb-2",
+    "sentence-transformers/embedding-training-data",
+    "prithivMLmods/Deepthink-Reasoning",
+    "O1-OPEN/OpenO1-SFT",
+    "Clinton/Text-to-sql-v1",
+    "RUC-NLPIR/FlashRAG_datasets"
+]
+# Loading all datasets in one go
+datasets = {name: load_dataset(name) for name in dataset_names}
+# Load SentenceTransformer model
 model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# Define sentences
 sentences = [
+    "The firewall successfully blocked unauthorized access attempts.",
+    "The system detected a potential phishing attack targeting users.",
+    "Regular software updates are essential to patch known vulnerabilities.",
+    "Implementing multi-factor authentication enhances account security."
+    "The function returns the sum of two numbers.",
+    "A list comprehension provides a concise way to create lists.",
+    "The 'try' block is used to handle exceptions in Python.",
+    "Using 'lambda' allows for the creation of anonymous functions."
 ]
+# Compute sentence embeddings
 embeddings = model.encode(sentences)
+# Calculate cosine similarity between sentence embeddings
 similarities = cosine_similarity(embeddings)
+# Print similarity matrix shape and values
+print(similarities.shape)  # Expected output: (4, 4)
 print(similarities)
+# Load transformer model for Seq2Seq tasks
+tokenizer = AutoTokenizer.from_pretrained("cssupport/t5-small-awesome-text-to-sql")
+model = AutoModelForSeq2SeqLM.from_pretrained("cssupport/t5-small-awesome-text-to-sql")