File size: 2,349 Bytes
9543aec
 
54c0de7
9543aec
 
 
 
 
 
 
6f6865f
 
 
 
 
 
 
9543aec
 
 
 
 
 
cc063db
9543aec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06b480a
9543aec
 
 
 
 
 
 
5c4bbd0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import json
from sentence_transformers import SentenceTransformer, InputExample, util
import pandas as pd

def Main(Modelo, Texto1, Texto2):
  
  error = ""
  modelResult  = ""
  
  try:
    data_test = []
    data_test.append(InputExample(guid= "", texts=[Texto1, Texto2], label=0))
    
    modelResult = TestModel('jfarray/Model_'+ Modelo +'_50_Epochs',data_test)
  except Exception as e:
    error = e
  
  return [error, modelResult]
    
def TestModel(checkpoint, data):
  local_model_path = checkpoint
  model = SentenceTransformer(local_model_path)
  df = pd.DataFrame(columns=["Similitud Semántica"])
  
  sentences1 = []
  sentences2 = []
  hashed_ids = []
  marks = []
  scores = []
  for i in range (0,len(data)): #len(data)
    sentences1.append(data[i].texts[0])
    sentences2.append(data[i].texts[1])

  #Compute embedding for both lists
  embeddings1 = model.encode(sentences1, convert_to_tensor=True)
  embeddings2 = model.encode(sentences2, convert_to_tensor=True)
  
  #Compute cosine-similarits
  cosine_scores = util.cos_sim(embeddings1, embeddings2)
  
  for i in range(len(sentences1)):
    hashed_ids.append(data[i].guid)
    marks.append(data[i].label)
    scores.append(round(cosine_scores[i][i].item(),3))

  df['Similitud Semántica'] = scores
  
  return df

Modelos = gr.inputs.Dropdown(["dccuchile_bert-base-spanish-wwm-uncased"
  , "bert-base-multilingual-uncased"
  , "all-distilroberta-v1"
  , "paraphrase-multilingual-mpnet-base-v2"
  , "paraphrase-multilingual-MiniLM-L12-v2"
  , "distiluse-base-multilingual-cased-v1"])
Opciones = gr.inputs.Radio(["Comparar Textos", "Procesar Fichero"])
Text1Input = gr.inputs.Textbox(lines=10, placeholder="Escriba el texto aqui ...")
Text2Input = gr.inputs.Textbox(lines=10, placeholder="Escriba el otro texto aqui ...")
LabelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="")
DataFrameOutput = gr.outputs.Dataframe(headers=["Similitud Semántica"]
  , max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")

iface = gr.Interface(fn=Main
    , inputs=[ Modelos, Text1Input ,Text2Input]
    , outputs=[LabelOutput, DataFrameOutput]
    , title = "Similitud Semántica de textos en Español de tamaño medio (200-250 palabras)"
)

iface.launch(share = False,enable_queue=True, show_error =True)