RandomNameAnd6 commited on
Commit
7e56099
·
verified ·
1 Parent(s): 8463291

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -2
app.py CHANGED
@@ -19,11 +19,70 @@ def generate_text(prompt):
19
  with open('dhar_mann_titles.txt', 'r') as file:
20
  dhar_mann_titles = file.readlines()
21
 
22
- # Function to generate an AI title (dummy implementation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def generate_ai_title():
24
  inputs = tokenizer(["<|startoftext|>"]*1, return_tensors = "pt")
25
  outputs = model.generate(**inputs, max_new_tokens=50, use_cache=True, temperature=0.85, do_sample=True)
26
- return (tokenizer.batch_decode(outputs)[0])[15:-13]
 
 
 
 
 
27
 
28
  # Function to check user's answer and update score
29
  def check_answer(user_choice, real_index, score):
 
19
  with open('dhar_mann_titles.txt', 'r') as file:
20
  dhar_mann_titles = file.readlines()
21
 
22
+ def levenshtein_distance(s1, s2):
23
+ """
24
+ Compute the Levenshtein distance between two strings.
25
+
26
+ Parameters:
27
+ - s1 (str): The first string.
28
+ - s2 (str): The second string.
29
+
30
+ Returns:
31
+ - int: The Levenshtein distance between the two strings.
32
+ """
33
+ if len(s1) < len(s2):
34
+ return levenshtein_distance(s2, s1)
35
+
36
+ if len(s2) == 0:
37
+ return len(s1)
38
+
39
+ previous_row = range(len(s2) + 1)
40
+ for i, c1 in enumerate(s1):
41
+ current_row = [i + 1]
42
+ for j, c2 in enumerate(s2):
43
+ insertions = previous_row[j + 1] + 1
44
+ deletions = current_row[j] + 1
45
+ substitutions = previous_row[j] + (c1 != c2)
46
+ current_row.append(min(insertions, deletions, substitutions))
47
+ previous_row = current_row
48
+
49
+ return previous_row[-1]
50
+
51
+ def string_similarity_index(original_text, comparison_text, threshold=0.6):
52
+ """
53
+ Calculate the similarity index between two strings based on Levenshtein distance
54
+ and compare it to a threshold.
55
+
56
+ Parameters:
57
+ - original_text (str): The original text.
58
+ - comparison_text (str): The text to compare for similarity.
59
+ - threshold (float): The non-original threshold score (0 to 1).
60
+
61
+ Returns:
62
+ - bool: True if the similarity score is above the threshold, False otherwise.
63
+ """
64
+ # Calculate the Levenshtein distance
65
+ distance = levenshtein_distance(original_text, comparison_text)
66
+
67
+ # Calculate the maximum possible distance
68
+ max_distance = max(len(original_text), len(comparison_text))
69
+
70
+ # Calculate the similarity score
71
+ similarity_score = 1 - distance / max_distance
72
+
73
+ # Compare the similarity score to the threshold
74
+ return similarity_score >= threshold
75
+
76
+ # Function to generate an AI title
77
  def generate_ai_title():
78
  inputs = tokenizer(["<|startoftext|>"]*1, return_tensors = "pt")
79
  outputs = model.generate(**inputs, max_new_tokens=50, use_cache=True, temperature=0.85, do_sample=True)
80
+ generated_title = (tokenizer.batch_decode(outputs)[0])[15:-13]
81
+ for title in dhar_mann_titles:
82
+ title = title.strip() # Remove any extra whitespace characters like newlines
83
+ if string_similarity_index(input_text, title):
84
+ return generate_ai_title()
85
+ return generated_title
86
 
87
  # Function to check user's answer and update score
88
  def check_answer(user_choice, real_index, score):