aiisc-watermarking-modelv3

Sleeping

jgyasu commited on Jul 9, 2024

Commit

d814758

verified ·

1 Parent(s): 6b8fb92

Update lcs.py

Files changed (1) hide show

lcs.py CHANGED Viewed

@@ -23,15 +23,18 @@ def find_common_subsequences(sentence, str_list):
     words = sentence.split()
     common_grams = []
     added_phrases = set()
-    index = 1
     for n in range(5, 0, -1):
         for i in range(len(words) - n + 1):
             subseq = " ".join(words[i:i+n])
             if is_present(subseq, str_list) and not any(subseq in phrase for phrase in added_phrases):
-                common_grams.append((index, subseq))
                 added_phrases.add(subseq)
-                index += 1
-    return common_grams

     words = sentence.split()
     common_grams = []
     added_phrases = set()
     for n in range(5, 0, -1):
         for i in range(len(words) - n + 1):
             subseq = " ".join(words[i:i+n])
             if is_present(subseq, str_list) and not any(subseq in phrase for phrase in added_phrases):
+                common_grams.append((i, subseq))
                 added_phrases.add(subseq)
+    # Sort by the first appearance in the original sentence
+    common_grams.sort(key=lambda x: x[0])
+    # Assign indices based on the sorted order
+    indexed_common_grams = [(index + 1, subseq) for index, (_, subseq) in enumerate(common_grams)]
+    return indexed_common_grams