Spaces:
Runtime error
Runtime error
ashokpoudel
commited on
Commit
·
d54ec92
1
Parent(s):
9247a10
First Commit
Browse files- README.md +59 -0
- SynonymEditor.py +78 -0
- app.py +21 -0
- requirements.txt +3 -0
- terminal.py +20 -0
README.md
CHANGED
@@ -11,3 +11,62 @@ license: other
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
# GPT-3 based Zero-shot Synonym Editor
|
16 |
+
|
17 |
+
Replace words with synonyms while maintaining the original sentence structure and meaning for a given text using OpenAI GPT-3
|
18 |
+
|
19 |
+
## Overview
|
20 |
+
|
21 |
+
The Synonym Editor is a Python tool that uses OpenAI's GPT-3 language model to replace words in a text with synonyms while maintaining the original sentence structure. The tool can be used to edit text files, such as articles, essays, or reports, to improve their readability, diversity of language, and clarity.
|
22 |
+
|
23 |
+
## Features
|
24 |
+
|
25 |
+
- Replace exactly n words with a synonym in a sentence while preserving quotation marks and symbols
|
26 |
+
- Maintain the original sentence structure and meaning of the text.
|
27 |
+
- Use OpenAI's GPT-3 language model to ensure reliable and consistent word replacement.
|
28 |
+
- Edit multiple paragraphs and sentences at once.
|
29 |
+
- Save the edited text to a file.
|
30 |
+
|
31 |
+
## Usage
|
32 |
+
|
33 |
+
1. Clone the repository to your local machine:
|
34 |
+
|
35 |
+
```
|
36 |
+
git clone https://github.com/ashokgit/SynonymEditor
|
37 |
+
|
38 |
+
```
|
39 |
+
|
40 |
+
2. Install the required packages:
|
41 |
+
|
42 |
+
```
|
43 |
+
pip install -r requirements.txt
|
44 |
+
|
45 |
+
```
|
46 |
+
|
47 |
+
3. Set up your OpenAI API key. You can create an API key by signing up for OpenAI at . Once you have your API key, export it to your environment variables:
|
48 |
+
|
49 |
+
```
|
50 |
+
export OPENAI_API_KEY=your_api_key
|
51 |
+
|
52 |
+
```
|
53 |
+
|
54 |
+
4. Edit the input text file. The input file should be a plain text file with one or more paragraphs of text. You can edit the file in any text editor, such as Notepad, Sublime Text, or Vim.
|
55 |
+
5. Run the Synonym Editor with the input and output file paths:
|
56 |
+
|
57 |
+
```
|
58 |
+
python app.py input.txt output.txt
|
59 |
+
|
60 |
+
```
|
61 |
+
|
62 |
+
Replace `input.txt` with the path to your input file and `output.txt` with the path to your output file. The Synonym Editor will read the input file, replace words with synonyms using OpenAI's GPT-3 model, and save the edited text to the output file.
|
63 |
+
|
64 |
+
6. Check the output file for any errors or issues. You can open the file in any text editor to view the edited text.
|
65 |
+
|
66 |
+
## License
|
67 |
+
|
68 |
+
The Synonym Editor is licensed under the MIT License. See the `LICENSE` file for more information.
|
69 |
+
|
70 |
+
## Acknowledgments
|
71 |
+
|
72 |
+
- This project uses the [OpenAI API](https://beta.openai.com/docs/api-reference/introduction) to perform language tasks.
|
SynonymEditor.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
from nltk import sent_tokenize
|
3 |
+
import re
|
4 |
+
|
5 |
+
|
6 |
+
class SynonymEditor:
|
7 |
+
|
8 |
+
def __init__(self, api_key, model_engine, max_tokens):
|
9 |
+
openai.api_key = api_key
|
10 |
+
self.model_engine = model_engine
|
11 |
+
self.max_tokens = max_tokens
|
12 |
+
|
13 |
+
# Play with the prompts here and change the return index to change and see the effect of the prompt on the output quality
|
14 |
+
# Note that the longer the prompt, higher the token used and hence the billing
|
15 |
+
def _get_prompt(self, sentence):
|
16 |
+
if "__QUOTE__" in sentence:
|
17 |
+
return "Replace exactly one word with a synonym while preserving __QUOTE__ in the following sentence:\n"+sentence+"\n"
|
18 |
+
else:
|
19 |
+
return "Replace exactly one word with a synonym in the following sentence:\n"+sentence+"\n"
|
20 |
+
|
21 |
+
# Call the OpenAI API here
|
22 |
+
|
23 |
+
def __call_ai(self, sentence):
|
24 |
+
prompt = self._get_prompt(sentence)
|
25 |
+
response = openai.Completion.create(
|
26 |
+
model=self.model_engine,
|
27 |
+
prompt=prompt,
|
28 |
+
temperature=0.6,
|
29 |
+
max_tokens=self.max_tokens,
|
30 |
+
top_p=1,
|
31 |
+
frequency_penalty=0,
|
32 |
+
presence_penalty=0
|
33 |
+
)
|
34 |
+
return self._post_process_sentence(response.choices[0].text.strip())
|
35 |
+
|
36 |
+
# Split the paragraph to preserve quotation marks
|
37 |
+
def _split_into_sentences(self, text):
|
38 |
+
text = text.replace('"', '__QUOTE__')
|
39 |
+
text = re.sub(r'\s+', ' ', text)
|
40 |
+
text = text.strip()
|
41 |
+
sentences = sent_tokenize(text)
|
42 |
+
return sentences
|
43 |
+
|
44 |
+
def _post_process_sentence(self, text):
|
45 |
+
return text.replace('__QUOTE__', '"')
|
46 |
+
|
47 |
+
# Preprocess the text, perform edit task and join back to get the original format
|
48 |
+
def _edit_text(self, text):
|
49 |
+
edited_text = ""
|
50 |
+
paragraphs = text.split("\n\n")
|
51 |
+
edited_paragraphs = []
|
52 |
+
for paragraph in paragraphs:
|
53 |
+
sentences = self._split_into_sentences(paragraph)
|
54 |
+
edited_sentences = []
|
55 |
+
for sentence in sentences:
|
56 |
+
new_sentence = self.__call_ai(sentence)
|
57 |
+
edited_sentences.append(new_sentence)
|
58 |
+
|
59 |
+
# join edited sentences to form an edited paragraph
|
60 |
+
edited_paragraph = ' '.join(edited_sentences)
|
61 |
+
edited_paragraphs.append(edited_paragraph)
|
62 |
+
|
63 |
+
# join edited paragraphs to form edited text
|
64 |
+
edited_text = '\n\n'.join(edited_paragraphs)
|
65 |
+
|
66 |
+
return edited_text
|
67 |
+
|
68 |
+
# File Read Write operation
|
69 |
+
def edit_file(self, input_file, output_file):
|
70 |
+
print("Opening File")
|
71 |
+
with open(input_file, "r", encoding="utf8", errors="ignore") as f:
|
72 |
+
text = f.read()
|
73 |
+
print("Editing")
|
74 |
+
edited_text = self._edit_text(text)
|
75 |
+
print("Finishing up")
|
76 |
+
with open(output_file, "w") as f:
|
77 |
+
f.write(edited_text)
|
78 |
+
print("Done!")
|
app.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from SynonymEditor import SynonymEditor
|
3 |
+
|
4 |
+
|
5 |
+
def replace_synonyms(api_key, text):
|
6 |
+
model_engine = "text-davinci-003"
|
7 |
+
max_tokens = 500
|
8 |
+
editor = SynonymEditor(api_key, model_engine, max_tokens)
|
9 |
+
return editor._edit_text(text)
|
10 |
+
|
11 |
+
|
12 |
+
api_key = gr.inputs.Textbox(label="API Key", lines=1, default="")
|
13 |
+
|
14 |
+
input_text = gr.inputs.Textbox(
|
15 |
+
label="Input Text", lines=10, default="Enter your text here.")
|
16 |
+
|
17 |
+
output_text = gr.outputs.Textbox(label="Output Text")
|
18 |
+
|
19 |
+
io = gr.Interface(fn=replace_synonyms, inputs=[api_key, input_text], outputs=output_text, title="Synonym Replacer",
|
20 |
+
description="Replace words in a text with their synonyms.", server_port=8080)
|
21 |
+
io.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
nltk==3.8.1
|
2 |
+
openai==0.26.5
|
3 |
+
gradio==3.19.1
|
terminal.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from SynonymEditor import SynonymEditor
|
3 |
+
|
4 |
+
api_key = "YOUR_API_KEY_HERE"
|
5 |
+
model_engine = "text-davinci-003"
|
6 |
+
max_tokens = 500
|
7 |
+
|
8 |
+
# Check if command line arguments are provided
|
9 |
+
if len(sys.argv) < 3:
|
10 |
+
print("Usage: python3 app.py input.txt output.txt")
|
11 |
+
sys.exit()
|
12 |
+
|
13 |
+
# Get command line arguments
|
14 |
+
input_file = sys.argv[1]
|
15 |
+
output_file = sys.argv[2]
|
16 |
+
|
17 |
+
# Create synonym editor
|
18 |
+
editor = SynonymEditor(api_key, model_engine, max_tokens)
|
19 |
+
|
20 |
+
editor.edit_file(input_file, output_file)
|