from nltk import sent_tokenize
import openai
import re
import nltk
nltk.download('punkt')


class SynonymEditor:

    def __init__(self, api_key, model_engine, max_tokens, temperature, language):
        openai.api_key = api_key
        self.model_engine = model_engine
        self.max_tokens = max_tokens
        self.temperature = temperature
        self.language = language

    # Play with the prompts here and change the return index to change and see the effect of the prompt on the output quality
    # Note that the longer the prompt, higher the token used and hence the billing
    def _get_prompt(self, sentence, few_shots):
        if (few_shots):
            if (self.language == "de"):
                prompt = 'Modernisiere den Text. Behalte die Inhalte von Klammern "" bei.\n' + \
                    few_shots + "\nEingang:" + sentence + " Ausgang:"
            else:
                prompt = "Replace exactly one word with a synonym while preserving the overall sentence structure and meaning.\n" + \
                    few_shots + "\nInput:" + sentence + " Output:"
        elif "__QUOTE__" in sentence:
            if (self.language == "de"):
                prompt = 'Modernisiere den Text. Behalte die Inhalte von Klammern "" bei.\n'+sentence+'\n'
            else:
                prompt = "Replace exactly one word with a synonym while preserving __QUOTE__ in the following sentence:\n"+sentence+"\n"
        else:
            if (self.language == "de"):
                prompt = 'Modernisiere den Text. Behalte die Inhalte von Klammern "" bei.\n'+sentence+'\n'
            else:
                prompt = "Replace exactly one word with a synonym in the following sentence:\n"+sentence+"\n"
        return prompt

    # Call the OpenAI API here

    def __call_ai(self, sentence, few_shots):
        prompt = self._get_prompt(sentence, few_shots)
        print(prompt)
        response = openai.Completion.create(
            model=self.model_engine,
            prompt=prompt,
            temperature=self.temperature,
            max_tokens=self.max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
        )
        return self._post_process_sentence(response.choices[0].text.strip())

    # Split the paragraph to preserve quotation marks
    def _split_into_sentences(self, text, few_shots):
        if (few_shots == False):
            text = text.replace('"', '__QUOTE__')
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        sentences = sent_tokenize(text)
        return sentences

    def _post_process_sentence(self, text):
        print(text)
        print("==============")
        return text.replace('__QUOTE__', '"')

    # Preprocess the text, perform edit task and join back to get the original format
    def _edit_text(self, text, few_shots=False):
        edited_text = ""
        paragraphs = text.split("\n\n")
        edited_paragraphs = []
        for paragraph in paragraphs:
            sentences = self._split_into_sentences(paragraph, few_shots)
            edited_sentences = []
            for sentence in sentences:
                new_sentence = self.__call_ai(sentence, few_shots)
                edited_sentences.append(new_sentence)

            # join edited sentences to form an edited paragraph
            edited_paragraph = ' '.join(edited_sentences)
            edited_paragraphs.append(edited_paragraph)

        # join edited paragraphs to form edited text
        edited_text = '\n\n'.join(edited_paragraphs)

        return edited_text

    # File Read Write operation
    def edit_file(self, input_file, output_file):
        print("Opening File")
        with open(input_file, "r", encoding="utf8", errors="ignore") as f:
            text = f.read()
        print("Editing")
        edited_text = self._edit_text(text)
        print("Finishing up")
        with open(output_file, "w") as f:
            f.write(edited_text)
        print("Done!")