Spaces:

hudsonhayes
/

finance_tool

Sleeping

File size: 16,417 Bytes

import openai
import os
import pdfplumber
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.prompts import PromptTemplate
import logging
import json
from typing import List
import mimetypes
import validators
import requests
import tempfile
from langchain.chains import create_extraction_chain
from GoogleNews import GoogleNews
import pandas as pd
import requests
import gradio as gr
import re
from langchain.document_loaders import WebBaseLoader
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from transformers import pipeline
import plotly.express as px
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.chains.llm import LLMChain
import yfinance as yf
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize
from openai import OpenAI


class KeyValueExtractor:

    def __init__(self):

        """
        Initialize the ContractSummarizer object.

        Parameters:
            pdf_file_path (str): The path to the input PDF file.
        """
        self.model = "facebook/bart-large-mnli"
        self.client = OpenAI()
        
        
    def get_url(self,keyword):
      return f"https://finance.yahoo.com/quote/{keyword}?p={keyword}"

    def get_each_link_summary(self,url):

      loader = WebBaseLoader(url)
      docs = loader.load()
      text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
      chunk_size=3000, chunk_overlap=200
        )

      # Split the documents into chunks
      split_docs = text_splitter.split_documents(docs)

      # Prepare the prompt template for summarization
      prompt_template = """The give text is Finance Stock Details for one company i want to get values for
      Previous Close : [value]
      Open : [value]
      Bid : [value]
      Ask : [value]
      Day's Range : [value]
      52 Week Range : [value]
      Volume : [value]
      Avg. Volume : [value]
      Market Cap : [value]
      Beta (5Y Monthly) : [value]
      PE Ratio (TTM) : [value]
      EPS (TTM) : [value]
      Earnings Date : [value]
      Forward Dividend & Yield : [value]
      Ex-Dividend Date : [value]
      1y Target Est : [value]
      these details form that and Write a abractive summary about those details:
      Given Text: {text}
      CONCISE SUMMARY:"""
      prompt = PromptTemplate.from_template(prompt_template)

      # Prepare the template for refining the summary with additional context
      refine_template = (
          "Your job is to produce a final summary\n"
          "We have provided an existing summary up to a certain point: {existing_answer}\n"
          "We have the opportunity to refine the existing summary"
          "(only if needed) with some more context below.\n"
          "------------\n"
          "{text}\n"
          "------------\n"
          "Given the new context, refine the original summary"
          "If the context isn't useful, return the original summary."
      )
      refine_prompt = PromptTemplate.from_template(refine_template)

      # Load the summarization chain using the ChatOpenAI language model
      chain = load_summarize_chain(
          llm = ChatOpenAI(temperature=0),
          chain_type="refine",
          question_prompt=prompt,
          refine_prompt=refine_prompt,
          return_intermediate_steps=True,
          input_key="input_documents",
          output_key="output_text",
      )

      # Generate the refined summary using the loaded summarization chain
      result = chain({"input_documents": split_docs}, return_only_outputs=True)
      print(result["output_text"])

      return result["output_text"]

    def one_day_summary(self,content) -> None:
        conversation = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"i want detailed Summary from given finance details. i want information like what happen today comparing last day good or bad Bullish or Bearish like these details i want summary. content in backticks.```{content}```."}
        ]
        
        # Call OpenAI GPT-3.5-turbo
        chat_completion = self.client.chat.completions.create(
            model = "gpt-3.5-turbo",
            messages = conversation,
            max_tokens=1000,
            temperature=0
        )
        
        response = chat_completion.choices[0].message.content
        return response   

        # # Use OpenAI's Completion API to analyze the text and extract key-value pairs
        # response = openai.Completion.create(
        #   engine="text-davinci-003",  # You can choose a different engine as well
        #   temperature = 0,
        #   prompt=f"i want detailed Summary from given finance details. i want information like what happen today comparing last day good or bad Bullish or Bearish like these details i want summary. content in backticks.```{content}```.",
        #   max_tokens=1000 # You can adjust the length of the response
        # )
        
        # # Extract and return the chatbot's reply
        # result = response['choices'][0]['text'].strip()
        # print(result)
        # return result

    def extract_key_value_pair(self,content) -> None:

        """
        Extract key-value pairs from the refined summary.

        Prints the extracted key-value pairs.
        """

        try:
            conversation = [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"Get maximum count meaningfull key value pairs. content in backticks.```{content}```."}
            ]
            
            # Call OpenAI GPT-3.5-turbo
            chat_completion = self.client.chat.completions.create(
                model = "gpt-3.5-turbo",
                messages = conversation,
                max_tokens=1000,
                temperature=0
            )
            response = chat_completion.choices[0].message.content
            return response            
          # # Use OpenAI's Completion API to analyze the text and extract key-value pairs
          # response = openai.Completion.create(
          #     engine="text-davinci-003",  # You can choose a different engine as well
          #     temperature = 0,
          #     prompt=f"Get maximum count meaningfull key value pairs. content in backticks.```{content}```.",
          #     max_tokens=1000 # You can adjust the length of the response
          # )

          # # Extract and return the chatbot's reply
          # result = response['choices'][0]['text'].strip()
          # return result
        except Exception as e:
            # If an error occurs during the key-value extraction process, log the error
            logging.error(f"Error while extracting key-value pairs: {e}")
            print("Error:", e)

    def analyze_sentiment_for_graph(self, text):

        pipe = pipeline("zero-shot-classification", model=self.model)
        label=["Positive", "Negative", "Neutral"]
        result = pipe(text, label)
        sentiment_scores = {
            result['labels'][0]: result['scores'][0],
            result['labels'][1]: result['scores'][1],
            result['labels'][2]: result['scores'][2]
        }
        return sentiment_scores

    def display_graph(self,text):

        sentiment_scores = self.analyze_sentiment_for_graph(text)
        labels = sentiment_scores.keys()
        scores = sentiment_scores.values()
        fig = px.bar(x=scores, y=labels, orientation='h', color=labels, color_discrete_map={"Negative": "red", "Positive": "green", "Neutral": "gray"})
        fig.update_traces(texttemplate='%{x:.2f}%', textposition='outside')
        fig.update_layout(title="Sentiment Analysis",width=800)

        formatted_pairs = []
        for key, value in sentiment_scores.items():
            formatted_value = round(value, 2)  # Round the value to two decimal places
            formatted_pairs.append(f"{key} : {formatted_value}")

        result_string = '\t'.join(formatted_pairs)

        return fig

    def get_finance_data(self,symbol):

        # Define the stock symbol and date range
        start_date = '2022-08-19'
        end_date = '2023-08-19'

        # Fetch historical OHLC data using yfinance
        data = yf.download(symbol, start=start_date, end=end_date)

        # Select only the OHLC columns
        ohlc_data = data[['Open', 'High', 'Low', 'Close']]

        csv_path = "ohlc_data.csv"
        # Save the OHLC data to a CSV file
        ohlc_data.to_csv(csv_path)
        return csv_path

    def csv_to_dataframe(self,csv_path):

      # Replace 'your_file.csv' with the actual path to your CSV file
      csv_file_path = csv_path
      # Read the CSV file into a DataFrame
      df = pd.read_csv(csv_file_path)
      # Now you can work with the 'df' DataFrame
      return df  # Display the first few rows of the DataFrame

    def save_dataframe_in_text_file(self,df):

        output_file_path = 'output.txt'

        # Convert the DataFrame to a text file
        df.to_csv(output_file_path, sep='\t', index=False)

        return output_file_path

    def csv_loader(self,output_file_path):

        loader = UnstructuredFileLoader(output_file_path, strategy="fast")
        docs = loader.load()

        return docs

    def document_text_spilliter(self,docs):

        """
        Split documents into chunks for efficient processing.

        Returns:
            List[str]: List of split document chunks.
        """

        # Initialize the text splitter with specified chunk size and overlap
        text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=1000, chunk_overlap=200
        )

        # Split the documents into chunks
        split_docs = text_splitter.split_documents(docs)

        # Return the list of split document chunks
        return split_docs

    def change_bullet_points(self,text):

        nltk.download('punkt')  # Download the sentence tokenizer data (only need to run this once)

        # Example passage
        passage = text

        # Tokenize the passage into sentences
        sentences = sent_tokenize(passage)
        bullet_string = ""
        # Print the extracted sentences
        for sentence in sentences:
            bullet_string+="* "+sentence+"\n"

        return bullet_string

    def one_year_summary(self,keyword):

        csv_path = self.get_finance_data(keyword)
        df = self.csv_to_dataframe(csv_path)
        output_file_path = self.save_dataframe_in_text_file(df)
        docs = self.csv_loader(output_file_path)
        split_docs = self.document_text_spilliter(docs)
        
        prompt_template = """Analyze the Financial Details and Write a abractive quick short summary how the company perform up and down,Bullish/Bearish of the following:
                {text}
                CONCISE SUMMARY:"""
        prompt = PromptTemplate.from_template(prompt_template)

        # Prepare the template for refining the summary with additional context
        refine_template = (
            "Your job is to produce a final summary\n"
            "We have provided an existing summary up to a certain point: {existing_answer}\n"
            "We have the opportunity to refine the existing summary"
            "(only if needed) with some more context below.\n"
            "------------\n"
            "{text}\n"
            "------------\n"
            "Given the new context, refine the original summary"
            "If the context isn't useful, return the original summary."
            "10 line summary is enough"
        )
        refine_prompt = PromptTemplate.from_template(refine_template)

        # Load the summarization chain using the ChatOpenAI language model
        chain = load_summarize_chain(
            llm = ChatOpenAI(temperature=0),
            chain_type="refine",
            question_prompt=prompt,
            refine_prompt=refine_prompt,
            return_intermediate_steps=True,
            input_key="input_documents",
            output_key="output_text",
        )

        # Generate the refined summary using the loaded summarization chain
        result = chain({"input_documents": split_docs}, return_only_outputs=True)
        one_year_perfomance_summary = self.change_bullet_points(result["output_text"])
        # Return the refined summary
        return one_year_perfomance_summary

    def main(self,keyword):


      clean_url = self.get_url(keyword)
      link_summary  =  self.get_each_link_summary(clean_url)
      clean_summary = self.one_day_summary(link_summary)
      key_value = self.extract_key_value_pair(clean_summary)

      return clean_summary, key_value

    def company_names(self,input_text):
        words = input_text.split("-")
        return words[1]

    def gradio_interface(self):

        with gr.Blocks(css="style.css",theme= 'karthikeyan-adople/hudsonhayes-gray') as app:
              gr.HTML("""<center class="darkblue" style='background-color:rgb(0,1,36); text-align:center;padding:25px;'><center><h1 class ="center">
                <img src="file=logo.png" height="110px" width="280px"></h1></center>
                  <br><h1 style="color:#fff"> Finance Tool for Investors </h1></center>""")
              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=1.0, min_width=150, ):
                  input_news = gr.Textbox(label="Company Name")
              with gr.Accordion("List_of_Companies", open = False):
                  with gr.Row(elem_id="col-container"):
                    with gr.Column(scale=1.0, min_width=150 ):
                          gr.Examples(
                            [["Apple Inc. - AAPL"], ["Microsoft Corporation - MSFT"],["Amazon.com Inc. - AMZN"],["Facebook Inc. - FB"],["Tesla Inc. - TSLA"]],
                            [input_news],
                            input_news,
                            fn=self.company_names,
                            cache_examples=True,
                        )

              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=1.0, min_width=150):
                  analyse = gr.Button("Analyse")
              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=0.50, min_width=150):
                  result_summary = gr.Textbox(label="Summary For Last Day Perfomance", lines = 12)
                with gr.Column(scale=0.50, min_width=150):
                  key_value_pair_result = gr.Textbox(label="Discussed Topics", lines = 12)
              with gr.Row(elem_id="col-container"):
                  with gr.Column(scale=1.0, min_width=0):
                      plot_for_day =gr.Plot(label="Sentiment for Last Day", size=(500, 600))
              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=1.0, min_width=150):
                  analyse_sentiment = gr.Button("Analyse Sentiment For Last Day")
              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=1.0, min_width=150, ):
                  one_year_summary = gr.Textbox(label="Summary For One Year Perfomance",lines = 12)
              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=1.0, min_width=150):
                  one_year = gr.Button("Analyse One Year Summary")
              with gr.Row(elem_id="col-container"):
                  with gr.Column(scale=1.0, min_width=0):
                      plot_for_year =gr.Plot(label="Sentiment for One Year", size=(500, 600))
              with gr.Row(elem_id="col-container"):
                with gr.Column(scale=1.0, min_width=150):
                  analyse_sentiment_for_year = gr.Button("Analyse Sentiment For One Year")

              analyse.click(self.main, input_news, [result_summary,key_value_pair_result])
              analyse_sentiment.click(self.display_graph,result_summary,[plot_for_day])
              one_year.click(self.one_year_summary,input_news,one_year_summary)
              analyse_sentiment_for_year.click(self.display_graph,one_year_summary,[plot_for_year])

        app.launch(debug=True)

if __name__ == "__main__":

  text_process = KeyValueExtractor()
  text_process.gradio_interface()