Spaces:

GIZ
/

SDSN-demo

Runtime error

App Files Files Community

prashant commited on Nov 1, 2022

Commit

2caced7

1 Parent(s): 3f0df44

moving old SDGandPreProc files

Browse files

Files changed (8) hide show

appStore/sdg_analysis.py +1 -48
udfPreprocess/sdg_classifier.py +8 -2
udfPreprocess/uploadAndExample.py +16 -28
{udfPreprocess → ver0.1 scripts}/cleaning.py +0 -0
{udfPreprocess → ver0.1 scripts}/docPreprocessing.py +0 -0
{udfPreprocess → ver0.1 scripts}/sdg.py +0 -0
ver0.1 scripts/sdg_analysis.py +160 -0
ver0.1 scripts/uploadAndExample.py +52 -0

appStore/sdg_analysis.py CHANGED Viewed

@@ -3,18 +3,13 @@ import glob, os, sys;
 sys.path.append('../udfPreprocess')
 #import helper
-import udfPreprocess.docPreprocessing as pre
-import udfPreprocess.cleaning as clean
 #import needed libraries
 import seaborn as sns
-from pandas import DataFrame
-from keybert import KeyBERT
-from transformers import pipeline
 import matplotlib.pyplot as plt
 import numpy as np
 import streamlit as st
-import pandas as pd
 import docx
 from docx.shared import Inches
 from docx.shared import Pt
@@ -29,17 +24,6 @@ logger = logging.getLogger(__name__)
-# @st.cache(allow_output_mutation=True)
-# def load_keyBert():
-#     return KeyBERT()
-# @st.cache(allow_output_mutation=True)
-# def load_sdgClassifier():
-#     classifier = pipeline("text-classification", model= "jonas/sdg_classifier_osdg")
-#     return classifier
 def app():
     with st.container():
@@ -66,19 +50,6 @@ def app():
                 df, x = sdg_classification(paraList)
-                # classifier = load_sdgClassifier()
-                # labels = classifier(par_list)
-                # labels_= [(l['label'],l['score']) for l in labels]
-                # df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
-                # df2['text'] = par_list
-                # df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
-                # df2.index += 1
-                # df2 =df2[df2['Relevancy']>.85]
-                # x = df2['SDG'].value_counts()
-                # df3 = df2.copy()
                 plt.rcParams['font.size'] = 25
                 colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
                 # plot
@@ -88,26 +59,8 @@ def app():
                 # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
                 st.markdown("#### Anything related to SDGs? ####")
-                # st.markdown("#### 🎈 Anything related to SDGs? ####")
                 c4, c5, c6 = st.columns([2, 2, 2])
-                # Add styling
-                cmGreen = sns.light_palette("green", as_cmap=True)
-                cmRed = sns.light_palette("red", as_cmap=True)
-                # df2 = df2.style.background_gradient(
-                #     cmap=cmGreen,
-                #     subset=[
-                #         "Relevancy",
-                #     ],
-                # )
-                # format_dictionary = {
-                #     "Relevancy": "{:.1%}",
-                # }
-                # df2 = df2.format(format_dictionary)
                 with c5:
                     st.pyplot(fig)

 sys.path.append('../udfPreprocess')
 #import helper
 #import needed libraries
 import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 import streamlit as st
 import docx
 from docx.shared import Inches
 from docx.shared import Pt
 def app():
     with st.container():
                 df, x = sdg_classification(paraList)
                 plt.rcParams['font.size'] = 25
                 colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
                 # plot
                 # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
                 st.markdown("#### Anything related to SDGs? ####")
                 c4, c5, c6 = st.columns([2, 2, 2])
                 with c5:
                     st.pyplot(fig)

udfPreprocess/sdg_classifier.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from tkinter import Text
 from haystack.nodes import TransformersDocumentClassifier
 from haystack.schema import Document
 from typing import List, Tuple
@@ -71,11 +70,18 @@ def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
     return df, x
-def runSDGPreprocessingPipeline()->List[Text]:
     """
     creates the pipeline and runs the preprocessing pipeline,
     the params for pipeline are fetched from paramconfig
     """
     file_path = st.session_state['filepath']
     file_name = st.session_state['filename']

 from haystack.nodes import TransformersDocumentClassifier
 from haystack.schema import Document
 from typing import List, Tuple
     return df, x
+def runSDGPreprocessingPipeline()->List[Document]:
     """
     creates the pipeline and runs the preprocessing pipeline,
     the params for pipeline are fetched from paramconfig
+    Return
+    --------------
+    List[Document]: When preprocessing pipeline is run, the output dictionary
+    has four objects. For the Haysatck implementation of SDG classification we,
+    need to use the List of Haystack Document, which can be fetched by
+    key = 'documents' on output.
     """
     file_path = st.session_state['filepath']
     file_name = st.session_state['filename']

udfPreprocess/uploadAndExample.py CHANGED Viewed

@@ -1,52 +1,40 @@
 import streamlit as st
 import tempfile
-import udfPreprocess.docPreprocessing as pre
-import udfPreprocess.cleaning as clean
 def add_upload(choice):
     if choice == 'Upload Document':
-          uploaded_file = st.sidebar.file_uploader('Upload the File', type=['pdf', 'docx', 'txt'])
-          if uploaded_file is not None:
             with tempfile.NamedTemporaryFile(mode="wb") as temp:
                 bytes_data = uploaded_file.getvalue()
                 temp.write(bytes_data)
                 st.session_state['filename'] = uploaded_file.name
-                # st.write("Uploaded Filename: ", uploaded_file.name)
                 file_name =  uploaded_file.name
                 file_path = temp.name
-                # docs = pre.load_document(file_path, file_name)
-                # haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
                 st.session_state['filename'] = file_name
-                # st.session_state['paraList'] = paraList
                 st.session_state['filepath'] = file_path
     else:
-          # listing the options
-          option = st.sidebar.selectbox('Select the example document',
-                                ('South Africa:Low Emission strategy',
-                                'Ethiopia: 10 Year Development Plan'))
-          if option is 'South Africa:Low Emission strategy':
             file_name = file_path  = 'sample/South Africa_s Low Emission Development Strategy.txt'
             st.session_state['filename'] = file_name
             st.sesion_state['filepath'] = file_path
-            # st.write("Selected document:", file_name.split('/')[1])
-            # with open('sample/South Africa_s Low Emission Development Strategy.txt') as dfile:
-            # file = open('sample/South Africa_s Low Emission Development Strategy.txt', 'wb')
-          else:
-            # with open('sample/Ethiopia_s_2021_10 Year Development Plan.txt') as dfile:
             file_name = file_path =  'sample/Ethiopia_s_2021_10 Year Development Plan.txt'
             st.session_state['filename'] = file_name
-            st.session_state['filepath'] = file_path
-            # st.write("Selected document:", file_name.split('/')[1])
-          # if option is not None:
-          #   docs = pre.load_document(file_path,file_name)
-          #   haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
-          #   st.session_state['docs'] = docs
-          #   st.session_state['paraList'] = paraList

 import streamlit as st
 import tempfile
 def add_upload(choice):
+    """
+    Provdies the user with choice to either 'Upload Document' or 'Try Example'.
+    Based on user choice runs streamlit processes and save the path and name of
+    the 'file' to streamlit session_state which then can be fetched later.
+    """
     if choice == 'Upload Document':
+        uploaded_file = st.sidebar.file_uploader('Upload the File',
+                            type=['pdf', 'docx', 'txt'])
+        if uploaded_file is not None:
             with tempfile.NamedTemporaryFile(mode="wb") as temp:
                 bytes_data = uploaded_file.getvalue()
                 temp.write(bytes_data)
                 st.session_state['filename'] = uploaded_file.name
                 file_name =  uploaded_file.name
                 file_path = temp.name
                 st.session_state['filename'] = file_name
                 st.session_state['filepath'] = file_path
     else:
+        # listing the options
+        option = st.sidebar.selectbox('Select the example document',
+                              ('South Africa:Low Emission strategy',
+                              'Ethiopia: 10 Year Development Plan'))
+        if option is 'South Africa:Low Emission strategy':
             file_name = file_path  = 'sample/South Africa_s Low Emission Development Strategy.txt'
             st.session_state['filename'] = file_name
             st.sesion_state['filepath'] = file_path
+        else:
             file_name = file_path =  'sample/Ethiopia_s_2021_10 Year Development Plan.txt'
             st.session_state['filename'] = file_name
+            st.session_state['filepath'] = file_path

{udfPreprocess → ver0.1 scripts}/cleaning.py RENAMED Viewed

File without changes

{udfPreprocess → ver0.1 scripts}/docPreprocessing.py RENAMED Viewed

File without changes

{udfPreprocess → ver0.1 scripts}/sdg.py RENAMED Viewed

File without changes

ver0.1 scripts/sdg_analysis.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# set path
+import glob, os, sys;
+sys.path.append('../udfPreprocess')
+#import helper
+#import needed libraries
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+import streamlit as st
+import docx
+from docx.shared import Inches
+from docx.shared import Pt
+from docx.enum.style import WD_STYLE_TYPE
+from udfPreprocess.sdg_classifier import sdg_classification
+from udfPreprocess.sdg_classifier import runSDGPreprocessingPipeline
+import configparser
+import tempfile
+import sqlite3
+import logging
+logger = logging.getLogger(__name__)
+def app():
+    with st.container():
+        st.markdown("<h1 style='text-align: center; color: black;'> SDSN x GIZ Policy Action Tracking v0.1</h1>", unsafe_allow_html=True)
+        st.write(' ')
+        st.write(' ')
+    with st.expander("ℹ️ - About this app", expanded=False):
+        st.write(
+            """
+            The *Analyse Policy Document* app is an easy-to-use interface built in Streamlit for analyzing policy documents with respect to SDG Classification for the paragraphs/texts in the document - developed by GIZ Data and the Sustainable Development Solution Network. \n
+            """)
+        st.markdown("")
+    with st.container():
+        if 'filepath' in st.session_state:
+            paraList = runSDGPreprocessingPipeline()
+            with st.spinner("Running SDG"):
+                df, x = sdg_classification(paraList)
+                # classifier = load_sdgClassifier()
+                # labels = classifier(par_list)
+                # labels_= [(l['label'],l['score']) for l in labels]
+                # df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
+                # df2['text'] = par_list
+                # df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
+                # df2.index += 1
+                # df2 =df2[df2['Relevancy']>.85]
+                # x = df2['SDG'].value_counts()
+                # df3 = df2.copy()
+                plt.rcParams['font.size'] = 25
+                colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
+                # plot
+                fig, ax = plt.subplots()
+                ax.pie(x, colors=colors, radius=2, center=(4, 4),
+                    wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
+                # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
+                st.markdown("#### Anything related to SDGs? ####")
+                # st.markdown("#### 🎈 Anything related to SDGs? ####")
+                c4, c5, c6 = st.columns([2, 2, 2])
+                # Add styling
+                cmGreen = sns.light_palette("green", as_cmap=True)
+                cmRed = sns.light_palette("red", as_cmap=True)
+                # df2 = df2.style.background_gradient(
+                #     cmap=cmGreen,
+                #     subset=[
+                #         "Relevancy",
+                #     ],
+                # )
+                # format_dictionary = {
+                #     "Relevancy": "{:.1%}",
+                # }
+                # df2 = df2.format(format_dictionary)
+                with c5:
+                    st.pyplot(fig)
+                c7, c8, c9 = st.columns([1, 10, 1])
+                with c8:
+                    st.table(df)
+#     1. Keyword heatmap \n
+ #               2. SDG Classification for the paragraphs/texts in the document
+ #
+    # with st.container():
+    #     if 'docs' in st.session_state:
+    #         docs = st.session_state['docs']
+    #         docs_processed, df, all_text, par_list = clean.preprocessingForSDG(docs)
+    #         # paraList = st.session_state['paraList']
+    #         logging.info("keybert")
+    #         with st.spinner("Running Key bert"):
+    #             kw_model = load_keyBert()
+    #             keywords = kw_model.extract_keywords(
+    #             all_text,
+    #             keyphrase_ngram_range=(1, 3),
+    #             use_mmr=True,
+    #             stop_words="english",
+    #             top_n=10,
+    #             diversity=0.7,
+    #             )
+    #             st.markdown("## 🎈 What is my document about?")
+    #             df = (
+    #                 DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
+    #                 .sort_values(by="Relevancy", ascending=False)
+    #                 .reset_index(drop=True)
+    #             )
+    #             df1 = (
+    #                 DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
+    #                 .sort_values(by="Relevancy", ascending=False)
+    #                 .reset_index(drop=True)
+    #             )
+    #             df.index += 1
+    #             # Add styling
+    #             cmGreen = sns.light_palette("green", as_cmap=True)
+    #             cmRed = sns.light_palette("red", as_cmap=True)
+    #             df = df.style.background_gradient(
+    #                 cmap=cmGreen,
+    #                 subset=[
+    #                     "Relevancy",
+    #                 ],
+    #             )
+    #             c1, c2, c3 = st.columns([1, 3, 1])
+    #             format_dictionary = {
+    #                 "Relevancy": "{:.1%}",
+    #             }
+    #             df = df.format(format_dictionary)
+    #             with c2:
+    #
+    #               st.table(df)

ver0.1 scripts/uploadAndExample.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import streamlit as st
+import tempfile
+import udfPreprocess.docPreprocessing as pre
+import udfPreprocess.cleaning as clean
+def add_upload(choice):
+    if choice == 'Upload Document':
+          uploaded_file = st.sidebar.file_uploader('Upload the File', type=['pdf', 'docx', 'txt'])
+          if uploaded_file is not None:
+            with tempfile.NamedTemporaryFile(mode="wb") as temp:
+                bytes_data = uploaded_file.getvalue()
+                temp.write(bytes_data)
+                st.session_state['filename'] = uploaded_file.name
+                # st.write("Uploaded Filename: ", uploaded_file.name)
+                file_name =  uploaded_file.name
+                file_path = temp.name
+                # docs = pre.load_document(file_path, file_name)
+                # haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
+                st.session_state['filename'] = file_name
+                # st.session_state['paraList'] = paraList
+                st.session_state['filepath'] = file_path
+    else:
+          # listing the options
+          option = st.sidebar.selectbox('Select the example document',
+                                ('South Africa:Low Emission strategy',
+                                'Ethiopia: 10 Year Development Plan'))
+          if option is 'South Africa:Low Emission strategy':
+            file_name = file_path  = 'sample/South Africa_s Low Emission Development Strategy.txt'
+            st.session_state['filename'] = file_name
+            st.sesion_state['filepath'] = file_path
+            # st.write("Selected document:", file_name.split('/')[1])
+            # with open('sample/South Africa_s Low Emission Development Strategy.txt') as dfile:
+            # file = open('sample/South Africa_s Low Emission Development Strategy.txt', 'wb')
+          else:
+            # with open('sample/Ethiopia_s_2021_10 Year Development Plan.txt') as dfile:
+            file_name = file_path =  'sample/Ethiopia_s_2021_10 Year Development Plan.txt'
+            st.session_state['filename'] = file_name
+            st.session_state['filepath'] = file_path
+            # st.write("Selected document:", file_name.split('/')[1])
+          # if option is not None:
+          #   docs = pre.load_document(file_path,file_name)
+          #   haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
+          #   st.session_state['docs'] = docs
+          #   st.session_state['paraList'] = paraList