arousrihab commited on
Commit
8f3023d
1 Parent(s): 616a701

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ from extractive import preprocess_text, get_sentence_embeddings, build_semantic_graph, apply_textrank, generate_summary
4
+ from abstractive import abstractive_summary
5
+ from utils import extract_named_entities
6
+ from transformers import AutoTokenizer, AutoModel
7
+
8
+ # Load pre-trained BERT model and tokenizer
9
+ model_name = "dmis-lab/biobert-base-cased-v1.2"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModel.from_pretrained(model_name)
12
+
13
+ # Streamlit app layout
14
+ st.title("Hybrid Summarization App")
15
+ st.write("Upload text files for multi-document summarization or enter text manually for single-document summarization.")
16
+
17
+ # Multi-document summarization
18
+ st.header("Multi-Document Summarization")
19
+ uploaded_files = st.file_uploader("Upload text files", type="txt", accept_multiple_files=True)
20
+
21
+ if uploaded_files:
22
+ texts = [file.read().decode("utf-8") for file in uploaded_files]
23
+
24
+ # Perform extractive summarization for each document
25
+ extractive_summaries = []
26
+ for text in texts:
27
+ sentences = preprocess_text(text)
28
+ embeddings = get_sentence_embeddings(sentences, model, tokenizer)
29
+ graph = build_semantic_graph(embeddings)
30
+ ranked_sentences = apply_textrank(graph, sentences)
31
+ ext_summary = generate_summary(ranked_sentences, sentences, max_length_ratio=0.5)
32
+ extractive_summaries.append(ext_summary)
33
+
34
+ # Combine extractive summaries for multi-document summarization
35
+ combined_extractive_summary = " ".join(extractive_summaries)
36
+ st.write("Combined Extractive Summary:", combined_extractive_summary)
37
+
38
+ # Extract named entities from the combined summary
39
+ entities = extract_named_entities(combined_extractive_summary)
40
+ st.write("Named Entities:", entities)
41
+
42
+ # Choose summary length ratio for abstractive summarization
43
+ abs_ratio_option = st.selectbox("Choose abstractive summary length ratio", ("1/2", "1/3", "1/4"))
44
+ abs_ratio = {"1/2": 0.5, "1/3": 0.33, "1/4": 0.25}[abs_ratio_option]
45
+
46
+ # Perform abstractive summarization
47
+ combined_input = combined_extractive_summary + " " + ' '.join([ent[0] for ent in entities])
48
+ abs_summary = abstractive_summary(combined_input, max_length_ratio=abs_ratio, min_length_ratio=abs_ratio/2)
49
+ st.write("Abstractive Summary:", abs_summary)
50
+
51
+ # Single-document summarization
52
+ st.header("Single-Document Summarization")
53
+ text_input = st.text_area("Enter text here")
54
+
55
+ if text_input:
56
+ # Extract named entities
57
+ entities = extract_named_entities(text_input)
58
+ st.write("Named Entities:", entities)
59
+
60
+ # Perform extractive summarization
61
+ sentences = preprocess_text(text_input)
62
+ embeddings = get_sentence_embeddings(sentences, model, tokenizer)
63
+ graph = build_semantic_graph(embeddings)
64
+ ranked_sentences = apply_textrank(graph, sentences)
65
+ ext_summary = generate_summary(ranked_sentences, sentences, max_length_ratio=0.5)
66
+ st.write("Extractive Summary:", ext_summary)
67
+
68
+ # Choose summary length ratio for abstractive summarization
69
+ abs_ratio_option = st.selectbox("Choose abstractive summary length ratio", ("1/2", "1/3", "1/4"))
70
+ abs_ratio = {"1/2": 0.5, "1/3": 0.33, "1/4": 0.25}[abs_ratio_option]
71
+
72
+ # Perform abstractive summarization
73
+ combined_input = ext_summary + " " + ' '.join([ent[0] for ent in entities])
74
+ abs_summary = abstractive_summary(combined_input, max_length_ratio=abs_ratio, min_length_ratio=abs_ratio/2)
75
+ st.write("Abstractive Summary:", abs_summary)