SitwalaM commited on
Commit
f6b9e7f
·
1 Parent(s): 6ca6f28

first commit

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from dolma.core.utils import split_paragraphs, split_sentences
4
+
5
+ # Title of the Streamlit app
6
+ st.title('Text Splitter: Paragraphs and Sentences')
7
+
8
+ # File uploader for text document
9
+ uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
10
+
11
+ if uploaded_file:
12
+ # Read the uploaded text file
13
+ sample_text = uploaded_file.read().decode("utf-8")
14
+ else:
15
+ # Text input from user
16
+ sample_text = st.text_area("Or paste your text below", height=300)
17
+
18
+ if sample_text:
19
+ # Split the text into paragraphs
20
+ paragraphs = split_paragraphs(sample_text)
21
+
22
+ # Split the text into sentences
23
+ sentences = split_sentences(sample_text)
24
+
25
+ # Show number of paragraphs and sentences
26
+ st.write(f"Number of paragraphs: {len(paragraphs)}")
27
+ st.write(f"Number of sentences: {len(sentences)}")
28
+
29
+ # Create two columns for separate views
30
+ col1, col2 = st.columns(2)
31
+
32
+ # Display paragraphs in the left column
33
+ with col1:
34
+ st.header("Paragraphs")
35
+ for i, paragraph in enumerate(paragraphs):
36
+ st.subheader(f"Paragraph {i + 1}")
37
+ st.write(paragraph.text)
38
+
39
+ # Display sentences in the right column
40
+ with col2:
41
+ st.header("Sentences")
42
+ for i, sentence in enumerate(sentences):
43
+ st.subheader(f"Sentence {i + 1}")
44
+ st.write(sentence.text)
45
+
46
+ # Convert paragraphs and sentences to pandas DataFrames
47
+ paragraphs_df = pd.DataFrame([p.text for p in paragraphs], columns=["Paragraph"])
48
+ sentences_df = pd.DataFrame([s.text for s in sentences], columns=["Sentence"])
49
+
50
+ # Option to download the paragraphs and sentences as CSV files
51
+ st.download_button(
52
+ label="Download Paragraphs as CSV",
53
+ data=paragraphs_df.to_csv(index=False).encode('utf-8'),
54
+ file_name="paragraphs.csv",
55
+ mime="text/csv"
56
+ )
57
+
58
+ st.download_button(
59
+ label="Download Sentences as CSV",
60
+ data=sentences_df.to_csv(index=False).encode('utf-8'),
61
+ file_name="sentences.csv",
62
+ mime="text/csv"
63
+ )
64
+
65
+ else:
66
+ st.write("Please upload a text file or paste your text to split it into paragraphs and sentences.")