wgcv commited on
Commit
00eb2b3
1 Parent(s): 9b6c0d7

Add some urls, modify documentation and the structure

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +47 -30
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  *.env
2
  .env
 
3
  __pycache__
4
  .DS_Store
5
  *.DS_Store
 
1
  *.env
2
  .env
3
+ venv
4
  __pycache__
5
  .DS_Store
6
  *.DS_Store
app.py CHANGED
@@ -21,7 +21,6 @@ st.sidebar.caption("Tidy Tabs - Title")
21
  user_input_url = st.sidebar.text_input('Enter your url:')
22
 
23
  error_message_url = None
24
-
25
  def load_tab():
26
  if(user_input_url):
27
  # Error message state
@@ -35,40 +34,25 @@ def load_tab():
35
  else:
36
 
37
  with st.spinner('Wait for it...'):
38
- st.sidebar.write(f'**<title>: **{title}')
39
  time.sleep(1)
40
  with st.spinner('Wait for it...'):
41
- st.sidebar.write(f'**T5-small: **{predict_model_t5(text)}')
42
  with st.spinner('Wait for it...'):
43
- st.sidebar.write(f'**Pegasus xsum: **{predict_model_pegasus(text)}')
44
  with st.spinner('Wait for it...'):
45
- st.sidebar.write(f'**Bart-Large-Cnn: **{predict_model_bart(text)}')
46
  else:
47
  error_message = st.sidebar.error(f'Is not a valid URL. Please enter a valid URL.')
48
 
49
- button_clicked = st.sidebar.button("Rename the tab", on_click=load_tab())
50
 
 
51
  st.sidebar.divider()
52
  ###
53
  # Content
54
  ###
55
  st.image('./assets/banner_tabs.png', width=350, caption='Navigate Through Powerful Features with Intuitive Tabs')
56
 
57
- with st.status("Loading models...", expanded=True, state="complete") as models:
58
- st.write("Loading https://huggingface.co/wgcv/tidy-tab-model-t5-small")
59
- get_tidy_tab_t5()
60
- st.write("Loaded T5-Small...")
61
-
62
- st.write("Loading from https://huggingface.co/wgcv/tidy-tab-model-pegasus-xsum")
63
- get_tidy_tab_pegasus()
64
- st.write("Loaded Pegasus xsum...")
65
-
66
- st.write("Loading from https://huggingface.co/wgcv/tidy-tab-model-bart-large-cnn")
67
- load_model_bart()
68
- st.write("Loaded Pegasus Bart-Large...")
69
-
70
- models.update(label="All models loaded!", state="complete", expanded=False)
71
-
72
 
73
  st.info("All three models are deployed in a single Hugging Face Space using the free tier. Specifications: CPU-based (no GPU), 2 vCPU cores, 16 GB RAM, and 50 GB storage.",icon="ℹ️")
74
  ###
@@ -82,14 +66,27 @@ Here are some examples you can try that aren't included in the training or test
82
  # Examples
83
  ```
84
  Urls:
85
- https://www.nytimes.com/2007/01/10/technology/10apple.html
86
- https://www.nytimes.com/2021/04/15/arts/design/Met-museum-roof-garden-da-corte.html
87
- https://www.forbes.com/sites/davidphelan/2024/07/09/apple-iphone-16-pro-major-design-upgrade-coming-new-report-claims/
88
- https://www.crn.com/news/channel-programs/18828789/microsoft-to-release-windows-xp-service-pack-1
89
- https://github.com/torvalds
90
- https://www.rickbayless.com/recipe/pastor-style-tacos/
91
-
92
- Some websites, like x.com, are not accessible because they use JavaScript engines to load content, which is beyond the scope of this project.
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  ```
95
 
@@ -166,12 +163,17 @@ Given the substantial volume of data, training a model from scratch was deemed i
166
  - 406M params
167
 
168
 
169
-
170
  ### Potential avenues for performance enhancement include:
171
  - Data preprocessing optimization
172
  - Dataset expansion
173
  - Comprehensive hyperparameter tuning
174
  - These strategies could significantly improve model efficacy.
 
 
 
 
 
175
 
176
  ## co2_eq_emissions
177
  - emissions: 0.16 grams of CO2)
@@ -186,3 +188,18 @@ Given the substantial volume of data, training a model from scratch was deemed i
186
 
187
  """, unsafe_allow_html=False, help=None)
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  user_input_url = st.sidebar.text_input('Enter your url:')
22
 
23
  error_message_url = None
 
24
  def load_tab():
25
  if(user_input_url):
26
  # Error message state
 
34
  else:
35
 
36
  with st.spinner('Wait for it...'):
37
+ st.sidebar.write(f'**<title>: ** {title}')
38
  time.sleep(1)
39
  with st.spinner('Wait for it...'):
40
+ st.sidebar.write(f'**T5-small: ** {predict_model_t5(text)}')
41
  with st.spinner('Wait for it...'):
42
+ st.sidebar.write(f'**Pegasus xsum: ** {predict_model_pegasus(text)}')
43
  with st.spinner('Wait for it...'):
44
+ st.sidebar.write(f'**Bart-Large-Cnn: ** {predict_model_bart(text)}')
45
  else:
46
  error_message = st.sidebar.error(f'Is not a valid URL. Please enter a valid URL.')
47
 
 
48
 
49
+ button_clicked = st.sidebar.button("Rename the tab", on_click=load_tab())
50
  st.sidebar.divider()
51
  ###
52
  # Content
53
  ###
54
  st.image('./assets/banner_tabs.png', width=350, caption='Navigate Through Powerful Features with Intuitive Tabs')
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  st.info("All three models are deployed in a single Hugging Face Space using the free tier. Specifications: CPU-based (no GPU), 2 vCPU cores, 16 GB RAM, and 50 GB storage.",icon="ℹ️")
58
  ###
 
66
  # Examples
67
  ```
68
  Urls:
69
+ High Similarity to Training Data:
70
+ https://www.nytimes.com/2007/01/10/technology/10apple.html
71
+ https://www.nytimes.com/2021/04/15/arts/design/Met-museum-roof-garden-da-corte.html
72
+ https://github.com/torvalds
73
+
74
+ Less than 2% Overlap with Training Data:
75
+ https://substack.com/browse/staff-picks/post/145699191
76
+ https://brentcates.substack.com/p/julian-assange-is-now-free-to-collapse
77
+
78
+ Moderate Similarity to Training Data:
79
+ https://techcrunch.com/2024/07/05/openai-breach-is-a-reminder-that-ai-companies-are-treasure-troves-for-hackers/
80
+ https://www.forbes.com/sites/davidphelan/2024/07/09/apple-iphone-16-pro-major-design-upgrade-coming-new-report-claims/
81
+ https://www.crn.com/news/channel-programs/18828789/microsoft-to-release-windows-xp-service-pack-1
82
+ https://www.rickbayless.com/recipe/pastor-style-tacos/
83
+
84
+ No Similarity to Training Data:
85
+ https://www.notioneverything.com/blog/notion-note-taking-templates
86
+ https://www.eluniverso.com/noticias/ecuador/quito-prohibido-circular-dos-personas-moto-seguridad-nota/
87
+ https://www.swift.org/blog/swift-on-windows/
88
+
89
+ Some websites, like x.com or instagram.com, are not accessible because they use JavaScript engines to load content, which is beyond the scope of this project.
90
 
91
  ```
92
 
 
163
  - 406M params
164
 
165
 
166
+
167
  ### Potential avenues for performance enhancement include:
168
  - Data preprocessing optimization
169
  - Dataset expansion
170
  - Comprehensive hyperparameter tuning
171
  - These strategies could significantly improve model efficacy.
172
+ - Add more language in the dataset
173
+ ### Access to the Models
174
+ `https://huggingface.co/wgcv/tidy-tab-model-t5-small`
175
+ `https://huggingface.co/wgcv/tidy-tab-model-pegasus-xsum`
176
+ `https://huggingface.co/wgcv/tidy-tab-model-bart-large-cnn`
177
 
178
  ## co2_eq_emissions
179
  - emissions: 0.16 grams of CO2)
 
188
 
189
  """, unsafe_allow_html=False, help=None)
190
 
191
+
192
+ with st.sidebar.status("Loading models...", expanded=True, state="complete") as models:
193
+ st.write("Loading 1/3 (https://huggingface.co/wgcv/tidy-tab-model-t5-small)")
194
+ get_tidy_tab_t5()
195
+ st.write("Loaded T5-Small...")
196
+
197
+ st.write("Loading 2/3 (https://huggingface.co/wgcv/tidy-tab-model-pegasus-xsum)")
198
+ get_tidy_tab_pegasus()
199
+ st.write("Loaded Pegasus xsum...")
200
+
201
+ st.write("Loading 3/3 (https://huggingface.co/wgcv/tidy-tab-model-bart-large-cnn)")
202
+ load_model_bart()
203
+ st.write("Loaded Pegasus Bart-Large...")
204
+
205
+ models.update(label="All models loaded!", state="complete", expanded=False)