Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,14 @@ import zipfile
|
|
12 |
from PIL import Image
|
13 |
|
14 |
EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
URLS = {
|
16 |
"Lumiere": "https://lumiere-video.github.io/",
|
17 |
"National Library of Medicine": "https://www.nlm.nih.gov/",
|
@@ -83,14 +91,18 @@ def download_html_and_files(url, subdir):
|
|
83 |
html_content = requests.get(url).text
|
84 |
soup = BeautifulSoup(html_content, 'html.parser')
|
85 |
base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
|
|
|
86 |
for link in soup.find_all('a'):
|
87 |
file_url = urllib.parse.urljoin(base_url, link.get('href'))
|
88 |
local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
|
89 |
if not local_filename.endswith('/') and local_filename != subdir:
|
90 |
link['href'] = local_filename
|
91 |
-
download_file(file_url, local_filename)
|
|
|
92 |
with open(os.path.join(subdir, "index.html"), "w") as file:
|
93 |
file.write(str(soup))
|
|
|
|
|
94 |
|
95 |
def list_files(directory_path='.'):
|
96 |
files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
|
@@ -135,25 +147,21 @@ def show_file_operations(file_path, sequence_number):
|
|
135 |
|
136 |
file_sequence_numbers = {}
|
137 |
|
138 |
-
def show_file_content(file_path):
|
139 |
_, file_extension = os.path.splitext(file_path)
|
|
|
140 |
try:
|
|
|
141 |
if file_extension in ['.png', '.jpg', '.jpeg']:
|
142 |
image_url = file_path.replace('File:','').replace('/','')
|
143 |
-
|
144 |
-
markdown_link = f"[![Image]({image_url})]({image_url})" #file_path
|
145 |
st.markdown(markdown_link, unsafe_allow_html=True)
|
146 |
-
elif file_extension in ['.
|
147 |
-
with open(file_path, "r") as file:
|
148 |
-
content = file.read()
|
149 |
-
edited_content = st.text_area(f"Edit {os.path.basename(file_path)}", value=content, height=250)
|
150 |
-
if st.button(f"Save {os.path.basename(file_path)}"):
|
151 |
-
with open(file_path, "w") as file:
|
152 |
-
file.write(edited_content)
|
153 |
-
st.success(f"Saved {os.path.basename(file_path)}!")
|
154 |
-
elif file_extension in ['.html', '.txt']:
|
155 |
with open(file_path, "r") as file:
|
|
|
156 |
st.markdown(file.read(), unsafe_allow_html=True)
|
|
|
|
|
157 |
except Exception as e:
|
158 |
st.error(f"Error reading file {file_path}: {e}")
|
159 |
|
@@ -231,14 +239,16 @@ def main():
|
|
231 |
json.dump(history, f)
|
232 |
|
233 |
if st.sidebar.button('π₯ Get All the Content', help="Download content from the selected URL"):
|
234 |
-
download_html_and_files(url, history[url])
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
237 |
if st.sidebar.button('π Show Download Links', help="Show all available download links"):
|
238 |
for subdir in history.values():
|
239 |
show_download_links(subdir)
|
240 |
|
241 |
-
|
242 |
if st.sidebar.button("π Delete All", help="Delete all downloaded content"):
|
243 |
# Clear history file
|
244 |
with open("history.json", "w") as f:
|
|
|
12 |
from PIL import Image
|
13 |
|
14 |
EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
|
15 |
+
# Emoji mapping for different file types
|
16 |
+
FILE_EMOJIS = {
|
17 |
+
".jpg": "πΌοΈ", ".jpeg": "πΌοΈ", ".png": "πΌοΈ",
|
18 |
+
".html": "π", ".htm": "π", ".txt": "π",
|
19 |
+
".pdf": "π", ".doc": "π", ".docx": "π",
|
20 |
+
".xls": "π", ".xlsx": "π", ".ppt": "π", ".pptx": "π",
|
21 |
+
# Add more mappings as needed
|
22 |
+
}
|
23 |
URLS = {
|
24 |
"Lumiere": "https://lumiere-video.github.io/",
|
25 |
"National Library of Medicine": "https://www.nlm.nih.gov/",
|
|
|
91 |
html_content = requests.get(url).text
|
92 |
soup = BeautifulSoup(html_content, 'html.parser')
|
93 |
base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
|
94 |
+
file_urls = {} # To store original URLs of files
|
95 |
for link in soup.find_all('a'):
|
96 |
file_url = urllib.parse.urljoin(base_url, link.get('href'))
|
97 |
local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
|
98 |
if not local_filename.endswith('/') and local_filename != subdir:
|
99 |
link['href'] = local_filename
|
100 |
+
if download_file(file_url, local_filename):
|
101 |
+
file_urls[local_filename] = file_url # Store original URL
|
102 |
with open(os.path.join(subdir, "index.html"), "w") as file:
|
103 |
file.write(str(soup))
|
104 |
+
return file_urls
|
105 |
+
|
106 |
|
107 |
def list_files(directory_path='.'):
|
108 |
files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
|
|
|
147 |
|
148 |
file_sequence_numbers = {}
|
149 |
|
150 |
+
def show_file_content(file_path, original_url):
|
151 |
_, file_extension = os.path.splitext(file_path)
|
152 |
+
emoji = FILE_EMOJIS.get(file_extension, "π") # Default emoji for unknown file types
|
153 |
try:
|
154 |
+
# Display file content and original URL with emoji
|
155 |
if file_extension in ['.png', '.jpg', '.jpeg']:
|
156 |
image_url = file_path.replace('File:','').replace('/','')
|
157 |
+
markdown_link = f"{emoji} [![Image]({image_url})]({original_url})"
|
|
|
158 |
st.markdown(markdown_link, unsafe_allow_html=True)
|
159 |
+
elif file_extension in ['.html', '.htm', '.txt']:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
with open(file_path, "r") as file:
|
161 |
+
st.markdown(f"{emoji} [{os.path.basename(file_path)}]({original_url})", unsafe_allow_html=True)
|
162 |
st.markdown(file.read(), unsafe_allow_html=True)
|
163 |
+
else:
|
164 |
+
st.markdown(f"{emoji} [{os.path.basename(file_path)}]({original_url})", unsafe_allow_html=True)
|
165 |
except Exception as e:
|
166 |
st.error(f"Error reading file {file_path}: {e}")
|
167 |
|
|
|
239 |
json.dump(history, f)
|
240 |
|
241 |
if st.sidebar.button('π₯ Get All the Content', help="Download content from the selected URL"):
|
242 |
+
file_urls = download_html_and_files(url, history[url])
|
243 |
+
for file in list_files(history[url]):
|
244 |
+
file_path = os.path.join(history[url], file)
|
245 |
+
original_url = file_urls.get(file_path, "#")
|
246 |
+
show_file_content(file_path, original_url)
|
247 |
+
|
248 |
if st.sidebar.button('π Show Download Links', help="Show all available download links"):
|
249 |
for subdir in history.values():
|
250 |
show_download_links(subdir)
|
251 |
|
|
|
252 |
if st.sidebar.button("π Delete All", help="Delete all downloaded content"):
|
253 |
# Clear history file
|
254 |
with open("history.json", "w") as f:
|