GianJSX commited on
Commit
3d86e8b
·
1 Parent(s): 31ff83b
Files changed (1) hide show
  1. app.py +32 -30
app.py CHANGED
@@ -15,26 +15,32 @@ os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
15
  os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
16
  os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
17
 
 
 
18
  @traceable(run_type="tool")
19
- def start(run):
20
- st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*")
21
-
22
- with st.expander(label="Check out the video demo"):
23
- yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc")
24
-
25
- info_text = """
26
- **Quick start** \n
27
- Fill the input with <HTML code>.
28
- * Choose a repeating element on the page, like a product on a list.
29
- * Inspect the HTML code and copy the element.
30
-
31
- After generating the "output format" and the code, paste the complete HTML code of the page in the last input to test it
32
- """
33
- st.write(info_text)
34
- st.image("https://j.gifs.com/gpqvPl.gif")
35
- return run
36
- # use time library
37
- start(run='start')
 
 
 
 
38
 
39
  if assistant_api_key == '':
40
  assistant_api_key = st.secrets["API_KEY"]
@@ -43,20 +49,16 @@ if assistant_api_key == '':
43
  else:
44
  gpt_assistant = GPTAssistant(assistant_api_key)
45
 
46
-
47
- html_content = None
48
  # check if html_content is an url, and show error if it is
 
 
 
 
49
 
50
- def html_content_input():
51
- html_content = st.text_input("Paste the HTML tags of the item you want to extract:", max_chars=10000, help="example: <li>Product 1 </li>, watch the video above")
52
- if html_content:
53
- if html_content.startswith("http"):
54
- st.write("Please paste the HTML piece code, not the URL")
55
- html_content = None
56
-
57
- return st.button("Generate output format & code")
58
 
59
- extract_button = html_content_input()
60
 
61
  if html_content and extract_button:
62
  try:
 
15
  os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
16
  os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
17
 
18
+ st.session_state['session_started'] = False
19
+
20
  @traceable(run_type="tool")
21
+ def start_session(session_started):
22
+ return session_started
23
+
24
+ # change session_started to True
25
+ if not st.session_state['session_started']:
26
+ st.session_state['session_started'] = start_session(True)
27
+
28
+ st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*")
29
+
30
+ with st.expander(label="Check out the video demo"):
31
+ yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc")
32
+
33
+ info_text = """
34
+ **Quick start** \n
35
+ Fill the input with <HTML code>.
36
+ -Choose a repeating element on the page, like a product on a list.
37
+ -Inspect the HTML code and copy the element.
38
+ -After generating the "output format" and the code, paste the complete HTML code of the page in the last input to test it
39
+ """
40
+ st.write(info_text)
41
+ st.image("https://j.gifs.com/gpqvPl.gif")
42
+
43
+
44
 
45
  if assistant_api_key == '':
46
  assistant_api_key = st.secrets["API_KEY"]
 
49
  else:
50
  gpt_assistant = GPTAssistant(assistant_api_key)
51
 
52
+ # get the html content
53
+ html_content = st.text_input("Paste the HTML tags of the item you want to extract:", max_chars=10000, help="example: <li>Product 1 </li>, watch the video above")
54
  # check if html_content is an url, and show error if it is
55
+ if html_content:
56
+ if html_content.startswith("http"):
57
+ st.write("Please paste the HTML piece code, not the URL")
58
+ html_content = None
59
 
60
+ extract_button = st.button("Generate output format & code")
 
 
 
 
 
 
 
61
 
 
62
 
63
  if html_content and extract_button:
64
  try: