Spaces:
Runtime error
Runtime error
update app
Browse files- app.py +11 -21
- utils/languages.json +3 -3
- utils/table_contents.md +1 -1
app.py
CHANGED
@@ -39,7 +39,7 @@ def load_model(values, language):
|
|
39 |
model = values["model"]
|
40 |
if not model:
|
41 |
text = f"""No model is available for {language.capitalize()}. If you trained a model on this language, let us know in\
|
42 |
-
in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\
|
43 |
You can also train your own model on The Stack using the instructions below π"""
|
44 |
st.write(text)
|
45 |
if st.button("Fine-tune your own model", key=4):
|
@@ -50,8 +50,8 @@ def load_model(values, language):
|
|
50 |
```python
|
51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
52 |
|
53 |
-
tokenizer = AutoTokenizer.from_pretrained({model})
|
54 |
-
model = AutoModelForCausalLM.from_pretrained({model}, trust_remote_code=True)
|
55 |
|
56 |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
|
57 |
outputs = model.generate(inputs)
|
@@ -60,7 +60,6 @@ def load_model(values, language):
|
|
60 |
"""
|
61 |
st.markdown(text)
|
62 |
st.markdown(code)
|
63 |
-
st.write(f"The scores of this model are the following: {values['scores']}")
|
64 |
|
65 |
def generate_code(
|
66 |
demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
|
@@ -78,31 +77,24 @@ def generate_code(
|
|
78 |
generated_text = ""
|
79 |
return generated_text
|
80 |
|
81 |
-
def init_nested_buttons():
|
82 |
-
if "Models trained on dataset" not in st.session_state:
|
83 |
-
st.session_state["Models trained on dataset"] = False
|
84 |
-
|
85 |
-
if "Generate code" not in st.session_state:
|
86 |
-
st.session_state["Generate code"] = False
|
87 |
-
|
88 |
-
if st.button("Models trained on dataset"):
|
89 |
-
st.session_state["Models trained on dataset"] = not st.session_state["Models trained on dataset"]
|
90 |
-
|
91 |
-
|
92 |
languages = load_languages()
|
93 |
|
|
|
|
|
94 |
col1, col2 = st.columns([1, 1.5])
|
95 |
with col1:
|
96 |
-
selected_language = st.selectbox("
|
97 |
|
98 |
st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
|
99 |
code = how_to_load(selected_language)
|
100 |
-
|
|
|
101 |
st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
|
102 |
# we can add some stats about files
|
103 |
|
104 |
-
|
105 |
-
|
|
|
106 |
load_model(languages[selected_language], selected_language)
|
107 |
|
108 |
if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
|
@@ -114,8 +106,6 @@ if st.session_state["Models trained on dataset"]:
|
|
114 |
).strip()
|
115 |
|
116 |
if st.button("Generate code"):
|
117 |
-
st.session_state["Generate code"] = not st.session_state["Generate code"]
|
118 |
-
if st.session_state["Generate code"]:
|
119 |
with st.spinner("Generating code..."):
|
120 |
generated_text = generate_code(
|
121 |
demo=languages[selected_language]["gradio_demo"],
|
|
|
39 |
model = values["model"]
|
40 |
if not model:
|
41 |
text = f"""No model is available for {language.capitalize()}. If you trained a model on this language, let us know in\
|
42 |
+
in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) to feature your model!\n\n\
|
43 |
You can also train your own model on The Stack using the instructions below π"""
|
44 |
st.write(text)
|
45 |
if st.button("Fine-tune your own model", key=4):
|
|
|
50 |
```python
|
51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
52 |
|
53 |
+
tokenizer = AutoTokenizer.from_pretrained("{model}")
|
54 |
+
model = AutoModelForCausalLM.from_pretrained("{model}", trust_remote_code=True)
|
55 |
|
56 |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt")
|
57 |
outputs = model.generate(inputs)
|
|
|
60 |
"""
|
61 |
st.markdown(text)
|
62 |
st.markdown(code)
|
|
|
63 |
|
64 |
def generate_code(
|
65 |
demo, gen_prompt, max_new_tokens=40, temperature=0.2, seed=0
|
|
|
77 |
generated_text = ""
|
78 |
return generated_text
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
languages = load_languages()
|
81 |
|
82 |
+
st.header("Languages of The Stack π")
|
83 |
+
st.markdown("The Stack contains over 6TB of permissively-licensed source code files covering 358 programming languages. Select one to get started:")
|
84 |
col1, col2 = st.columns([1, 1.5])
|
85 |
with col1:
|
86 |
+
selected_language = st.selectbox("Programming Language", list(languages.keys()), label_visibility="collapsed", key=1)
|
87 |
|
88 |
st.write(f"Here's how you can load the {selected_language.capitalize()} subset of The Stack:")
|
89 |
code = how_to_load(selected_language)
|
90 |
+
|
91 |
+
with st.expander("More info about the dataset"):
|
92 |
st.write(f"The dataset contains {languages[selected_language]['num_examples']} examples.")
|
93 |
# we can add some stats about files
|
94 |
|
95 |
+
st.header("Models trained on The Stack π€")
|
96 |
+
st.write("Here we show models trained on the language you select as part of BigCode project.")
|
97 |
+
with st.expander(f"Models trained on {selected_language.capitalize()}"):
|
98 |
load_model(languages[selected_language], selected_language)
|
99 |
|
100 |
if languages[selected_language]["model"] and languages[selected_language]["gradio_demo"]:
|
|
|
106 |
).strip()
|
107 |
|
108 |
if st.button("Generate code"):
|
|
|
|
|
109 |
with st.spinner("Generating code..."):
|
110 |
generated_text = generate_code(
|
111 |
demo=languages[selected_language]["gradio_demo"],
|
utils/languages.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
{"python": {"num_examples": 10, "model": "bigcode/santacoder", "
|
2 |
-
"java": {"num_examples": 10, "model": "bigcode/santacoder", "
|
3 |
-
"javascript": {"num_examples": 10, "model": "bigcode/santacoder", "
|
4 |
"typescript": {"num_examples": 10, "model": ""},
|
5 |
"go": {"num_examples": 10, "model": ""},
|
6 |
"php": {"num_examples": 10, "model": ""},
|
|
|
1 |
+
{"python": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
|
2 |
+
"java": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
|
3 |
+
"javascript": {"num_examples": 10, "model": "bigcode/santacoder", "gradio_demo": "https://loubnabnl-santa-demo.hf.space"},
|
4 |
"typescript": {"num_examples": 10, "model": ""},
|
5 |
"go": {"num_examples": 10, "model": ""},
|
6 |
"php": {"num_examples": 10, "model": ""},
|
utils/table_contents.md
CHANGED
@@ -6,4 +6,4 @@
|
|
6 |
|
7 |
3 - Demos for code generation
|
8 |
|
9 |
-
If you trained a model on The Stack, let us know so we can feature it! π
|
|
|
6 |
|
7 |
3 - Demos for code generation
|
8 |
|
9 |
+
If you trained a model on The Stack, let us know in the [Community tab](https://huggingface.co/spaces/loubnabnl/the-stack-bot/discussions) so we can feature it! π
|