loubnabnl HF staff commited on
Commit
fa5e188
β€’
1 Parent(s): 0c4db1f

add link to github-code data

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -6,6 +6,9 @@ import json
6
  import pandas as pd
7
  import requests
8
 
 
 
 
9
  @st.cache(allow_output_mutation=True)
10
  def load_tokenizer(model_ckpt):
11
  return AutoTokenizer.from_pretrained(model_ckpt)
@@ -40,7 +43,7 @@ if selected_task == " ":
40
 
41
  elif selected_task == "Pretraining datasets":
42
  st.title("Pretraining datasets πŸ“š")
43
- st.markdown("Preview of some code files from Github repositories")
44
  df = pd.read_csv("utils/data_preview.csv")
45
  st.dataframe(df)
46
  for model in selected_models:
@@ -57,7 +60,7 @@ elif selected_task == "Model architecture":
57
  st.markdown(f"## {model}")
58
  st.markdown(text)
59
  if model == "InCoder":
60
- st.image("https://huggingface.co/datasets/loubnabnl/repo-images/raw/main/incoder.png", caption="Figure 1: InCoder training", width=700)
61
 
62
  elif selected_task == "Model evaluation":
63
  st.title("Code models evaluation πŸ“Š")
 
6
  import pandas as pd
7
  import requests
8
 
9
+ GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
10
+ INCODER_IMG = "https://huggingface.co/datasets/loubnabnl/repo-images/raw/main/incoder.png"
11
+
12
  @st.cache(allow_output_mutation=True)
13
  def load_tokenizer(model_ckpt):
14
  return AutoTokenizer.from_pretrained(model_ckpt)
 
43
 
44
  elif selected_task == "Pretraining datasets":
45
  st.title("Pretraining datasets πŸ“š")
46
+ st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
47
  df = pd.read_csv("utils/data_preview.csv")
48
  st.dataframe(df)
49
  for model in selected_models:
 
60
  st.markdown(f"## {model}")
61
  st.markdown(text)
62
  if model == "InCoder":
63
+ st.image(INCODER_IMG, caption="Figure 1: InCoder training", width=700)
64
 
65
  elif selected_task == "Model evaluation":
66
  st.title("Code models evaluation πŸ“Š")