iofu728 commited on
Commit
770d29d
1 Parent(s): 27e09a4

Feature(MInference): update information

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -14,12 +14,15 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
14
 
15
 
16
  DESCRIPTION = """
17
- # MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention (Under Review) [[paper](https://arxiv.org/abs/2406.05736)]
18
  _Huiqiang Jiang†, Yucheng Li†, Chengruidong Zhang†, Qianhui Wu, Xufang Luo, Surin Ahn, Zhenhua Han, Amir H. Abdi, Dongsheng Li, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
19
 
20
  <h2 style="text-align: center;"><a href="https://github.com/microsoft/MInference" target="blank"> [Code]</a>
21
- <a href="https://hqjiang.com/minference.html" target="blank"> [Project Page]</a>
22
- <a href="https://arxiv.org/abs/2406.05736" target="blank"> [Paper]</a></h2>
 
 
 
23
 
24
  <font color="brown"><b>This is only a deployment demo. Due to limited GPU resources, we do not provide an online demo. You will need to follow the code below to try MInference locally.</b></font>
25
 
@@ -55,7 +58,7 @@ h1 {
55
  """
56
 
57
  # Load the tokenizer and model
58
- model_name = "gradientai/Llama-3-8B-Instruct-Gradient-1048k"
59
  tokenizer = AutoTokenizer.from_pretrained(model_name)
60
  model = AutoModelForCausalLM.from_pretrained(
61
  model_name, torch_dtype="auto", device_map="auto"
 
14
 
15
 
16
  DESCRIPTION = """
17
+ # [MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention](https://aka.ms/MInference)(Under Review, ES-FoMo @ ICML'24)
18
  _Huiqiang Jiang†, Yucheng Li†, Chengruidong Zhang†, Qianhui Wu, Xufang Luo, Surin Ahn, Zhenhua Han, Amir H. Abdi, Dongsheng Li, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
19
 
20
  <h2 style="text-align: center;"><a href="https://github.com/microsoft/MInference" target="blank"> [Code]</a>
21
+ <a href="https://aka.ms/MInference" target="blank"> [Project Page]</a>
22
+ <a href="https://arxiv.org/abs/2407" target="blank"> [Paper]</a></h2>
23
+
24
+ ## News
25
+ - 🧩 We will present **MInference 1.0** at the _**Microsoft Booth**_ and _**ES-FoMo**_ at ICML'24. See you in Vienna!
26
 
27
  <font color="brown"><b>This is only a deployment demo. Due to limited GPU resources, we do not provide an online demo. You will need to follow the code below to try MInference locally.</b></font>
28
 
 
58
  """
59
 
60
  # Load the tokenizer and model
61
+ model_name = "gradientai/Llama-3-8B-Instruct-Gradient-1048k" if torch.cuda.is_available() else "Qwen/Qwen2-0.5B"
62
  tokenizer = AutoTokenizer.from_pretrained(model_name)
63
  model = AutoModelForCausalLM.from_pretrained(
64
  model_name, torch_dtype="auto", device_map="auto"