<!DOCTYPE html> |
<html lang="en"> |
<head> |
<meta charset="UTF-8" /> |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
<script> |
function strToHtml(str) { |
let parser = new DOMParser(); |
return parser.parseFromString(str, "text/html"); |
} |
function tableToObj(table) { |
var rows = table.rows; |
var propCells = rows[0].cells; |
var propNames = []; |
var results = []; |
var obj, row, cells; |
for (var i = 0, iLen = propCells.length; i < iLen; i++) { |
propNames.push( |
(propCells[i].textContent || propCells[i].innerText).trim() |
); |
} |
for (var j = 1, jLen = rows.length; j < jLen; j++) { |
cells = rows[j].cells; |
obj = {}; |
for (var k = 0; k < iLen; k++) { |
obj[propNames[k]] = ( |
cells[k].textContent || cells[k].innerText |
).trim(); |
} |
results.push(obj); |
} |
return results; |
} |
function formatGpu(gpus) { |
return gpus.map( |
(g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}` |
); |
} |
const gguf_quants = { |
"IQ1_S": 1.56, |
"IQ2_XXS": 2.06, |
"IQ2_XS": 2.31, |
"IQ2_S": 2.5, |
"IQ2_M": 2.7, |
"IQ3_XXS": 3.06, |
"IQ3_XS": 3.3, |
"Q2_K": 3.35, |
"Q3_K_S": 3.5, |
"IQ3_S": 3.5, |
"IQ3_M": 3.7, |
"Q3_K_M": 3.91, |
"Q3_K_L": 4.27, |
"IQ4_XS": 4.25, |
"IQ4_NL": 4.5, |
"Q4_0": 4.55, |
"Q4_K_S": 4.58, |
"Q4_K_M": 4.85, |
"Q5_0": 5.54, |
"Q5_K_S": 5.54, |
"Q5_K_M": 5.69, |
"Q6_K": 6.59, |
"Q8_0": 8.5, |
} |
async function modelConfig(hf_model, hf_token) { |
auth = hf_token == "" ? {} : { |
headers: { |
'Authorization': `Bearer ${hf_token}` |
} |
} |
let config_res = await fetch( |
`https://huggingface.co./${hf_model}/raw/main/config.json`, auth |
) |
if (config_res.status === 401) { |
throw new Error("Model is either private or gated, you must provide an access token") |
} |
if (config_res.status === 403) { |
throw new Error("Model is either private or gated and provided access token does not have access to the repo") |
} |
let config = await config_res.json() |
let model_size = 0 |
let dtype = parseInt(config["torch_dtype"].replaceAll(/\D/g, '')) / 8 |
try { |
model_size = (await fetch(`https://huggingface.co./${hf_model}/resolve/main/model.safetensors.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / dtype |
if (isNaN(model_size)) { |
throw new Error("no size in safetensors metadata") |
} |
} catch (e) { |
try { |
model_size = (await fetch(`https://huggingface.co./${hf_model}/resolve/main/pytorch_model.bin.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / dtype |
if (isNaN(model_size)) { |
throw new Error("no size in pytorch metadata") |
} |
} catch { |
try { |
model_size = (await fetch( |
`https://huggingface.co./api/models/${hf_model}` |
).then(r => r.json()))["safetensors"]["total"] |
if (isNaN(model_size)) { |
throw new Error("no size in pytorch metadata") |
} |
} catch { |
throw new Error("Couldn't determine model size from safetensor/pytorch index metadata nor from the model card. If the model is an unsharded pytorch model, it is not supported by this calculator.") |
} |
} |
} |
config.parameters = model_size |
return config |
} |
function inputBuffer(context=8192, model_config, bsz=512) { |
const inp_tokens = bsz |
const inp_embd = model_config["hidden_size"] * bsz |
const inp_pos = bsz |
const inp_KQ_mask = context * bsz |
const inp_K_shift = context |
const inp_sum = bsz |
return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum |
} |
function computeBuffer(context=8192, model_config, bsz=512) { |
if (bsz != 512) { |
alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition") |
} |
return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 |
} |
function kvCache(context=8192, model_config, cache_bit=16) { |
const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] |
const n_embd_gqa = model_config["hidden_size"] / n_gqa |
const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) |
const size = 2 * n_elements |
return size * (cache_bit / 8) |
} |
function contextSize(context=8192, model_config, bsz=512, cache_bit=16) { |
return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2)) |
} |
function modelSize(model_config, bpw=4.5) { |
return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2)) |
} |
async function calculateSizes(format) { |
try { |
const model_config = await modelConfig(document.getElementById("modelsearch").value.replace("https://huggingface.co./", ""), document.getElementById("hf_token").value) |
const context = parseInt(document.getElementById("contextsize").value) |
let bsz = 512 |
let cache_bit = 16 |
let bpw = 0 |
if (format === "gguf") { |
bsz = parseInt(document.getElementById("batchsize").value) |
bpw = gguf_quants[document.getElementById("quantsize").innerText] |
} else if (format == "exl2") { |
cache_bit = Number.parseInt(document.getElementById("kvCache").value) |
bpw = Number.parseFloat(document.getElementById("bpw").value) |
} |
const model_size = modelSize(model_config, bpw) |
const context_size = contextSize(context, model_config, bsz, cache_bit) |
const total_size = ((model_size + context_size) / 2**30) |
document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2) |
document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2) |
const result_total_el = document.getElementById("resulttotal"); |
result_total_el.innerText = total_size.toFixed(2) |
const gpu = document.getElementById("gpusearch").value |
if (gpu !== "") { |
const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim()) |
if (vram - total_size > 0.5) { |
result_total_el.style.backgroundColor = "#bef264" |
} else if (vram - total_size > 0) { |
result_total_el.style.backgroundColor = "#facc15" |
} else { |
result_total_el.style.backgroundColor = "#ef4444" |
} |
} |
} catch(e) { |
alert(e); |
} |
} |
</script> |
<link href="./styles.css" rel="stylesheet"> |
<title>Can I run it? - LLM VRAM Calculator</title> |
</head> |
<body class="p-8"> |
<div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10"> |
<h1 class="text-xl font-semibold leading-6 text-gray-900"> |
LLM Model, Can I run it? |
</h1> |
<p> |
To support gated or private repos, you need to <a href="https://huggingface.co./settings/tokens" style="color: #4444ff"><b>create an authentification token</b></a>, to check the box <span style="color: #6e1818"><b>"Read access to contents of all public gated repos you can access"</b></span> and then enter the token in the field below. |
</p> |
<div class="flex flex-col gap-10"> |
<div class="w-auto flex flex-col gap-4"> |
<div |
class="relative" |
x-data="{ |
results: null, |
query: null |
}" |
> |
<label |
for="gpusearch" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
>Huggingface Token (optional)</label |
> |
<input |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
id="hf_token" |
/> |
</div> |
<div |
class="relative" |
x-data="{ |
results: null, |
query: null |
}" |
> |
<label |
for="gpusearch" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
>GPU (optional)</label |
> |
<input |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
placeholder="GeForce RTX 3090 - 24 GB" |
id="gpusearch" |
name="gpusearch" |
list="gpulist" |
x-model="query" |
@keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))" |
/> |
<datalist id="gpulist"> |
<template x-for="item in results"> |
<option :value="item" x-text="item"></option> |
</template> |
</datalist> |
</div> |
<div class="flex flex-row gap-4 relative"> |
<label |
for="contextsize" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Model (unquantized) |
</label> |
<div |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
x-data="{ |
open: false, |
value: 'Nexusflow/Starling-LM-7B-beta', |
results: null, |
toggle() { |
if (this.open) { |
return this.close() |
} |
this.$refs.input.focus() |
this.open = true |
}, |
close(focusAfter) { |
if (! this.open) return |
this.open = false |
focusAfter && focusAfter.focus() |
} |
}" |
x-on:keydown.escape.prevent.stop="close($refs.input)" |
x-id="['model-typeahead']" |
class="relative" |
> |
<input |
id="modelsearch" |
x-ref="input" |
x-on:click="toggle()" |
@keypress.debounce.150ms="results = (await |
fetch('https://huggingface.co./api/quicksearch?type=model&q=' + |
encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));" |
:aria-expanded="open" |
:aria-controls="$id('model-typeahead')" |
x-model="value" |
class="flex justify-between items-center gap-2 w-full" |
/> |
<div |
x-ref="panel" |
x-show="open" |
x-transition.origin.top.left |
x-on:click.outside="close($refs.input)" |
:id="$id('model-typeahead')" |
style="display: none" |
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
> |
<template x-for="result in results"> |
<a |
@click="value = result.id; close($refs.input)" |
x-text="result.id" |
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
></a> |
</template> |
</div> |
</div> |
</div> |
<div class="relative"> |
<label |
for="contextsize" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Context Size |
</label> |
<input |
value="8192" |
type="number" |
name="contextsize" |
id="contextsize" |
step="1024" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
/> |
</div> |
<div class="relative"> |
<label |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
>Quant Format</label |
> |
<fieldset |
x-model="format" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
> |
<legend class="sr-only">Quant format</legend> |
<div |
class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0" |
> |
<div class="flex items-center"> |
<input |
id="gguf-format" |
name="quant-format" |
type="radio" |
value="gguf" |
checked |
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
/> |
<label |
for="gguf-format" |
class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
>GGUF</label |
> |
</div> |
<div class="flex items-center"> |
<input |
id="exl2-format" |
name="quant-format" |
type="radio" |
value="exl2" |
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
/> |
<label |
for="exl2-format" |
class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
>EXL2</label |
> |
</div> |
<div class="flex items-center"> |
<input |
id="gptq-format" |
name="quant-format" |
type="radio" |
disabled |
value="gptq" |
class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
/> |
<label |
for="gptq-format" |
class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
>GPTQ (coming soon)</label |
> |
</div> |
</div> |
</fieldset> |
</div> |
<div x-show="format === 'exl2'" class="flex flex-row gap-4"> |
<div class="relative flex-grow"> |
<label |
for="bpw" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
</label> |
<input |
value="4.5" |
type="number" |
step="0.01" |
id="bpw" |
name="bpw" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
/> |
</div> |
<div |
class="flex-shrink relative rounded-md" |
> |
<div |
class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
> |
<label |
for="kvCache" |
class="inline-block bg-white text-xs font-medium text-gray-900" |
> |
KV Cache |
</label> |
<select id="kvCache" name="kvCache"> |
<option value="16">16 bit</option> |
<option value="8">8 bit</option> |
<option value="4">4 bit</option> |
</select> |
</div> |
</div> |
</div> |
<div x-show="format === 'gguf'" class="relative"> |
<div class="flex flex-row gap-4"> |
<label |
for="contextsize" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Quantization Size |
</label> |
<div |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
x-data="{ |
open: false, |
value: '', |
toggle() { |
if (this.open) { |
return this.close() |
} |
this.$refs.button.focus() |
this.open = true |
}, |
close(focusAfter) { |
if (! this.open) return |
this.open = false |
focusAfter && focusAfter.focus() |
} |
}" |
x-on:keydown.escape.prevent.stop="close($refs.button)" |
x-id="['dropdown-button']" |
class="relative" |
> |
<button |
x-ref="button" |
x-on:click="toggle()" |
:aria-expanded="open" |
:aria-controls="$id('dropdown-button')" |
type="button" |
id="quantsize" |
x-text="value.length === 0 ? 'Q4_K_S' : value" |
class="flex justify-between items-center gap-2 w-full" |
> |
Q4_K_S |
<svg |
xmlns="http://www.w3.org/2000/svg" |
class="h-5 w-5 text-gray-400" |
viewBox="0 0 20 20" |
fill="currentColor" |
> |
<path |
fill-rule="evenodd" |
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" |
clip-rule="evenodd" |
/> |
</svg> |
</button> |
<div |
x-data="{ quants: [ |
'IQ1_S', |
'IQ2_XXS', |
'IQ2_XS', |
'IQ2_S', |
'IQ2_M', |
'IQ3_XXS', |
'IQ3_XS', |
'Q2_K', |
'Q3_K_S', |
'IQ3_S', |
'IQ3_M', |
'Q3_K_M', |
'Q3_K_L', |
'IQ4_XS', |
'IQ4_NL', |
'Q4_0', |
'Q4_K_S', |
'Q4_K_M', |
'Q5_0', |
'Q5_K_S', |
'Q5_K_M', |
'Q6_K', |
'Q8_0' |
]}" |
x-ref="panel" |
x-show="open" |
x-transition.origin.top.left |
x-on:click.outside="close($refs.button)" |
:id="$id('dropdown-button')" |
style="display: none" |
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
> |
<template x-for="quant in quants"> |
<a |
@click="value = quant; close($refs.button)" |
x-text="quant" |
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
></a> |
</template> |
</div> |
</div> |
<div class="relative"> |
<label |
for="batchsize" |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Batch Size |
</label> |
<input |
value="512" |
type="number" |
step="128" |
id="batchsize" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
/> |
</div> |
</div> |
</div> |
<button |
type="button" |
class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600" |
@click="calculateSizes(format)" |
> |
Submit |
</button> |
</div> |
<div class="w-auto flex flex-col gap-4"> |
<div class="relative"> |
<label |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Model Size (GB) |
</label> |
<div |
id="resultmodel" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
>4.20</div> |
</div> |
<div class="relative"> |
<label |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Context Size (GB) |
</label> |
<div |
id="resultcontext" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
>6.90</div> |
</div> |
<div class="relative"> |
<label |
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
> |
Total Size (GB) |
</label> |
<div |
id="resulttotal" |
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
>420.69</div> |
</div> |
</div> |
</div> |
</div> |
<script |
src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js" |
></script> |
<script defer> |
calculateSizes("gguf") |
</script> |
</body> |
</html> |