document.addEventListener("DOMContentLoaded", function() { const select = document.getElementById('language-family-select'); const hoverBox = document.getElementById('hover-box'); const searchBar = document.getElementById('search-bar'); const depthSelector = document.getElementById('depth-number'); const tokenizerFilterDiv = document.getElementById('tokenizer-filter'); const scriptFiltersDiv = document.getElementById('script-filter'); const showNonGlotlidCheckbox = document.getElementById('show-non-glotlid'); const clearTokenizersFilter = document.getElementById('clear-tokenizers'); const clearScriptsFilter = document.getElementById('clear-scripts'); let nodeToCenter = null; let currentTreeData = null; let expandedNodes; let currentTransform = d3.zoomIdentity; // Save the current zoom and transform // Get unique tokenizer names let tokenizerNames; let scriptNames; let color; function setExpanded(id, state = true){ if (state) { expandedNodes.add(id) } else if(expandedNodes.has(id)) { expandedNodes.delete(id) } } function addNodeIds(tree){ function addIdToSubtree(tree, nodeCount = 0){ tree.id = nodeCount++; let tree_subtreeSize = tree.children.length === 0 ? 1 : 0; let node = { ...tree, children: (tree.children || []).map(child => { const [subtree, newCount, subtreeSize] = addIdToSubtree(child, nodeCount); nodeCount = newCount; tree_subtreeSize += subtreeSize; return subtree; }) }; node.subtreeSize = tree_subtreeSize return [node, nodeCount, tree_subtreeSize]; } const [parsedTree, _finalCount] = addIdToSubtree(tree); return parsedTree; } function loadLanguageFamily() { const family = select.value; fetch(`data/${family}.json`) .then(response => response.json()) .then(data => { currentTreeData = addNodeIds(data); expandedNodes = new Set([0]); updateTokenizerFilter(data); updateScriptFilter(data); drawVisibleNodes(true); }); } loadLanguageFamily(); select.addEventListener('change', loadLanguageFamily); searchBar.addEventListener('input', () => searchNode(searchBar.value)); depthSelector.addEventListener('change', (_) => drawVisibleNodes()); showNonGlotlidCheckbox.addEventListener('click', (_) => drawVisibleNodes()); clearTokenizersFilter.addEventListener('click', (_) => { tokenizerFilterDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false); drawVisibleNodes(); }) clearScriptsFilter.addEventListener('click', (_) => { scriptFiltersDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false); drawVisibleNodes(); }) function getScriptNames(node, namesSet = new Set('x')) { if (!node) return namesSet; if (node.scripts.length > 0) { for(const script of node.scripts) namesSet.add(script); } if (node.children) { node.children.forEach(child => getScriptNames(child, namesSet)); } return namesSet; } function updateScriptFilter(data) { scriptNames = Array.from(getScriptNames(data)); scriptFiltersDiv.innerHTML = ''; scriptNames.forEach(name => { const checkbox = document.createElement('input'); checkbox.type = 'checkbox'; checkbox.value = name; checkbox.checked = true; checkbox.addEventListener('change', () => drawVisibleNodes()); const label = document.createElement('label'); label.appendChild(checkbox); label.appendChild(document.createTextNode(name)); scriptFiltersDiv.appendChild(label); scriptFiltersDiv.appendChild(document.createElement('br')); }); } function updateTokenizerFilter(data) { tokenizerNames = Array.from(getTokenizerNames(data)); // Create color scale based on the unique tokenizer names color = d3.scaleOrdinal(tokenizerNames, d3.schemeCategory10); tokenizerFilterDiv.innerHTML = ''; tokenizerNames.forEach(name => { const checkbox = document.createElement('input'); checkbox.type = 'checkbox'; checkbox.value = name; checkbox.checked = true; checkbox.addEventListener('change', () => drawVisibleNodes()); const label = document.createElement('label'); // Create the colored icon const icon = document.createElement('span'); icon.style.display = 'inline-block'; icon.style.width = '10px'; icon.style.height = '10px'; icon.style.backgroundColor = color(name); icon.style.marginRight = '5px'; label.appendChild(icon); // Add the icon before the checkbox label.appendChild(checkbox); label.appendChild(document.createTextNode(name)); tokenizerFilterDiv.appendChild(label); tokenizerFilterDiv.appendChild(document.createElement('br')); }); } function getTokenizerNames(node, namesSet = new Set('x')) { if (!node) return namesSet; if (node.tokenizers) { for(const [script, tokenizer] of Object.entries(node.tokenizers)) namesSet.add(tokenizer.original_lang_name); } if (node.children) { node.children.forEach(child => getTokenizerNames(child, namesSet)); } return namesSet; } function createTree(data, recenter = false) { // Clear any existing tree d3.select("#tree-container svg").remove(); // Set the SVG dimensions to fill the entire screen const width = window.innerWidth; const height = window.innerHeight; // Create the SVG element const svg = d3.select("#tree-container") .append("svg") .attr("width", width) .attr("height", height); const g = svg.append("g"); // Define zoom behavior without restricting translation extents const zoom = d3.zoom() .scaleExtent([0.1, 5]) .on("zoom", function(event) { currentTransform = event.transform; // Save the current transform on zoom g.attr("transform", event.transform); }); // Create a root hierarchy node const root = d3.hierarchy(data); // Dynamically compute tree layout for horizontal orientation const treeLayout = d3.tree().nodeSize([200, 100]); treeLayout(root); // Create links between nodes (now horizontal) g.selectAll('.link') .data(root.links()) .enter() .append('path') .attr('class', 'link') .attr('d', d3.linkVertical() .x(d => d.x) .y(d => d.y)) .attr('stroke', '#ccc') .attr('fill', 'none'); // Create nodes const node = g.selectAll('.node') .data(root.descendants()) .enter() .append('g') .attr('class', 'node') .attr('transform', d => `translate(${d.x},${d.y})`); const sizeScale = d3.scaleSqrt() .domain([1, root.data.subtreeSize]) .range([5, 20]); // Adjust the range as needed for minimum and maximum circle const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked')) .map(input => input.value); const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked')) .map(input => input.value); function getColorTokenizer(node) { // debugger; const toks = Object.entries(node.tokenizers).filter(([script, obj]) => selectedScripts.includes(script) && selectedTokenizers.includes(obj.original_lang_name)); return toks.length > 0 ? toks[0][1].original_lang_name : 'unknown'; } // Add circles to nodes node.append('circle') .filter(d => !d.data.iso_3_code) .attr('r', d => sizeScale(d.data.subtreeSize)) .attr('fill', d => { const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); node.filter(d => d.data.iso_3_code && d.data.native_tokenizers.length === 0) // Select leaf nodes .append('rect') .attr('width', 10) .attr('height', 10) .attr('x', -5) .attr('y', -5) .attr('fill', d => { const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); node.filter(d => d.data.native_tokenizers.length !== 0) // Select leaf nodes with "own" assignment .append('path') .attr('d', d3.symbol().type(d3.symbolTriangle).size(100)) // Adjust size as needed .attr('fill', d => { const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); // Add text labels to nodes node.append('text') .attr('dy', 4) .attr('x', d => sizeScale(d.data.subtreeSize) + 4) .attr('text-anchor', 'start') .text(d => { const tokenizerName = getColorTokenizer(d.data); return `${d.data.name} - ${tokenizerName || 'x'}${d.data.iso_3_code ? '' : ' (' + d.data.subtreeSize + ')'}` }); if (!currentTransform || recenter) currentTransform = d3.zoomIdentity.translate(width / 2, height / 2); if (nodeToCenter){ debugger; let element = g.selectAll('.node').filter(d => d.data.id === nodeToCenter).data()[0]; if (element){ const x = element.x; const y = element.y; const scale = 1.0; // Adjust scale as needed currentTransform = d3.zoomIdentity.translate(window.innerWidth / 2 - x, window.innerHeight / 2 - y).scale(scale); } nodeToCenter = null; } svg.call(zoom).call(zoom).call(zoom.transform, d3.zoomIdentity.translate(currentTransform.x, currentTransform.y).scale(currentTransform.k)); // Add hover event to nodes for hover-box node.on("mouseover", function(event, d) { hoverBox.style.display = "block"; hoverBox.style.left = (event.pageX) + "px"; hoverBox.style.top = (event.pageY) + "px"; const tokenizersList = Object.keys(d.data.tokenizers).map((script) => `
  • ${script}: ${d.data.tokenizers[script]['class_name']}-${d.data.tokenizers[script]['original_lang_name']}${d.data.native_tokenizers.includes(script) ? '(👤)' :'(🤖)'}
  • `).join('') hoverBox.innerHTML = ` Name: ${d.data.name}
    ${d.data.iso_1_code ? 'ISO 1 Code: ' + d.data.iso_1_code + '
    ' : ''} ${d.data.iso_3_code ? 'ISO 3 Code: ' + d.data.iso_3_code + '
    ' : ''} ${d.data.scripts.length > 0 ? 'Scripts: ' + d.data.scripts.join(', ') + '
    ' : ''} ${d.data.iso_3_code ? 'In GlotLID: ' + (d.data.scripts.length > 0 ? 'YES' : 'NO') + '
    ' : ''} Tokenizers: Subtree size: ${d.data.subtreeSize} `; }).on("mousemove", function(event) { hoverBox.style.left = (event.pageX) + "px"; hoverBox.style.top = (event.pageY) + "px"; }).on("mouseout", function() { hoverBox.style.display = "none"; }); node.on('click', function(event, d) { setExpanded(d.data.id, !expandedNodes.has(d.data.id)) hoverBox.style.display = "none"; drawVisibleNodes(); }) } function drawVisibleNodes(recenter = false) { if (!currentTreeData) return; // Get selected tokenizers const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked')) .map(input => input.value); const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked')) .map(input => input.value); const showNonGlotlid = showNonGlotlidCheckbox.checked; // Create a new root node containing only selected tokenizers and their ancestors function filterHierarchy(node, parentExpanded = true, depth = 0) { if ((node.iso_3_code && node.scripts.length === 0 && !showNonGlotlid) || (!parentExpanded && !(depthSelector.value == 0 || depth <= depthSelector.value))) { return null; } const filteredChildren = (node.children || []) .map(child => filterHierarchy(child, expandedNodes.has(node.id), depth + 1)) .filter(child => child !== null); const shouldBeShown = selectedTokenizers.some(tok => (Object.keys(node.tokenizers).length > 0 ? Object.values(node.tokenizers).map(x => x.original_lang_name) : ['x']).includes(tok)) && selectedScripts.some(scr => (node.scripts.length ? node.scripts : ['x']).includes(scr) && (node.scripts.length > 0 || showNonGlotlid)); // if one of the leaves has this tokenizer and the selected scripts if (!parentExpanded && filteredChildren.length === 0 && !(node.children.length === 0 && shouldBeShown) && node.id != 0) return null; return { ...node, children: filteredChildren }; } const filteredData = filterHierarchy(currentTreeData); createTree(filteredData, recenter); } function searchNode(name) { name = name.toLowerCase(); if (!currentTreeData || name.length < 2) return; // Find node by name function expandNode(node) { if (node.name.toLowerCase() === name || node.iso_3_code === name || node.iso_1_code === name) { return node; } for (const child of (node.children || [])) { const found = expandNode(child); if (found) { setExpanded(node.id) return found; } } return null; } const expandedNode = expandNode(currentTreeData); if (expandedNode) { nodeToCenter = expandedNode.id; drawVisibleNodes(); } } });