Spaces:
Sleeping
Sleeping
File size: 3,837 Bytes
0396aac adfc9b0 3f5d923 0396aac 3f5d923 0396aac 3f5d923 0396aac 3f5d923 0396aac 3f5d923 adfc9b0 0396aac 3f5d923 adfc9b0 3f5d923 0396aac 3f5d923 0396aac 3f5d923 0396aac 3f5d923 0396aac adfc9b0 0396aac 3f5d923 adfc9b0 0396aac 3f5d923 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import pandas as pd
class PaperList:
def __init__(self) -> None:
self.table = pd.read_csv("papers.csv")
self._preprcess_table()
self.table_header = """
<tr>
<td width="50%">Paper</td>
<td width="22%">Authors</td>
<td width="4%">pdf</td>
<td width="4%">category</td>
<td width="4%">arXiv</td>
<td width="4%">GitHub</td>
<td width="4%">HF Spaces</td>
<td width="4%">HF Models</td>
<td width="4%">HF Datasets</td>
</tr>"""
def _preprcess_table(self) -> None:
self.table["title_lowercase"] = self.table.title.str.lower()
rows = []
for row in self.table.itertuples():
paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(row.url, str) else row.title
pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>' if isinstance(row.pdf, str) else ""
arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(row.arxiv, str) else ""
github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(row.github, str) else ""
hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(row.hf_space, str) else ""
hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(row.hf_model, str) else ""
hf_dataset = (
f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(row.hf_dataset, str) else ""
)
new_row = f"""
<tr>
<td>{paper}</td>
<td>{row.authors}</td>
<td>{pdf}</td>
<td>{row.category}</td>
<td>{arxiv}</td>
<td>{github}</td>
<td>{hf_space}</td>
<td>{hf_model}</td>
<td>{hf_dataset}</td>
</tr>"""
rows.append(new_row)
self.table["html_table_content"] = rows
def render(
self, search_query: str, case_sensitive: bool, filter_names: list[str], paper_categories: list[str]
) -> tuple[int, str]:
df = self.table
if search_query:
if case_sensitive:
df = df[df.title.str.contains(search_query)]
else:
df = df[df.title_lowercase.str.contains(search_query.lower())]
has_arxiv = "arXiv" in filter_names
has_github = "GitHub" in filter_names
has_hf_space = "HF Space" in filter_names
has_hf_model = "HF Model" in filter_names
has_hf_dataset = "HF Dataset" in filter_names
df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset, paper_categories)
return len(df), self.to_html(df, self.table_header)
@staticmethod
def filter_table(
df: pd.DataFrame,
has_arxiv: bool,
has_github: bool,
has_hf_space: bool,
has_hf_model: bool,
has_hf_dataset: bool,
paper_categories: list[str],
) -> pd.DataFrame:
if has_arxiv:
df = df[~df.arxiv.isna()]
if has_github:
df = df[~df.github.isna()]
if has_hf_space:
df = df[~df.hf_space.isna()]
if has_hf_model:
df = df[~df.hf_model.isna()]
if has_hf_dataset:
df = df[~df.hf_dataset.isna()]
return df[df.category.isin(set(paper_categories))]
@staticmethod
def to_html(df: pd.DataFrame, table_header: str) -> str:
table_data = "".join(df.html_table_content)
return f"""
<table>
{table_header}
{table_data}
</table>"""
|