File size: 5,874 Bytes
c3358e9
 
94e1f4a
 
c8c0cec
 
94e1f4a
c8c0cec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04e75a3
c8c0cec
 
 
 
 
 
 
 
 
04e75a3
c8c0cec
 
 
 
 
 
 
 
 
 
d237742
 
 
 
 
c8c0cec
 
 
 
 
 
 
 
 
 
d237742
c8c0cec
 
 
 
 
28deedd
c8c0cec
 
 
 
28deedd
c8c0cec
 
 
28deedd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c0cec
 
 
 
c3358e9
 
 
 
 
4f3db48
 
6d612fd
 
 
 
 
 
 
 
 
28deedd
 
 
6d612fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d237742
 
f591286
 
d237742
6d612fd
 
 
 
 
 
 
 
f591286
ea776c6
d237742
6d612fd
 
c3358e9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import gradio as gr
from huggingface_hub import InferenceClient
import re
import datetime
from urllib import request
from lxml import etree


url_prefix_mapping = {
    'acl': 'https://aclanthology.org',
    'emnlp': 'https://aclanthology.org',
    'naacl': 'https://aclanthology.org',
    'tacl': 'https://aclanthology.org',
    'nips': 'https://papers.nips.cc',
    'icml': 'https://papers.nips.cc',
    'iclr': 'https://iclr.cc',
}

mlr_mapping = {
    ('icml', 2020): 'v119',
    ('icml', 2021): 'v139',
    ('icml', 2022): 'v162',
    ('icml', 2023): 'v202',
    ('icml', 2024): 'v139',
}

def get_paper_home(venue, year):
    if venue in ['acl', 'emnlp', 'naacl']:
        return f'https://aclanthology.org/events/{venue}-{year}'
    
    elif venue == 'nips':
        return f'https://papers.{venue}.cc/paper_files/paper/{year}'
    
    elif venue == 'icml':
        return f'https://proceedings.mlr.press/{mlr_mapping[(venue, year)]}'
    
    elif venue == 'iclr':
        return f'https://iclr.cc/Downloads/{year}'


def check_keywords(ele, keywords):
    s = ''.join(ele.itertext()).lower()
    url = ele.get('href')
    for i in keywords:
        match = re.search(i, s)
        if match:
            return True

    return False

def check_keywords_icml(ele, keywords):
    s = ''.join(ele.find('.//p[@class="title"]').itertext()).lower()
    url = ele.get('href')
    for i in keywords:
        match = re.search(i, s)
        if match:
            return True

    return False


def search(keywords, venues, min_year, max_year):
    keywords = [keyword.strip() for keyword in keywords.split(",")]

    year_range = list(range(min_year, max_year))
    
    search_venues = []
    if "NeurIPS/ICLR/ICML" in venues:
        search_venues.extend(['nips', 'iclr', 'icml'])
    if "*ACL" in venues:
        search_venues.extend(['acl', 'emnlp', 'naacl', 'tacl'])
    if "CVPR/ECCV/ICCV" in venues:
        search_venues.extend(['nips', 'iclr', 'icml'])

    results = []
    for venue in search_venues:
        for year in year_range:
            print(venue, year)
            paper_home = get_paper_home(venue, year)
            url_prefix = url_prefix_mapping[venue]
            if venue == 'icml':
                url_prefix = paper_home

            try:
                response = request.urlopen(paper_home)
            except:
                continue

            html = response.read().decode()
            tree = etree.fromstring(html, etree.HTMLParser())
            
            if 'acl' in venue:
                paper_tag_on_html = ".//a[@class='align-middle']"
            elif venue == 'iclr':
                paper_tag_on_html = ".//a[@class='Poster']"
            elif venue == 'nips':
                paper_tag_on_html = ".//a[@title='paper title']"
            elif venue == 'icml':
                paper_tag_on_html = ".//div[@class='paper']"
            
            elements = tree.findall(paper_tag_on_html)
            for element in elements:
                if venue == 'icml':
                    if check_keywords_icml(element, keywords):
                        paper_url = element.find('.//p[@class="links"]').find('a').get('href')
                        results.append([paper_url])
                else:
                    if check_keywords(element, keywords):
                        paper_url = url_prefix + element.get('href')
                        results.append([paper_url])
                        
            print(len(elements))
            print()

    return results


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
current_year = datetime.datetime.now().year

# demo = gr.Interface(
#     search,
#     inputs=[
#         gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."),
#         gr.CheckboxGroup(["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"]),
#         gr.Slider(minimum=2020, maximum=current_year, value=[2020, current_year], label="Year Range", step=1)
#     ],
#     outputs=gr.DataFrame(headers=["Paper Link", "Title", "Authors"])
# )
def test_search(keywords, venues, min_year, max_year):
    return [["https://example.com"], ["https://anotherexample.com"]]
    
with gr.Blocks() as demo:
    with gr.Row():  # Organize inputs and outputs in a row (side by side)
        with gr.Column(scale=1):  # Input section (narrower)
            # Textbox for keywords
            textbox = gr.Textbox(
                label="Enter comma-separated keywords",
                placeholder="Enter keywords, separated by commas...",
                lines=2
            )
            # Vertical checkbox group for actions
            checkbox = gr.CheckboxGroup(
                ["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"],
                label="Choose Venues to Search",
                value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"],
                type="value"
            )
            # Year range slider
            min_year_slider = gr.Slider(minimum=2015, maximum=current_year, value=2020, label="Select Min Year", step=1)
            max_year_slider = gr.Slider(minimum=2015, maximum=current_year, value=current_year, label="Select Max Year", step=1)

            submit_button = gr.Button("Search")
            
        with gr.Column(scale=3):  # Output section (wider)
            # Output table
            output_table = gr.DataFrame(
                headers=["Paper Link",],# "Title", "Authors"
                label="Results"
            )

    # Link the input components to the output function
    submit_button.click(
        search,
        inputs=[textbox, checkbox, min_year_slider, max_year_slider],
        outputs=output_table
    )

if __name__ == "__main__":
    demo.launch()