diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..461219f1ffbd91aaf2e9fb307532631521bd710d Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md index 87198b415806a683909bd94878c339cf518c2569..cbab0bd8fe885762ef779c4153a8720d3cd0dccd 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,16 @@ ---- -title: MatchPrePrintArticles -emoji: ๐ -colorFrom: green -colorTo: pink -sdk: gradio -sdk_version: 5.8.0 -app_file: app.py -pinned: false -license: mit -short_description: Dataset Creator for Matching PrePrint and Articles ---- - -Check out the configuration reference at https://huggingface.co./docs/hub/spaces-config-reference +# MatchingPubs + +## Dataset + +The `dataset/` directory contains the following main classes: + +- `DatasetLoader`: Responsible for loading the dataset from various sources. +- `DatasetProcessor`: Handles preprocessing and cleaning of the dataset. +- `DatasetAnalyzer`: Provides methods for analyzing and summarizing the dataset. + +## Getting the Dataset + +To get the dataset, run the following command: + +```bash +PYTHONPATH=$(pwd) python src/dataset/get_dataset.py \ No newline at end of file diff --git a/__pycache__/run_augmenter.cpython-313.pyc b/__pycache__/run_augmenter.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a36f28f04872c18b7f4a905b059446b097dcae21 Binary files /dev/null and b/__pycache__/run_augmenter.cpython-313.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..7642dd5f336762866380029d295890baec28bb2b --- /dev/null +++ b/app.py @@ -0,0 +1,245 @@ +import gradio as gr +import pandas as pd +import pandas as pd +from src.utils.io_utils import PROJECT_ROOT +from run_augmenter import negative_sampler , positive_sampler +from pathlib import Path + +def augment_interface(factor, type_or_difficulty, use_default, csv_file=None): + """Negative Tool Sampler: Wrapper to handle negative dataset augmentation.""" + try: + if use_default: + input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv" + if not Path(input_csv_path).exists(): + return "Error: Default CSV file not found!", None, gr.update(visible=False) + elif csv_file is not None: + input_csv_path = csv_file.name + else: + return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False) + + augmented_df = negative_sampler(input_csv_path, factor, type_or_difficulty) + output_csv_path = "augmented_dataset.csv" + augmented_df.to_csv(output_csv_path, index=False) + + return output_csv_path, augmented_df.head(), gr.update(visible=True) + + except Exception as e: + return f"Error during processing: {str(e)}", None, gr.update(visible=False) + + +def positive_sampler_interface(use_default, csv_file=None, size=10, random=True, seed=42, full=False): + """Positive Tool Sampler: Wrapper to handle positive dataset augmentation with additional arguments.""" + try: + if use_default: + input_csv_path = f"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv" + if not Path(input_csv_path).exists(): + return "Error: Default CSV file not found!", None, gr.update(visible=False) + elif csv_file is not None: + input_csv_path = csv_file.name + else: + return "Error: Please select default or upload a CSV file.", None, gr.update(visible=False) + + # Call the positive sampler function with additional arguments + augmented_df = positive_sampler( + optional_path=input_csv_path, + size=size, + random=random, + seed=seed, + full=full + ) + output_csv_path = "positive_augmented_dataset.csv" + augmented_df.to_csv(output_csv_path, index=False) + + return output_csv_path, augmented_df.head(), gr.update(visible=True) + + except Exception as e: + return f"Error during processing: {str(e)}", None, gr.update(visible=False) + + +def reset_output(): + """Resets the output fields by returning None and hiding the DataFrame.""" + return None, None, gr.update(visible=False) + +with gr.Blocks(css=f""" + .gradio-container {{ + font-family: Arial, sans-serif; + max-width: 900px; + margin: auto; + }} + h1 {{ + text-align: center; + color: white; + font-size: 60px; + margin-bottom: 0px; + }} + h2 {{ + text-align: center; + color: #ff0000; + font-size: 16px; + font-weight: normal; + margin-top: 0px; + }} + .title {{ + text-align: center; + font-size: 40px; + margin-top: 30px; + margin-bottom: 20px; + }} + .title .positive {{ + color: #ff0000; + }} + .title .negative {{ + color: #ff0000; + }} + .title .tool {{ + color: white; + }} + .title .sampler {{ + color: #ff0000; + }} + .description {{ + text-align: center; + margin-bottom: 20px; + }} + #submit-button {{ + background-color: #ff0000; + color: white; + font-size: 16px; + border: none; + border-radius: 5px; + padding: 10px 20px; + }} + #reset-button {{ + background-color: #d3d3d3; + color: black; + font-size: 16px; + border: none; + border-radius: 5px; + padding: 10px 20px; + }} +""") as app: + # Main Title Section + gr.Markdown(""" +
+ This tool takes a list of DOIs and augments them using the OpenAlex API. + It is designed to complement the Negative Tool Sampler, enabling the creation of complete datasets. +
+ """) + + with gr.Group(): + with gr.Row(): + pos_use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True) + pos_csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False) + + with gr.Row(): + size_input = gr.Number(label="Number of Samples", value=10, info="Specify the number of samples to generate.") + random_input = gr.Checkbox(label="Sample Randomly", value=True, info="Whether to sample randomly.") + seed_input = gr.Number(label="Random Seed", value=42, info="Random seed for reproducibility.") + full_input = gr.Checkbox(label="Full Dataset Mode", value=False, info="Indicate whether to use the full dataset.") + + with gr.Group(): + pos_output_file = gr.File(label="Download Augmented Dataset") + pos_dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False) + with gr.Row(): + pos_submit_button = gr.Button("Submit ๐", elem_id="submit-button") + pos_reset_button = gr.Button("Reset ๐", elem_id="reset-button") + + # Button Actions + pos_submit_button.click( + positive_sampler_interface, + inputs=[pos_use_default_checkbox, pos_csv_file_input, size_input, random_input, seed_input, full_input], + outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview] + ) + + pos_reset_button.click( + reset_output, + inputs=[], + outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview] + ) + + # Toggle File Input + def toggle_pos_csv_input(use_default): + return gr.update(visible=not use_default) + + pos_use_default_checkbox.change( + toggle_pos_csv_input, + inputs=[pos_use_default_checkbox], + outputs=[pos_csv_file_input] + ) + + # Negative Tool Sampler Section + gr.Markdown(""" ++ This tool generates datasets by creating negative samples from positive matches between preprints and articles. + Customize the difficulty and the augmentation factor to meet your needs. +
+ """) + + with gr.Group(): + with gr.Row(): + factor_input = gr.Number( + label="Factor (int)", value=1, info="Specify the number of negative samples per positive sample." + ) + type_dropdown = gr.Dropdown( + ["random", "similar topics", "overlapping authors", "random authors", "fuzzed title"], + label="Select Difficulty or Augmentation Type" + ) + with gr.Row(): + use_default_checkbox = gr.Checkbox(label="Use Default Dataset", value=True) + csv_file_input = gr.File(label="Upload CSV (optional)", file_types=[".csv"], visible=False) + + with gr.Group(): + output_file = gr.File(label="Download Augmented Dataset") + dataset_preview = gr.DataFrame(label="Dataset Preview", interactive=False, visible=False) + with gr.Row(): + submit_button = gr.Button("Submit ๐", elem_id="submit-button") + reset_button = gr.Button("Reset ๐", elem_id="reset-button") + + # Button Actions + submit_button.click( + augment_interface, + inputs=[factor_input, type_dropdown, use_default_checkbox, csv_file_input], + outputs=[output_file, dataset_preview, dataset_preview] + ) + + reset_button.click( + reset_output, + inputs=[], + outputs=[output_file, dataset_preview, dataset_preview] + ) + + # Toggle File Input + def toggle_csv_input(use_default): + return gr.update(visible=not use_default) + + use_default_checkbox.change( + toggle_csv_input, + inputs=[use_default_checkbox], + outputs=[csv_file_input] + ) + +# Launch the app +if __name__ == "__main__": + app.launch(share=True) diff --git a/app/.DS_Store b/app/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..8a784558b10624a4f3f9bfc5695a8352a42200c4 Binary files /dev/null and b/app/.DS_Store differ diff --git a/app/.gradio/certificate.pem b/app/.gradio/certificate.pem new file mode 100644 index 0000000000000000000000000000000000000000..b85c8037f6b60976b2546fdbae88312c5246d9a3 --- /dev/null +++ b/app/.gradio/certificate.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw +TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh +cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4 +WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu +ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY +MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc +h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+ +0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U +A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW +T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH +B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC +B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv +KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn +OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn +jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw +qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI +rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV +HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq +hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL +ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ +3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK +NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5 +ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur +TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC +jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc +oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq +4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA +mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d +emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc= +-----END CERTIFICATE----- diff --git a/app/.gradio/flagged/dataset1.csv b/app/.gradio/flagged/dataset1.csv new file mode 100644 index 0000000000000000000000000000000000000000..5c5833b1a834add3c7b8ac35b5cdfab7d87bc5df --- /dev/null +++ b/app/.gradio/flagged/dataset1.csv @@ -0,0 +1,3 @@ +Factor (int),Select Augmentation Type or Difficulty,Use Default Dataset,Upload CSV (optional),Download Augmented Dataset,timestamp +,,false,,,2024-12-10 22:00:22.460971 +1,easy,true,,,2024-12-10 22:00:36.882145 diff --git a/app/__pycache__/run_augmenter.cpython-313.pyc b/app/__pycache__/run_augmenter.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b99bbab4c4d936595f389c29efa526c0970d0f4 Binary files /dev/null and b/app/__pycache__/run_augmenter.cpython-313.pyc differ diff --git a/app/app.ipynb b/app/app.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..209f0c7a388eb8c6ed4a723d0327884a0b459e94 --- /dev/null +++ b/app/app.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/giorgosnikolaou/Library/Python/3.9/lib/python/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n", + " warnings.warn(\n", + "[nltk_data] Downloading package words to\n", + "[nltk_data] /Users/giorgosnikolaou/nltk_data...\n", + "[nltk_data] Package words is already up-to-date!\n" + ] + } + ], + "source": [ + "import gradio as gr\n", + "import pandas as pd\n", + "import pandas as pd\n", + "from src.utils.io_utils import PROJECT_ROOT\n", + "from run_augmenter import negative_sampler , positive_sampler\n", + "from pathlib import Path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running on local URL: http://127.0.0.1:7860\n", + "Running on public URL: https://85b886469a8c17104c.gradio.live\n", + "\n", + "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co./spaces)\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "\n", + " This tool takes a list of DOIs and augments them using the OpenAlex API.\n", + " It is designed to complement the Negative Tool Sampler, enabling the creation of complete datasets.\n", + "
\n", + " \"\"\")\n", + "\n", + " with gr.Group():\n", + " with gr.Row():\n", + " pos_use_default_checkbox = gr.Checkbox(label=\"Use Default Dataset\", value=True)\n", + " pos_csv_file_input = gr.File(label=\"Upload CSV (optional)\", file_types=[\".csv\"], visible=False)\n", + "\n", + " with gr.Row():\n", + " size_input = gr.Number(label=\"Number of Samples\", value=10, info=\"Specify the number of samples to generate.\")\n", + " random_input = gr.Checkbox(label=\"Sample Randomly\", value=True, info=\"Whether to sample randomly.\")\n", + " seed_input = gr.Number(label=\"Random Seed\", value=42, info=\"Random seed for reproducibility.\")\n", + " full_input = gr.Checkbox(label=\"Full Dataset Mode\", value=False, info=\"Indicate whether to use the full dataset.\")\n", + "\n", + " with gr.Group():\n", + " pos_output_file = gr.File(label=\"Download Augmented Dataset\")\n", + " pos_dataset_preview = gr.DataFrame(label=\"Dataset Preview\", interactive=False, visible=False)\n", + " with gr.Row():\n", + " pos_submit_button = gr.Button(\"Submit ๐\", elem_id=\"submit-button\")\n", + " pos_reset_button = gr.Button(\"Reset ๐\", elem_id=\"reset-button\")\n", + "\n", + " # Button Actions\n", + " pos_submit_button.click(\n", + " positive_sampler_interface,\n", + " inputs=[pos_use_default_checkbox, pos_csv_file_input, size_input, random_input, seed_input, full_input],\n", + " outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview]\n", + " )\n", + "\n", + " pos_reset_button.click(\n", + " reset_output,\n", + " inputs=[],\n", + " outputs=[pos_output_file, pos_dataset_preview, pos_dataset_preview]\n", + " )\n", + "\n", + " # Toggle File Input\n", + " def toggle_pos_csv_input(use_default):\n", + " return gr.update(visible=not use_default)\n", + "\n", + " pos_use_default_checkbox.change(\n", + " toggle_pos_csv_input,\n", + " inputs=[pos_use_default_checkbox],\n", + " outputs=[pos_csv_file_input]\n", + " )\n", + "\n", + " # Negative Tool Sampler Section\n", + " gr.Markdown(\"\"\"\n", + "\n", + " This tool generates datasets by creating negative samples from positive matches between preprints and articles.\n", + " Customize the difficulty and the augmentation factor to meet your needs.\n", + "
\n", + " \"\"\")\n", + "\n", + " with gr.Group():\n", + " with gr.Row():\n", + " factor_input = gr.Number(\n", + " label=\"Factor (int)\", value=1, info=\"Specify the number of negative samples per positive sample.\"\n", + " )\n", + " type_dropdown = gr.Dropdown(\n", + " [\"random\", \"similar topics\", \"overlapping authors\", \"random authors\", \"fuzzed title\"],\n", + " label=\"Select Difficulty or Augmentation Type\"\n", + " )\n", + " with gr.Row():\n", + " use_default_checkbox = gr.Checkbox(label=\"Use Default Dataset\", value=True)\n", + " csv_file_input = gr.File(label=\"Upload CSV (optional)\", file_types=[\".csv\"], visible=False)\n", + "\n", + " with gr.Group():\n", + " output_file = gr.File(label=\"Download Augmented Dataset\")\n", + " dataset_preview = gr.DataFrame(label=\"Dataset Preview\", interactive=False, visible=False)\n", + " with gr.Row():\n", + " submit_button = gr.Button(\"Submit ๐\", elem_id=\"submit-button\")\n", + " reset_button = gr.Button(\"Reset ๐\", elem_id=\"reset-button\")\n", + "\n", + " # Button Actions\n", + " submit_button.click(\n", + " augment_interface,\n", + " inputs=[factor_input, type_dropdown, use_default_checkbox, csv_file_input],\n", + " outputs=[output_file, dataset_preview, dataset_preview]\n", + " )\n", + "\n", + " reset_button.click(\n", + " reset_output,\n", + " inputs=[],\n", + " outputs=[output_file, dataset_preview, dataset_preview]\n", + " )\n", + "\n", + " # Toggle File Input\n", + " def toggle_csv_input(use_default):\n", + " return gr.update(visible=not use_default)\n", + "\n", + " use_default_checkbox.change(\n", + " toggle_csv_input,\n", + " inputs=[use_default_checkbox],\n", + " outputs=[csv_file_input]\n", + " )\n", + "\n", + "# Launch the app\n", + "if __name__ == \"__main__\":\n", + " app.launch(share=True)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "marple", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/create_negative_samples.py b/create_negative_samples.py new file mode 100644 index 0000000000000000000000000000000000000000..67cc9e2cc95b7d5d1200ebc431b43c6e3975d13f --- /dev/null +++ b/create_negative_samples.py @@ -0,0 +1,54 @@ +from src.dataset.GoodDataset import * +from src.dataset.NegativeSampler import * +import argparse +import os + +def main(config): + """ + Main function to process the dataset and save it as a CSV file. + Args: + config: Namespace object containing the script arguments. + """ + dataset = AugmentedDataset() + dataset.load(config.input) + + sampler = NegativeSampler(dataset) + sampler.create_negative_samples(config) + + + print(custom_struct_to_df(dataset.negative_samples).head()) + custom_struct_to_df(dataset.positive_samples).to_csv('./data/pos.csv', index=False) + custom_struct_to_df(dataset.negative_samples).to_csv('./data/neg.csv', index=False) + print(len(dataset.positive_samples)) + print(len(dataset.negative_samples)) + + +if __name__ == "__main__": + # Parse command-line arguments + from src.utils.io_utils import PROJECT_ROOT + parser = argparse.ArgumentParser(description="Generate and save a dataset based on the given configuration.") + + + parser.add_argument("-i", "--input", type=str, default=os.path.join(PROJECT_ROOT, "data/positive_samples.pkl"), help="Input file path to load the positive samples.") + parser.add_argument("-o", "--output", type=str, default=os.path.join(PROJECT_ROOT, "data/negative_samples.pkl"), help="Output file path to save the negative samples.") + + parser.add_argument("-s", "--seed", type=int, default=42, help="Random seed for reproducibility.") + + parser.add_argument("-r", "--random", action='store_true', help="Utilization of `sample_random`") + parser.add_argument("-f", "--fuzz_title", action='store_true', help="Utilization of `fuzz_title`") + parser.add_argument("-ra", "--replace_auth", action='store_true', help="Utilization of `sample_authors_overlap_random`") + parser.add_argument("-oa", "--overlap_auth", action='store_true', help="Utilization of `sample_authors_overlap`") + parser.add_argument("-ot", "--overlap_topic", action='store_true', help="Utilization of `sample_similar_topic`") + + parser.add_argument("--factor_max", type=int, default=4, help="Maximum number of negative samples to generate per positive sample.") + parser.add_argument("--authors_to_consider", type=int, default=1, help="Number of authors to consider when overlapping authors.") + parser.add_argument("--overlapping_authors", type=int, default=1, help="Minimum number of overlapping authors required.") + parser.add_argument("--fuzz_count", type=int, default=-1, help="Number of words to replace when fuzzing titles.") + + # Parse the arguments and pass to the main function + config = parser.parse_args() + if config.overlap_auth and config.overlap_topic: + parser.error("Only one of --overlap_auth and --overlap_topic can be used.") + if not (config.overlap_auth or config.overlap_topic or config.random): + parser.error("At least one of --overlap_auth, --overlap_topic, or --random must be specified.") + main(config) diff --git a/data/crossref-preprint-article-relationships-Aug-2023.csv b/data/crossref-preprint-article-relationships-Aug-2023.csv new file mode 100644 index 0000000000000000000000000000000000000000..30a55f1c612cc313f0c232812490e9f148d9fd28 --- /dev/null +++ b/data/crossref-preprint-article-relationships-Aug-2023.csv @@ -0,0 +1,1001 @@ +preprint_doi,article_doi,deposited_by_article_publisher,deposited_by_preprint_publisher,matching_confidence_score +10.5194/wcd-2021-52,10.5194/wcd-2-1245-2021,True,True,0.9919484702093396 +10.5194/tc-2020-96,10.5194/tc-15-1277-2021,True,True,1.0 +10.1101/001586,10.1016/j.bica.2014.02.003,False,True,1.0 +10.2196/preprints.23492,10.2196/23492,False,True,1.0 +10.5194/acpd-9-11659-2009,10.5194/acp-9-9349-2009,True,True,1.0 +10.31235/osf.io/nj43g,10.31014/aior.1991.04.01.262,False,True,0.9456161616161616 +10.2196/preprints.16461,10.2196/16461,False,True,1.0 +10.5194/acpd-10-523-2010,10.5194/acp-10-4699-2010,True,True,1.0 +10.20944/preprints201910.0177.v1,10.3390/ma12223739,False,True, +10.20944/preprints201903.0234.v1,10.35513/21658005.2019.1.5,False,True,1.0 +10.31234/osf.io/6ythf,10.1017/s0140525x19002206,False,True,1.0 +10.2196/preprints.11905,10.2196/11905,False,True,1.0 +10.1101/665364,10.1016/j.dyepig.2019.107863,False,True,0.936810016689432 +10.1101/2020.06.10.20067116,10.1098/rspa.2019.0790,False,True,1.0 +10.2196/preprints.19048,10.2196/19048,False,True,1.0 +10.2196/preprints.29042,10.2196/29042,False,True,1.0 +10.21203/rs.2.14007/v2,10.1186/s12885-019-6361-2,False,True,1.0 +10.21203/rs.2.14007/v3,10.1186/s12885-019-6361-2,False,True,1.0 +10.21203/rs.2.14007/v1,10.1186/s12885-019-6361-2,False,True, +10.2196/preprints.27257,10.2196/27257,False,True,1.0 +10.5194/hessd-7-7121-2010,10.5194/hess-15-197-2011,True,True,1.0 +10.20944/preprints201801.0038.v1,10.3390/e20030160,False,True,1.0 +10.20944/preprints201810.0314.v1,10.3390/v10110603,False,True,0.9962962962962963 +10.1101/2020.03.21.001347,10.21914/anziamj.v61i0.15040,False,True,0.9722222222222224 +10.1101/853283,10.1016/j.brainres.2019.146627,False,True,1.0 +10.2196/preprints.19021,10.2196/19021,False,True,1.0 +10.5194/gmd-2019-113,10.5194/gmd-13-4845-2020,True,True,1.0 +10.20944/preprints201711.0016.v1,10.3390/sym9120292,False,True,0.9916666666666668 +10.5194/osd-12-1567-2015,10.5194/os-12-39-2016,True,True,1.0 +10.20944/preprints201807.0566.v1,10.3390/sym10100451,False,True, +10.5194/hessd-5-1371-2008,10.5194/hess-14-2243-2010,True,True,0.8916666666666666 +10.31219/osf.io/s93tx,10.1080/19419899.2021.1875595,False,True,1.0 +10.31730/osf.io/cxety,10.35409/ijbmer.2019.2421,False,True,0.9907407407407408 +10.1101/675090,10.1371/journal.pone.0219107,False,True,1.0 +10.20944/preprints202101.0270.v1,10.3390/s21051871,False,True,0.9498525073746312 +10.5194/acp-2020-1010,10.5194/acp-21-9585-2021,True,True,0.998531571218796 +10.31219/osf.io/j28d4,10.31014/aior.1992.03.04.310,False,True, +10.31222/osf.io/c5xu8,10.31014/aior.1992.03.04.310,False,True,0.8857142857142857 +10.5194/os-2016-41,10.5194/os-12-1279-2016,True,True,1.0 +10.1101/790352,10.1016/j.neuroscience.2020.02.016,False,True,0.9743589743589745 +10.2196/preprints.10722,10.2196/10722,False,True,1.0 +10.20944/preprints201710.0189.v1,10.3390/soc8010005,False,True,1.0 +10.2196/preprints.17323,10.2196/17323,False,True,1.0 +10.20944/preprints202005.0002.v1,10.3390/app10113953,False,True,1.0 +10.2196/preprints.6578,10.2196/pediatrics.6578,False,True,1.0 +10.21034/dp.44,10.1016/s0927-5398(01)00040-8,False,True, +10.20944/preprints201608.0072.v1,10.3390/fermentation2030016,False,True,1.0 +10.5194/esurf-2021-105,10.5194/esurf-10-875-2022,True,True,0.9392265193370166 +10.20944/preprints201809.0407.v1,10.3390/sym10100514,False,True,1.0 +10.20944/preprints201807.0609.v1,10.3390/en11082171,False,True,0.9858156028368796 +10.20944/preprints201806.0018.v1,10.3390/su10072312,False,True,0.978494623655914 +10.5194/bg-2021-170,10.5194/bg-18-6167-2021,True,True,0.9986824769433466 +10.21203/rs.2.454/v1,10.1186/s13063-019-3712-x,False,True,0.9910714285714284 +10.21203/rs.2.454/v2,10.1186/s13063-019-3712-x,False,True,0.9910714285714284 +10.1101/2020.02.04.934588,10.1016/j.cnsns.2020.105373,False,True,0.9941520467836256 +10.31235/osf.io/9uw6j,10.1017/aap.2019.4,False,True,1.0 +10.20944/preprints201802.0123.v1,10.3390/su10040947,False,True,0.9989350372736956 +10.31219/osf.io/nvz85,10.13189/ujer.2020.082273,False,True,0.975438596491228 +10.21203/rs.3.rs-68784/v2,10.1186/s13018-020-02188-2,False,True,1.0 +10.21203/rs.3.rs-68784/v1,10.1186/s13018-020-02188-2,False,True,1.0 +10.5194/angeo-2021-38,10.5194/angeo-39-1005-2021,True,True,0.996376811594203 +10.1101/213397,10.1093/molbev/msy059,False,True,0.9847494553376906 +10.21203/rs.3.rs-122948/v1,10.1186/s12879-021-05787-4,False,True,1.0 +10.1101/468959,10.1080/1062936x.2012.742136,False,True, +10.2196/preprints.10213,10.2196/10213,False,True,0.9743589743589745 +10.20944/preprints201810.0228.v1,10.3390/children5110151,False,True,1.0 +10.21203/rs.3.rs-47855/v2,10.1186/s12879-021-05889-z,False,True,1.0 +10.21203/rs.3.rs-47855/v1,10.1186/s12879-021-05889-z,False,True,0.9910824834496332 +10.1002/essoar.10511860.1,10.1007/s11356-022-22561-4,False,True,1.0 +10.1101/139642,10.1371/journal.pone.0192081,False,True,0.9770609318996416 +10.2196/preprints.39166,10.2196/39166,False,True,1.0 +10.20944/preprints202212.0232.v1,10.3390/v15020406,False,True,1.0 +10.5194/amt-2022-196,10.5194/amt-16-707-2023,True,True,1.0 +10.2196/preprints.19397,10.2196/19397,False,True,1.0 +10.1101/496752,10.1016/j.optom.2019.10.001,False,True,0.9983660130718954 +10.1101/261370,10.1167/18.6.9,False,True,1.0 +10.5194/amt-2017-92,10.5194/amt-11-17-2018,True,True,1.0 +10.20944/preprints202102.0387.v1,10.3390/foods10030678,False,True,1.0 +10.20944/preprints201910.0017.v1,10.3390/sym11111390,False,True,1.0 +10.20944/preprints201707.0061.v1,10.3390/rel8080155,False,True,0.9885057471264368 +10.5194/wes-2018-35,10.5194/wes-3-615-2018,True,True,0.9987029831387808 +10.31235/osf.io/t25hr,10.18408/ahuri-7115001,False,True,1.0 +10.5194/se-2019-99,10.5194/se-11-241-2020,True,True,0.9732770745428972 +10.5194/amt-2021-113,10.5194/amt-14-6379-2021,True,True,0.9866666666666668 +10.2196/preprints.17782,10.2196/17782,False,True,1.0 +10.21203/rs.2.12126/v3,10.1186/s12898-019-0270-8,False,True,0.9933333333333332 +10.21203/rs.2.12126/v1,10.1186/s12898-019-0270-8,False,True,0.9909178743961352 +10.21203/rs.2.12126/v2,10.1186/s12898-019-0270-8,False,True,0.9909178743961352 +10.31235/osf.io/8k7sp,10.4303/jdar/235992,False,True,1.0 +10.2196/preprints.23357,10.2196/23357,False,True,1.0 +10.5194/os-2022-15,10.5194/os-18-1163-2022,True,True,0.999250936329588 +10.5194/npgd-1-1133-2014,10.5194/npg-22-53-2015,True,False,1.0 +10.2196/preprints.46339,10.2196/46339,False,True,1.0 +10.2196/preprints.38176,10.2196/38176,False,True,1.0 +10.2196/preprints.44602,10.2196/44602,False,True,1.0 +10.2196/preprints.20571,10.2196/20571,False,True,1.0 +10.2196/preprints.12603,10.2196/12603,False,True,1.0 +10.20944/preprints201809.0144.v1,10.3390/resources7040076,False,True,0.9727626459143968 +10.5194/cp-2017-68,10.5194/cp-15-377-2019,True,True,1.0 +10.1101/402750,10.1098/rsif.2018.0792,False,True,1.0 +10.31234/osf.io/5bm8r,10.1163/22105832-00902006,False,True,1.0 +10.21203/rs.3.rs-2156656/v1,10.1038/s41388-023-02692-9,False,True,1.0 +10.5194/os-2020-51,10.5194/os-17-131-2021,True,True,0.9956140350877192 +10.20944/preprints202102.0336.v1,10.3390/app11062801,False,True,0.9324894514767932 +10.2196/preprints.16294,10.2196/16294,False,True,0.9784172661870504 +10.20944/preprints201809.0477.v1,10.3390/medicina54060099,False,True,0.9989417989417988 +10.2196/preprints.23400,10.2196/23400,False,True,1.0 +10.5194/hgss-2023-1,10.5194/hgss-14-61-2023,True,True,1.0 +10.26434/chemrxiv-2022-k7k0h-v6,10.1021/acs.jpcb.2c03638,False,True,0.9807692307692308 +10.5194/amt-2022-263,10.5194/amt-16-355-2023,True,True,0.9877675840978594 +10.1101/446310,10.1371/journal.pntd.0006927,False,True,0.9273689273689274 +10.1101/537035,10.1021/acs.jctc.0c00476,False,True,1.0 +10.21203/rs.3.rs-39716/v1,10.1186/s12913-021-06123-x,False,True,0.9987029831387808 +10.21203/rs.3.rs-39716/v2,10.1186/s12913-021-06123-x,False,True,1.0 +10.5194/cp-2021-15,10.5194/cp-17-2327-2021,True,True,0.8992248062015503 +10.21203/rs.3.rs-114221/v1,10.1186/s12889-021-10594-2,False,True,1.0 +10.21203/rs.3.rs-2014302/v1,10.1038/s41372-023-01642-3,False,True,1.0 +10.1101/2021.10.21.465319,10.1002/oby.23441,False,True,0.959660297239915 +10.5194/acpd-14-14637-2014,10.5194/acp-14-12683-2014,True,True,1.0 +10.32942/osf.io/mxg6q,10.1111/1440-1703.12294,False,True,1.0 +10.5194/acpd-11-813-2011,10.5194/acp-11-8017-2011,True,True,0.938818565400844 +10.5194/acp-2016-365,10.5194/acp-17-575-2017,True,True,0.9300395256916996 +10.5194/amtd-8-8385-2015,10.5194/amt-9-359-2016,True,True,0.9983660130718954 +10.5194/acpd-8-15101-2008,10.5194/acp-9-1639-2009,True,True,1.0 +10.5194/acp-2018-739,10.5194/acp-18-17225-2018,True,True,0.9977324263038548 +10.5194/amt-2016-398,10.5194/amt-10-1911-2017,True,True,1.0 +10.5194/cpd-9-1703-2013,10.5194/cp-9-1749-2013,True,True,1.0 +10.5194/bg-2017-74,10.5194/bg-14-3883-2017,True,True,1.0 +10.5194/acp-2016-178,10.5194/acp-16-11617-2016,True,True,0.9722222222222224 +10.5194/bgd-12-3211-2015,10.5194/bg-12-3225-2015,True,True,1.0 +10.5194/tc-2016-111,10.5194/tc-10-2317-2016,True,True,1.0 +10.5194/acpd-10-21931-2010,10.5194/acp-11-5603-2011,True,True,1.0 +10.1101/2020.02.29.970913,10.3389/fmicb.2020.01037,False,True,1.0 +10.5194/tc-2017-2,10.5194/tc-11-2265-2017,True,True,0.943502824858757 +10.21203/rs.3.rs-35627/v2,10.1186/s12889-020-09979-6,False,True,0.9965277777777776 +10.21203/rs.3.rs-35627/v3,10.1186/s12889-020-09979-6,False,True,1.0 +10.21203/rs.3.rs-35627/v4,10.1186/s12889-020-09979-6,False,True,1.0 +10.21203/rs.3.rs-35627/v5,10.1186/s12889-020-09979-6,False,True,0.9965277777777776 +10.21203/rs.3.rs-35627/v1,10.1186/s12889-020-09979-6,False,True,0.9910873440285204 +10.5194/acpd-15-27501-2015,10.5194/acp-16-2477-2016,True,True,0.9437328918048964 +10.5194/acpd-11-11809-2011,10.5194/acp-12-3627-2012,True,False,1.0 +10.5194/acpd-14-9801-2014,10.5194/acp-14-9917-2014,True,True,0.9572649572649572 +10.5194/bgd-11-14269-2014,10.5194/bg-12-1131-2015,True,False,0.9866666666666668 +10.5194/amt-2016-87,10.5194/amt-9-3769-2016,True,True,0.9743589743589745 +10.5194/acpd-15-34361-2015,10.5194/acp-16-10501-2016,True,True,0.954940867939686 +10.5194/acpd-13-2125-2013,10.5194/acp-13-11089-2013,True,True,1.0 +10.5194/acp-2016-806,10.5194/acp-18-2243-2018,True,True,0.9950980392156864 +10.5194/cp-2017-57,10.5194/cp-13-1539-2017,True,True,1.0 +10.5194/tc-2016-29,10.5194/tc-10-2241-2016,True,True,0.9985693848354792 +10.5194/acpd-9-15747-2009,10.5194/acp-9-8857-2009,True,True,1.0 +10.26434/chemrxiv.7464803.v1,10.1021/acs.langmuir.9b02574,False,True,0.954861111111111 +10.5194/nhessd-2-4685-2014,10.5194/nhess-15-109-2015,True,True,0.9206349206349206 +10.31234/osf.io/5v4wt,10.1016/j.beproc.2017.04.017,False,True, +10.5194/hessd-12-9003-2015,10.5194/hess-20-605-2016,True,False,0.9679291983488598 +10.5194/acp-2016-692,10.5194/acp-17-3279-2017,True,True,0.9863013698630138 +10.5194/acpd-10-7469-2010,10.5194/acp-10-9017-2010,True,True,1.0 +10.1101/115253,10.1093/cercor/bhx259,False,True,1.0 +10.5194/acpd-11-25709-2011,10.5194/acp-11-12959-2011,True,True,1.0 +10.5194/acpd-12-28765-2012,10.5194/acp-13-2857-2013,True,True,1.0 +10.5194/gmdd-8-5315-2015,10.5194/gmd-9-17-2016,True,True,1.0 +10.5194/gmd-2016-63,10.5194/gmd-9-3199-2016,True,True,1.0 +10.5194/acp-2017-319,10.5194/acp-17-13699-2017,True,True,0.8735930735930736 +10.5194/bgd-10-2415-2013,10.5194/bg-10-5171-2013,True,True,0.9637681159420288 +10.5194/bgd-10-9315-2013,10.5194/bg-10-7347-2013,True,True,0.9975669099756692 +10.5194/soild-2-29-2015,10.5194/soil-1-475-2015,True,True,0.9957446808510638 +10.5194/se-2017-18,10.5194/se-8-955-2017,True,True,1.0 +10.1101/012195,10.1007/s10827-015-0574-4,False,True,0.988095238095238 +10.5194/gmdd-6-2491-2013,10.5194/gmd-7-1183-2014,True,True,0.993127147766323 +10.5194/gmd-2017-293,10.5194/gmd-11-1971-2018,True,True,0.998272884283247 +10.5194/gmdd-5-1381-2012,10.5194/gmd-6-57-2013,True,True,1.0 +10.1101/2021.04.28.441869,10.1016/j.ymthe.2022.01.030,False,True,1.0 +10.1101/568790,10.3389/fimmu.2019.01066,False,True,0.9709639953542392 +10.1101/560144,10.26508/lsa.201900358,False,True,1.0 +10.1101/2020.06.22.164814,10.1152/jn.00110.2021,False,True,1.0 +10.5194/amtd-7-11345-2014,10.5194/amt-8-2491-2015,True,True,1.0 +10.20944/preprints201803.0185.v1,10.3390/ijms19051364,False,True,0.9803921568627452 +10.5194/acpd-8-3895-2008,10.5194/acp-8-4655-2008,True,True,1.0 +10.5194/hess-2016-400,10.5194/hess-20-5049-2016,True,True,1.0 +10.5194/soild-2-647-2015,10.5194/soil-2-1-2016,True,True,1.0 +10.5194/nhess-2016-210,10.5194/nhess-16-2347-2016,True,True,0.9388560157790928 +10.5194/gmdd-6-1085-2013,10.5194/gmd-6-1641-2013,True,True,1.0 +10.5194/gmd-2016-114,10.5194/gmd-9-3605-2016,True,True,0.933719101675758 +10.5194/acpd-14-19837-2014,10.5194/acp-15-913-2015,True,True,0.9904761904761904 +10.5194/amtd-8-10755-2015,10.5194/amt-9-1613-2016,True,True,0.9866666666666668 +10.5194/acpd-15-931-2015,10.5194/acp-15-6535-2015,True,True,0.9878542510121456 +10.5194/acpd-14-2277-2014,10.5194/acp-14-6557-2014,True,True,0.998003992015968 +10.5194/acpd-12-16647-2012,10.5194/acp-12-11795-2012,True,True,1.0 +10.20944/preprints201805.0070.v1,10.3390/diagnostics8020041,False,True,0.9658260233918128 +10.20944/preprints202105.0096.v1,10.3390/nu13061875,False,True,0.993992722253592 +10.5194/acp-2018-37,10.5194/acp-18-6761-2018,True,True,1.0 +10.5194/amtd-5-2111-2012,10.5194/amt-5-1719-2012,True,True,1.0 +10.26434/chemrxiv.8220599.v1,10.1021/acs.chemrestox.9b00255,False,True,0.996031746031746 +10.5194/osd-11-1719-2014,10.5194/os-11-269-2015,True,True,1.0 +10.5194/osd-11-693-2014,10.5194/os-10-587-2014,True,True,1.0 +10.5194/cp-2017-49,10.5194/cp-14-157-2018,True,True,0.987468671679198 +10.5194/acpd-11-4533-2011,10.5194/acp-11-6721-2011,True,False,1.0 +10.5194/sed-6-2567-2014,10.5194/se-5-1169-2014,True,True,0.9886264216972878 +10.5194/cpd-8-1523-2012,10.5194/cp-8-1801-2012,True,True,0.9982547993019196 +10.5194/gmd-2016-37,10.5194/gmd-9-3111-2016,True,True,0.9759036144578314 +10.5194/soil-2016-63,10.5194/soil-3-67-2017,True,True,0.9228395061728396 +10.5194/acp-2016-7,10.5194/acp-16-7653-2016,True,True,0.978593272171254 +10.1101/165357,10.1371/journal.pcbi.1005868,False,True,1.0 +10.5194/nhess-2016-66,10.5194/nhess-16-2247-2016,True,True,1.0 +10.2196/preprints.9154,10.2196/resprot.9154,False,True,1.0 +10.1101/568212,10.1098/rspb.2019.1818,False,True,1.0 +10.5194/gmdd-7-931-2014,10.5194/gmd-7-2411-2014,True,True,0.9975669099756692 +10.1101/2021.06.19.449118,10.1158/2767-9764.crc-22-0003,False,True, +10.2196/preprints.20509,10.2196/20509,False,True,1.0 +10.5194/osd-10-691-2013,10.5194/os-9-885-2013,True,True,0.9607843137254902 +10.20944/preprints202007.0409.v1,10.3390/en13174331,False,True,1.0 +10.5194/se-2016-55,10.5194/se-7-1085-2016,True,True,0.9526748971193416 +10.5194/nhessd-1-3891-2013,10.5194/nhess-14-1257-2014,True,True,0.930117899249732 +10.5194/acpd-13-32291-2013,10.5194/acp-14-7485-2014,True,True,1.0 +10.5194/acpd-6-3099-2006,10.5194/acp-6-3243-2006,True,True,0.9629629629629628 +10.26434/chemrxiv-2022-dnl9p,10.1021/acs.inorgchem.2c01171,False,True,1.0 +10.5194/bgd-10-19311-2013,10.5194/bg-11-4015-2014,True,True,1.0 +10.5194/bgd-10-7013-2013,10.5194/bg-10-6807-2013,True,True, +10.5194/bgd-10-2305-2013,10.5194/bg-10-7263-2013,True,True,1.0 +10.1101/2022.01.11.475674,10.1016/j.nbd.2022.105879,False,True,1.0 +10.1101/517243,10.1016/j.neuroimage.2019.116175,False,True,1.0 +10.20944/preprints201808.0196.v1,10.3390/e20110840,False,True,0.949874686716792 +10.5194/acpd-14-24573-2014,10.5194/acp-15-6047-2015,True,True,0.9977324263038548 +10.5194/hessd-9-2717-2012,10.5194/hess-16-3817-2012,True,True,0.8919753086419754 +10.2196/preprints.42403,10.2196/42403,False,True,1.0 +10.2196/preprints.15105,10.2196/15105,False,True,0.9085858585858584 +10.5194/acpd-12-14115-2012,10.5194/acp-12-11037-2012,True,True,1.0 +10.5194/acpd-12-6593-2012,10.5194/acp-12-10331-2012,True,True,0.9982547993019196 +10.5194/acpd-12-20007-2012,10.5194/acp-13-3849-2013,True,True,1.0 +10.2196/preprints.9966,10.2196/jmir.9966,False,True,1.0 +10.5194/osd-3-939-2006,10.5194/os-3-129-2007,True,True,1.0 +10.1101/2020.02.16.951954,10.15252/embj.2020104708,False,True,0.9681704260651628 +10.5194/bgd-8-941-2011,10.5194/bg-8-2523-2011,True,True,0.9915764139590856 +10.5194/bgd-8-7165-2011,10.5194/bg-8-3609-2011,True,True,1.0 +10.1101/791319,10.1523/jneurosci.2416-19.2020,False,True,0.990138067061144 +10.5194/tcd-8-5361-2014,10.5194/tc-9-103-2015,True,True,1.0 +10.5194/tcd-6-5119-2012,10.5194/tc-7-1139-2013,True,True,0.9551724137931036 +10.1101/2021.10.23.465582,10.1523/jneurosci.2145-21.2022,False,True,1.0 +10.5194/tcd-6-2265-2012,10.5194/tc-7-1-2013,True,True,0.989384288747346 +10.2196/preprints.13662,10.2196/13662,False,True,1.0 +10.31231/osf.io/3pxzd,10.1037/ccp0000092,False,True,0.9423740510697032 +10.5194/hessd-7-9173-2010,10.5194/hess-15-1339-2011,True,True,1.0 +10.5194/bgd-9-14291-2012,10.5194/bg-10-5079-2013,True,True,1.0 +10.5194/acpd-11-4631-2011,10.5194/acp-11-7629-2011,True,True,1.0 +10.2196/preprints.17542,10.2196/17542,False,True,1.0 +10.5194/hessd-7-621-2010,10.5194/hess-14-719-2010,True,True,1.0 +10.5194/acpd-12-1451-2012,10.5194/acp-12-5755-2012,True,True,0.998015873015873 +10.5194/gmdd-6-3655-2013,10.5194/gmd-6-2153-2013,True,True,0.9890611279972982 +10.1101/2021.07.14.452404,10.1111/2041-210x.13821,False,True,0.9930555555555556 +10.5194/bgd-10-17071-2013,10.5194/bg-11-3131-2014,True,True,0.99457111834962 +10.1101/600312,10.1111/tpj.14670,False,True,0.9085648148148148 +10.5194/cpd-7-775-2011,10.5194/cp-7-917-2011,True,True,1.0 +10.5194/acpd-13-27779-2013,10.5194/acp-14-2383-2014,True,True,0.9473684210526316 +10.5194/esdd-5-779-2014,10.5194/esd-5-423-2014,True,False,1.0 +10.21203/rs.2.11587/v2,10.1186/s12884-019-2590-2,False,True,1.0 +10.21203/rs.2.11587/v1,10.1186/s12884-019-2590-2,False,True,1.0 +10.21203/rs.2.11587/v4,10.1186/s12884-019-2590-2,False,True,1.0 +10.21203/rs.2.11587/v3,10.1186/s12884-019-2590-2,False,True,1.0 +10.21203/rs.2.11587/v5,10.1186/s12884-019-2590-2,False,True,1.0 +10.2196/preprints.12347,10.2196/12347,False,True,1.0 +10.5194/se-2016-11,10.5194/se-7-599-2016,True,True,0.9022946859903382 +10.5194/bg-2015-647,10.5194/bg-13-5511-2016,True,True,1.0 +10.1101/087577,10.1371/journal.pgen.1006793,False,True,0.912630579297246 +10.5194/acpd-9-13327-2009,10.5194/acp-9-8651-2009,True,True,1.0 +10.5194/hessd-6-4307-2009,10.5194/hess-13-2151-2009,True,True,1.0 +10.5194/acpd-11-4807-2011,10.5194/acp-11-6297-2011,True,True,0.9947916666666666 +10.5194/acpd-11-8337-2011,10.5194/acp-11-8415-2011,True,True,1.0 +10.1101/233924,10.1002/ece3.3872,False,True,0.9987129987129988 +10.21203/rs.3.rs-997649/v1,10.1007/s10637-022-01218-6,False,True,0.9977477477477475 +10.1101/359018,10.1152/jn.00601.2018,False,True,1.0 +10.1101/247189,10.1111/evo.13573,False,True,1.0 +10.5194/bgd-10-14093-2013,10.5194/bg-10-8223-2013,True,True,0.9901960784313726 +10.20944/preprints202009.0582.v1,10.3390/jmse8100756,False,True,1.0 +10.1101/370155,10.1371/journal.pone.0201329,False,True,0.9853249475890984 +10.2196/preprints.18338,10.2196/18338,False,True,1.0 +10.5194/acpd-11-13867-2011,10.5194/acp-11-10911-2011,True,True,1.0 +10.21203/rs.3.rs-61529/v2,10.1186/s13049-020-00818-6,False,True,0.98989898989899 +10.21203/rs.3.rs-61529/v1,10.1186/s13049-020-00818-6,False,True,1.0 +10.1101/868307,10.1093/cercor/bhaa146,False,True,1.0 +10.5194/acpd-11-9887-2011,10.5194/acp-11-11867-2011,True,True,0.9621952608794714 +10.5194/acpd-11-11649-2011,10.5194/acp-11-12751-2011,True,True,1.0 +10.5194/bgd-12-15495-2015,10.5194/bg-13-3619-2016,True,True,0.903925364758698 +10.5194/bgd-12-7705-2015,10.5194/bg-12-5277-2015,True,True,1.0 +10.1101/2020.04.14.041145,10.1158/1078-0432.ccr-20-1762,False,True,0.9487179487179488 +10.5194/acpd-10-24245-2010,10.5194/acp-11-767-2011,True,True,0.9993238674780256 +10.31234/osf.io/k4anx,10.1186/s12887-019-1818-7,False,True,1.0 +10.5194/bgd-10-19005-2013,10.5194/bg-11-2793-2014,True,True,1.0 +10.20944/preprints202004.0309.v1,10.3390/rs12111748,False,True,1.0 +10.5194/acpd-9-16715-2009,10.5194/acp-10-1269-2010,True,True,1.0 +10.5194/hessd-12-12615-2015,10.5194/hess-20-2691-2016,True,True, +10.5194/amtd-7-1917-2014,10.5194/amt-7-2097-2014,True,True,1.0 +10.5194/amtd-6-1771-2013,10.5194/amt-6-1903-2013,True,True,1.0 +10.5194/hessd-10-2373-2013,10.5194/hess-18-595-2014,True,True, +10.5194/acpd-13-2913-2013,10.5194/acp-13-6473-2013,True,True,0.9876543209876544 +10.5194/bgd-10-17043-2013,10.5194/bg-11-2519-2014,True,True, +10.5194/acpd-15-6125-2015,10.5194/acp-15-9003-2015,True,True,1.0 +10.5194/acpd-9-6397-2009,10.5194/acp-9-5093-2009,True,True,0.9914529914529916 +10.5194/acpd-10-23657-2010,10.5194/acp-11-1621-2011,True,True,1.0 +10.5194/bgd-11-10917-2014,10.5194/bg-11-7025-2014,True,True,0.9966329966329966 +10.5194/acpd-13-18951-2013,10.5194/acp-13-11169-2013,True,True,1.0 +10.5194/hessd-4-3087-2007,10.5194/hess-12-405-2008,True,True,1.0 +10.5194/bgd-9-19121-2012,10.5194/bg-10-2315-2013,True,False,0.9976359338061466 +10.2196/preprints.11364,10.2196/11364,False,True,1.0 +10.5194/acpd-11-19011-2011,10.5194/acp-12-11085-2012,True,True, +10.5194/sed-5-257-2013,10.5194/se-4-255-2013,True,True,1.0 +10.31234/osf.io/84uqz,10.1111/psyp.14242,False,True,0.9692307692307692 +10.5194/esurfd-1-745-2013,10.5194/esurf-2-363-2014,True,True,1.0 +10.21203/rs.3.rs-474980/v1,10.1038/s42003-021-02885-6,False,True,0.998148148148148 +10.1101/2021.10.15.464543,10.1021/acs.jcim.1c01269,False,True,1.0 +10.5194/hessd-10-9847-2013,10.5194/hess-17-5213-2013,True,True,1.0 +10.5194/acpd-11-30757-2011,10.5194/acp-12-4885-2012,True,True,1.0 +10.5194/tcd-8-3367-2014,10.5194/tc-9-65-2015,True,True,1.0 +10.5194/acpd-14-19515-2014,10.5194/acp-15-99-2015,True,False,1.0 +10.5194/bgd-8-5849-2011,10.5194/bg-9-593-2012,True,True,1.0 +10.5194/bgd-10-17549-2013,10.5194/bg-11-4459-2014,True,True,1.0 +10.5194/acpd-14-4189-2014,10.5194/acp-14-7075-2014,True,True,1.0 +10.5194/hessd-9-5531-2012,10.5194/hess-16-3749-2012,True,False,0.9987325728770596 +10.5194/bgd-9-2153-2012,10.5194/bg-9-2301-2012,True,True,0.9831649831649832 +10.5194/acpd-9-5809-2009,10.5194/acp-9-6479-2009,True,False,1.0 +10.5194/cpd-9-1735-2013,10.5194/cp-9-1773-2013,True,True,0.9965277777777776 +10.5194/osd-7-995-2010,10.5194/os-7-175-2011,True,True,0.9966666666666668 +10.1101/116426,10.1088/1478-3975/aa6b67,False,True,1.0 +10.5194/tc-2016-199,10.5194/tc-11-47-2017,True,True,1.0 +10.5194/tcd-8-4823-2014,10.5194/tc-9-53-2015,True,True,1.0 +10.5194/cp-2016-131,10.5194/cp-13-1153-2017,True,True,1.0 +10.26434/chemrxiv.11514189.v1,10.1002/anie.201915493,False,True,0.9611046776853706 +10.26434/chemrxiv.11514189.v2,10.1002/anie.201915493,False,True,0.9611046776853706 +10.1101/2020.12.18.423427,10.1038/s41422-021-00495-9,False,True,0.9445194182036288 +10.5194/cpd-11-3277-2015,10.5194/cp-12-455-2016,True,True, +10.21203/rs.2.14334/v2,10.1186/s12909-019-1876-4,False,True,1.0 +10.21203/rs.2.14334/v3,10.1186/s12909-019-1876-4,False,True,1.0 +10.21203/rs.2.14334/v1,10.1186/s12909-019-1876-4,False,True,1.0 +10.1101/2021.04.26.441285,10.1021/acsnano.1c06488,False,True,1.0 +10.5194/esd-2020-80,10.5194/esd-12-367-2021,True,True,0.9983249581239532 +10.20944/preprints201810.0612.v1,10.3390/electronics7120347,False,True,0.925925925925926 +10.1101/209718,10.1038/s41592-018-0002-6,False,True,0.9055876685934487 +10.5194/cpd-11-3143-2015,10.5194/cp-12-819-2016,True,True,0.9324444444444444 +10.5194/cpd-9-5837-2013,10.5194/cp-10-759-2014,True,True,0.9959349593495936 +10.2196/preprints.9498,10.2196/jmir.9498,False,True,1.0 +10.2196/preprints.9498.a,10.2196/jmir.9498,False,True,1.0 +10.5194/cp-2018-60,10.5194/cp-15-1063-2019,True,True,0.8722741433021807 +10.5194/acp-2022-387,10.5194/acp-22-13897-2022,True,True,0.9963099630996308 +10.5194/acp-2021-870,10.5194/acp-22-12961-2022,True,True,0.9591397849462364 +10.5194/bg-2019-145,10.5194/bg-16-3377-2019,True,True,1.0 +10.5194/tc-2018-131,10.5194/tc-13-219-2019,True,True,0.9820193637621024 +10.5194/gmdd-8-7063-2015,10.5194/gmd-9-1293-2016,True,True,0.91999806765503 +10.1101/223248,10.1093/jnci/djy081,False,True,0.9974747474747474 +10.5194/hess-2021-2,10.5194/hess-25-5749-2021,True,True,0.9780786589297228 +10.5194/hessd-10-14705-2013,10.5194/hess-19-389-2015,True,True,0.9164912280701756 +10.5194/hessd-9-10563-2012,10.5194/hess-17-817-2013,True,False,1.0 +10.5194/hessd-11-9183-2014,10.5194/hess-19-1247-2015,True,False,1.0 +10.1101/542282,10.1093/jxb/erz182,False,True,0.971326164874552 +10.5194/essd-2022-239,10.5194/essd-15-1675-2023,True,True,0.9861239592969472 +10.1101/2020.08.04.237156,10.1002/jev2.12079,False,True,0.9973544973544972 +10.1101/198671,10.1038/s41593-019-0359-6,False,True,1.0 +10.1101/2020.07.13.200360,10.1371/journal.pone.0236612,False,True,0.9743589743589745 +10.1101/462861,10.1371/journal.pone.0207555,False,True,0.9920496894409938 +10.20944/preprints201608.0123.v1,10.3390/s16081290,False,True,0.9979423868312756 +10.5194/acpd-15-14889-2015,10.5194/acp-15-11165-2015,True,True,1.0 +10.26434/chemrxiv-2021-70pvw,10.1021/acs.chemmater.1c04167,False,True,1.0 +10.5194/hessd-8-5319-2011,10.5194/hess-15-2839-2011,True,True,1.0 +10.5194/hessd-12-1809-2015,10.5194/hess-20-3873-2016,True,True,0.9321789321789322 +10.5194/hess-2022-60,10.5194/hess-26-6399-2022,True,True,0.9314420803782508 +10.5194/hessd-2-2427-2005,10.5194/hess-10-535-2006,True,True,1.0 +10.5194/hessd-11-6881-2014,10.5194/hess-19-1225-2015,True,False,1.0 +10.5194/hess-2019-461,10.5194/hess-24-3015-2020,True,True,0.9784172661870504 +10.1101/2021.06.16.448617,10.1084/jem.20211112,False,True,0.96 +10.1101/2020.02.16.942904,10.3390/cancers12051171,False,True,0.9784172661870504 +10.5194/gmd-2022-173,10.5194/gmd-16-1617-2023,True,True,0.9973958333333334 +10.5194/hessd-8-9961-2011,10.5194/hess-16-1445-2012,True,False,1.0 +10.21034/sr.410,10.1086/666589,False,True,1.0 +10.5194/acpd-11-32601-2011,10.5194/acp-12-3273-2012,True,True,1.0 +10.5194/hess-2018-334,10.5194/hess-22-5987-2018,True,True,0.9670781893004116 +10.1101/2022.07.01.498411,10.7554/elife.81184,True,False,0.8990378213475783 +10.1101/374660,10.1016/j.celrep.2018.10.079,False,True,0.9166666666666666 +10.21203/rs.3.rs-87483/v1,10.1186/s40658-020-00350-7,False,True,1.0 +10.1101/2020.08.27.269647,10.1186/s40478-020-01068-4,False,True,0.9691358024691358 +10.1101/028886,10.1038/nature17661,False,True,0.992156862745098 +10.1101/2021.08.11.455980,10.7554/elife.83652,True,False,0.9791666666666666 +10.1101/596569,10.1016/j.celrep.2019.10.056,False,True,1.0 +10.5194/mr-2020-13,10.5194/mr-1-209-2020,True,True,0.9714285714285714 +10.26434/chemrxiv-2021-t1b6t,10.1021/jacs.2c03024,False,True, +10.5194/mr-2021-9,10.5194/mr-2-375-2021,True,True,1.0 +10.5194/mr-2020-5,10.5194/mr-1-59-2020,True,True,1.0 +10.5194/acpd-15-10899-2015,10.5194/acp-15-8751-2015,True,True,1.0 +10.1101/529156,10.1371/journal.pgen.1008458,False,True,0.9487922705314008 +10.1101/2021.09.14.460327,10.1523/eneuro.0373-21.2022,False,True,0.875 +10.5194/bg-2018-512,10.5194/bg-16-2635-2019,True,True,1.0 +10.5194/acp-2020-17,10.5194/acp-20-9281-2020,True,True,0.9971014492753624 +10.1101/2021.11.03.467174,10.7554/elife.75272,True,True,1.0 +10.26434/chemrxiv.7990910.v2,10.1021/acs.jcim.9b00325,False,True,0.98635477582846 +10.26434/chemrxiv.7990910.v1,10.1021/acs.jcim.9b00325,False,True,0.98635477582846 +10.21203/rs.3.rs-677091/v1,10.1038/s41467-021-26199-7,False,True,0.993103448275862 +10.1101/2020.04.02.022541,10.1021/acschembio.0c00348,False,True,1.0 +10.5194/wes-2021-156,10.5194/wes-7-2307-2022,True,True,1.0 +10.1101/050237,10.1371/journal.pone.0170622,False,True,0.9890453834115808 +10.5194/osd-12-135-2015,10.5194/os-11-629-2015,True,True,1.0 +10.1101/072470,10.1038/s41586-018-0124-0,False,True, +10.31234/osf.io/dbkj6,10.1111/bjso.12399,False,True,0.9777777777777776 +10.21203/rs.3.rs-136113/v1,10.1186/s13011-021-00358-x,False,True,1.0 +10.26434/chemrxiv.9756785.v1,10.1021/acsmedchemlett.9b00399,False,True,0.9969135802469136 +10.1101/2021.04.23.441115,10.7554/elife.69223,True,True,0.9528769841269842 +10.1101/328211,10.1007/s00339-019-2480-5,False,True,0.9629629629629628 +10.1101/2022.10.06.511106,10.7554/elife.83761,True,False,1.0 +10.1101/146852,10.1038/s41589-018-0013-8,False,True,0.9523809523809524 +10.31234/osf.io/vxa86,10.1037/met0000179,False,True,0.9985569985569984 +10.21203/rs.2.20459/v1,10.1186/s12870-020-2311-z,False,True,0.9845288326300984 +10.21203/rs.2.20459/v2,10.1186/s12870-020-2311-z,False,True,1.0 +10.21203/rs.2.20459/v3,10.1186/s12870-020-2311-z,False,True,1.0 +10.1101/122044,10.1371/journal.pcbi.1005890,False,True,1.0 +10.2196/preprints.14675,10.2196/14675,False,True,1.0 +10.32942/osf.io/s5dnr,10.1111/jeb.13728,False,True,0.942857142857143 +10.1101/2020.10.28.358846,10.1071/fp21337,False,True,0.93 +10.1101/2021.06.26.449853,10.1002/glia.24190,False,True,1.0 +10.1101/2021.04.13.439588,10.7554/elife.69377,True,True,0.9671445639187576 +10.5194/acp-2020-1041,10.5194/acp-21-9909-2021,True,True,0.9969418960244648 +10.5194/sed-4-1069-2012,10.5194/se-3-355-2012,True,True,1.0 +10.5194/hess-2021-41,10.5194/hess-25-4917-2021,True,True,0.9851387437594336 +10.21203/rs.3.rs-35889/v1,10.1186/s12883-020-01958-z,False,True,0.9662234998203376 +10.21203/rs.3.rs-35889/v2,10.1186/s12883-020-01958-z,False,True,1.0 +10.1101/2022.05.10.491316,10.7554/elife.78810,True,False,1.0 +10.2196/preprints.17997,10.2196/17997,False,True,1.0 +10.1101/317552,10.1038/s41396-018-0240-8,False,True, +10.1101/719922,10.7554/elife.88350,True,False, +10.2196/preprints.12957,10.2196/12957,False,True,1.0 +10.1101/322388,10.1016/j.celrep.2019.05.006,False,True,0.9878183831672204 +10.1101/431718,10.1016/j.neuroimage.2019.03.019,False,True,1.0 +10.21203/rs.3.rs-97961/v2,10.1186/s12933-021-01222-9,False,True,1.0 +10.21203/rs.3.rs-97961/v1,10.1186/s12933-021-01222-9,False,True,1.0 +10.5194/gmd-2019-295,10.5194/gmd-13-873-2020,True,True,0.988835725677831 +10.1101/294587,10.1016/j.eclinm.2019.06.003,False,True,0.9437857708706062 +10.1101/426957,10.1186/s12864-018-5299-0,False,True,1.0 +10.21203/rs.2.16448/v1,10.1186/s12864-020-6471-x,False,True,1.0 +10.21203/rs.2.16448/v2,10.1186/s12864-020-6471-x,False,True,1.0 +10.1101/2021.02.09.430442,10.1523/jneurosci.0556-21.2021,False,True,0.9092592592592592 +10.5194/essd-2020-16,10.5194/essd-12-1789-2020,True,True,0.9985693848354792 +10.21203/rs.3.rs-65516/v1,10.1186/s12864-021-07431-6,False,True,1.0 +10.21203/rs.3.rs-65516/v2,10.1186/s12864-021-07431-6,False,True,1.0 +10.1101/370874,10.1186/s41073-019-0069-3,False,True, +10.20944/preprints201908.0008.v1,10.3390/ma12182960,False,True,0.9965635738831616 +10.1101/2020.04.24.059840,10.15252/embj.2019104136,False,True,0.9988344988344988 +10.5194/hessd-5-2791-2008,10.5194/hess-13-467-2009,True,True,1.0 +10.21034/wp.274,10.2307/1391384,False,True,0.9977324263038548 +10.21034/sr.498,10.1086/707735,False,True, +10.5194/hessd-12-8091-2015,10.5194/hess-20-175-2016,True,True,0.9803921568627452 +10.1101/2020.03.02.972521,10.1016/j.foreco.2020.118344,False,True,1.0 +10.5194/acp-2015-1028,10.5194/acp-16-6041-2016,True,True,0.9331369079944484 +10.20944/preprints202011.0348.v1,10.3390/en14030635,False,True,1.0 +10.20944/preprints201609.0106.v2,10.3390/mca22010017,False,True,0.9957446808510638 +10.20944/preprints201609.0106.v1,10.3390/mca22010017,False,True,0.9957446808510638 +10.21203/rs.3.rs-38976/v2,10.1186/s12960-020-00532-5,False,True,0.9913644214162348 +10.21203/rs.3.rs-38976/v1,10.1186/s12960-020-00532-5,False,True,0.9913644214162348 +10.21203/rs.3.rs-38976/v3,10.1186/s12960-020-00532-5,False,True,1.0 +10.5194/acpd-4-399-2004,10.5194/acp-4-801-2004,True,True,0.9197530864197532 +10.5194/acp-2021-58,10.5194/acp-21-13483-2021,True,True,1.0 +10.5194/cpd-7-4173-2011,10.5194/cp-8-855-2012,True,True,1.0 +10.5194/hess-2022-117,10.5194/hess-26-4953-2022,True,True,1.0 +10.1101/059329,10.1093/nar/gkw627,False,True,1.0 +10.1101/054247,10.1016/j.neuron.2016.08.007,False,True,1.0 +10.5194/bg-2017-53,10.5194/bg-15-13-2018,True,True,1.0 +10.5194/acpd-2-1735-2002,10.5194/acp-3-303-2003,True,True,1.0 +10.1101/2021.06.09.447533,10.7554/elife.71569,True,False,0.979381443298969 +10.26434/chemrxiv.14541432.v1,10.1021/acscentsci.1c00592,False,True,1.0 +10.5194/esurfd-1-1-2013,10.5194/esurf-1-1-2013,True,True,1.0 +10.21203/rs.3.rs-995821/v1,10.1007/s10533-022-00915-x,False,True,1.0 +10.26434/chemrxiv.11985357.v1,10.1021/acschemneuro.0c00479,False,True,0.9789397240377632 +10.26434/chemrxiv.11985357,10.1021/acschemneuro.0c00479,False,True,0.9984567901234568 +10.1101/185520,10.1038/npp.2017.250,False,True,1.0 +10.21203/rs.3.rs-39782/v1,10.1186/s12885-021-07994-3,False,True, +10.21203/rs.3.rs-39782/v2,10.1186/s12885-021-07994-3,False,True,1.0 +10.21034/sr.516,10.1257/aer.20151260,False,True,1.0 +10.2196/preprints.16665,10.2196/16665,False,True,1.0 +10.20944/preprints201912.0205.v1,10.3390/ijerph17020616,False,True,1.0 +10.20944/preprints201912.0205.v2,10.3390/ijerph17020616,False,True,1.0 +10.1101/465096,10.1371/journal.ppat.1007460,False,True,0.9405399726862064 +10.1101/031260,10.1103/physrevlett.116.248101,False,True,1.0 +10.31234/osf.io/p5gns,10.1017/s0033291721001306,False,True,0.9601748959617086 +10.1101/2020.07.16.207662,10.1002/advs.202001572,False,True,1.0 +10.1101/135814,10.1093/nar/gkx607,False,True,0.9864208543958768 +10.1101/2021.05.03.442388,10.1523/jneurosci.0933-21.2021,False,True,1.0 +10.1101/542381,10.15252/msb.20209880,False,True,0.9792843691148776 +10.1101/2020.09.28.316653,10.1016/j.molcel.2020.10.031,False,True,1.0 +10.1101/370775,10.1136/bmjopen-2018-026211,False,True, +10.20944/preprints202009.0192.v1,10.3390/cancers12102798,False,True,0.9914529914529916 +10.1101/430124,10.26508/lsa.201800162,False,True,1.0 +10.1101/2022.09.05.506603,10.7554/elife.83153,True,False,0.9729729729729728 +10.5194/acp-2020-543,10.5194/acp-21-3395-2021,True,True,0.963226571767497 +10.5194/essd-2019-118,10.5194/essd-12-789-2020,True,True,1.0 +10.1101/2020.05.04.077040,10.1016/j.nicl.2020.102353,False,True,1.0 +10.5194/acp-2020-909,10.5194/acp-21-8915-2021,True,True,1.0 +10.1101/2020.05.06.081356,10.1002/bit.27473,False,True,0.9810874704491724 +10.1101/2020.10.29.355859,10.1038/s41388-021-01876-5,False,True,0.9841269841269842 +10.5194/acp-2020-674,10.5194/acp-21-2305-2021,True,True,0.9862258953168044 +10.26434/chemrxiv.9684470.v1,10.1021/acsmacrolett.9b00717,False,True,0.9807852965747702 +10.5194/acp-2019-580,10.5194/acp-20-753-2020,True,True,0.9407407407407408 +10.5194/tc-2022-217,10.5194/tc-17-3593-2023,True,True,1.0 +10.5194/amt-2018-397,10.5194/amt-12-2819-2019,True,True,0.9985569985569984 +10.5194/acpd-11-17879-2011,10.5194/acp-11-9237-2011,True,True,1.0 +10.1101/2020.03.13.990887,10.1038/s41594-020-0465-x,False,True,0.9803921568627452 +10.5194/acpd-14-7141-2014,10.5194/acp-14-10411-2014,True,True,0.948073701842546 +10.26434/chemrxiv.12743720,10.1021/acssensors.0c02264,False,True,0.9817042606516292 +10.26434/chemrxiv.12743720.v1,10.1021/acssensors.0c02264,False,True,0.986466165413534 +10.21203/rs.3.rs-2145653/v1,10.1038/s41388-022-02585-3,False,True,0.914092014536055 +10.1101/2020.05.20.106575,10.15252/embj.2020106230,False,True,0.9282787454386976 +10.31234/osf.io/j2bzc,10.1016/j.jad.2022.12.162,False,True,1.0 +10.2196/preprints.20457,10.2196/20457,False,True,1.0 +10.5194/acp-2018-761,10.5194/acp-19-233-2019,True,True,0.980213089802131 +10.5194/amt-2018-258,10.5194/amt-12-955-2019,True,True,0.99860529986053 +10.20944/preprints202010.0084.v2,10.3390/cancers12113327,False,True,1.0 +10.20944/preprints202010.0084.v1,10.3390/cancers12113327,False,True,1.0 +10.21203/rs.3.rs-32295/v1,10.1186/s13287-020-02000-2,False,True,1.0 +10.21203/rs.3.rs-32295/v2,10.1186/s13287-020-02000-2,False,True,1.0 +10.1101/760777,10.1007/s13205-020-2084-y,False,True,0.9902370990237098 +10.1101/591065,10.1371/journal.pgen.1008501,False,True,1.0 +10.2196/preprints.25469,10.2196/25469,False,True,1.0 +10.1101/276618,10.1186/s12885-018-4757-z,False,True,1.0 +10.1101/055863,10.1038/nmeth.4108,False,True,1.0 +10.1101/2022.02.11.479825,10.1172/jci159402,False,True,0.9957805907172996 +10.1101/549873,10.1111/oik.07213,False,True,0.9989615784008308 +10.5194/tc-2019-30,10.5194/tc-13-1709-2019,True,True,1.0 +10.20944/preprints202103.0467.v1,10.3390/rs13081581,False,True,1.0 +10.5194/amtd-7-5491-2014,10.5194/amt-8-1701-2015,True,True,0.9948717948717948 +10.1101/2020.04.28.066605,10.3389/fcell.2020.00617,False,True,1.0 +10.1101/2021.12.22.473713,10.1002/advs.202200315,False,True,0.9963369963369964 +10.21203/rs.3.rs-2240657/v1,10.1038/s41467-023-35915-4,False,True,1.0 +10.5194/acp-2021-182,10.5194/acp-21-9329-2021,True,True,1.0 +10.1101/416305,10.1021/jacs.8b10840,False,True,1.0 +10.1101/127761,10.1186/s13059-017-1218-y,False,True,1.0 +10.5194/bg-2022-101,10.5194/bg-19-4655-2022,True,True,0.996376811594203 +10.1101/2020.01.13.905471,10.1523/jneurosci.2809-19.2020,False,True,0.9659090909090908 +10.1101/857987,10.1523/jneurosci.1468-19.2020,False,True,0.89788748538998 +10.21203/rs.3.rs-38299/v1,10.1186/s13018-020-02039-0,False,True, +10.21203/rs.3.rs-38299/v2,10.1186/s13018-020-02039-0,False,True,1.0 +10.20944/preprints202007.0501.v1,10.3390/en13174422,False,True,1.0 +10.21203/rs.3.rs-1523403/v1,10.1038/s41591-022-02202-6,False,True,0.9626833586851126 +10.26434/chemrxiv.9994940.v1,10.1021/acs.jpclett.0c00121,False,True,0.992248062015504 +10.21203/rs.3.rs-32573/v1,10.1186/s12985-020-01417-8,False,True,1.0 +10.21203/rs.3.rs-32573/v2,10.1186/s12985-020-01417-8,False,True,1.0 +10.21203/rs.3.rs-17623/v1,10.1007/s40145-020-0410-9,False,True,0.9696969696969696 +10.21203/rs.3.rs-17623/v2,10.1007/s40145-020-0410-9,False,True,0.9696969696969696 +10.5194/tcd-9-2597-2015,10.5194/tc-9-2201-2015,True,True,1.0 +10.5194/hessd-10-15771-2013,10.5194/hess-18-2287-2014,True,True,0.9458128078817736 +10.21203/rs.2.16987/v2,10.1186/s12881-020-01156-1,False,True,0.9550997150997153 +10.21203/rs.2.16987/v1,10.1186/s12881-020-01156-1,False,True,0.9550997150997153 +10.21203/rs.2.16987/v4,10.1186/s12881-020-01156-1,False,True,0.9550997150997153 +10.21203/rs.2.16987/v3,10.1186/s12881-020-01156-1,False,True,0.9743589743589745 +10.5194/acp-2021-173,10.5194/acp-21-15023-2021,True,True,0.9869061137513844 +10.20944/preprints201808.0402.v1,10.3390/s18113670,False,True,1.0 +10.26434/chemrxiv.13055873.v2,10.1021/acsomega.0c04691,False,True,1.0 +10.26434/chemrxiv.13055873,10.1021/acsomega.0c04691,False,True,1.0 +10.26434/chemrxiv.13055873.v1,10.1021/acsomega.0c04691,False,True,1.0 +10.5194/acpd-11-163-2011,10.5194/acp-11-9683-2011,True,True,0.994017094017094 +10.5194/acpd-13-2795-2013,10.5194/acp-13-8607-2013,True,False,1.0 +10.2196/preprints.16513,10.2196/16513,False,True,1.0 +10.5194/bgd-6-11035-2009,10.5194/bg-7-1443-2010,True,True,0.9955357142857144 +10.1101/2021.03.16.435577,10.1002/glia.24106,False,True,0.9912609238451936 +10.21203/rs.3.rs-65568/v2,10.1186/s13643-021-01612-w,False,True,1.0 +10.21203/rs.3.rs-65568/v1,10.1186/s13643-021-01612-w,False,True,1.0 +10.21034/wp.742,10.1257/mac.20170367,False,True,1.0 +10.26434/chemrxiv-2021-k4v9r,10.1021/jacs.1c09321,False,True,1.0 +10.1101/2021.06.10.447962,10.1002/smll.202103552,False,True,0.971118761485915 +10.21203/rs.2.13144/v4,10.1186/s12879-019-4618-7,False,True,1.0 +10.21203/rs.2.13144/v3,10.1186/s12879-019-4618-7,False,True,1.0 +10.21203/rs.2.13144/v2,10.1186/s12879-019-4618-7,False,True,1.0 +10.21203/rs.2.13144/v1,10.1186/s12879-019-4618-7,False,True,1.0 +10.26434/chemrxiv.12369758.v1,10.1021/acschembio.0c00426,False,True,0.903858024691358 +10.26434/chemrxiv.12369758,10.1021/acschembio.0c00426,False,True,0.903858024691358 +10.5194/bgd-5-3157-2008,10.5194/bg-6-405-2009,True,True,1.0 +10.1101/2020.05.03.20089383,10.1016/j.bbi.2020.08.021,False,True,1.0 +10.21203/rs.3.rs-1293101/v1,10.1038/s41562-023-01540-w,False,True,0.8857142857142857 +10.5194/gmd-2018-20,10.5194/gmd-11-2813-2018,True,True,1.0 +10.1101/2020.08.12.248005,10.7554/elife.57436,True,True,0.9803921568627452 +10.1101/173146,10.1099/mgen.0.000166,False,True,1.0 +10.2196/preprints.23254,10.2196/23254,False,True,1.0 +10.5194/gmd-2017-263,10.5194/gmd-11-3187-2018,True,True,1.0 +10.2196/preprints.24006,10.2196/24006,False,True,1.0 +10.1101/2021.02.08.21251234,10.1093/ajcn/nqab276,False,True,1.0 +10.5194/acp-2016-770,10.5194/acp-17-7067-2017,True,True,1.0 +10.5194/cpd-11-3187-2015,10.5194/cp-12-91-2016,True,True,1.0 +10.2196/preprints.14369,10.2196/14369,False,True,1.0 +10.5194/mr-2023-2,10.5194/mr-4-153-2023,True,True,0.9080459770114944 +10.2196/preprints.19018,10.2196/19018,False,True,1.0 +10.5194/gmd-2017-206,10.5194/gmd-11-2975-2018,True,True,0.9471620227038184 +10.1101/481507,10.1096/fj.201902811rr,False,True,0.9894179894179894 +10.21203/rs.3.rs-57499/v1,10.1186/s10020-020-00230-x,False,True,0.9711286089238844 +10.21203/rs.3.rs-57499/v2,10.1186/s10020-020-00230-x,False,True,1.0 +10.1101/151522,10.1016/j.jneumeth.2017.08.033,False,True,1.0 +10.5194/acpd-7-6767-2007,10.5194/acp-7-4553-2007,True,True,1.0 +10.26434/chemrxiv.7322330.v1,10.1021/jacs.8b13127,False,True,0.92018779342723 +10.1101/2020.02.19.955609,10.1177/2331216520964068,False,True,1.0 +10.5194/amt-2018-209,10.5194/amt-12-977-2019,True,True,1.0 +10.1101/2020.03.09.20033423,10.1002/jia2.25546,False,True,0.9957624290957624 +10.1101/342592,10.1186/s12864-018-5032-z,False,True,0.985685071574642 +10.31234/osf.io/qp4ev,10.1016/j.beth.2019.09.005,False,True,1.0 +10.21203/rs.2.12994/v1,10.1186/s12879-020-4873-7,False,True,0.9984567901234568 +10.21203/rs.2.12994/v2,10.1186/s12879-020-4873-7,False,True,0.9984567901234568 +10.21203/rs.2.12994/v3,10.1186/s12879-020-4873-7,False,True,0.9984567901234568 +10.21203/rs.2.12994/v4,10.1186/s12879-020-4873-7,False,True,0.9984567901234568 +10.20944/preprints202011.0543.v1,10.3390/pathogens9121037,False,True,0.9727626459143968 +10.21203/rs.3.rs-76084/v1,10.1186/s12960-021-00558-3,False,True,1.0 +10.1101/776237,10.7554/elife.70469,True,True,1.0 +10.1101/2021.10.03.462935,10.7554/elife.74183,True,True,0.9227481919789612 +10.5194/tc-2016-250,10.5194/tc-11-949-2017,True,True,1.0 +10.1101/192245,10.1162/jocn_a_01200,False,True,1.0 +10.2196/preprints.15960,10.2196/15960,False,True,1.0 +10.2196/preprints.22795,10.2196/22795,False,True,1.0 +10.5194/hessd-8-8291-2011,10.5194/hess-16-167-2012,True,True,1.0 +10.5194/gmd-2016-87,10.5194/gmd-9-3655-2016,True,True,0.9993412384716732 +10.1101/2020.02.17.952895,10.3389/fcimb.2020.00405,False,True, +10.1101/2021.02.15.21251449,10.1093/ajcn/nqab279,False,True,1.0 +10.5194/essd-2020-280,10.5194/essd-13-2995-2021,True,True,0.98989898989899 +10.21203/rs.3.rs-23615/v3,10.1186/s12985-020-01451-6,False,True,1.0 +10.21203/rs.3.rs-23615/v2,10.1186/s12985-020-01451-6,False,True,1.0 +10.21203/rs.3.rs-23615/v1,10.1186/s12985-020-01451-6,False,True,1.0 +10.5194/gmd-2018-123,10.5194/gmd-11-4843-2018,True,True,0.9679545950665794 +10.2196/preprints.43101,10.2196/43101,False,True,1.0 +10.5194/angeo-2019-65,10.5194/angeo-37-689-2019,True,True,1.0 +10.5194/os-2020-66,10.5194/os-17-59-2021,True,True,1.0 +10.5194/acp-2022-410,10.5194/acp-23-1963-2023,True,True,0.9946524064171124 +10.21203/rs.3.rs-42553/v2,10.1186/s13756-020-00864-w,False,True,1.0 +10.21203/rs.3.rs-42553/v1,10.1186/s13756-020-00864-w,False,True,0.9852941176470588 +10.20944/preprints201905.0040.v1,10.3390/ijms20112780,False,True,1.0 +10.1101/2020.03.17.20037515,10.1093/cid/ciaa443,False,True,0.9732868757259 +10.20944/preprints202002.0288.v1,10.3390/brainsci10030143,False,True,0.9984848484848484 +10.20944/preprints202007.0130.v1,10.3390/biomedicines8080275,False,True,1.0 +10.1101/128645,10.1002/hbm.23843,False,True,0.987962962962963 +10.1101/419994,10.1111/jfb.13989,False,True, +10.1101/2020.03.20.000000,10.1096/fj.202001281rr,False,True,0.9926091269841272 +10.26434/chemrxiv-2022-c1ctc-v2,10.1039/d2sc05997e,False,True,1.0 +10.2196/preprints.12664,10.2196/12664,False,True,1.0 +10.5194/bg-2018-477,10.5194/bg-16-2147-2019,True,True,1.0 +10.21034/wp.75,10.2307/1991332,False,True,1.0 +10.1101/2021.12.13.472383,10.7554/elife.78092,True,True,1.0 +10.1101/105874,10.1016/j.neuroimage.2017.04.063,False,True,1.0 +10.1101/2022.10.25.513707,10.7554/elife.83908,True,False,0.9799631120053656 +10.26434/chemrxiv.12587537,10.1021/acs.jctc.0c00715,False,True,1.0 +10.26434/chemrxiv.12587537.v1,10.1021/acs.jctc.0c00715,False,True,1.0 +10.1101/378497,10.1093/nar/gkz169,False,True,1.0 +10.31223/osf.io/3mjc2,10.1016/j.precamres.2020.105849,False,True,0.9765684051398336 +10.1101/439687,10.1002/2211-5463.12744,False,True,0.987987987987988 +10.26434/chemrxiv-2023-6tgkh,10.1021/acs.jcim.3c00732,False,True,1.0 +10.5194/gmd-2020-179,10.5194/gmd-13-6077-2020,True,True,0.9718076285240466 +10.2196/preprints.22564,10.2196/22564,False,True,1.0 +10.1101/617019,10.3390/genes10060468,False,True,0.8909691867586605 +10.1101/068346,10.1371/journal.pcbi.1005260,False,True,0.8888888888888888 +10.1101/2020.08.15.252494,10.1161/atvbaha.120.315556,False,True,0.915073340051506 +10.1101/612010,10.3390/su11102787,False,True,0.9824561403508772 +10.1101/2020.03.28.013672,10.1016/j.devcel.2020.05.012,False,True,0.9743589743589745 +10.1101/122945,10.1162/netn_a_00031,False,True,1.0 +10.1101/2020.12.16.423042,10.1007/s10334-022-01033-3,False,True,1.0 +10.1101/2021.07.20.453033,10.7554/elife.73348,True,True,1.0 +10.1101/2021.03.22.21254119,10.1021/acs.jproteome.1c00326,False,True,1.0 +10.1101/208223,10.1038/s41380-018-0023-7,False,True,1.0 +10.1101/354829,10.1002/hbm.24788,False,True,0.9956140350877192 +10.1101/501221,10.1523/jneurosci.0601-21.2021,False,True,1.0 +10.31219/osf.io/y6mkh,10.1111/nous.12265,False,True, +10.5194/bgd-11-7615-2014,10.5194/bg-11-6323-2014,True,True,1.0 +10.1101/641159,10.1038/s41593-021-00821-9,False,True,1.0 +10.5194/hessd-11-1343-2014,10.5194/hess-18-3259-2014,True,True,0.9955555555555556 +10.26434/chemrxiv-2021-rg4wj-v2,10.1016/j.jcis.2022.07.164,False,True,0.983974358974359 +10.21203/rs.3.rs-515297/v1,10.1016/j.bbamem.2021.183794,False,True,0.9827586206896552 +10.1101/2020.05.06.081562,10.1021/acs.jpcb.0c04139,False,True,1.0 +10.1101/2020.02.17.952457,10.1371/journal.pbio.3000687,False,True,0.9978213507625272 +10.21203/rs.3.rs-31943/v4,10.1186/s12876-020-01553-z,False,True,1.0 +10.21203/rs.3.rs-31943/v3,10.1186/s12876-020-01553-z,False,True,1.0 +10.21203/rs.3.rs-31943/v2,10.1186/s12876-020-01553-z,False,True,1.0 +10.21203/rs.3.rs-31943/v1,10.1186/s12876-020-01553-z,False,True,0.9862258953168044 +10.1101/067876,10.15252/embj.201696038,False,True,0.8719135802469135 +10.1101/2020.06.24.169334,10.1021/acscentsci.1c01293,False,True,0.9753872555660932 +10.1101/138834,10.3389/fncel.2017.00214,False,True,1.0 +10.21203/rs.3.rs-70874/v1,10.1016/j.wasman.2022.01.022,False,True,0.9464007899260638 +10.5194/essd-2017-134,10.5194/essd-10-985-2018,True,True,1.0 +10.5194/hess-2021-68,10.5194/hess-25-6495-2021,True,True,0.9129097148266476 +10.20944/preprints201612.0138.v1,10.3390/ma10030297,False,True,1.0 +10.1101/505032,10.1038/s41592-020-01023-0,False,True,0.996078431372549 +10.1101/671230,10.1038/s41556-020-0485-0,False,True,1.0 +10.21203/rs.3.rs-66113/v1,10.1186/s13287-020-02056-0,False,True,0.9678362573099416 +10.21203/rs.3.rs-66113/v2,10.1186/s13287-020-02056-0,False,True,1.0 +10.21203/rs.3.rs-66113/v3,10.1186/s13287-020-02056-0,False,True,1.0 +10.1101/038117,10.1038/nmeth.3991,False,True,0.9095238095238096 +10.20944/preprints201808.0322.v1,10.3390/molecules23102549,False,True,1.0 +10.1101/2020.07.30.228924,10.1038/s41592-021-01136-0,False,True,1.0 +10.21203/rs.3.rs-904665/v1,10.1038/s41556-022-00953-5,False,True,0.9195027195027196 +10.2196/preprints.22488,10.2196/22488,False,True,1.0 +10.1101/2022.03.04.483005,10.7554/elife.78385,True,False,0.9462465245597776 +10.1101/848846,10.1182/blood.2020004801,False,True,0.9846216768916156 +10.2196/preprints.10755,10.2196/10755,False,True,1.0 +10.1101/057976,10.1093/bioinformatics/btw390,False,True,0.9118723052546582 +10.5194/tc-2018-175,10.5194/tc-13-895-2019,True,True,0.9646464646464646 +10.5194/esurf-2020-59,10.5194/esurf-9-1153-2021,True,True,0.9936507936507936 +10.5194/cp-2017-151,10.5194/cp-14-1079-2018,True,True,1.0 +10.1101/2020.04.07.029140,10.1371/journal.ppat.1008530,False,True,1.0 +10.2196/preprints.26309,10.2196/26309,False,True,1.0 +10.1101/2021.10.14.464354,10.7554/elife.74565,True,True,1.0 +10.1101/860874,10.1088/1741-2552/ab9dba,False,True,1.0 +10.1101/207076,10.1167/18.6.10,False,True,1.0 +10.1101/376863,10.1152/jn.00680.2018,False,True,0.9885129490392648 +10.5194/bg-2019-482,10.5194/bg-17-4247-2020,True,True,0.9792843691148776 +10.2196/preprints.13802,10.2196/13802,False,True,1.0 +10.2196/preprints.17740,10.2196/17740,False,True,1.0 +10.2196/preprints.11334,10.2196/11334,False,True,1.0 +10.31235/osf.io/hfr96,10.1038/nclimate3271,False,True,0.9826839826839828 +10.1101/2021.04.30.442171,10.1523/jneurosci.1575-21.2022,False,True,1.0 +10.1101/228668,10.1099/mgen.0.000165,False,True,0.9875222816399286 +10.5194/nhess-2017-152,10.5194/nhess-17-2199-2017,True,True,0.9588652482269504 +10.5194/amt-2019-282,10.5194/amt-13-323-2020,True,True,0.9693251533742332 +10.5194/acpd-12-24847-2012,10.5194/acp-13-3345-2013,True,True,1.0 +10.5194/hessd-9-9809-2012,10.5194/hess-17-3127-2013,True,True,0.9061032863849764 +10.5194/bgd-11-14699-2014,10.5194/bg-12-863-2015,True,True,0.9968253968253968 +10.2196/preprints.24851,10.2196/24851,False,True,1.0 +10.1101/263939,10.1038/nbt.4266,False,True, +10.1101/2020.05.05.078196,10.1002/brb3.1786,False,True,0.9953703703703703 +10.1101/191809,10.1002/pld3.47,False,True,0.96640826873385 +10.26434/chemrxiv.10003412.v1,10.1021/acs.jctc.9b01066,False,True,1.0 +10.20944/preprints201706.0002.v1,10.3390/e19060286,False,True,1.0 +10.31235/osf.io/fw4er,10.1093/aje/kwy218,False,True,1.0 +10.1101/191494,10.1111/ejn.13816,False,True,1.0 +10.5194/acp-2021-784,10.5194/acp-22-1951-2022,True,True,0.9819277108433736 +10.1101/2020.06.29.20143180,10.1371/journal.pone.0242758,False,True,1.0 +10.2196/preprints.25456,10.2196/25456,False,True,1.0 +10.5194/cpd-9-3239-2013,10.5194/cp-10-487-2014,True,True,0.9954954954954954 +10.1002/essoar.10510350.1,10.1029/2022gl098158,False,True,0.9988505747126436 +10.1101/844712,10.1371/journal.pone.0237189,False,True,1.0 +10.5194/se-2017-35,10.5194/se-8-789-2017,True,True,0.9942726231386024 +10.1101/111070,10.1037/xlm0000518,False,True,0.9186480453521352 +10.5194/se-2021-6,10.5194/se-12-2523-2021,True,True,0.9875311720698252 +10.1101/158113,10.1371/journal.pone.0214311,False,True,1.0 +10.1101/2021.05.06.21256789,10.1213/ane.0000000000005730,False,True,0.9975490196078431 +10.1101/867168,10.1016/j.jneumeth.2020.108756,False,True,0.875 +10.1101/661207,10.3389/fmicb.2019.02558,False,True,1.0 +10.1101/391243,10.1534/g3.118.200662,False,True,0.9444444444444444 +10.2196/preprints.44548,10.2196/44548,False,True,1.0 +10.1101/101535,10.1098/rsos.171308,False,True,0.9969418960244648 +10.5194/bg-2018-430,10.5194/bg-16-1225-2019,True,True,1.0 +10.1101/534206,10.1093/nar/gkz306,False,True,0.9427618157089428 +10.2196/preprints.18662,10.2196/18662,False,True,1.0 +10.5194/nhess-2021-31,10.5194/nhess-21-1759-2021,True,True,1.0 +10.21203/rs.3.rs-60829/v1,10.1186/s13046-020-01796-4,False,True,0.9891156462585036 +10.21203/rs.3.rs-60829/v2,10.1186/s13046-020-01796-4,False,True,0.9986772486772488 +10.1101/2020.06.12.20127944,10.1001/jama.2020.15580,False,True,0.978494623655914 +10.20944/preprints201801.0107.v1,10.3390/nu10020238,False,True,1.0 +10.5194/hess-2016-351,10.5194/hess-21-1741-2017,True,True,0.9983579638752051 +10.5194/essd-2020-303,10.5194/essd-13-3337-2021,True,True,1.0 +10.1101/2021.10.07.463355,10.1021/acssensors.1c02201,False,True,1.0 +10.5194/hess-2021-506,10.5194/hess-26-2899-2022,True,True,0.912 +10.1101/219113,10.1016/j.sbi.2018.01.009,False,True,1.0 +10.1101/632810,10.3390/cancers12061568,False,True,0.9866666666666668 +10.2196/preprints.41446,10.2196/41446,False,True,1.0 +10.21203/rs.3.rs-2209582/v1,10.1007/s13146-023-00880-y,False,True,1.0 +10.21203/rs.3.rs-2440941/v1,10.1007/s13146-023-00882-w,False,True,1.0 +10.21203/rs.3.rs-2597108/v1,10.1007/s10238-023-01049-6,False,True,0.9743589743589745 +10.1101/2020.05.27.119438,10.1371/journal.pcbi.1008625,False,True,0.9843400447427294 +10.20944/preprints201902.0019.v1,10.3390/a12030060,False,True,1.0 +10.1101/816694,10.1182/bloodadvances.2019001393,False,True,0.9716981132075472 +10.5194/acp-2016-430,10.5194/acp-17-11041-2017,True,True,0.9962546816479402 +10.31234/osf.io/hv28a,10.1037/pspa0000098,False,True,1.0 +10.20944/preprints201612.0042.v1,10.3390/ijms18020347,False,True,0.9297052154195012 +10.1101/2019.12.15.876847,10.1093/sleep/zsaa111,False,True,1.0 +10.5194/acpd-14-25533-2014,10.5194/acp-15-4179-2015,True,True,1.0 +10.5194/acp-2016-308,10.5194/acp-16-12397-2016,True,True,1.0 +10.1101/235176,10.1038/s41592-018-0171-3,False,True,0.9691282491742363 +10.21034/wp.730,10.1257/aer.20121524,False,True,1.0 +10.1101/636803,10.1002/ece3.6313,False,True,0.9895833333333334 +10.5194/cpd-5-1367-2009,10.5194/cp-5-585-2009,True,True,0.989010989010989 +10.36227/techrxiv.21758660,10.1109/tim.2023.3256468,False,True,1.0 +10.36227/techrxiv.21758660.v1,10.1109/tim.2023.3256468,False,True,1.0 +10.31234/osf.io/y27vc,10.1080/13548506.2017.1385818,False,True,1.0 +10.5194/cp-2017-26,10.5194/cp-13-1007-2017,True,True,1.0 +10.5194/acpd-15-12007-2015,10.5194/acp-15-11861-2015,True,True,1.0 +10.5194/wes-2018-49,10.5194/wes-3-845-2018,True,True,1.0 +10.31219/osf.io/cv2bn,10.3758/s13428-018-1035-6,False,True,1.0 +10.1101/803346,10.1016/j.ajhg.2020.06.010,False,True,0.9377207977207976 +10.36227/techrxiv.21674759.v1,10.1109/ojcoms.2023.3282814,False,True,1.0 +10.36227/techrxiv.21674759,10.1109/ojcoms.2023.3282814,False,True,1.0 +10.31219/osf.io/bwm4k,10.3390/ma14051106,False,True,1.0 +10.1101/084418,10.15252/msb.20188497,False,True,1.0 +10.5194/egusphere-2022-180,10.5194/se-13-1755-2022,True,True,0.9826224328593997 +10.1101/537001,10.1016/j.cell.2019.07.038,False,True, +10.21203/rs.3.rs-136528/v1,10.1186/s13287-021-02223-x,False,True,0.9743589743589745 +10.1101/640557,10.1111/oik.06957,False,True,0.9883190883190884 +10.21034/sr.361,10.1257/mac.1.1.146,False,True, +10.26434/chemrxiv.8289812.v1,10.1021/acsnano.9b06019,False,True,0.9314194577352471 +10.5194/amtd-2-489-2009,10.5194/amt-2-379-2009,True,True,1.0 +10.1101/2020.08.27.267880,10.1186/s13229-022-00511-8,False,True,0.9561904761904764 +10.5194/acp-2019-1026,10.5194/acp-20-8727-2020,True,True,0.983606557377049 +10.5194/se-2019-49,10.5194/se-10-987-2019,True,True,1.0 +10.5194/acp-2020-263,10.5194/acp-21-1697-2021,True,True,0.9936073059360732 +10.21034/sr.186,10.1007/bf01213946,False,True,0.974910394265233 +10.5194/acpd-8-8009-2008,10.5194/acp-8-6169-2008,True,True,1.0 +10.5194/acpd-7-10799-2007,10.5194/acp-8-901-2008,True,True,1.0 +10.5194/acpd-11-8665-2011,10.5194/acp-11-6207-2011,True,True,0.9209742194584792 +10.5194/acpd-6-9003-2006,10.5194/acp-7-685-2007,True,True,1.0 +10.5194/acpd-14-19791-2014,10.5194/acp-15-253-2015,True,True,1.0 +10.5194/acpd-4-4545-2004,10.5194/acp-4-2227-2004,True,True,1.0 +10.1101/863621,10.21105/joss.01994,False,True,0.927811176648518 +10.5194/acpd-4-2569-2004,10.5194/acp-4-1895-2004,True,True,1.0 +10.2196/preprints.9633,10.2196/resprot.9633,False,True,1.0 +10.2196/preprints.19601,10.2196/19601,False,True,1.0 +10.5194/bg-2019-237,10.5194/bg-17-215-2020,True,True,0.9548387096774192 +10.20944/preprints202103.0379.v1,10.3390/genes12040544,False,True,1.0 +10.21203/rs.3.rs-72276/v1,10.1186/s13643-021-01652-2,False,True,0.978593272171254 +10.5194/acpd-13-20677-2013,10.5194/acp-14-1423-2014,True,True,1.0 +10.5194/acpd-10-10219-2010,10.5194/acp-10-7169-2010,True,True,1.0 +10.1101/2021.09.06.21263001,10.1111/nmo.14331,False,True,0.942927545452176 +10.1101/230938,10.1016/j.yjmcc.2018.06.007,False,True,0.9944444444444444 +10.5194/gchron-2020-11,10.5194/gchron-3-181-2021,True,True,1.0 +10.5194/hessd-8-4459-2011,10.5194/hess-15-2581-2011,True,True,1.0 +10.1101/2020.04.05.026005,10.1016/j.molliq.2020.113612,False,True,0.9987029831387808 +10.1101/2022.12.07.519455,10.7554/elife.85069,True,False, +10.20944/preprints202003.0433.v1,10.1016/j.micpath.2020.104236,False,True,1.0 +10.5194/acp-2016-332,10.5194/acp-16-13185-2016,True,True,1.0 +10.5194/amt-2020-257,10.5194/amt-14-945-2021,True,True,0.9913644214162348 +10.20944/preprints202010.0453.v1,10.3390/ani10122196,False,True,0.9775910364145658 +10.20944/preprints202010.0453.v2,10.3390/ani10122196,False,True,0.9716981132075472 +10.21034/wp.741,10.1257/aer.20181499,False,True,1.0 +10.1101/2020.04.25.20079996,10.3389/fpsyg.2020.551004,False,True,0.9986504723346828 +10.1101/443127,10.1007/s00415-019-09340-x,False,True,1.0 +10.5194/essd-2022-16,10.5194/essd-14-3743-2022,True,True,1.0 +10.5194/soil-2017-28,10.5194/soil-4-37-2018,True,True,0.9715242881072026 +10.5194/hess-2016-323,10.5194/hess-21-1149-2017,True,True,0.9643605870020964 +10.1101/511683,10.1186/s40168-019-0665-y,False,True,0.9893444246385422 +10.1101/088666,10.7717/peerj.3889,False,True, +10.1101/2021.03.28.21254404,10.1021/acs.estlett.1c00375,False,True,0.959078814570144 +10.5194/gmd-2017-103,10.5194/gmd-11-257-2018,True,True,0.9954415954415956 +10.5194/acpd-8-21229-2008,10.5194/acp-9-5905-2009,True,True,0.9964912280701754 +10.5194/acpd-4-3699-2004,10.5194/acp-4-2337-2004,True,True,0.9767441860465116 +10.20944/preprints202010.0447.v1,10.3390/cancers12123524,False,True,1.0 +10.5194/bgd-10-19509-2013,10.5194/bg-11-2069-2014,True,True,0.9797979797979798 +10.5194/acpd-13-18345-2013,10.5194/acp-13-12271-2013,True,True,0.9987515605493132 +10.1101/296061,10.1016/j.dcn.2018.09.003,False,True,1.0 +10.21034/sr.249,10.1080/07474939908800428,False,True,0.988155668358714 +10.21203/rs.3.rs-93388/v1,10.1186/s12872-020-01827-0,False,True,1.0 +10.2196/preprints.8954,10.2196/jmir.8954,False,True,1.0 +10.5194/bgd-7-3335-2010,10.5194/bg-7-2613-2010,True,True,0.9688888888888888 +10.1002/essoar.10506462.1,10.1029/2021jc017734,False,True,0.9967320261437908 +10.5194/acpd-2-2209-2002,10.5194/acp-3-417-2003,True,True,0.9904761904761904 +10.5194/acpd-8-18727-2008,10.5194/acp-9-5489-2009,True,True,1.0 +10.5194/tc-2020-164,10.5194/tc-15-1097-2021,True,True,1.0 +10.5194/acpd-13-10621-2013,10.5194/acp-14-765-2014,True,True,0.9565217391304348 +10.21203/rs.3.rs-507826/v1,10.1007/s10924-021-02297-x,False,True,0.9696969696969696 +10.5194/acp-2018-209,10.5194/acp-18-12207-2018,True,True,0.9986504723346828 +10.1101/2021.04.19.440546,10.1038/s42003-021-02874-9,False,True,1.0 +10.5194/acpd-5-509-2005,10.5194/acp-5-1557-2005,True,True,1.0 +10.26434/chemrxiv.13513731.v2,10.1021/acs.jpca.1c02872,False,True, +10.5194/acpd-6-3135-2006,10.5194/acp-6-3377-2006,True,True,1.0 +10.5194/bgd-11-7991-2014,10.5194/bg-11-6173-2014,True,True,0.9916161616161616 +10.5194/gchron-2019-3,10.5194/gchron-1-17-2019,True,True,0.9760765550239232 +10.1101/430447,10.15252/embj.2019103667,False,True, +10.5194/acpd-9-16549-2009,10.5194/acp-10-431-2010,True,True,1.0 +10.5194/bg-2016-101,10.5194/bg-13-4491-2016,True,True,0.9313034188034188 +10.1101/2020.05.17.100255,10.1016/j.cortex.2020.09.004,False,True,1.0 +10.5194/acpd-3-5139-2003,10.5194/acp-4-391-2004,True,True,1.0 +10.5194/bg-2016-357,10.5194/bg-14-2781-2017,True,True,1.0 +10.5194/acp-2020-91,10.5194/acp-20-8641-2020,True,True,0.9826224328593997 +10.5194/hess-2019-600,10.5194/hess-24-4413-2020,True,True, +10.1101/476960,10.15252/embj.2018101153,False,True,0.973765903307888 +10.5194/cpd-10-3327-2014,10.5194/cp-11-327-2015,True,True,1.0 +10.5194/acpd-10-12713-2010,10.5194/acp-10-9039-2010,True,True,0.9947916666666666 +10.5194/acpd-9-24587-2009,10.5194/acp-10-5573-2010,True,True,0.9382716049382716 +10.5194/tc-2016-161,10.5194/tc-10-2981-2016,True,True,1.0 +10.26434/chemrxiv-2022-kgxfk-v2,10.1016/j.eml.2022.101929,False,True,1.0 +10.26434/chemrxiv.8061650.v1,10.1021/acs.chemmater.9b03267,False,True,0.9822281959378736 +10.5194/tc-2019-293,10.5194/tc-14-2775-2020,True,True,1.0 +10.5194/tcd-2-111-2008,10.5194/tc-2-95-2008,True,True,0.9919678714859438 +10.26434/chemrxiv.7322183.v1,10.1021/acs.jctc.8b01041,False,True,1.0 +10.21203/rs.3.rs-2241246/v1,10.1016/j.resconrec.2023.106873,False,True,1.0 +10.26434/chemrxiv.7851587.v1,10.1021/acs.jpcc.8b11092,False,True,1.0 +10.20944/preprints201609.0095.v1,10.1007/s11356-016-8321-6,False,True,0.9662618083670717 +10.5194/bg-2016-172,10.5194/bg-14-597-2017,True,True,1.0 +10.20944/preprints202006.0275.v1,10.3855/jidc.13692,False,True,0.9977324263038548 +10.5194/tc-2021-382,10.5194/tc-16-3313-2022,True,True,1.0 +10.5194/sed-5-789-2013,10.5194/se-4-373-2013,True,True,0.9851380042462846 +10.2196/preprints.19159,10.2196/19159,False,True,1.0 +10.5194/amt-2021-90,10.5194/amt-14-5625-2021,True,True,1.0 +10.21203/rs.3.rs-206773/v1,10.1186/s43058-021-00128-7,False,True,1.0 +10.2196/preprints.10078,10.2196/10078,False,True,1.0 +10.1101/2020.07.06.190314,10.3390/metabo10120488,False,True,1.0 +10.5194/essd-2018-3,10.5194/essd-10-1427-2018,True,True,1.0 +10.7287/peerj.preprints.2795v1,10.7717/peerj.3500,False,True,1.0 +10.31227/osf.io/kxdf6,10.22216/jen.v2i3.2357,False,True,1.0 +10.2196/preprints.40038,10.2196/40038,False,True,1.0 +10.1101/392761,10.1371/journal.pone.0223183,False,True,1.0 +10.2196/preprints.39264,10.2196/39264,False,True,1.0 +10.2196/preprints.33793,10.2196/33793,False,True,1.0 +10.21203/rs.2.12491/v2,10.1186/s13063-019-3833-2,False,True,0.9719974309569684 +10.21203/rs.2.12491/v1,10.1186/s13063-019-3833-2,False,True,0.9645951035781544 +10.21203/rs.3.rs-61509/v1,10.1186/s12944-020-01428-y,False,True,1.0 +10.21203/rs.3.rs-61509/v4,10.1186/s12944-020-01428-y,False,True,1.0 +10.21203/rs.3.rs-61509/v5,10.1186/s12944-020-01428-y,False,True,1.0 +10.21203/rs.3.rs-61509/v2,10.1186/s12944-020-01428-y,False,True,1.0 +10.21203/rs.3.rs-61509/v3,10.1186/s12944-020-01428-y,False,True,1.0 +10.5194/acpd-11-24813-2011,10.5194/acp-12-5429-2012,True,True,0.9028871391076116 +10.5194/amtd-4-3055-2011,10.5194/amt-4-1593-2011,True,True,1.0 +10.21203/rs.3.rs-58058/v2,10.1186/s12909-021-02570-6,False,True,0.9968253968253968 +10.21203/rs.3.rs-58058/v1,10.1186/s12909-021-02570-6,False,True,1.0 +10.20944/preprints201802.0069.v1,10.3390/f9030100,False,True,0.9824561403508772 +10.21203/rs.2.15987/v1,10.1186/s12887-019-1863-2,False,True,1.0 +10.21203/rs.2.15987/v2,10.1186/s12887-019-1863-2,False,True,1.0 +10.21203/rs.2.15987/v3,10.1186/s12887-019-1863-2,False,True,1.0 +10.2196/preprints.12797,10.2196/12797,False,True,1.0 +10.20944/preprints201907.0118.v1,10.3390/ijerph16162815,False,True,1.0 +10.2196/preprints.11824,10.2196/11824,False,True,1.0 +10.5194/hess-2020-46,10.5194/hess-24-5015-2020,True,True,1.0 +10.31219/osf.io/w9unj,10.32520/jtp.v8i2.941,False,True, +10.20944/preprints201908.0123.v1,10.15517/rbt.v68i1.38555,False,True,0.925004016451385 +10.5194/amtd-5-8579-2012,10.5194/amt-6-1359-2013,True,True,0.9941520467836256 +10.5194/acpd-15-19045-2015,10.5194/acp-16-7681-2016,True,True,0.9204142368936375 +10.20944/preprints201804.0244.v1,10.3390/min8050192,False,True,0.9482758620689654 +10.31235/osf.io/7t6w3,10.18523/kmlpj153255.2018-4.99-118,False,True,1.0 +10.5194/essd-2021-239,10.5194/essd-14-3915-2022,True,True,0.996078431372549 +10.5194/acp-2019-639,10.5194/acp-20-4445-2020,True,True,1.0 +10.2196/preprints.14501,10.2196/14501,False,True,1.0 +10.21203/rs.3.rs-132353/v1,10.1186/s13018-020-02191-7,False,True,0.992248062015504 +10.2196/preprints.10665,10.2196/10665,False,True,1.0 +10.5194/gmd-2021-395,10.5194/gmd-15-7557-2022,True,True,0.9958333333333332 +10.5194/acpd-10-10969-2010,10.5194/acp-10-8669-2010,True,False,1.0 +10.5194/amt-2019-481,10.5194/amt-13-3661-2020,True,True,1.0 +10.5194/wes-2020-51,10.5194/wes-5-855-2020,True,True,1.0 +10.5194/npg-2020-4,10.5194/npg-27-391-2020,True,True,1.0 +10.1101/2020.01.09.900050,10.1371/journal.pone.0228121,False,True,1.0 +10.5194/nhess-2016-46,10.5194/nhess-16-1807-2016,True,True,1.0 +10.31219/osf.io/j67kq,10.31014/aior.1991.03.04.241,False,True, +10.31235/osf.io/wsh64,10.31014/aior.1991.03.04.241,False,True, +10.5194/acp-2021-207,10.5194/acp-21-13119-2021,True,True,1.0 +10.21034/wp.415,10.1080/07350015.1990.10509768,False,True, +10.1101/670257,10.1016/j.nlm.2020.107225,False,True,1.0 +10.2196/preprints.18258,10.2196/18258,False,True,1.0 +10.5194/amtd-6-3545-2013,10.5194/amt-6-1981-2013,True,True,1.0 +10.31220/osf.io/pg3v9,10.29255/aksara.v31i2.364.251-268,False,True,1.0 +10.33767/osf.io/y4s3w,10.7560/vlt8102,False,True, +10.20944/preprints202010.0346.v1,10.3390/biom10111564,False,True,1.0 +10.1101/056044,10.1186/s12918-016-0380-2,False,True,1.0 +10.5194/angeo-2019-119,10.5194/angeo-38-467-2020,True,True,1.0 +10.31219/osf.io/vmu6q,10.24269/ars.v6i1.780,False,True,0.8888888888888888 +10.20944/preprints201805.0072.v1,10.3390/electronics7060079,False,True,1.0 +10.31224/osf.io/8s59e,10.1504/ijvp.2017.081276,False,True,0.9213085764809904 +10.20944/preprints201807.0061.v1,10.3390/mti2030044,False,True,1.0 +10.5194/gmd-2016-315,10.5194/gmd-10-1927-2017,True,True,1.0 +10.5194/amt-2019-252,10.5194/amt-13-1735-2020,True,True,1.0 +10.21203/rs.3.rs-41396/v2,10.1186/s12882-020-02158-0,False,True,1.0 +10.21203/rs.3.rs-41396/v1,10.1186/s12882-020-02158-0,False,True,1.0 +10.21203/rs.3.rs-41396/v3,10.1186/s12882-020-02158-0,False,True,1.0 +10.5194/gc-2021-26,10.5194/gc-5-101-2022,True,True,0.9716312056737588 +10.21203/rs.3.rs-104730/v1,10.1186/s12957-021-02152-2,False,True,0.9976359338061466 +10.21203/rs.3.rs-104730/v2,10.1186/s12957-021-02152-2,False,True,0.9976359338061466 +10.1101/634006,10.1093/nargab/lqaa022,False,True,1.0 +10.1101/481952,10.2174/1568026619666181220111059,False,True,0.9923664122137404 +10.21203/rs.3.rs-127854/v1,10.1186/s13019-021-01444-8,False,True,1.0 +10.2196/preprints.17064,10.2196/17064,False,True,1.0 +10.20944/preprints202102.0539.v1,10.3390/molecules26061667,False,True,0.9855072463768116 +10.20944/preprints201701.0068.v1,10.3390/su9010122,False,True,1.0 +10.31219/osf.io/byjhc,10.25046/aj0505120,False,True, +10.20944/preprints202005.0163.v1,10.1186/s41205-020-00086-1,False,True,0.9867724867724867 +10.2196/preprints.12968,10.2196/12968,False,True,1.0 +10.1101/2020.06.26.169458,10.3390/ijms21217980,False,True,0.9946236559139784 +10.2196/preprints.11219,10.2196/11219,False,True,1.0 +10.5194/egusphere-2022-682,10.5194/bg-19-5617-2022,True,True,1.0 +10.21203/rs.2.11941/v2,10.1186/s13104-019-4593-5,False,True,1.0 +10.21203/rs.2.11941/v1,10.1186/s13104-019-4593-5,False,True,1.0 +10.21203/rs.2.11941/v3,10.1186/s13104-019-4593-5,False,True,1.0 +10.1101/460337,10.1093/cercor/bhaa081,False,True,0.9984917043740572 +10.1101/199687,10.1016/j.neuropsychologia.2018.06.010,False,True,1.0 +10.1101/19009589,10.1371/journal.pone.0230274,False,True,1.0 +10.5194/acp-2017-666,10.5194/acp-18-3779-2018,True,True,0.9942857142857144 +10.1101/149716,10.1016/j.ymben.2017.11.011,False,True,0.977777777777778 +10.1101/2020.04.16.044842,10.1523/jneurosci.0875-20.2020,False,True,1.0 +10.5194/acp-2016-998,10.5194/acp-17-4419-2017,True,True,1.0 +10.5194/bg-2019-165,10.5194/bg-16-4097-2019,True,True,0.9865591397849464 +10.5194/bg-2017-173,10.5194/bg-15-953-2018,True,True,1.0 +10.5194/acp-2020-875,10.5194/acp-21-10337-2021,True,True,1.0 +10.31224/osf.io/5atbz,10.1016/j.flowmeasinst.2018.07.003,False,True, +10.1101/251843,10.1523/eneuro.0381-18.2018,False,True,0.9583333333333334 +10.21203/rs.3.rs-253126/v1,10.1002/adpr.202100285,False,True,0.989010989010989 +10.20944/preprints201710.0032.v1,10.3390/environments4040088,False,True,0.9069781480140046 +10.20944/preprints201704.0135.v1,10.3390/ijms18050923,False,True,1.0 +10.1101/2022.01.14.476419,10.1111/mec.16469,False,True,1.0 +10.5194/bgd-4-3343-2007,10.5194/bg-5-371-2008,True,True,0.9743589743589745 +10.5194/amt-2017-408,10.5194/amt-11-3251-2018,True,True,1.0 +10.5194/osd-11-1543-2014,10.5194/os-11-187-2015,True,True,1.0 +10.20944/preprints201906.0228.v1,10.3390/cancers11070942,False,True,0.9832134292565948 +10.5194/cp-2016-46,10.5194/cp-12-1829-2016,True,True,1.0 +10.5194/amt-2017-287,10.5194/amt-11-4465-2018,True,True,1.0 +10.1101/2021.01.28.428594,10.1172/jci.insight.147700,False,True,0.9786096256684492 +10.1101/852434,10.1093/nar/gkaa032,False,True,0.9975308641975308 +10.1101/2021.06.21.449154,10.7554/elife.73153,True,True,1.0 +10.5194/angeo-2018-21,10.5194/angeo-36-891-2018,True,True,0.9968847352024922 +10.31230/osf.io/3b2c9,10.3354/meps12774,False,True, +10.1101/2022.05.17.492323,10.7554/elife.78877,True,False,1.0 +10.5194/osd-11-1213-2014,10.5194/os-10-881-2014,True,True,0.9857295482295484 +10.5194/acp-2020-1095,10.5194/acp-21-5289-2021,True,True,1.0 +10.5194/egusphere-2022-481,10.5194/os-18-1665-2022,True,True,1.0 +10.5194/amt-2020-348,10.5194/amt-14-5349-2021,True,True,0.9696969696969696 +10.31223/osf.io/5wakg,10.1111/j.1365-246x.2006.03017.x,False,True, +10.5194/amt-2020-28,10.5194/amt-13-6559-2020,True,True,1.0 +10.5194/tc-2019-28,10.5194/tc-13-3337-2019,True,True,1.0 +10.1101/2020.12.17.423361,10.7554/elife.66194,True,True,1.0 +10.20944/preprints201808.0242.v1,10.3390/nano8090725,False,True,0.9592592592592591 +10.1101/2021.06.09.21258556,10.1016/s2213-2600(21)00409-4,False,True, +10.5194/hess-2016-505,10.5194/hess-21-765-2017,True,True,1.0 +10.5194/amtd-7-4481-2014,10.5194/amt-7-3549-2014,True,True,1.0 +10.21203/rs.3.rs-2267501/v1,10.1007/s11356-023-27197-6,False,True,0.9909297052154196 diff --git a/fetch_positive_samples.py b/fetch_positive_samples.py new file mode 100644 index 0000000000000000000000000000000000000000..172103fcf27492c9eac29d7364c2bd647cbc847b --- /dev/null +++ b/fetch_positive_samples.py @@ -0,0 +1,37 @@ +from src.dataset.GoodDataset import * + +import argparse + +def main(config): + """ + Main function to process the dataset and save it as a CSV file. + Args: + config: Namespace object containing the script arguments. + """ + # Initialize the dataset + dataset = AugmentedDataset() + + positive_samples = dataset.fetch_positive_samples_parallel( + num_samples=config.size, + random=config.random, + seed=config.seed, + full=config.full + ) + + dataset.save(config.output) + + +if __name__ == "__main__": + # Parse command-line arguments + from src.utils.io_utils import PROJECT_ROOT + parser = argparse.ArgumentParser(description="Generate and save a dataset based on the given configuration.") + + parser.add_argument("-s", "--size", type=int, default=10, help="Number of samples to generate.") + parser.add_argument("-r", "--random", type=bool, default=True, help="Whether to sample randomly.") + parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility.") + parser.add_argument("--full", action="store_true", help="Boolean flag to indicate full dataset mode.") + parser.add_argument("-o", "--output", type=str, default=os.path.join(PROJECT_ROOT, "data/dataset.pkl"), help="Output file path to save the dataset as a CSV.") + + # Parse the arguments and pass to the main function + config = parser.parse_args() + main(config) diff --git a/logo.png b/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..80ac9df99ad087ad97a6ea81c5acf9edb0775c0a Binary files /dev/null and b/logo.png differ diff --git a/notebooks/.DS_Store b/notebooks/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/notebooks/.DS_Store differ diff --git a/notebooks/1-0-dataset_development.ipynb b/notebooks/1-0-dataset_development.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f3920020187300295a4967584a4b6f80580929a5 --- /dev/null +++ b/notebooks/1-0-dataset_development.ipynb @@ -0,0 +1,1121 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from enum import Enum\n", + "from typing import List, Dict, Any\n", + "from dataclasses import dataclass\n", + "from tqdm import tqdm\n", + "\n", + "import os\n", + "import yaml\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import pyalex\n", + "from pyalex import Works\n", + "from src.utils.io_utils import PROJECT_ROOT\n", + "from src.dataset.Dataset import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class ConfigAugmentation:\n", + " \"\"\"Configuration for OpenAlex features\"\"\"\n", + " basic: Dict[str, bool] = None # id, doi, title, etc\n", + " source: Dict[str, bool] = None # journal info\n", + " authors: Dict[str, bool] = None # author details\n", + " metrics: Dict[str, bool] = None # citations, fwci, etc\n", + " classification: Dict[str, bool] = None # topics, concepts\n", + " access: Dict[str, bool] = None # OA status\n", + " related_works: Dict[str, bool] = None # references\n", + " abstract: bool = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dataset Loading " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# load the dataset \n", + "\n", + "class DatasetType(Enum):\n", + " FULL_RAW = \"full_raw\"\n", + " PARTIAL_RAW = \"partial_raw\"\n", + " FULL_AUGMENTED = \"full_augmented\"\n", + " PARTIAL_AUGMENTED = \"partial_augmented\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class Field:\n", + " \"\"\"Field configuration for data extraction\"\"\"\n", + " name: str\n", + " path: List[str]\n", + " default: Any = None\n", + "\n", + "class AlexFields:\n", + " \"\"\"OpenAlex field definitions\"\"\"\n", + " \n", + " BASIC = [\n", + " Field(\"id\", [\"id\"]),\n", + " Field(\"doi\", [\"doi\"]),\n", + " Field(\"title\", [\"title\"]),\n", + " Field(\"display_name\", [\"display_name\"]),\n", + " Field(\"publication_year\", [\"publication_year\"]),\n", + " Field(\"publication_date\", [\"publication_date\"]),\n", + " Field(\"language\", [\"language\"]),\n", + " Field(\"type\", [\"type\"]),\n", + " Field(\"type_crossref\", [\"type_crossref\"])\n", + " ]\n", + " \n", + " SOURCE = [\n", + " Field(\"journal_name\", [\"primary_location\", \"source\", \"display_name\"]),\n", + " Field(\"issn\", [\"primary_location\", \"source\", \"issn\"]),\n", + " Field(\"issn_l\", [\"primary_location\", \"source\", \"issn_l\"]),\n", + " Field(\"publisher\", [\"primary_location\", \"source\", \"host_organization_name\"]),\n", + " Field(\"type\", [\"primary_location\", \"source\", \"type\"])\n", + " ]\n", + "\n", + " METRICS = [\n", + " Field(\"cited_by_count\", [\"cited_by_count\"]),\n", + " Field(\"cited_by_percentile\", [\"citation_normalized_percentile\"]),\n", + " Field(\"is_retracted\", [\"is_retracted\"]),\n", + " Field(\"fwci\", [\"fwci\"]),\n", + " Field(\"referenced_works_count\", [\"referenced_works_count\"])\n", + " ]\n", + "\n", + " ACCESS = [\n", + " Field(\"is_oa\", [\"open_access\", \"is_oa\"]),\n", + " Field(\"oa_status\", [\"open_access\", \"oa_status\"]),\n", + " Field(\"oa_url\", [\"open_access\", \"oa_url\"]),\n", + " Field(\"pdf_url\", [\"primary_location\", \"pdf_url\"]),\n", + " Field(\"license\", [\"primary_location\", \"license\"]) \n", + " ]\n", + "\n", + "def get_nested_value(data: Dict, path: List[str], default: Any = None) -> Any:\n", + " \"\"\"Extract nested value from dictionary using path\"\"\"\n", + " value = data\n", + " for key in path:\n", + " try:\n", + " value = value[key]\n", + " except (KeyError, TypeError):\n", + " return default\n", + " return value\n", + "\n", + "class DataAugmenter:\n", + " \"\"\"Class for augmenting data with OpenAlex features\"\"\"\n", + "\n", + " def __init__(self):\n", + " \"\"\"Initialize augmenter with API credentials\"\"\"\n", + " self.profile = self._load_profile()\n", + " self.email = self.profile[\"email\"]\n", + " self.filters = ConfigAugmentation(\n", + " basic={\n", + " \"id\": True,\n", + " \"doi\": True,\n", + " \"title\": True,\n", + " \"display_name\": True,\n", + " \"publication_year\": True,\n", + " \"publication_date\": True,\n", + " \"language\": True,\n", + " \"type\": True,\n", + " \"type_crossref\": True\n", + " },\n", + " source={\n", + " \"journal_name\": True,\n", + " \"issn\": True,\n", + " \"issn_l\": True,\n", + " \"publisher\": True,\n", + " \"type\": True\n", + " },\n", + " authors={\n", + " \"position\": True,\n", + " \"name\": True,\n", + " \"id\": True,\n", + " \"orcid\": True,\n", + " \"is_corresponding\": True,\n", + " \"affiliations\": False\n", + " },\n", + " metrics={\n", + " \"cited_by_count\": True,\n", + " \"cited_by_percentile\": False,\n", + " \"is_retracted\": True,\n", + " \"fwci\": True,\n", + " \"referenced_works_count\": True\n", + " },\n", + " classification={\n", + " \"primary_topic\": True,\n", + " \"topics\": False,\n", + " \"concepts\": False,\n", + " },\n", + " access={\n", + " \"is_oa\": True,\n", + " \"oa_status\": True,\n", + " \"oa_url\": True,\n", + " \"pdf_url\": True,\n", + " \"license\": True\n", + " },\n", + " related_works={\n", + " \"references\": True,\n", + " \"referenced_by_count\": True,\n", + " \"related\": True\n", + " },\n", + " abstract=True\n", + " )\n", + " \n", + " pyalex.config.email = self.email\n", + " \n", + " def _load_profile(self) -> Dict[str, str]:\n", + " \"\"\"Load API credentials from profile\"\"\"\n", + " profile_path = f\"{PROJECT_ROOT}/user_information/profile.yaml\"\n", + " \n", + " assert str(PROJECT_ROOT).split(\"/\")[-1] == \"MatchingPubs\", \"Please run this script in the github repo folder \"\n", + " assert os.path.exists(profile_path), \"create a profile.yaml with your email (email:) and your api key (api_key:). Go here to get one https://dev.elsevier.com/\"\n", + "\n", + " \n", + " with open(profile_path, \"r\") as f:\n", + " profile = yaml.safe_load(f)\n", + " \n", + " return {\n", + " \"email\": profile[\"email\"]\n", + " }\n", + "\n", + " def get_alex_features(self, doi: str) -> Dict:\n", + " \"\"\"Extract all OpenAlex features for a DOI\"\"\"\n", + " try:\n", + " work = Works()[f\"https://doi.org/{doi}\"]\n", + " result = {}\n", + "\n", + " # Basic metadata\n", + " result[\"basic\"] = {\n", + " field.name: get_nested_value(work, field.path, None)\n", + " for field in AlexFields.BASIC\n", + " }\n", + " \n", + " # Source/journal info\n", + " result[\"source\"] = {\n", + " field.name: get_nested_value(work, field.path, None)\n", + " for field in AlexFields.SOURCE\n", + " }\n", + " \n", + " # Authors with affiliations\n", + " try:\n", + " result[\"authors\"] = [\n", + " {\n", + " \"position\": auth.get(\"author_position\", None),\n", + " \"name\": auth.get(\"author\", {}).get(\"display_name\", None),\n", + " \"id\": auth.get(\"author\", {}).get(\"id\", None),\n", + " \"orcid\": auth.get(\"author\", {}).get(\"orcid\", None),\n", + " \"is_corresponding\": auth.get(\"is_corresponding\", None),\n", + " \"affiliations\": [\n", + " {\n", + " \"name\": inst.get(\"display_name\", None),\n", + " \"id\": inst.get(\"id\", None),\n", + " \"country\": inst.get(\"country_code\", None),\n", + " \"type\": inst.get(\"type\", None),\n", + " \"ror\": inst.get(\"ror\", None)\n", + " }\n", + " for inst in auth.get(\"institutions\", [])\n", + " ]\n", + " }\n", + " for auth in work.get(\"authorships\", [])\n", + " ]\n", + " except:\n", + " result[\"authors\"] = None\n", + "\n", + " # Topics and classifications \n", + " try:\n", + " result[\"classification\"] = {\n", + " \"primary_topic\": {\n", + " \"name\": work.get(\"primary_topic\", {}).get(\"display_name\", None),\n", + " \"score\": work.get(\"primary_topic\", {}).get(\"score\", None),\n", + " \"field\": work.get(\"primary_topic\", {}).get(\"field\", {}).get(\"display_name\", None),\n", + " \"subfield\": work.get(\"primary_topic\", {}).get(\"subfield\", {}).get(\"display_name\", None)\n", + " },\n", + " \"topics\": [\n", + " {\n", + " \"name\": topic.get(\"display_name\", None),\n", + " \"score\": topic.get(\"score\", None),\n", + " \"field\": topic.get(\"field\", {}).get(\"display_name\", None)\n", + " }\n", + " for topic in work.get(\"topics\", [])\n", + " ],\n", + " \"concepts\": [\n", + " {\n", + " \"name\": concept.get(\"display_name\", None),\n", + " \"level\": concept.get(\"level\", None),\n", + " \"score\": concept.get(\"score\", None),\n", + " \"wikidata\": concept.get(\"wikidata\", None)\n", + " }\n", + " for concept in work.get(\"concepts\", [])\n", + " ]\n", + " }\n", + " except:\n", + " result[\"classification\"] = None\n", + "\n", + " # Metrics\n", + " result[\"metrics\"] = {\n", + " field.name: get_nested_value(work, field.path, None)\n", + " for field in AlexFields.METRICS\n", + " }\n", + "\n", + " # Access info\n", + " result[\"access\"] = {\n", + " field.name: get_nested_value(work, field.path, None)\n", + " for field in AlexFields.ACCESS\n", + " }\n", + "\n", + " # Abstract\n", + " try:\n", + " if \"abstract_inverted_index\" in work:\n", + " abstract_dict = work[\"abstract_inverted_index\"]\n", + " if abstract_dict:\n", + " max_pos = max(max(positions) for positions in abstract_dict.values())\n", + " words = [\"\"] * (max_pos + 1)\n", + " for word, positions in abstract_dict.items():\n", + " for pos in positions:\n", + " words[pos] = word\n", + " result[\"abstract\"] = \" \".join(words)\n", + " else:\n", + " result[\"abstract\"] = None\n", + " else:\n", + " result[\"abstract\"] = None\n", + " except:\n", + " result[\"abstract\"] = None\n", + "\n", + " return result\n", + "\n", + " except Exception as e:\n", + " print(f\"OpenAlex error for DOI {doi}: {e}\")\n", + " return {}\n", + " \n", + " def filter_augmented_data(self, data: Dict[str, Any], config: ConfigAugmentation = None) -> Dict[str, Any]:\n", + " \"\"\"Filter data based on configuration\n", + " \n", + " Args:\n", + " data: Dictionary containing raw data\n", + " config: Configuration specifying which features to include\n", + " \n", + " Returns:\n", + " Filtered dictionary containing only the configured features\n", + " \"\"\"\n", + " config = config or self.filters\n", + " \n", + " def filter_section(section_data: Dict[str, Any], section_config: Dict[str, bool]) -> Dict[str, Any]:\n", + " \"\"\"Filter a section of the data based on the section configuration\"\"\"\n", + " return {k: v for k, v in section_data.items() if k in section_config and section_config[k]}\n", + " \n", + " filtered_data = {}\n", + " \n", + " # Filter OpenAlex data\n", + " alex_filtered = {}\n", + " \n", + " # Basic metadata\n", + " if config.basic:\n", + " alex_filtered[\"basic\"] = filter_section(data.get(\"basic\", {}), config.basic)\n", + " \n", + " # Source/journal info\n", + " if config.source:\n", + " alex_filtered[\"source\"] = filter_section(data.get(\"source\", {}), config.source)\n", + " \n", + " # Authors\n", + " if config.authors:\n", + " authors_data = data.get(\"authors\", [])\n", + " filtered_authors = []\n", + " for author in authors_data:\n", + " filtered_author = filter_section(author, config.authors)\n", + " if config.authors.get(\"affiliations\", False):\n", + " print(author.get(\"affiliations\", []))\n", + " filtered_author[\"affiliations\"] = [\n", + " filter_section(aff, config.authors[\"affiliations\"])\n", + " for aff in author.get(\"affiliations\", [])\n", + " ]\n", + " filtered_authors.append(filtered_author)\n", + " alex_filtered[\"authors\"] = filtered_authors\n", + " \n", + " # Metrics\n", + " if config.metrics:\n", + " alex_filtered[\"metrics\"] = filter_section(data.get(\"metrics\", {}), config.metrics)\n", + " \n", + " # Classification\n", + " if config.classification:\n", + " classification_data = data.get(\"classification\", {})\n", + " alex_filtered[\"classification\"] = {\n", + " k: v for k, v in classification_data.items() if k in config.classification and config.classification[k]\n", + " }\n", + " \n", + " # Access info\n", + " if config.access:\n", + " alex_filtered[\"access\"] = filter_section(data.get(\"access\", {}), config.access)\n", + " \n", + " # Related works\n", + " if config.related_works:\n", + " alex_filtered[\"related_works\"] = filter_section(data.get(\"related_works\", {}), config.related_works)\n", + " \n", + " # Abstract\n", + " if config.abstract and \"abstract\" in data:\n", + " alex_filtered[\"abstract\"] = data[\"abstract\"]\n", + " \n", + " filtered_data = alex_filtered\n", + " \n", + " return filtered_data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'primary_topic': {'name': 'Gait Analysis and Fall Prevention in Elderly',\n", + " 'score': 0.9994,\n", + " 'field': 'Health Professions',\n", + " 'subfield': 'Physical Therapy, Sports Therapy and Rehabilitation'}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doi = \"10.2196/41082\"\n", + "a = DataAugmenter()\n", + "info = a.get_alex_features(doi)\n", + "filtered_info = a.filter_augmented_data(info)\n", + "filtered_info[\"classification\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "class FullAugmentedDataset: \n", + "\n", + " def __init__(self):\n", + " self.augmenter = DataAugmenter()\n", + " self.full_raw_dataset = self._load_the_dataset()\n", + "\n", + " def _load_the_dataset(self, type: DatasetType = DatasetType.FULL_RAW) -> pd.DataFrame:\n", + " \"\"\"Load as csv file one of the datasets for training.\"\"\"\n", + " assert str(PROJECT_ROOT).split(\"/\")[-1] == \"MatchingPubs\", \"Please run this script in the github repo folder \"\n", + " \n", + " if type == DatasetType.FULL_RAW:\n", + " return pd.read_csv(f\"{PROJECT_ROOT}/data/crossref-preprint-article-relationships-Aug-2023.csv\")\n", + "\n", + " def retrieve_dois_couple(self, len: int = 1, random: bool = False, seed: bool = None, full: bool = False):\n", + " \"\"\"Retrieve two DOIs from the dataset\"\"\"\n", + " if random:\n", + " dois = self.full_raw_dataset.sample(n=len, random_state=seed)[[\"preprint_doi\", \"article_doi\"]]\n", + " else:\n", + " dois = self.full_raw_dataset.head(len)[[\"preprint_doi\", \"article_doi\"]]\n", + " if full:\n", + " dois = self.full_raw_dataset[[\"preprint_doi\", \"article_doi\"]]\n", + " return dois.to_numpy()\n", + " \n", + " @staticmethod\n", + " def _flatten_list(lst):\n", + " \"\"\"\n", + " Flattens a nested list into a single list. If the input is not nested, it returns the original list.\n", + " Handles cases where some elements are lists and others are not.\n", + " \"\"\"\n", + " if not isinstance(lst, list): # Ensure the input is a list\n", + " raise ValueError(\"Input must be a list\")\n", + "\n", + " def _flatten(sublist):\n", + " for item in sublist:\n", + " if isinstance(item, list): # Check if the item is a list\n", + " yield from _flatten(item) # Recursively flatten the list\n", + " else:\n", + " yield item # Yield the non-list item\n", + "\n", + " return list(_flatten(lst))\n", + " \n", + " def _augmented_data_to_row(self, filtered_data: Dict[str, Any], preprint: bool = True) -> pd.Series:\n", + " \"\"\"Transform filtered augmented data into a pandas Series\n", + " \n", + " Args:\n", + " filtered_data: Dictionary containing filtered OpenAlex and Elsevier data\n", + " preprint: If True, use prpnt_ prefix, else use article_ prefix\n", + " \n", + " Returns:\n", + " pd.Series: Flattened data as a single row\n", + " \"\"\"\n", + "\n", + " additional_part = FullAugmentedDataset.filter_author(filtered_data.get(\"authors\",{}))\n", + " # modify the key of additional part by adding authors_ at the beginning\n", + " additional_part = {f\"authors_{k}\": v for k, v in additional_part.items()} \n", + " # remove authos key from filtreed_info\n", + " filtered_data.pop(\"authors\")\n", + " # append the additional part to the filtered_info\n", + " filtered_data.update(additional_part)\n", + " final_dictionary = FullAugmentedDataset.flatten_dict(filtered_data, preprint=preprint)\n", + "\n", + " for k, v in final_dictionary.items():\n", + " final_dictionary[k] = \"$@$\".join(map(str, FullAugmentedDataset._flatten_list(v))) if isinstance(v, list) else [v]\n", + "\n", + " return pd.DataFrame(final_dictionary)\n", + "\n", + " @staticmethod\n", + " def filter_author(authors_info : list) -> dict:\n", + "\n", + " try:\n", + " relevant_keys = authors_info[0].keys()\n", + " new_dict = {}\n", + " for key in relevant_keys:\n", + " new_dict[key] = [author[key] for author in authors_info]\n", + " return new_dict\n", + " except:\n", + " return {}\n", + " \n", + " @staticmethod\n", + " def flatten_dict(d: dict, parent_key: str = '', sep: str = '_', preprint = True) -> dict:\n", + " \"\"\"Flatten a nested dictionary.\n", + " \n", + " Args:\n", + " d (dict): The dictionary to flatten.\n", + " parent_key (str): The base key string to use for the flattened keys.\n", + " sep (str): The separator to use between parent and child keys.\n", + " \n", + " Returns:\n", + " dict: The flattened dictionary.\n", + " \"\"\"\n", + " addition = \"prpnt_\" if preprint else \"article_\"\n", + " def _flatten_dict(d: dict, parent_key: str = '', sep: str = '_') -> dict:\n", + " items = []\n", + " for k, v in d.items():\n", + " new_key = f\"{parent_key}{sep}{k}\" if parent_key else k\n", + " if isinstance(v, dict):\n", + " items.extend(_flatten_dict(v, new_key, sep=sep).items())\n", + " else:\n", + " items.append((new_key, v))\n", + " return dict(items)\n", + " return {f\"{addition}{k}\": v for k, v in _flatten_dict(d, parent_key, sep).items()}\n", + "\n", + " def process_pair(self, dois) -> pd.DataFrame:\n", + " \"\"\"Process a pair of DOIs and return combined rows as a DataFrame\"\"\"\n", + " assert len(dois) > 0\n", + " rows = []\n", + " for preprint_doi, article_doi in tqdm(dois):\n", + " # Get preprint features\n", + " preprint_features = self.augmenter.get_alex_features(preprint_doi) # augment with all the features\n", + " preprint_filtered = self.augmenter.filter_augmented_data(preprint_features) # filter the relevant features\n", + " preprint_row = self._augmented_data_to_row(preprint_filtered, True)\n", + "\n", + " # Get article features\n", + " article_features = self.augmenter.get_alex_features(article_doi) # augment with all the features\n", + " article_filtered = self.augmenter.filter_augmented_data(article_features)\n", + " article_row = self._augmented_data_to_row(article_filtered, False)\n", + "\n", + " # print(article_row.columns)\n", + " # print(len(preprint_row.columns))\n", + "\n", + " # combined_row = pd.concat([preprint_row, article_row], axis=1)\n", + " # rows.append(combined_row)\n", + " rows.append([preprint_row, article_row])\n", + "\n", + " return rows\n", + "\n", + " @staticmethod\n", + " def transform_array(input_array, factor):\n", + " output_list = []\n", + " \n", + " for i, row in enumerate(input_array):\n", + " other_indices = np.array([j for j in range(len(input_array)) if j != i])\n", + " sampled_indices = np.random.choice(other_indices, size=factor, replace=False)\n", + " sampled_rows = [input_array[j] for j in sampled_indices]\n", + "\n", + " output_list.append(pd.concat([row[0], row[1], pd.DataFrame(data=[1], columns=['label'])], axis=1))\n", + " for B in sampled_rows:\n", + " output_list.append(pd.concat([row[0], B[1], pd.DataFrame(data=[0], columns=['label'])], axis=1))\n", + "\n", + " return pd.concat(output_list).reset_index(drop=True)\n", + "\n", + " def get_full_dataset(self, len: int = 1, random: bool = True, seed: int = 42, full: bool = True) -> pd.DataFrame:\n", + " \"\"\"Process all DOI pairs and return full dataset\"\"\"\n", + " dois = self.retrieve_dois_couple(len, random, seed, full)\n", + " self.augmented_df = FullAugmentedDataset.transform_array(self.process_pair(dois), factor=3)\n", + " return self.augmented_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TRYING STUFF" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Create dataset with new configs\n", + "dataset = FullAugmentedDataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 2)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dois = dataset.retrieve_dois_couple(5, random = True, seed = 42)\n", + "dois.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "info = dataset.augmenter.get_alex_features(dois[0][0]) # augment with all the features" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'basic': {'id': 'https://openalex.org/W4213260597',\n", + " 'doi': 'https://doi.org/10.31234/osf.io/6fps2',\n", + " 'title': 'Distance perception in virtual reality: A meta-analysis of the effect of head-mounted display characteristics',\n", + " 'display_name': 'Distance perception in virtual reality: A meta-analysis of the effect of head-mounted display characteristics',\n", + " 'publication_year': 2022,\n", + " 'publication_date': '2022-02-12',\n", + " 'language': 'en',\n", + " 'type': 'preprint',\n", + " 'type_crossref': 'posted-content'},\n", + " 'source': {'journal_name': None,\n", + " 'issn': None,\n", + " 'issn_l': None,\n", + " 'publisher': None,\n", + " 'type': None},\n", + " 'authors': [{'position': 'first',\n", + " 'name': 'Jonathan W. Kelly',\n", + " 'id': 'https://openalex.org/A5011931977',\n", + " 'orcid': 'https://orcid.org/0000-0002-4317-273X',\n", + " 'is_corresponding': True,\n", + " 'affiliations': [{'name': 'Iowa State University',\n", + " 'id': 'https://openalex.org/I173911158',\n", + " 'country': 'US',\n", + " 'type': 'education',\n", + " 'ror': 'https://ror.org/04rswrd78'}]}],\n", + " 'classification': {'primary_topic': {'name': 'Virtual Presence and Embodiment in VR Research',\n", + " 'score': 0.9982,\n", + " 'field': 'Computer Science',\n", + " 'subfield': 'Human-Computer Interaction'},\n", + " 'topics': [{'name': 'Virtual Presence and Embodiment in VR Research',\n", + " 'score': 0.9982,\n", + " 'field': 'Computer Science'},\n", + " {'name': 'Neural Mechanisms of Visual Perception and Processing',\n", + " 'score': 0.9906,\n", + " 'field': 'Neuroscience'},\n", + " {'name': 'Spatial Ability for STEM Domains',\n", + " 'score': 0.9727,\n", + " 'field': 'Engineering'}],\n", + " 'concepts': [{'name': 'Virtual reality',\n", + " 'level': 2,\n", + " 'score': 0.74525213,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q170519'},\n", + " {'name': 'Perception',\n", + " 'level': 2,\n", + " 'score': 0.69497585,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q160402'},\n", + " {'name': 'Optical head-mounted display',\n", + " 'level': 2,\n", + " 'score': 0.64143133,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q17105103'},\n", + " {'name': 'Computer science',\n", + " 'level': 0,\n", + " 'score': 0.4773505,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q21198'},\n", + " {'name': 'Psychology',\n", + " 'level': 0,\n", + " 'score': 0.3757282,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q9418'},\n", + " {'name': 'Computer vision',\n", + " 'level': 1,\n", + " 'score': 0.3722988,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q844240'},\n", + " {'name': 'Artificial intelligence',\n", + " 'level': 1,\n", + " 'score': 0.35102686,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q11660'},\n", + " {'name': 'Neuroscience',\n", + " 'level': 1,\n", + " 'score': 0.0,\n", + " 'wikidata': 'https://www.wikidata.org/wiki/Q207011'}]},\n", + " 'metrics': {'cited_by_count': 6,\n", + " 'cited_by_percentile': {'value': 0.997093,\n", + " 'is_in_top_1_percent': True,\n", + " 'is_in_top_10_percent': True},\n", + " 'is_retracted': False,\n", + " 'fwci': None,\n", + " 'referenced_works_count': 89},\n", + " 'access': {'is_oa': True,\n", + " 'oa_status': 'green',\n", + " 'oa_url': 'https://psyarxiv.com/6fps2/download',\n", + " 'pdf_url': 'https://psyarxiv.com/6fps2/download',\n", + " 'license': None},\n", + " 'abstract': 'Distances are commonly underperceived in virtual reality (VR), and this finding has been documented repeatedly over more than two decades of research. Yet, there is evidence that perceived distance is more accurate in modern compared to older head-mounted displays (HMDs). This meta-analysis of 131 studies describes egocentric distance perception across 20 HMDs, and also examines the relationship between perceived distance and technical HMD characteristics. Judged distance was positively associated with HMD field of view (FOV), positively associated with HMD resolution, and negatively associated with HMD weight. The effects of FOV and resolution were more pronounced among heavier HMDs. These findings suggest that future improvements in these technical characteristics may be central to resolving the problem of distance underperception in VR.'}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "info" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_info = dataset.augmenter.filter_augmented_data(info)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " | prpnt_basic_id | \n", + "prpnt_basic_doi | \n", + "prpnt_basic_title | \n", + "prpnt_basic_display_name | \n", + "prpnt_basic_publication_year | \n", + "prpnt_basic_publication_date | \n", + "prpnt_basic_language | \n", + "prpnt_basic_type | \n", + "prpnt_basic_type_crossref | \n", + "prpnt_source_journal_name | \n", + "... | \n", + "prpnt_access_oa_status | \n", + "prpnt_access_oa_url | \n", + "prpnt_access_pdf_url | \n", + "prpnt_access_license | \n", + "prpnt_abstract | \n", + "prpnt_authors_position | \n", + "prpnt_authors_name | \n", + "prpnt_authors_id | \n", + "prpnt_authors_orcid | \n", + "prpnt_authors_is_corresponding | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "https://openalex.org/W4213260597 | \n", + "https://doi.org/10.31234/osf.io/6fps2 | \n", + "Distance perception in virtual reality: A meta... | \n", + "Distance perception in virtual reality: A meta... | \n", + "2022 | \n", + "2022-02-12 | \n", + "en | \n", + "preprint | \n", + "posted-content | \n", + "None | \n", + "... | \n", + "green | \n", + "https://psyarxiv.com/6fps2/download | \n", + "https://psyarxiv.com/6fps2/download | \n", + "None | \n", + "Distances are commonly underperceived in virtu... | \n", + "first | \n", + "Jonathan W. Kelly | \n", + "https://openalex.org/A5011931977 | \n", + "https://orcid.org/0000-0002-4317-273X | \n", + "True | \n", + "
1 rows ร 33 columns
\n", + "\n", + " | prpnt_basic_doi | \n", + "article_basic_doi | \n", + "label | \n", + "
---|---|---|---|
0 | \n", + "https://doi.org/10.31234/osf.io/6fps2 | \n", + "https://doi.org/10.1109/tvcg.2022.3196606 | \n", + "1 | \n", + "
1 | \n", + "https://doi.org/10.31234/osf.io/6fps2 | \n", + "https://doi.org/10.1007/s10499-023-01047-1 | \n", + "0 | \n", + "
2 | \n", + "https://doi.org/10.31234/osf.io/6fps2 | \n", + "https://doi.org/10.5194/acp-12-3837-2012 | \n", + "0 | \n", + "
3 | \n", + "https://doi.org/10.31234/osf.io/6fps2 | \n", + "https://doi.org/10.1016/j.vetpar.2021.109373 | \n", + "0 | \n", + "
4 | \n", + "https://doi.org/10.5194/acpd-11-3071-2011 | \n", + "https://doi.org/10.5194/acp-12-3837-2012 | \n", + "1 | \n", + "
5 | \n", + "https://doi.org/10.5194/acpd-11-3071-2011 | \n", + "https://doi.org/10.1016/j.vetpar.2021.109373 | \n", + "0 | \n", + "
6 | \n", + "https://doi.org/10.5194/acpd-11-3071-2011 | \n", + "https://doi.org/10.1109/tvcg.2022.3196606 | \n", + "0 | \n", + "
7 | \n", + "https://doi.org/10.5194/acpd-11-3071-2011 | \n", + "https://doi.org/10.1007/s10499-023-01047-1 | \n", + "0 | \n", + "
8 | \n", + "https://doi.org/10.1101/2020.08.07.241687 | \n", + "https://doi.org/10.1021/acscentsci.1c00703 | \n", + "1 | \n", + "
9 | \n", + "https://doi.org/10.1101/2020.08.07.241687 | \n", + "https://doi.org/10.1007/s10499-023-01047-1 | \n", + "0 | \n", + "
10 | \n", + "https://doi.org/10.1101/2020.08.07.241687 | \n", + "https://doi.org/10.1109/tvcg.2022.3196606 | \n", + "0 | \n", + "
11 | \n", + "https://doi.org/10.1101/2020.08.07.241687 | \n", + "https://doi.org/10.1016/j.vetpar.2021.109373 | \n", + "0 | \n", + "
12 | \n", + "https://doi.org/10.21203/rs.3.rs-62250/v1 | \n", + "https://doi.org/10.1016/j.vetpar.2021.109373 | \n", + "1 | \n", + "
13 | \n", + "https://doi.org/10.21203/rs.3.rs-62250/v1 | \n", + "https://doi.org/10.5194/acp-12-3837-2012 | \n", + "0 | \n", + "
14 | \n", + "https://doi.org/10.21203/rs.3.rs-62250/v1 | \n", + "https://doi.org/10.1109/tvcg.2022.3196606 | \n", + "0 | \n", + "
15 | \n", + "https://doi.org/10.21203/rs.3.rs-62250/v1 | \n", + "https://doi.org/10.1021/acscentsci.1c00703 | \n", + "0 | \n", + "
16 | \n", + "https://doi.org/10.21203/rs.3.rs-2640242/v1 | \n", + "https://doi.org/10.1007/s10499-023-01047-1 | \n", + "1 | \n", + "
17 | \n", + "https://doi.org/10.21203/rs.3.rs-2640242/v1 | \n", + "https://doi.org/10.5194/acp-12-3837-2012 | \n", + "0 | \n", + "
18 | \n", + "https://doi.org/10.21203/rs.3.rs-2640242/v1 | \n", + "https://doi.org/10.1016/j.vetpar.2021.109373 | \n", + "0 | \n", + "
19 | \n", + "https://doi.org/10.21203/rs.3.rs-2640242/v1 | \n", + "https://doi.org/10.1021/acscentsci.1c00703 | \n", + "0 | \n", + "