Spaces:

rdassignies
/

dataset_card_evaluator

Sleeping

App Files Files Community

bagbreizh commited on Oct 11, 2024

Commit

5c5cb78

1 Parent(s): 9f74345

First commit

Browse files

Files changed (2) hide show

app.py +92 -0
prompt.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import requests
+import streamlit as st
+from huggingface_hub import InferenceClient
+from prompt import default_prompt, prompt_enhanced
+from transformers import GPT2Tokenizer
+# Initialize LLaMA tokenizer
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+# Function to load the README.md directly from the Hugging Face API
+def load_readme(dataset_name: str):
+    api_url = f"https://huggingface.co/datasets/{dataset_name}/raw/main/README.md"
+    try:
+        response = requests.get(api_url)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error loading the README.md: {e}")
+        return None
+    return response.text
+# Function to check if the README content exceeds the token limit
+def check_token_limit(content: str, max_tokens: int = 8192):
+    tokens = tokenizer.encode(content)
+    if len(tokens) > max_tokens:
+        truncated_content = tokenizer.decode(tokens[:max_tokens])
+        st.warning("Warning: The README.md exceeds 8192 tokens. It has been truncated for evaluation. This may affect the quality of the evaluation results.")
+        return truncated_content
+    return content
+# Function to evaluate the quality of the dataset card
+def evaluate_readme(readme_content: str, user_prompt: str):
+    # Retrieve the inference token from environment variables
+    hf_token = os.getenv('HF_TOKEN_INFERENCE')
+    # Ensure the token is available
+    if not hf_token:
+        st.error("The Hugging Face inference token is not configured. Please ensure HF_TOKEN_INFERENCE is set.")
+        return None
+    # Initialize the inference client with the specified model
+    client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=hf_token)
+    # User-customizable prompt
+    prompt = user_prompt.format(readme_content=readme_content)
+    messages = [
+        {'role': 'system', 'content': "You are an expert in legal field especially in Artificial Intelligence and data privacy."},
+        {'role': 'user', 'content': prompt}
+    ]
+    # Call the model to get an evaluation
+    response = client.chat_completion(
+        model="meta-llama/Meta-Llama-3-70B-Instruct",
+        messages=messages,
+        tool_choice="auto",
+        max_tokens=500,
+    )
+    return response['choices'][0]['message']['content']
+# Streamlit Interface
+def main():
+    from dotenv import load_dotenv
+    load_dotenv()
+    st.title("Dataset Card Evaluator")
+    # Dataset name input
+    dataset_name = st.text_input("Path to HF Dataset (e.g., amirveyseh/acronym_identification)")
+    if dataset_name:
+        # Load and display the dataset's README.md
+        readme = load_readme(dataset_name)
+        if readme:
+            # Check for token limit and truncate if necessary
+            readme = check_token_limit(readme)
+            st.subheader("README.md content:")
+            st.text_area("README.md", readme, height=200)
+            # Button to evaluate the documentation
+            if st.button("Evaluate dataset documentation"):
+                with st.spinner("Audit in progress..."):
+                    evaluation_result = evaluate_readme(readme, prompt_enhanced)
+                    if evaluation_result:
+                        st.subheader("Evaluation Result:")
+                        st.write(evaluation_result)
+if __name__ == "__main__":
+    main()

prompt.py ADDED Viewed

	@@ -0,0 +1,59 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Oct 11 18:53:59 2024
+@author: legalchain
+"""
+default_prompt = """
+Evaluate the given dataset card based on these mandatory criteria:
+1. Purpose and Applications: Clearly articulates the dataset's purpose and potential real-world applications.
+2. Unique Value: Explicitly states what makes this dataset unique or valuable compared to others.
+3. Content Description: Provides detailed information about the actual data content, not just structure.
+4. Completeness: Covers dataset size, data distribution, collection methodology, and any preprocessing steps.
+5. Limitations and Biases: Discusses any known limitations, biases, or ethical considerations.
+6. Human Curation: Shows evidence of human-added insights, context, or valuable information beyond auto-generated content.
+Rate the card from 1 to 5, with half-points allowed (e.g., 2.5):
+Dataset Card to Evaluate: {readme_content}
+"""
+prompt_enhanced = """Evaluate the given dataset card based on these mandatory criteria:
+# Steps
+1. **Data Origin**: Evaluate the description of where the data originates from. Check if it includes details like data source, collection methods, and any preprocessing steps.
+2. **Usage**: Assess how the model usage is outlined in the card. Consider the intended applications, recommended scenarios, and potential misuse cases.
+3. **Biases**: Review the section addressing biases. Determine if it identifies possible biases in the data or the model's output and how these are mitigated or acknowledged.
+4. **Performance**: Check for clear information on the model's performance, including metrics, benchmarks, and testing conditions.
+5. **Limitations**: Look for a discussion of the model's limitations, noting any restrictions on the types of data or contexts it should be used in.
+6. **Ethics and Safety**: Evaluate any ethical considerations and safety precautions mentioned, focusing on steps taken to ensure responsible use of the model.
+7. **Transparency and Explainability**: Determine if the card includes details that enhance transparency, such as architecture, algorithms used, and explainability measures.
+# Output Format
+Provide a detailed paragraph for each aspect (data origin, usage, biases, etc.) specifying the strengths and weaknesses observed in the model card. Summarize with a concluding statement about the overall quality and completeness of the model card.
+# Examples
+- **Data Origin**: The model card should comprehensively detail the origin of data, including specific datasets used, any licensing or ethical considerations in data collection, and a summary of preprocessing steps (e.g., "The data originates from public health records collected between 2010-2020, after anonymization and normalization processes were applied").
+- **Biases**: An exemplary model card would identify potential biases by describing the demographic distribution of the training data (e.g., "The data set has a skew towards urban populations, which may affect the model's accuracy in rural settings").
+# Notes
+- Rate the card from 1 to 5, with half-points allowed (e.g., 2.5):
+- Create a table with marddown with items name and score at the beginning of your response
+- Pay special attention to how each section contributes to understanding the model's behavior and reliability.
+- Consider the completeness and clarity of the information presented.
+- Document any missing elements or recommendations for improvement.
+Dataset Card to Evaluate: {readme_content}
+"""