Spaces:
Sleeping
Sleeping
bagbreizh
commited on
Commit
·
5c5cb78
1
Parent(s):
9f74345
First commit
Browse files
app.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import streamlit as st
|
4 |
+
from huggingface_hub import InferenceClient
|
5 |
+
from prompt import default_prompt, prompt_enhanced
|
6 |
+
from transformers import GPT2Tokenizer
|
7 |
+
# Initialize LLaMA tokenizer
|
8 |
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
9 |
+
# Function to load the README.md directly from the Hugging Face API
|
10 |
+
def load_readme(dataset_name: str):
|
11 |
+
api_url = f"https://huggingface.co/datasets/{dataset_name}/raw/main/README.md"
|
12 |
+
|
13 |
+
try:
|
14 |
+
response = requests.get(api_url)
|
15 |
+
response.raise_for_status()
|
16 |
+
except requests.exceptions.RequestException as e:
|
17 |
+
st.error(f"Error loading the README.md: {e}")
|
18 |
+
return None
|
19 |
+
|
20 |
+
return response.text
|
21 |
+
|
22 |
+
# Function to check if the README content exceeds the token limit
|
23 |
+
def check_token_limit(content: str, max_tokens: int = 8192):
|
24 |
+
tokens = tokenizer.encode(content)
|
25 |
+
if len(tokens) > max_tokens:
|
26 |
+
truncated_content = tokenizer.decode(tokens[:max_tokens])
|
27 |
+
st.warning("Warning: The README.md exceeds 8192 tokens. It has been truncated for evaluation. This may affect the quality of the evaluation results.")
|
28 |
+
return truncated_content
|
29 |
+
return content
|
30 |
+
|
31 |
+
# Function to evaluate the quality of the dataset card
|
32 |
+
def evaluate_readme(readme_content: str, user_prompt: str):
|
33 |
+
# Retrieve the inference token from environment variables
|
34 |
+
hf_token = os.getenv('HF_TOKEN_INFERENCE')
|
35 |
+
|
36 |
+
# Ensure the token is available
|
37 |
+
if not hf_token:
|
38 |
+
st.error("The Hugging Face inference token is not configured. Please ensure HF_TOKEN_INFERENCE is set.")
|
39 |
+
return None
|
40 |
+
|
41 |
+
# Initialize the inference client with the specified model
|
42 |
+
client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=hf_token)
|
43 |
+
|
44 |
+
# User-customizable prompt
|
45 |
+
prompt = user_prompt.format(readme_content=readme_content)
|
46 |
+
|
47 |
+
messages = [
|
48 |
+
{'role': 'system', 'content': "You are an expert in legal field especially in Artificial Intelligence and data privacy."},
|
49 |
+
{'role': 'user', 'content': prompt}
|
50 |
+
]
|
51 |
+
|
52 |
+
# Call the model to get an evaluation
|
53 |
+
response = client.chat_completion(
|
54 |
+
model="meta-llama/Meta-Llama-3-70B-Instruct",
|
55 |
+
messages=messages,
|
56 |
+
tool_choice="auto",
|
57 |
+
max_tokens=500,
|
58 |
+
)
|
59 |
+
|
60 |
+
return response['choices'][0]['message']['content']
|
61 |
+
|
62 |
+
# Streamlit Interface
|
63 |
+
def main():
|
64 |
+
|
65 |
+
from dotenv import load_dotenv
|
66 |
+
load_dotenv()
|
67 |
+
st.title("Dataset Card Evaluator")
|
68 |
+
|
69 |
+
# Dataset name input
|
70 |
+
dataset_name = st.text_input("Path to HF Dataset (e.g., amirveyseh/acronym_identification)")
|
71 |
+
|
72 |
+
if dataset_name:
|
73 |
+
# Load and display the dataset's README.md
|
74 |
+
readme = load_readme(dataset_name)
|
75 |
+
|
76 |
+
if readme:
|
77 |
+
# Check for token limit and truncate if necessary
|
78 |
+
readme = check_token_limit(readme)
|
79 |
+
|
80 |
+
st.subheader("README.md content:")
|
81 |
+
st.text_area("README.md", readme, height=200)
|
82 |
+
|
83 |
+
# Button to evaluate the documentation
|
84 |
+
if st.button("Evaluate dataset documentation"):
|
85 |
+
with st.spinner("Audit in progress..."):
|
86 |
+
evaluation_result = evaluate_readme(readme, prompt_enhanced)
|
87 |
+
if evaluation_result:
|
88 |
+
st.subheader("Evaluation Result:")
|
89 |
+
st.write(evaluation_result)
|
90 |
+
|
91 |
+
if __name__ == "__main__":
|
92 |
+
main()
|
prompt.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Created on Fri Oct 11 18:53:59 2024
|
5 |
+
|
6 |
+
@author: legalchain
|
7 |
+
"""
|
8 |
+
default_prompt = """
|
9 |
+
Evaluate the given dataset card based on these mandatory criteria:
|
10 |
+
1. Purpose and Applications: Clearly articulates the dataset's purpose and potential real-world applications.
|
11 |
+
2. Unique Value: Explicitly states what makes this dataset unique or valuable compared to others.
|
12 |
+
3. Content Description: Provides detailed information about the actual data content, not just structure.
|
13 |
+
4. Completeness: Covers dataset size, data distribution, collection methodology, and any preprocessing steps.
|
14 |
+
5. Limitations and Biases: Discusses any known limitations, biases, or ethical considerations.
|
15 |
+
6. Human Curation: Shows evidence of human-added insights, context, or valuable information beyond auto-generated content.
|
16 |
+
|
17 |
+
Rate the card from 1 to 5, with half-points allowed (e.g., 2.5):
|
18 |
+
Dataset Card to Evaluate: {readme_content}
|
19 |
+
"""
|
20 |
+
|
21 |
+
prompt_enhanced = """Evaluate the given dataset card based on these mandatory criteria:
|
22 |
+
|
23 |
+
# Steps
|
24 |
+
|
25 |
+
1. **Data Origin**: Evaluate the description of where the data originates from. Check if it includes details like data source, collection methods, and any preprocessing steps.
|
26 |
+
|
27 |
+
2. **Usage**: Assess how the model usage is outlined in the card. Consider the intended applications, recommended scenarios, and potential misuse cases.
|
28 |
+
|
29 |
+
3. **Biases**: Review the section addressing biases. Determine if it identifies possible biases in the data or the model's output and how these are mitigated or acknowledged.
|
30 |
+
|
31 |
+
4. **Performance**: Check for clear information on the model's performance, including metrics, benchmarks, and testing conditions.
|
32 |
+
|
33 |
+
5. **Limitations**: Look for a discussion of the model's limitations, noting any restrictions on the types of data or contexts it should be used in.
|
34 |
+
|
35 |
+
6. **Ethics and Safety**: Evaluate any ethical considerations and safety precautions mentioned, focusing on steps taken to ensure responsible use of the model.
|
36 |
+
|
37 |
+
7. **Transparency and Explainability**: Determine if the card includes details that enhance transparency, such as architecture, algorithms used, and explainability measures.
|
38 |
+
|
39 |
+
# Output Format
|
40 |
+
|
41 |
+
Provide a detailed paragraph for each aspect (data origin, usage, biases, etc.) specifying the strengths and weaknesses observed in the model card. Summarize with a concluding statement about the overall quality and completeness of the model card.
|
42 |
+
|
43 |
+
# Examples
|
44 |
+
|
45 |
+
- **Data Origin**: The model card should comprehensively detail the origin of data, including specific datasets used, any licensing or ethical considerations in data collection, and a summary of preprocessing steps (e.g., "The data originates from public health records collected between 2010-2020, after anonymization and normalization processes were applied").
|
46 |
+
|
47 |
+
- **Biases**: An exemplary model card would identify potential biases by describing the demographic distribution of the training data (e.g., "The data set has a skew towards urban populations, which may affect the model's accuracy in rural settings").
|
48 |
+
|
49 |
+
# Notes
|
50 |
+
- Rate the card from 1 to 5, with half-points allowed (e.g., 2.5):
|
51 |
+
- Create a table with marddown with items name and score at the beginning of your response
|
52 |
+
- Pay special attention to how each section contributes to understanding the model's behavior and reliability.
|
53 |
+
- Consider the completeness and clarity of the information presented.
|
54 |
+
- Document any missing elements or recommendations for improvement.
|
55 |
+
|
56 |
+
Dataset Card to Evaluate: {readme_content}
|
57 |
+
|
58 |
+
|
59 |
+
"""
|