bagbreizh commited on
Commit
5c5cb78
·
1 Parent(s): 9f74345

First commit

Browse files
Files changed (2) hide show
  1. app.py +92 -0
  2. prompt.py +59 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from huggingface_hub import InferenceClient
5
+ from prompt import default_prompt, prompt_enhanced
6
+ from transformers import GPT2Tokenizer
7
+ # Initialize LLaMA tokenizer
8
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
9
+ # Function to load the README.md directly from the Hugging Face API
10
+ def load_readme(dataset_name: str):
11
+ api_url = f"https://huggingface.co/datasets/{dataset_name}/raw/main/README.md"
12
+
13
+ try:
14
+ response = requests.get(api_url)
15
+ response.raise_for_status()
16
+ except requests.exceptions.RequestException as e:
17
+ st.error(f"Error loading the README.md: {e}")
18
+ return None
19
+
20
+ return response.text
21
+
22
+ # Function to check if the README content exceeds the token limit
23
+ def check_token_limit(content: str, max_tokens: int = 8192):
24
+ tokens = tokenizer.encode(content)
25
+ if len(tokens) > max_tokens:
26
+ truncated_content = tokenizer.decode(tokens[:max_tokens])
27
+ st.warning("Warning: The README.md exceeds 8192 tokens. It has been truncated for evaluation. This may affect the quality of the evaluation results.")
28
+ return truncated_content
29
+ return content
30
+
31
+ # Function to evaluate the quality of the dataset card
32
+ def evaluate_readme(readme_content: str, user_prompt: str):
33
+ # Retrieve the inference token from environment variables
34
+ hf_token = os.getenv('HF_TOKEN_INFERENCE')
35
+
36
+ # Ensure the token is available
37
+ if not hf_token:
38
+ st.error("The Hugging Face inference token is not configured. Please ensure HF_TOKEN_INFERENCE is set.")
39
+ return None
40
+
41
+ # Initialize the inference client with the specified model
42
+ client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct", token=hf_token)
43
+
44
+ # User-customizable prompt
45
+ prompt = user_prompt.format(readme_content=readme_content)
46
+
47
+ messages = [
48
+ {'role': 'system', 'content': "You are an expert in legal field especially in Artificial Intelligence and data privacy."},
49
+ {'role': 'user', 'content': prompt}
50
+ ]
51
+
52
+ # Call the model to get an evaluation
53
+ response = client.chat_completion(
54
+ model="meta-llama/Meta-Llama-3-70B-Instruct",
55
+ messages=messages,
56
+ tool_choice="auto",
57
+ max_tokens=500,
58
+ )
59
+
60
+ return response['choices'][0]['message']['content']
61
+
62
+ # Streamlit Interface
63
+ def main():
64
+
65
+ from dotenv import load_dotenv
66
+ load_dotenv()
67
+ st.title("Dataset Card Evaluator")
68
+
69
+ # Dataset name input
70
+ dataset_name = st.text_input("Path to HF Dataset (e.g., amirveyseh/acronym_identification)")
71
+
72
+ if dataset_name:
73
+ # Load and display the dataset's README.md
74
+ readme = load_readme(dataset_name)
75
+
76
+ if readme:
77
+ # Check for token limit and truncate if necessary
78
+ readme = check_token_limit(readme)
79
+
80
+ st.subheader("README.md content:")
81
+ st.text_area("README.md", readme, height=200)
82
+
83
+ # Button to evaluate the documentation
84
+ if st.button("Evaluate dataset documentation"):
85
+ with st.spinner("Audit in progress..."):
86
+ evaluation_result = evaluate_readme(readme, prompt_enhanced)
87
+ if evaluation_result:
88
+ st.subheader("Evaluation Result:")
89
+ st.write(evaluation_result)
90
+
91
+ if __name__ == "__main__":
92
+ main()
prompt.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Fri Oct 11 18:53:59 2024
5
+
6
+ @author: legalchain
7
+ """
8
+ default_prompt = """
9
+ Evaluate the given dataset card based on these mandatory criteria:
10
+ 1. Purpose and Applications: Clearly articulates the dataset's purpose and potential real-world applications.
11
+ 2. Unique Value: Explicitly states what makes this dataset unique or valuable compared to others.
12
+ 3. Content Description: Provides detailed information about the actual data content, not just structure.
13
+ 4. Completeness: Covers dataset size, data distribution, collection methodology, and any preprocessing steps.
14
+ 5. Limitations and Biases: Discusses any known limitations, biases, or ethical considerations.
15
+ 6. Human Curation: Shows evidence of human-added insights, context, or valuable information beyond auto-generated content.
16
+
17
+ Rate the card from 1 to 5, with half-points allowed (e.g., 2.5):
18
+ Dataset Card to Evaluate: {readme_content}
19
+ """
20
+
21
+ prompt_enhanced = """Evaluate the given dataset card based on these mandatory criteria:
22
+
23
+ # Steps
24
+
25
+ 1. **Data Origin**: Evaluate the description of where the data originates from. Check if it includes details like data source, collection methods, and any preprocessing steps.
26
+
27
+ 2. **Usage**: Assess how the model usage is outlined in the card. Consider the intended applications, recommended scenarios, and potential misuse cases.
28
+
29
+ 3. **Biases**: Review the section addressing biases. Determine if it identifies possible biases in the data or the model's output and how these are mitigated or acknowledged.
30
+
31
+ 4. **Performance**: Check for clear information on the model's performance, including metrics, benchmarks, and testing conditions.
32
+
33
+ 5. **Limitations**: Look for a discussion of the model's limitations, noting any restrictions on the types of data or contexts it should be used in.
34
+
35
+ 6. **Ethics and Safety**: Evaluate any ethical considerations and safety precautions mentioned, focusing on steps taken to ensure responsible use of the model.
36
+
37
+ 7. **Transparency and Explainability**: Determine if the card includes details that enhance transparency, such as architecture, algorithms used, and explainability measures.
38
+
39
+ # Output Format
40
+
41
+ Provide a detailed paragraph for each aspect (data origin, usage, biases, etc.) specifying the strengths and weaknesses observed in the model card. Summarize with a concluding statement about the overall quality and completeness of the model card.
42
+
43
+ # Examples
44
+
45
+ - **Data Origin**: The model card should comprehensively detail the origin of data, including specific datasets used, any licensing or ethical considerations in data collection, and a summary of preprocessing steps (e.g., "The data originates from public health records collected between 2010-2020, after anonymization and normalization processes were applied").
46
+
47
+ - **Biases**: An exemplary model card would identify potential biases by describing the demographic distribution of the training data (e.g., "The data set has a skew towards urban populations, which may affect the model's accuracy in rural settings").
48
+
49
+ # Notes
50
+ - Rate the card from 1 to 5, with half-points allowed (e.g., 2.5):
51
+ - Create a table with marddown with items name and score at the beginning of your response
52
+ - Pay special attention to how each section contributes to understanding the model's behavior and reliability.
53
+ - Consider the completeness and clarity of the information presented.
54
+ - Document any missing elements or recommendations for improvement.
55
+
56
+ Dataset Card to Evaluate: {readme_content}
57
+
58
+
59
+ """