Ozgur Unlu commited on
Commit
0418a32
·
1 Parent(s): 53feca5

compliance rules into separate files. Small Gradio UI changes

Browse files
app.py CHANGED
@@ -5,45 +5,13 @@ from transformers import pipeline, DistilBertTokenizer, DistilBertForSequenceCla
5
  import numpy as np
6
  from PIL import Image
7
  import json
 
8
 
9
  # Initialize OCR reader
10
  reader = easyocr.Reader(['en'])
11
 
12
- # Define compliance rules
13
- COMPLIANCE_RULES = {
14
- "US_SEC": {
15
- "required_disclaimers": [
16
- "past performance",
17
- "investment risks",
18
- "regulatory statement"
19
- ],
20
- "prohibited_terms": [
21
- "guaranteed returns",
22
- "risk-free",
23
- "sure thing"
24
- ]
25
- },
26
- "UK_FCA": {
27
- "required_disclaimers": [
28
- "capital at risk",
29
- "regulated by FCA"
30
- ],
31
- "prohibited_terms": [
32
- "guaranteed profit",
33
- "no risk"
34
- ]
35
- },
36
- "EU": {
37
- "required_disclaimers": [
38
- "risk warning",
39
- "regulatory information"
40
- ],
41
- "prohibited_terms": [
42
- "assured returns",
43
- "no losses"
44
- ]
45
- }
46
- }
47
 
48
  def extract_text_from_image(image):
49
  """Extract text from image using EasyOCR"""
@@ -52,35 +20,62 @@ def extract_text_from_image(image):
52
 
53
  def check_compliance(text):
54
  """Check text for compliance across all regions"""
 
55
  report = {
56
  "compliant": True,
57
  "violations": [],
58
  "warnings": [],
59
  "channel_risks": {
60
- "email": 0,
61
- "social": 0,
62
- "print": 0
63
  }
64
  }
65
 
66
- # Check each region's rules
67
- for region, rules in COMPLIANCE_RULES.items():
68
  # Check prohibited terms
69
- for term in rules["prohibited_terms"]:
70
- if term.lower() in text.lower():
 
71
  report["compliant"] = False
72
- report["violations"].append(f"{region}: Prohibited term '{term}' found")
73
- report["channel_risks"]["email"] += 2
74
- report["channel_risks"]["social"] += 2
75
- report["channel_risks"]["print"] += 1
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Check required disclaimers
78
- for disclaimer in rules["required_disclaimers"]:
79
- if disclaimer.lower() not in text.lower():
80
- report["warnings"].append(f"{region}: Missing disclaimer about {disclaimer}")
81
- report["channel_risks"]["email"] += 1
82
- report["channel_risks"]["social"] += 1
83
- report["channel_risks"]["print"] += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  return report
86
 
@@ -99,31 +94,47 @@ def analyze_ad_copy(image):
99
  if compliance_report["violations"]:
100
  report_text += "Violations Found:\n"
101
  for violation in compliance_report["violations"]:
102
- report_text += f"• {violation}\n"
103
  report_text += "\n"
104
 
105
  if compliance_report["warnings"]:
106
  report_text += "Warnings:\n"
107
  for warning in compliance_report["warnings"]:
108
- report_text += f"• {warning}\n"
109
  report_text += "\n"
110
 
111
  report_text += "Channel Risk Assessment:\n"
112
- for channel, risk in compliance_report["channel_risks"].items():
113
- risk_level = "Low" if risk < 2 else "Medium" if risk < 4 else "High"
114
- report_text += f" {channel.capitalize()}: {risk_level} Risk\n"
 
 
 
 
115
 
116
  return report_text
117
 
118
  # Create Gradio interface
119
  iface = gr.Interface(
120
  fn=analyze_ad_copy,
121
- inputs=gr.Image(type="pil"),
 
 
 
 
 
 
 
 
 
 
 
122
  outputs=gr.Textbox(label="Compliance Report", lines=10),
123
  title="Marketing Campaign Compliance Checker",
124
  description="Upload marketing material to check compliance with US (SEC), UK (FCA), and EU financial regulations.",
125
  examples=[],
126
- theme=gr.themes.Base()
 
127
  )
128
 
129
  # Launch the app
 
5
  import numpy as np
6
  from PIL import Image
7
  import json
8
+ from compliance_rules import ComplianceRules
9
 
10
  # Initialize OCR reader
11
  reader = easyocr.Reader(['en'])
12
 
13
+ # Initialize compliance rules
14
+ compliance_rules = ComplianceRules()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def extract_text_from_image(image):
17
  """Extract text from image using EasyOCR"""
 
20
 
21
  def check_compliance(text):
22
  """Check text for compliance across all regions"""
23
+ rules = compliance_rules.get_all_rules()
24
  report = {
25
  "compliant": True,
26
  "violations": [],
27
  "warnings": [],
28
  "channel_risks": {
29
+ "email": {"score": 0, "details": []},
30
+ "social": {"score": 0, "details": []},
31
+ "print": {"score": 0, "details": []}
32
  }
33
  }
34
 
35
+ for region, region_rules in rules.items():
 
36
  # Check prohibited terms
37
+ for term_info in region_rules["prohibited_terms"]:
38
+ term = term_info["term"].lower()
39
+ if term in text.lower() or any(var.lower() in text.lower() for var in term_info["variations"]):
40
  report["compliant"] = False
41
+ violation = f"{region}: Prohibited term '{term}' found"
42
+ report["violations"].append({
43
+ "region": region,
44
+ "type": "prohibited_term",
45
+ "term": term,
46
+ "severity": term_info["severity"]
47
+ })
48
+
49
+ # Update channel risks
50
+ for channel in report["channel_risks"]:
51
+ risk_score = compliance_rules.calculate_risk_score([violation], [], region)
52
+ report["channel_risks"][channel]["score"] += risk_score
53
+ report["channel_risks"][channel]["details"].append(
54
+ f"Prohibited term '{term}' increases {channel} risk"
55
+ )
56
 
57
  # Check required disclaimers
58
+ for disclaimer in region_rules["required_disclaimers"]:
59
+ disclaimer_found = any(
60
+ disc_text.lower() in text.lower()
61
+ for disc_text in disclaimer["text"]
62
+ )
63
+ if not disclaimer_found:
64
+ warning = f"{region}: Missing {disclaimer['type']} disclaimer"
65
+ report["warnings"].append({
66
+ "region": region,
67
+ "type": "missing_disclaimer",
68
+ "disclaimer_type": disclaimer["type"],
69
+ "severity": disclaimer["severity"]
70
+ })
71
+
72
+ # Update channel risks
73
+ for channel in report["channel_risks"]:
74
+ risk_score = compliance_rules.calculate_risk_score([], [warning], region)
75
+ report["channel_risks"][channel]["score"] += risk_score
76
+ report["channel_risks"][channel]["details"].append(
77
+ f"Missing {disclaimer['type']} disclaimer affects {channel} risk"
78
+ )
79
 
80
  return report
81
 
 
94
  if compliance_report["violations"]:
95
  report_text += "Violations Found:\n"
96
  for violation in compliance_report["violations"]:
97
+ report_text += f"• {violation['region']}: {violation['type']} - '{violation['term']}' (Severity: {violation['severity']})\n"
98
  report_text += "\n"
99
 
100
  if compliance_report["warnings"]:
101
  report_text += "Warnings:\n"
102
  for warning in compliance_report["warnings"]:
103
+ report_text += f"• {warning['region']}: {warning['disclaimer_type']} (Severity: {warning['severity']})\n"
104
  report_text += "\n"
105
 
106
  report_text += "Channel Risk Assessment:\n"
107
+ for channel, risk_info in compliance_report["channel_risks"].items():
108
+ score = risk_info["score"]
109
+ risk_level = "Low" if score < 3 else "Medium" if score < 6 else "High"
110
+ report_text += f"• {channel.capitalize()}: {risk_level} Risk (Score: {score})\n"
111
+ if risk_info["details"]:
112
+ for detail in risk_info["details"]:
113
+ report_text += f" - {detail}\n"
114
 
115
  return report_text
116
 
117
  # Create Gradio interface
118
  iface = gr.Interface(
119
  fn=analyze_ad_copy,
120
+ inputs=[
121
+ gr.Image(
122
+ type="pil",
123
+ label="Upload Marketing Material",
124
+ height=300, # Fixed height
125
+ width=400, # Fixed width
126
+ image_mode="RGB",
127
+ scale=1, # Prevents auto-scaling
128
+ source="upload",
129
+ tool="select"
130
+ )
131
+ ],
132
  outputs=gr.Textbox(label="Compliance Report", lines=10),
133
  title="Marketing Campaign Compliance Checker",
134
  description="Upload marketing material to check compliance with US (SEC), UK (FCA), and EU financial regulations.",
135
  examples=[],
136
+ theme=gr.themes.Base(),
137
+ allow_flagging="never"
138
  )
139
 
140
  # Launch the app
compliance_rules/__init__.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Loader for compliance rules from all regulatory bodies"""
2
+
3
+ from .sec_rules import SEC_RULES
4
+ from .fca_rules import FCA_RULES
5
+ from .eu_rules import EU_RULES
6
+
7
+ class ComplianceRules:
8
+ def __init__(self):
9
+ self.rules = {
10
+ "US_SEC": SEC_RULES,
11
+ "UK_FCA": FCA_RULES,
12
+ "EU": EU_RULES
13
+ }
14
+
15
+ def get_all_rules(self):
16
+ """Return all compliance rules"""
17
+ return self.rules
18
+
19
+ def get_rules_by_region(self, region):
20
+ """Get rules for a specific region"""
21
+ return self.rules.get(region, {})
22
+
23
+ def get_combined_prohibited_terms(self):
24
+ """Get all prohibited terms across regions"""
25
+ prohibited_terms = set()
26
+ for region_rules in self.rules.values():
27
+ for term_dict in region_rules["prohibited_terms"]:
28
+ prohibited_terms.add(term_dict["term"])
29
+ prohibited_terms.update(term_dict["variations"])
30
+ return list(prohibited_terms)
31
+
32
+ def get_channel_requirements(self, channel):
33
+ """Get requirements for a specific channel across all regions"""
34
+ requirements = {}
35
+ for region, rules in self.rules.items():
36
+ if "channel_specific_rules" in rules and channel in rules["channel_specific_rules"]:
37
+ requirements[region] = rules["channel_specific_rules"][channel]
38
+ return requirements
39
+
40
+ def calculate_risk_score(self, violations, warnings, region):
41
+ """Calculate risk score based on violations and warnings"""
42
+ if region not in self.rules:
43
+ return 0
44
+
45
+ risk_scoring = self.rules[region]["risk_scoring"]
46
+ score = 0
47
+
48
+ for violation in violations:
49
+ if "disclaimer" in violation.lower():
50
+ score += risk_scoring["missing_disclaimer"]
51
+ elif "prohibited" in violation.lower():
52
+ score += risk_scoring["prohibited_term"]
53
+ else:
54
+ score += risk_scoring["misleading_statement"]
55
+
56
+ return score
compliance_rules/eu_rules.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """EU compliance rules for marketing materials"""
2
+
3
+ EU_RULES = {
4
+ "required_disclaimers": [
5
+ {
6
+ "type": "risk_warning",
7
+ "text": ["investment involves risk",
8
+ "you could lose your invested capital",
9
+ "past performance is not a reliable guide to future performance"],
10
+ "severity": "high",
11
+ "languages": ["en"] # expand for other EU languages
12
+ },
13
+ {
14
+ "type": "regulatory_information",
15
+ "text": ["regulated by", "authorized under EU regulations"],
16
+ "severity": "high",
17
+ "languages": ["en"]
18
+ },
19
+ {
20
+ "type": "costs_disclosure",
21
+ "text": ["fees and charges apply",
22
+ "view our fee schedule"],
23
+ "severity": "medium",
24
+ "languages": ["en"]
25
+ }
26
+ ],
27
+ "prohibited_terms": [
28
+ {
29
+ "term": "assured returns",
30
+ "variations": ["guaranteed returns", "secure profit", "guaranteed profit"],
31
+ "severity": "high",
32
+ "context_check": True
33
+ },
34
+ {
35
+ "term": "no losses",
36
+ "variations": ["cannot lose", "risk free", "safe investment"],
37
+ "severity": "high",
38
+ "context_check": True
39
+ },
40
+ {
41
+ "term": "guaranteed performance",
42
+ "variations": ["assured performance", "secured returns"],
43
+ "severity": "high",
44
+ "context_check": True
45
+ }
46
+ ],
47
+ "channel_specific_rules": {
48
+ "email": {
49
+ "required": ["unsubscribe option", "company information"],
50
+ "character_limit": None,
51
+ "gdpr_compliance": True
52
+ },
53
+ "social": {
54
+ "required": ["#ad", "#promotion"],
55
+ "risk_warning_placement": "visible without clicking"
56
+ },
57
+ "print": {
58
+ "required": ["full risk warning", "company details"],
59
+ "font_size_minimum": "9pt",
60
+ "prominence": "clearly legible"
61
+ }
62
+ },
63
+ "risk_scoring": {
64
+ "missing_disclaimer": 4,
65
+ "prohibited_term": 5,
66
+ "misleading_statement": 4,
67
+ "risk_thresholds": {
68
+ "low": 3,
69
+ "medium": 6,
70
+ "high": 9
71
+ }
72
+ },
73
+ "mifid_requirements": {
74
+ "fair_presentation": {
75
+ "required": ["balanced view", "prominent risk warnings"],
76
+ "prohibited": ["emphasize benefits without risks"]
77
+ },
78
+ "target_market": {
79
+ "required": ["clear target market identification"],
80
+ "prohibited": ["mass marketing of professional products"]
81
+ }
82
+ }
83
+ }
compliance_rules/fca_rules.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UK FCA compliance rules for marketing materials"""
2
+
3
+ FCA_RULES = {
4
+ "required_disclaimers": [
5
+ {
6
+ "type": "capital_risk",
7
+ "text": ["capital at risk",
8
+ "you may lose some or all of your investment",
9
+ "your capital is at risk"],
10
+ "severity": "high"
11
+ },
12
+ {
13
+ "type": "regulatory_status",
14
+ "text": ["regulated by the Financial Conduct Authority",
15
+ "authorised and regulated by the FCA",
16
+ "FCA regulated"],
17
+ "severity": "high"
18
+ },
19
+ {
20
+ "type": "past_performance",
21
+ "text": ["past performance is not a reliable indicator of future results",
22
+ "past performance does not guarantee future returns"],
23
+ "severity": "high"
24
+ }
25
+ ],
26
+ "prohibited_terms": [
27
+ {
28
+ "term": "guaranteed profit",
29
+ "variations": ["secure profit", "assured gains", "guaranteed returns"],
30
+ "severity": "high",
31
+ "context_check": True
32
+ },
33
+ {
34
+ "term": "no risk",
35
+ "variations": ["risk free", "zero risk", "safe investment"],
36
+ "severity": "high",
37
+ "context_check": True
38
+ },
39
+ {
40
+ "term": "secured returns",
41
+ "variations": ["protected returns", "guaranteed income"],
42
+ "severity": "high",
43
+ "context_check": True
44
+ }
45
+ ],
46
+ "channel_specific_rules": {
47
+ "email": {
48
+ "required": ["opt-out mechanism", "firm details"],
49
+ "character_limit": None
50
+ },
51
+ "social": {
52
+ "required": ["#ad", "#financial promotion"],
53
+ "risk_warning_placement": "prominent"
54
+ },
55
+ "print": {
56
+ "required": ["risk warning", "firm details"],
57
+ "font_size_minimum": "10pt",
58
+ "risk_warning_prominence": "no less prominent than main message"
59
+ }
60
+ },
61
+ "risk_scoring": {
62
+ "missing_disclaimer": 4,
63
+ "prohibited_term": 5,
64
+ "misleading_statement": 4,
65
+ "risk_thresholds": {
66
+ "low": 3,
67
+ "medium": 6,
68
+ "high": 9
69
+ }
70
+ },
71
+ "specific_requirements": {
72
+ "retail_investment": {
73
+ "required_elements": ["past performance warning", "balanced message"],
74
+ "restricted_terms": ["tax-free", "guaranteed"]
75
+ },
76
+ "pension_products": {
77
+ "required_elements": ["tax treatment warning", "age restrictions"],
78
+ "restricted_terms": ["pension liberation", "pension loan"]
79
+ }
80
+ }
81
+ }
compliance_rules/sec_rules.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # US SEC compliance rules for marketing materials
2
+
3
+ SEC_RULES = {
4
+ "required_disclaimers": [
5
+ {
6
+ "type": "past_performance",
7
+ "text": ["past performance is not indicative of future results",
8
+ "past returns do not guarantee future performance"],
9
+ "severity": "high"
10
+ },
11
+ {
12
+ "type": "investment_risks",
13
+ "text": ["investment involves risk",
14
+ "you may lose your principal",
15
+ "investments may lose value"],
16
+ "severity": "high"
17
+ },
18
+ {
19
+ "type": "regulatory_statement",
20
+ "text": ["registered with the Securities and Exchange Commission",
21
+ "SEC regulated"],
22
+ "severity": "medium"
23
+ }
24
+ ],
25
+ "prohibited_terms": [
26
+ {
27
+ "term": "guaranteed returns",
28
+ "variations": ["guarantee profits", "assured returns", "guaranteed investment"],
29
+ "severity": "high",
30
+ "context_check": True
31
+ },
32
+ {
33
+ "term": "risk-free",
34
+ "variations": ["no risk", "zero risk", "riskless"],
35
+ "severity": "high",
36
+ "context_check": True
37
+ },
38
+ {
39
+ "term": "sure thing",
40
+ "variations": ["cant lose", "never lose", "always profits"],
41
+ "severity": "high",
42
+ "context_check": False
43
+ }
44
+ ],
45
+ "channel_specific_rules": {
46
+ "email": {
47
+ "required": ["unsubscribe option", "physical address"],
48
+ "character_limit": None
49
+ },
50
+ "social": {
51
+ "required": ["#ad", "disclosure"],
52
+ "character_limit": {
53
+ "twitter": 280,
54
+ "instagram": 2200,
55
+ "linkedin": 3000
56
+ }
57
+ },
58
+ "print": {
59
+ "required": ["full disclaimer", "company details"],
60
+ "font_size_minimum": "8pt"
61
+ }
62
+ },
63
+ "risk_scoring": {
64
+ "missing_disclaimer": 3,
65
+ "prohibited_term": 5,
66
+ "misleading_statement": 4,
67
+ "risk_thresholds": {
68
+ "low": 2,
69
+ "medium": 5,
70
+ "high": 8
71
+ }
72
+ }
73
+ }