cyberandy commited on
Commit
5ee5132
·
verified ·
1 Parent(s): e46988f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -86
app.py CHANGED
@@ -1,98 +1,75 @@
1
- import gradio as gr
2
  import requests
3
- from concurrent.futures import ThreadPoolExecutor
4
- from typing import Dict, List, Tuple
 
5
 
6
- BRAND_EXAMPLES = [
7
- "Nike - Just Do It. The power of determination and action.",
8
- "Apple - Think Different. Innovation through unique perspectives.",
9
- "McDonald's - I'm Lovin' It. Creating moments of joy and satisfaction.",
10
- "L'Oréal - Because You're Worth It. Embracing self-worth and beauty.",
11
- "BMW - The Ultimate Driving Machine. Engineering excellence in motion.",
12
- "Mastercard - There are some things money can't buy. For everything else, there's MasterCard.",
13
- "Google - Don't be evil. Building technology for a better world.",
14
- "Amazon - Work Hard. Have Fun. Make History. Revolutionizing how the world shops."
15
- ]
16
-
17
- def get_feature_activation(text: str, feature_id: int) -> Dict:
18
  response = requests.post(
19
- "https://www.neuronpedia.org/api/activation/new",
20
  headers={"Content-Type": "application/json"},
21
- json={
22
- "feature": {
23
- "modelId": "gemma-2-2b",
24
- "layer": "0-gemmascope-mlp-16k",
25
- "index": str(feature_id)
26
- },
27
- "customText": text
28
- }
29
  )
30
- result = response.json()
31
- values = result.get('values', [0])
32
- return {
33
- 'id': feature_id,
34
- 'activation': max(values),
35
- 'token': result['tokens'][result['maxValueTokenIndex']] if values else None
36
- }
37
 
38
- def analyze_text(text: str, batch_size: int = 50) -> List[Dict]:
39
- features = []
40
- with ThreadPoolExecutor(max_workers=10) as executor:
41
- futures = []
42
- for i in range(0, 16384, batch_size):
43
- futures.extend([
44
- executor.submit(get_feature_activation, text, idx)
45
- for idx in range(i, min(i + batch_size, 16384))
46
- ])
47
-
48
- for future in executor.map(lambda f: f.result(), futures):
49
- if future['activation'] > 1.0:
50
- features.append(future)
51
-
52
- return sorted(features, key=lambda x: x['activation'], reverse=True)[:10]
53
 
54
- def format_output(features: List[Dict]) -> Tuple[str, str, str]:
55
- if not features:
56
- return "No significant neural activations detected", "", ""
57
 
58
- output = "# Neural Analysis Results\n\n"
59
- for f in features:
60
- output += f"### Feature {f['id']}\n"
61
- output += f"- Activation: {f['activation']:.2f}\n"
62
- output += f"- Peak Token: {f['token']}\n\n"
63
-
64
- dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{features[0]['id']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
65
- iframe = f'<iframe src="{dashboard_url}" width="100%" height="600px" frameborder="0"></iframe>'
66
 
67
- return output, iframe, f"Feature {features[0]['id']} Dashboard"
68
-
69
- def create_interface():
70
- with gr.Blocks() as interface:
71
- gr.Markdown("# Brand Message Neural Analyzer")
72
- gr.Markdown("Analyze brand messages and taglines using Gemma's neural features")
73
-
74
- with gr.Row():
75
- with gr.Column():
76
- input_text = gr.Textbox(
77
- lines=5,
78
- placeholder="Enter brand message or tagline to analyze...",
79
- label="Brand Message"
80
- )
81
- analyze_btn = gr.Button("Analyze Neural Patterns")
82
- gr.Examples(BRAND_EXAMPLES, inputs=input_text)
83
 
84
- with gr.Column():
85
- output_text = gr.Markdown()
86
- feature_label = gr.Text(show_label=False)
87
- dashboard = gr.HTML()
88
-
89
- analyze_btn.click(
90
- fn=lambda text: format_output(analyze_text(text)),
91
- inputs=input_text,
92
- outputs=[output_text, dashboard, feature_label]
93
- )
94
 
95
- return interface
 
 
 
 
 
 
 
 
 
96
 
97
- if __name__ == "__main__":
98
- create_interface().launch()
 
 
 
 
 
 
 
1
  import requests
2
+ import json
3
+ from typing import Dict, List
4
+ import numpy as np
5
 
6
+ def get_activation_values(text: str, feature_id: int) -> Dict:
7
+ """Get activation values for a specific feature"""
8
+ url = "https://www.neuronpedia.org/api/activation/new"
9
+ data = {
10
+ "feature": {
11
+ "modelId": "gemma-2-2b",
12
+ "layer": "0-gemmascope-mlp-16k",
13
+ "index": str(feature_id)
14
+ },
15
+ "customText": text
16
+ }
17
+
18
  response = requests.post(
19
+ url,
20
  headers={"Content-Type": "application/json"},
21
+ json=data
 
 
 
 
 
 
 
22
  )
23
+ return response.json()
 
 
 
 
 
 
24
 
25
+ def calculate_density(values: List[float], threshold: float = 0.5) -> float:
26
+ """Calculate activation density (% of tokens with activation > threshold)"""
27
+ return sum(1 for v in values if v > threshold) / len(values)
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ def find_top_features_per_token(text: str, num_features: int = 5,
30
+ max_density: float = 0.01, batch_size: int = 100) -> Dict:
31
+ """Find top features for each token with density filtering"""
32
 
33
+ # First get initial feature activations to get tokens
34
+ sample_activation = get_activation_values(text, 0)
35
+ tokens = sample_activation['tokens']
36
+ token_features = {token: [] for token in tokens}
 
 
 
 
37
 
38
+ # Process features in batches
39
+ for start_idx in range(0, 16384, batch_size):
40
+ for feature_id in range(start_idx, min(start_idx + batch_size, 16384)):
41
+ result = get_activation_values(text, feature_id)
42
+ values = result.get('values', [])
43
+
44
+ # Calculate density and skip if too high
45
+ density = calculate_density(values)
46
+ if density > max_density:
47
+ continue
 
 
 
 
 
 
48
 
49
+ # Add feature to each token's list if activated
50
+ for token_idx, (token, value) in enumerate(zip(tokens, values)):
51
+ if value > 0.5: # Activation threshold
52
+ token_features[token].append({
53
+ 'feature_id': feature_id,
54
+ 'activation': value,
55
+ 'density': density
56
+ })
 
 
57
 
58
+ # Sort features for each token and keep top N
59
+ for token in token_features:
60
+ token_features[token].sort(key=lambda x: x['activation'], reverse=True)
61
+ token_features[token] = token_features[token][:num_features]
62
+
63
+ return token_features
64
+
65
+ # Test the function
66
+ text = "Nike - Just Do It"
67
+ token_features = find_top_features_per_token(text)
68
 
69
+ # Print results
70
+ print(f"Text: {text}\n")
71
+ for token, features in token_features.items():
72
+ if features: # Only show tokens with active features
73
+ print(f"\nToken: {token}")
74
+ for feat in features:
75
+ print(f" Feature {feat['feature_id']}: activation={feat['activation']:.3f}, density={feat['density']:.3%}")