Spaces:

WordLift
/

brand-llms

Sleeping

App Files Files Community

cyberandy commited on Dec 2, 2024

Commit

5ee5132

verified ·

1 Parent(s): e46988f

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -86

app.py CHANGED Viewed

@@ -1,98 +1,75 @@
-import gradio as gr
 import requests
-from concurrent.futures import ThreadPoolExecutor
-from typing import Dict, List, Tuple
-BRAND_EXAMPLES = [
-    "Nike - Just Do It. The power of determination and action.",
-    "Apple - Think Different. Innovation through unique perspectives.",
-    "McDonald's - I'm Lovin' It. Creating moments of joy and satisfaction.",
-    "L'Oréal - Because You're Worth It. Embracing self-worth and beauty.",
-    "BMW - The Ultimate Driving Machine. Engineering excellence in motion.",
-    "Mastercard - There are some things money can't buy. For everything else, there's MasterCard.",
-    "Google - Don't be evil. Building technology for a better world.",
-    "Amazon - Work Hard. Have Fun. Make History. Revolutionizing how the world shops."
-]
-def get_feature_activation(text: str, feature_id: int) -> Dict:
     response = requests.post(
-        "https://www.neuronpedia.org/api/activation/new",
         headers={"Content-Type": "application/json"},
-        json={
-            "feature": {
-                "modelId": "gemma-2-2b",
-                "layer": "0-gemmascope-mlp-16k",
-                "index": str(feature_id)
-            },
-            "customText": text
-        }
     )
-    result = response.json()
-    values = result.get('values', [0])
-    return {
-        'id': feature_id,
-        'activation': max(values),
-        'token': result['tokens'][result['maxValueTokenIndex']] if values else None
-    }
-def analyze_text(text: str, batch_size: int = 50) -> List[Dict]:
-    features = []
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        futures = []
-        for i in range(0, 16384, batch_size):
-            futures.extend([
-                executor.submit(get_feature_activation, text, idx)
-                for idx in range(i, min(i + batch_size, 16384))
-            ])
-        for future in executor.map(lambda f: f.result(), futures):
-            if future['activation'] > 1.0:
-                features.append(future)
-    return sorted(features, key=lambda x: x['activation'], reverse=True)[:10]
-def format_output(features: List[Dict]) -> Tuple[str, str, str]:
-    if not features:
-        return "No significant neural activations detected", "", ""
-    output = "# Neural Analysis Results\n\n"
-    for f in features:
-        output += f"### Feature {f['id']}\n"
-        output += f"- Activation: {f['activation']:.2f}\n"
-        output += f"- Peak Token: {f['token']}\n\n"
-    dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{features[0]['id']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
-    iframe = f'<iframe src="{dashboard_url}" width="100%" height="600px" frameborder="0"></iframe>'
-    return output, iframe, f"Feature {features[0]['id']} Dashboard"
-def create_interface():
-    with gr.Blocks() as interface:
-        gr.Markdown("# Brand Message Neural Analyzer")
-        gr.Markdown("Analyze brand messages and taglines using Gemma's neural features")
-        with gr.Row():
-            with gr.Column():
-                input_text = gr.Textbox(
-                    lines=5,
-                    placeholder="Enter brand message or tagline to analyze...",
-                    label="Brand Message"
-                )
-                analyze_btn = gr.Button("Analyze Neural Patterns")
-                gr.Examples(BRAND_EXAMPLES, inputs=input_text)
-            with gr.Column():
-                output_text = gr.Markdown()
-                feature_label = gr.Text(show_label=False)
-                dashboard = gr.HTML()
-        analyze_btn.click(
-            fn=lambda text: format_output(analyze_text(text)),
-            inputs=input_text,
-            outputs=[output_text, dashboard, feature_label]
-        )
-    return interface
-if __name__ == "__main__":
-    create_interface().launch()

 import requests
+import json
+from typing import Dict, List
+import numpy as np
+def get_activation_values(text: str, feature_id: int) -> Dict:
+    """Get activation values for a specific feature"""
+    url = "https://www.neuronpedia.org/api/activation/new"
+    data = {
+        "feature": {
+            "modelId": "gemma-2-2b",
+            "layer": "0-gemmascope-mlp-16k",
+            "index": str(feature_id)
+        },
+        "customText": text
+    }
     response = requests.post(
+        url,
         headers={"Content-Type": "application/json"},
+        json=data
     )
+    return response.json()
+def calculate_density(values: List[float], threshold: float = 0.5) -> float:
+    """Calculate activation density (% of tokens with activation > threshold)"""
+    return sum(1 for v in values if v > threshold) / len(values)
+def find_top_features_per_token(text: str, num_features: int = 5,
+                              max_density: float = 0.01, batch_size: int = 100) -> Dict:
+    """Find top features for each token with density filtering"""
+    # First get initial feature activations to get tokens
+    sample_activation = get_activation_values(text, 0)
+    tokens = sample_activation['tokens']
+    token_features = {token: [] for token in tokens}
+    # Process features in batches
+    for start_idx in range(0, 16384, batch_size):
+        for feature_id in range(start_idx, min(start_idx + batch_size, 16384)):
+            result = get_activation_values(text, feature_id)
+            values = result.get('values', [])
+            # Calculate density and skip if too high
+            density = calculate_density(values)
+            if density > max_density:
+                continue
+            # Add feature to each token's list if activated
+            for token_idx, (token, value) in enumerate(zip(tokens, values)):
+                if value > 0.5:  # Activation threshold
+                    token_features[token].append({
+                        'feature_id': feature_id,
+                        'activation': value,
+                        'density': density
+                    })
+    # Sort features for each token and keep top N
+    for token in token_features:
+        token_features[token].sort(key=lambda x: x['activation'], reverse=True)
+        token_features[token] = token_features[token][:num_features]
+    return token_features
+# Test the function
+text = "Nike - Just Do It"
+token_features = find_top_features_per_token(text)
+# Print results
+print(f"Text: {text}\n")
+for token, features in token_features.items():
+    if features:  # Only show tokens with active features
+        print(f"\nToken: {token}")
+        for feat in features:
+            print(f"  Feature {feat['feature_id']}: activation={feat['activation']:.3f}, density={feat['density']:.3%}")