Source code for signxai.torch_signxai.methods_impl.smoothgrad

"""PyTorch implementation of SmoothGrad."""
import torch
import torch.nn as nn
import numpy as np
from typing import Tuple, List, Union, Optional



[docs]
class SmoothGrad:
    """SmoothGrad attribution method.
    
    Implements SmoothGrad as described in the original paper:
    "SmoothGrad: removing noise by adding noise"
    https://arxiv.org/abs/1706.03825
    """
    

[docs]
    def __init__(self, model, num_samples=16, noise_scale=1.0):
        """Initialize SmoothGrad.
        
        Args:
            model: PyTorch model
            num_samples: Number of noisy samples to use (matches TF default 16)
            noise_scale: Standard deviation of noise to add (matches TF behavior, default 1.0)
        """
        self.model = model
        self.num_samples = num_samples
        self.noise_scale = noise_scale

        

[docs]
    def attribute(self, inputs, target=None, num_samples=None, noise_scale=None):
        """Calculate SmoothGrad attribution.
        
        Args:
            inputs: Input tensor
            target: Target class index (None for argmax)
            num_samples: Override the number of samples (optional)
            noise_scale: Override the noise scale (optional)
            
        Returns:
            Attribution tensor of the same shape as inputs
        """
        # Get parameters (use instance defaults if not provided)
        num_samples = num_samples if num_samples is not None else self.num_samples
        noise_scale = noise_scale if noise_scale is not None else self.noise_scale
        
        # Ensure input is a tensor
        if not isinstance(inputs, torch.Tensor):
            inputs = torch.tensor(inputs, dtype=torch.float32)
            
        # Clone inputs to avoid modifying the original
        inputs = inputs.clone().detach()
        
        # Use fixed noise standard deviation (matches TensorFlow behavior)
        noise_std = noise_scale
        
        # Store original model mode
        original_mode = self.model.training
        self.model.eval()
        
        # Accumulate gradients
        accumulated_gradients = torch.zeros_like(inputs)
        
        for i in range(num_samples):
            # Generate noisy input (matches TensorFlow's np.random.normal behavior)
            noise = torch.normal(0, noise_std, size=inputs.shape, device=inputs.device)
            noisy_input = inputs + noise
            noisy_input.requires_grad_(True)
            
            # Forward pass
            self.model.zero_grad()
            output = self.model(noisy_input)
            
            # Determine target classes
            if target is None:
                target_indices = output.argmax(dim=1)
            elif isinstance(target, int):
                target_indices = torch.full((inputs.shape[0],), target, dtype=torch.long, device=inputs.device)
            elif isinstance(target, torch.Tensor):
                if target.numel() == 1:  # Single class for all examples
                    target_indices = torch.full((inputs.shape[0],), target.item(), dtype=torch.long, device=inputs.device)
                else:  # Different target for each example
                    target_indices = target
            else:
                raise ValueError(f"Unsupported target type: {type(target)}")
                
            # One-hot encoding for target classes
            one_hot = torch.zeros_like(output)
            one_hot.scatter_(1, target_indices.view(-1, 1), 1.0)
            
            # Backward pass
            output.backward(gradient=one_hot)
            
            # Accumulate gradients 
            if noisy_input.grad is not None:
                accumulated_gradients += noisy_input.grad
            
        # Restore model mode
        self.model.train(original_mode)
        
        # Average gradients
        smoothgrad_attribution = accumulated_gradients / num_samples
        
        # Apply small value thresholding for numerical stability
        smoothgrad_attribution[torch.abs(smoothgrad_attribution) < 1e-10] = 0.0
        
        return smoothgrad_attribution





[docs]
class SmoothGradXInput(SmoothGrad):
    """SmoothGrad × Input attribution method.
    
    Implements SmoothGrad multiplied by the input, which can produce more
    visually appealing attributions by focusing on the important input features.
    """
    

[docs]
    def attribute(self, inputs, target=None, num_samples=None, noise_scale=None):
        """Calculate SmoothGrad × Input attribution.
        
        Args:
            inputs: Input tensor
            target: Target class index (None for argmax)
            num_samples: Override the number of samples (optional)
            noise_scale: Override the noise scale (optional)
            
        Returns:
            Attribution tensor of the same shape as inputs
        """
        # Get smooth gradients
        smooth_gradients = super().attribute(inputs, target, num_samples, noise_scale)
        
        # Ensure input is a tensor
        if not isinstance(inputs, torch.Tensor):
            inputs = torch.tensor(inputs, dtype=torch.float32)
            
        # Multiply by the original input (element-wise)
        attribution = smooth_gradients * inputs.clone().detach()
        
        return attribution





[docs]
class SmoothGradXSign(SmoothGrad):
    """SmoothGrad × Sign attribution method.
    
    Implements SmoothGrad multiplied by the sign of (input - threshold),
    which can emphasize both positive and negative contributions.
    """
    

[docs]
    def __init__(self, model, num_samples=16, noise_scale=1.0, mu=0.0):
        """Initialize SmoothGradXSign.
        
        Args:
            model: PyTorch model
            num_samples: Number of noisy samples to use (matches TF default 16)
            noise_scale: Standard deviation of noise to add (matches TF behavior, default 1.0)
            mu: Threshold value for the sign function
        """
        super().__init__(model, num_samples, noise_scale)
        self.mu = mu

        

[docs]
    def attribute(self, inputs, target=None, num_samples=None, noise_scale=None, mu=None):
        """Calculate SmoothGrad × Sign attribution.
        
        Args:
            inputs: Input tensor
            target: Target class index (None for argmax)
            num_samples: Override the number of samples (optional)
            noise_scale: Override the noise scale (optional)
            mu: Override the threshold value (optional)
            
        Returns:
            Attribution tensor of the same shape as inputs
        """
        # Get smooth gradients
        smooth_gradients = super().attribute(inputs, target, num_samples, noise_scale)
        
        # Ensure input is a tensor
        if not isinstance(inputs, torch.Tensor):
            inputs = torch.tensor(inputs, dtype=torch.float32)
            
        # Get threshold value (use instance default if not provided)
        mu_value = mu if mu is not None else self.mu
        
        # Calculate sign of (input - threshold)
        input_sign = torch.sign(inputs.clone().detach() - mu_value)
        
        # Multiply by the sign (element-wise)
        attribution = smooth_gradients * input_sign
        
        return attribution





[docs]
def smoothgrad(model, inputs, target=None, num_samples=16, noise_scale=1.0):
    """Calculate SmoothGrad attribution (functional API).
    
    Args:
        model: PyTorch model
        inputs: Input tensor
        target: Target class index (None for argmax)
        num_samples: Number of noisy samples to use
        noise_scale: Standard deviation of noise to add
        
    Returns:
        Attribution tensor of the same shape as inputs
    """
    # Create SmoothGrad instance and calculate attribution
    return SmoothGrad(model, num_samples, noise_scale).attribute(inputs, target)




[docs]
def smoothgrad_x_input(model, inputs, target=None, num_samples=16, noise_scale=1.0):
    """Calculate SmoothGrad × Input attribution (functional API).
    
    Args:
        model: PyTorch model
        inputs: Input tensor
        target: Target class index (None for argmax)
        num_samples: Number of noisy samples to use
        noise_scale: Standard deviation of noise to add
        
    Returns:
        Attribution tensor of the same shape as inputs
    """
    # Create SmoothGradXInput instance and calculate attribution
    return SmoothGradXInput(model, num_samples, noise_scale).attribute(inputs, target)




[docs]
def smoothgrad_x_sign(model, inputs, target=None, num_samples=16, noise_scale=1.0, mu=0.0):
    """Calculate SmoothGrad × Sign attribution (functional API).
    
    Args:
        model: PyTorch model
        inputs: Input tensor
        target: Target class index (None for argmax)
        num_samples: Number of noisy samples to use
        noise_scale: Standard deviation of noise to add
        mu: Threshold value for the sign function
        
    Returns:
        Attribution tensor of the same shape as inputs
    """
    # Create SmoothGradXSign instance and calculate attribution
    return SmoothGradXSign(model, num_samples, noise_scale, mu).attribute(inputs, target)