Source code for signxai.tf_signxai.methods_impl.grad_cam

"""
Title: Grad-CAM class activation visualization
Author: [fchollet](https://twitter.com/fchollet)
Date created: 2020/04/26
Last modified: 2021/03/07
Description: How to obtain a class activation heatmap for an image classification model.

Adapted from Deep Learning with Python (2017).
"""

import numpy as np
import tensorflow as tf
from scipy.interpolate.interpolate import interp1d
from tensorflow import keras
from tensorflow.python.keras import Model


[docs] class GradCAM: """Grad-CAM implementation for TensorFlow models. Grad-CAM uses the gradients of a target concept flowing into the final convolutional layer to produce a coarse localization map highlighting important regions in the image for prediction. """
[docs] def __init__(self, model, last_conv_layer_name): """Initialize GradCAM. Args: model: TensorFlow model last_conv_layer_name: Name of the last convolutional layer """ self.model = model self.last_conv_layer_name = last_conv_layer_name # Create a model that maps the input to the activations of the last conv layer and model output self.grad_model = tf.keras.models.Model( [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output] )
[docs] def compute_heatmap(self, x, target_class=None, resize=True): """Compute Grad-CAM heatmap. Args: x: Input tensor or array (must include batch dimension) target_class: Target class index (None for argmax) resize: Whether to resize the heatmap to input size Returns: Grad-CAM heatmap """ # Check if input has batch dimension, if not add it if len(x.shape) == 3: x = np.expand_dims(x, axis=0) # Convert to tensor if numpy array if isinstance(x, np.ndarray): x = tf.convert_to_tensor(x) # Compute gradient of target class with respect to last conv layer with tf.GradientTape() as tape: # Forward pass last_conv_layer_output, preds = self.grad_model(x) # Determine target class if not specified if target_class is None: target_class = tf.argmax(preds[0]) # Select target class output class_channel = preds[:, target_class] # Gradient of target class with respect to last conv layer output grads = tape.gradient(class_channel, last_conv_layer_output) # Vector of importance weights for each feature map if len(grads.shape) == 4: # For images (B, H, W, C) pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) else: # For time series (B, T, C) pooled_grads = tf.reduce_mean(grads, axis=(0, 1)) # Extract first sample's conv output last_conv_output = last_conv_layer_output[0] # Weight feature maps by importance weighted_output = last_conv_output * pooled_grads[..., tf.newaxis] # Sum across feature map channels heatmap = tf.reduce_sum(weighted_output, axis=-1) # Apply ReLU and normalize heatmap = tf.maximum(heatmap, 0) / (tf.reduce_max(heatmap) + tf.keras.backend.epsilon()) # Convert to numpy heatmap = heatmap.numpy() # Resize if requested if resize and len(x.shape) == 4: # Image input import cv2 # Resize to input spatial dimensions heatmap = cv2.resize(heatmap, (x.shape[2], x.shape[1])) elif resize and len(x.shape) == 3: # Time series input # Interpolate to match input time steps f = interp1d( x=np.arange(0, len(heatmap)), y=heatmap, bounds_error=False, fill_value="extrapolate" ) heatmap = f(np.linspace(0, len(heatmap) - 1, num=x.shape[1])) # Match channel dimension if x.shape[2] > 1: heatmap = np.expand_dims(heatmap, axis=1) heatmap = np.tile(heatmap, (1, x.shape[2])) return heatmap
[docs] def calculate_grad_cam_relevancemap_timeseries(x, model, last_conv_layer_name, neuron_selection=None, resize=True): """ Calculate Grad-CAM relevance map specifically adapted for time series data. Args: x: Input data, expected shape: (batch_size, time_steps, channels) model: Model to analyze last_conv_layer_name: Name of the last convolutional layer neuron_selection: Index of neuron to analyze (None for predicted class) resize: Whether to resize heatmap to input size Returns: Relevance map with shape matching the input if resize=True """ # Debug input shape print(f" DEBUG: GradCAM-Timeseries input shape: {x.shape}") # Ensure input has batch dimension if not isinstance(x, np.ndarray): x = np.array(x) if x.ndim == 2: # Shape (time_steps, channels) x = np.expand_dims(x, axis=0) # Add batch -> (1, time_steps, channels) print(f" DEBUG: Added batch dimension, new shape: {x.shape}") # Convert numpy array to tensor if isinstance(x, np.ndarray): x = tf.convert_to_tensor(x, dtype=tf.float32) # Create a model that maps the input to the activations of the last conv layer and model output grad_model = Model( [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output] ) # Compute the gradient of the target class with respect to the activations of the last conv layer with tf.GradientTape() as tape: # Need to watch the input to the gradient tape in TF2 tape.watch(x) # Forward pass last_conv_layer_output, preds = grad_model(x) print(f" DEBUG: Conv layer output shape: {last_conv_layer_output.shape}") print(f" DEBUG: Model prediction shape: {preds.shape}") # Determine target class if not specified if neuron_selection is None: neuron_selection = tf.argmax(preds[0]) # Get the specific class output class_channel = preds[:, neuron_selection] print(f" DEBUG: Selected neuron {neuron_selection}, activation: {class_channel.numpy()}") # Calculate gradient of the target class with respect to feature maps grads = tape.gradient(class_channel, last_conv_layer_output) print(f" DEBUG: Gradients shape: {grads.shape}") # Calculate importance weights for each feature map # Pool across temporal dimension (axis 1) and batches (axis 0) pooled_grads = tf.reduce_mean(grads, axis=(0, 1)) print(f" DEBUG: Pooled gradients shape: {pooled_grads.shape}") # Apply the importance weights to each feature map last_conv_layer_output = last_conv_layer_output[0] # Get first sample # Ensure proper broadcasting by adding a dimension pooled_grads_reshaped = tf.reshape(pooled_grads, (1, -1)) print(f" DEBUG: Reshaped pooled gradients: {pooled_grads_reshaped.shape}") # Calculate weighted feature maps (properly vectorized) weighted_maps = tf.einsum('tc,c->t', last_conv_layer_output, pooled_grads) heatmap = weighted_maps print(f" DEBUG: Pre-normalization heatmap shape: {heatmap.shape}") # ReLU and normalize heatmap = tf.maximum(heatmap, 0) max_val = tf.reduce_max(heatmap) if max_val > 0: heatmap = heatmap / max_val else: print(" WARNING: Max value is zero, heatmap will be all zeros") # Convert to numpy for further processing heatmap_np = heatmap.numpy() # Resize to match input time steps if resize is True: try: # Get number of time steps and channels from input input_time_steps = x.shape[1] input_channels = x.shape[2] # Check if we need to resize if len(heatmap_np) != input_time_steps: print(f" DEBUG: Resizing heatmap from {len(heatmap_np)} to {input_time_steps} time steps") # Create interpolation function for the temporal dimension f = interp1d( x=np.arange(len(heatmap_np)), y=heatmap_np, bounds_error=False, fill_value="extrapolate" ) # Interpolate to match input time steps heatmap_resized = f(np.linspace(0, len(heatmap_np) - 1, num=input_time_steps)) # Expand to match channel dimension if needed if input_channels > 1: heatmap_resized = np.expand_dims(heatmap_resized, axis=1) heatmap_resized = np.tile(heatmap_resized, (1, input_channels)) print(f" DEBUG: Expanded heatmap to match {input_channels} channels, shape: {heatmap_resized.shape}") return heatmap_resized else: # Just expand to match channels if needed if input_channels > 1 and heatmap_np.ndim == 1: heatmap_np = np.expand_dims(heatmap_np, axis=1) heatmap_np = np.tile(heatmap_np, (1, input_channels)) print(f" DEBUG: Expanded heatmap to match {input_channels} channels, shape: {heatmap_np.shape}") return heatmap_np except Exception as e: print(f" ERROR in resizing: {e}") # Fall back to unresized heatmap return heatmap_np else: return heatmap_np
[docs] def calculate_grad_cam_relevancemap(x, model, last_conv_layer_name, neuron_selection=None, resize=False, **kwargs): # First, we create a model that maps the input image to the activations # of the last conv layer as well as the output predictions grad_model = tf.keras.models.Model( [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output] ) # Then, we compute the gradient of the top predicted class for our input image # with respect to the activations of the last conv layer with tf.GradientTape() as tape: last_conv_layer_output, preds = grad_model(x) if neuron_selection is None: neuron_selection = tf.argmax(preds[0]) class_channel = preds[:, neuron_selection] # This is the gradient of the output neuron (top predicted or chosen) # with regard to the output feature map of the last conv layer grads = tape.gradient(class_channel, last_conv_layer_output) # This is a vector where each entry is the mean intensity of the gradient # over a specific feature map channel # For 1D timeseries: grads shape is [batch, time, channels], so reduce over (0, 1) # For 2D images: grads shape is [batch, height, width, channels], so reduce over (0, 1, 2) if len(grads.shape) == 3: # 1D timeseries case pooled_grads = tf.reduce_mean(grads, axis=(0, 1)) else: # 2D image case (original) pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) # We multiply each channel in the feature map array # by "how important this channel is" with regard to the top predicted class # then sum all the channels to obtain the relevancemap class activation last_conv_layer_output = last_conv_layer_output[0] # Ensure compatible shapes for matrix multiplication if len(last_conv_layer_output.shape) == 2: # 1D timeseries: [time, channels] @ [channels] -> [time] relevancemap = tf.reduce_sum(last_conv_layer_output * pooled_grads, axis=1) else: # 2D image case (original) relevancemap = last_conv_layer_output @ pooled_grads[..., tf.newaxis] relevancemap = tf.squeeze(relevancemap) # Relu (filter positve values) relevancemap = tf.maximum(relevancemap, 0) # For visualization purpose, we will also normalize the relevancemap between 0 & 1 relevancemap = relevancemap / tf.math.reduce_max(relevancemap) if resize is True: # Resize to input spatial dimensions using TensorFlow operations import cv2 h = relevancemap.numpy() # Resize to match input spatial dimensions (H, W) -> input shape is (B, H, W, C) h_resized = cv2.resize(h, (x.shape[2], x.shape[1])) return h_resized else: return relevancemap.numpy()