Open In Colab

SoftMax¶

$$ \text{SoftMax}(x_i) = \frac{e^{x_i}}{\sum_{j=1}^{n} e^{x_j}} $$

Converts a vector of K real numbers into a probability distribution of K possible outcomes. In case of neural nets, it is used to convert raw outputs of a neural net into a probability distribution.

In [15]:
import torch
import torch.nn as nn

m = nn.Softmax(dim=1)
input = torch.randn(1, 20)
output = m(input)
print(input)
print(output)

input_np = input.numpy()[0]
output_np = output.numpy()[0]

print(f"Sum of input: {input_np.sum()}")
print(f"Sum of output: {output_np.sum()}")
tensor([[ 1.6410e+00, -6.8308e-01, -1.4463e+00,  1.6618e+00, -3.1804e-02,
          9.0053e-02, -2.2035e-01, -2.9736e-01,  1.2417e+00,  8.7962e-01,
          1.0464e+00,  2.6645e+00,  3.1628e-01,  6.3423e-01, -3.6201e-01,
         -4.3475e-01, -2.1171e-03, -5.1607e-01, -1.1722e+00, -8.7281e-01]])
tensor([[0.1152, 0.0113, 0.0053, 0.1177, 0.0216, 0.0244, 0.0179, 0.0166, 0.0773,
         0.0538, 0.0636, 0.3207, 0.0306, 0.0421, 0.0155, 0.0145, 0.0223, 0.0133,
         0.0069, 0.0093]])
Sum of input: 4.136675834655762
Sum of output: 0.9999999403953552
In [13]:
import matplotlib.pyplot as plt
import numpy as np

# Create bar plot comparing input and softmax output
plt.figure(figsize=(10, 5))

# Convert tensors to numpy arrays for plotting
input_np = input.numpy()[0]
output_np = output.numpy()[0]

x = np.arange(len(input_np))
width = 0.35

plt.bar(x - width/2, input_np, width, label='Input Values', color='skyblue')
plt.bar(x + width/2, output_np, width, label='Softmax Output', color='lightcoral')

plt.xlabel('Index')
plt.ylabel('Value')
plt.title('Input Values vs Softmax Probabilities')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(x)

# Add text to show that softmax outputs sum to 1
plt.text(0.02, 0.98, f'Sum of probabilities: {output_np.sum():.3f}', 
         transform=plt.gca().transAxes, 
         verticalalignment='top')

plt.show()
No description has been provided for this image
In [ ]: