SoftMax¶
$$ \text{SoftMax}(x_i) = \frac{e^{x_i}}{\sum_{j=1}^{n} e^{x_j}} $$
Converts a vector of K real numbers into a probability distribution of K possible outcomes. In case of neural nets, it is used to convert raw outputs of a neural net into a probability distribution.
In [15]:
import torch
import torch.nn as nn
m = nn.Softmax(dim=1)
input = torch.randn(1, 20)
output = m(input)
print(input)
print(output)
input_np = input.numpy()[0]
output_np = output.numpy()[0]
print(f"Sum of input: {input_np.sum()}")
print(f"Sum of output: {output_np.sum()}")
tensor([[ 1.6410e+00, -6.8308e-01, -1.4463e+00, 1.6618e+00, -3.1804e-02, 9.0053e-02, -2.2035e-01, -2.9736e-01, 1.2417e+00, 8.7962e-01, 1.0464e+00, 2.6645e+00, 3.1628e-01, 6.3423e-01, -3.6201e-01, -4.3475e-01, -2.1171e-03, -5.1607e-01, -1.1722e+00, -8.7281e-01]]) tensor([[0.1152, 0.0113, 0.0053, 0.1177, 0.0216, 0.0244, 0.0179, 0.0166, 0.0773, 0.0538, 0.0636, 0.3207, 0.0306, 0.0421, 0.0155, 0.0145, 0.0223, 0.0133, 0.0069, 0.0093]]) Sum of input: 4.136675834655762 Sum of output: 0.9999999403953552
In [13]:
import matplotlib.pyplot as plt
import numpy as np
# Create bar plot comparing input and softmax output
plt.figure(figsize=(10, 5))
# Convert tensors to numpy arrays for plotting
input_np = input.numpy()[0]
output_np = output.numpy()[0]
x = np.arange(len(input_np))
width = 0.35
plt.bar(x - width/2, input_np, width, label='Input Values', color='skyblue')
plt.bar(x + width/2, output_np, width, label='Softmax Output', color='lightcoral')
plt.xlabel('Index')
plt.ylabel('Value')
plt.title('Input Values vs Softmax Probabilities')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(x)
# Add text to show that softmax outputs sum to 1
plt.text(0.02, 0.98, f'Sum of probabilities: {output_np.sum():.3f}',
transform=plt.gca().transAxes,
verticalalignment='top')
plt.show()
In [ ]: