Softmax Regression
From Ufldl
(→Parameterization) |
(→Optimizing Softmax Regression) |
||
Line 56: | Line 56: | ||
&= \ln \prod_{i=1}^{m}{ P(y^{(i)} | x^{(i)}) } \\ | &= \ln \prod_{i=1}^{m}{ P(y^{(i)} | x^{(i)}) } \\ | ||
&= \sum_{i=1}^{m}{ \ln \frac{ e^{ \theta^T_{y^{(i)}} x^{(i)} } }{ \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }} } } \\ | &= \sum_{i=1}^{m}{ \ln \frac{ e^{ \theta^T_{y^{(i)}} x^{(i)} } }{ \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }} } } \\ | ||
- | &= \theta^T_{y^{(i)}} x^{(i)} - \ln \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }} | + | &= \sum_{i=1}^{m}{\theta^T_{y^{(i)}} x^{(i)} - \ln \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }}} |
\end{align} | \end{align} | ||
</math> | </math> | ||
Line 64: | Line 64: | ||
<math> | <math> | ||
\begin{align} | \begin{align} | ||
- | \frac{\partial \ell(\theta)}{\partial \theta_k} &= \frac{\partial}{\partial \theta_k} \theta^T_{y^{(i)}} x^{(i)} - \ln \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }} \\ | + | \frac{\partial \ell(\theta)}{\partial \theta_k} &= \sum_{i=1}^{m}{\left[\frac{\partial}{\partial \theta_k} \theta^T_{y^{(i)}} x^{(i)} - \ln \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }}\right]} \\ |
- | &= I_{ \{ y^{(i)} = k\} } x^{(i)} - \frac{1}{ \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }} } | + | &= \sum_{i=1}^{m}{ \left[ I_{ \{ y^{(i)} = k\} } x^{(i)} - \frac{1}{ \sum_{j=1}^{n}{e^{ \theta_j^T x^{(i)} }} } |
\cdot | \cdot | ||
e^{ \theta_k^T x^{(i)} } | e^{ \theta_k^T x^{(i)} } | ||
\cdot | \cdot | ||
- | x^{(i)} | + | x^{(i)} \right]} |
- | \qquad \text{(where } I_{ \{ y^{(i)} = k\} } \text{is 1 when } y^{(i)} = k \text{ and 0 otherwise) } \\ | + | \qquad \text{(where } I_{ \{ y^{(i)} = k\} } \text{is 1 when } y^{(i)} = k \text{ and 0 otherwise) } \\ |
- | &= x^{(i)} ( I_{ \{ y^{(i)} = k\} } - P(y^{(i)} = k | x^{(i)}) ) | + | &= \sum_{i=1}^{m}{ \left[ x^{(i)} ( I_{ \{ y^{(i)} = k\} } - P(y^{(i)} = k | x^{(i)}) ) \right] } |
\end{align} | \end{align} | ||
</math> | </math> |