cytoのメモ帳

気づいたこと,感じたことを徒然と

行列,ベクトルの微分公式

機械学習の分野などでちょいちょい出てくる行列,ベクトルの微分の計算に関して公式をまとめてみました.

 \frac{\partial}{\partial w}w^\mathrm{T}X = X


\begin{aligned}
& w^\mathrm{T} =
\begin{pmatrix}
w_{1} & w_{2} & \cdots & w_{D}\\
\end{pmatrix}
\\
& X=
\begin{pmatrix}
x_{11} & x_{12} & x_{13} & \cdots & x_{1N} \\
x_{21} & x_{22} & x_{23} & \cdots & x_{2N} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
x_{D1} & x_{D2} & x_{D3} & \cdots & x_{DN} \\
\end{pmatrix}
\end{aligned}

とおくと,


\begin{aligned}
w^\mathrm{T}X
&=
\begin{pmatrix}
w_{1} & w_{2} & \cdots & w_{D}\\
\end{pmatrix}
\begin{pmatrix}
x_{11} & x_{12} & x_{13} & \cdots & x_{1N} \\
x_{21} & x_{22} & x_{23} & \cdots & x_{2N} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
x_{D1} & x_{D2} & x_{D3} & \cdots & x_{DN} \\
\end{pmatrix}\\
& =
\begin{pmatrix}
\sum_{n=1}^{D}w_{n}x_{n1} & \sum_{n=1}^{D}w_{n} x_{n2}& \cdots & \sum_{n=1}^{D}w_{n} x_{nN}\\
\end{pmatrix}
\end{aligned}

となる.

 
\begin{aligned}
\sum_{n=1}^{D}w_{n}x_{n1}
= w_{1}x_{11} + w_{1}x_{21} + w_{2}x_{31} + \cdots + w_{D}x_{D1}
\end{aligned}

であり,w_{1}w_{1}x_{11}にしか出現しない.
よって w_{1}微分したときは w_{1}x_{11} x_{11}に, w_{1}x_{11}以外の項はゼロとなる.
他の要素の場合も同様に考えると,

 
\begin{aligned}
\frac{\partial}{\partial w_0}w^\mathrm{T}X 
&=
\begin{pmatrix}
x_{11} & x_{12} & \cdots & x_{1N}\\
\end{pmatrix}\\

\frac{\partial}{\partial w_1}w^\mathrm{T}X 
&=
\begin{pmatrix}
x_{21} & x_{22} & \cdots & x_{2N}\\
\end{pmatrix}\\

& \vdots\\

\frac{\partial}{\partial w_D}w^\mathrm{T}X 
&=
\begin{pmatrix}
x_{D1} & x_{D2} & \cdots & x_{DN}\\
\end{pmatrix}\\
\end{aligned}

となり,


\begin{aligned}
\frac{\partial}{\partial w}w^\mathrm{T}X
=
\begin{pmatrix}
x_{11} & x_{12} & x_{13} & \cdots & x_{1N} \\
x_{21} & x_{22} & x_{23} & \cdots & x_{2N} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
x_{D1} & x_{D2} & x_{D3} & \cdots & x_{DN} \\
\end{pmatrix}
= X
\end{aligned}

となる.
以上より,


\begin{aligned}
\frac{\partial}{\partial w}w^\mathrm{T}X = X
\end{aligned}

が成立する.

 \frac{\partial}{\partial w}w^\mathrm{T}Xw = (X + X^\mathrm{T})w


\begin{aligned}
& w^\mathrm{T}  =
\begin{pmatrix}
w_{1} & w_{2} & \cdots & w_{D}\\
\end{pmatrix}
\\
& X =
\begin{pmatrix}
x_{11} & x_{12} & \cdots & x_{1D} \\
x_{21} & x_{22} & \cdots & x_{2D} \\
\vdots & \vdots & \ddots & \vdots \\
x_{D1} & x_{D2} & \cdots & x_{DD} \\
\end{pmatrix}
\end{aligned}

とおくと,


\begin{aligned}
w^\mathrm{T}Xw
&=
w^\mathrm{T}
\begin{pmatrix}
x_{11} & x_{12} & \cdots & x_{1D} \\
x_{21} & x_{22} & \cdots & x_{2D} \\
\vdots & \vdots & \ddots & \vdots \\
x_{D1} & x_{D2} & \cdots & x_{DD} \\
\end{pmatrix}
\begin{pmatrix}
w_{1} \\
w_{2} \\
\vdots \\
w_{D}\\
\end{pmatrix} \\
& =
w^\mathrm{T}
\begin{pmatrix}
\sum_{n=1}^{D}w_{n}x_{1n} \\
\sum_{n=1}^{D}w_{n} x_{2n} \\
\vdots \\
\sum_{n=1}^{D}w_{n} x_{Dn}\\
\end{pmatrix}
=
\begin{pmatrix}
w_{1} & w_{2} & \cdots & w_{D}\\
\end{pmatrix}
\begin{pmatrix}
\sum_{n=1}^{D}w_{n}x_{1n} \\
\sum_{n=1}^{D}w_{n} x_{2n} \\
\vdots \\
\sum_{n=1}^{D}w_{n} x_{Dn}\\
\end{pmatrix} \\
& =
w_{1}\sum_{n=1}^{D}w_{n}x_{1n} + w_{2}\sum_{n=1}^{D}w_{n}x_{2n} + \cdots + w_{D}\sum_{n=1}^{D}w_{n}x_{Dn} \\
& =
\sum_{m=1}^{D}\sum_{n=1}^{D}w_{m}w_{n}x _{mn}
\end{aligned}

となる.


\begin{aligned}
\frac{\partial}{\partial w_1}w^\mathrm{T}Xw
& =
\frac{\partial}{\partial w_1}\{w_{1}\sum_{n=1}^{D}w_{n}x_{1n} + w_{2}\sum_{n=1}^{D}w_{n}x_{2n} + \cdots + w_{D}\sum_{n=1}^{D}w_{n}x_{Dn}\} \\
& =
\frac{\partial}{\partial w_1}\{w_{1}\sum_{n=1}^{D}w_{n}x_{1n}\} + \frac{\partial}{\partial w_1}\{w_{2}\sum_{n=1}^{D}w_{n}x_{2n} + \cdots + w_{D}\sum_{n=1}^{D}w_{n}x_{Dn}\} \\
& =
\{\sum_{n=1}^{D}w_{n}x_{1n} + w_{1}x_{11}\} + \{w_{2}x_{21} + w_{3}x_{31} + \cdots + + w_{D}x_{D1}\}\\
& =
\sum_{n=1}^{D}w_{n}x_{1n} + \sum_{n=1}^{D}w_{n}x_{n1} \\
& =
\begin{pmatrix}
x_{11} & x_{12} & \cdots & x_{1D}
\end{pmatrix}
\begin{pmatrix}
w_{1} \\
w_{2} \\
\vdots \\
w_{D}\\
\end{pmatrix} 
+
\begin{pmatrix}
x_{11} & x_{21} & \cdots & x_{D1}
\end{pmatrix}
\begin{pmatrix}
w_{1} \\
w_{2} \\
\vdots \\
w_{D}\\
\end{pmatrix} \\
& =
\begin{pmatrix}
x_{11} & x_{12} & \cdots & x_{1D}
\end{pmatrix}
w
+
\begin{pmatrix}
x_{11} & x_{21} & \cdots & x_{D1}
\end{pmatrix}
w
\end{aligned}

であるから,他の要素の場合も同様に考えると,


\begin{aligned}
\frac{\partial}{\partial w_i}w^\mathrm{T}Xw
& = 
\begin{pmatrix}
x_{i1} & x_{i2} & \cdots & x_{iD}
\end{pmatrix}
w
+
\begin{pmatrix}
x_{1i} & x_{2i} & \cdots & x_{Di}
\end{pmatrix}
w
\end{aligned}

がわかる.
よって,各要素をまとめると,


\begin{aligned}
\frac{\partial}{\partial w}w^\mathrm{T}Xw
& = 
\begin{pmatrix}
\begin{pmatrix}x_{11} & x_{12} & \cdots & x_{1D}\end{pmatrix}w + \begin{pmatrix}x_{11} & x_{21} & \cdots & x_{D1}\end{pmatrix}w\\
\begin{pmatrix}x_{21} & x_{22} & \cdots & x_{2D}\end{pmatrix}w + \begin{pmatrix}x_{12} & x_{22} & \cdots & x_{D2}\end{pmatrix}w\\
\vdots \\
\begin{pmatrix}x_{D1} & x_{D2} & \cdots & x_{DD}\end{pmatrix}w + \begin{pmatrix}x_{1D} & x_{2D} & \cdots & x_{DD}\end{pmatrix}w\\
\end{pmatrix} \\
& =
\begin{pmatrix}
x_{11} & x_{12} & \cdots & x_{1D} \\
x_{21} & x_{22} & \cdots & x_{2D} \\
\vdots & \vdots & \ddots & \vdots \\
x_{D1} & x_{D2} & \cdots & x_{DD} \\
\end{pmatrix}
w
+
\begin{pmatrix}
x_{11} & x_{21} & \cdots & x_{D1} \\
x_{12} & x_{22} & \cdots & x_{D2} \\
\vdots & \vdots & \ddots & \vdots \\
x_{1D} & x_{2D} & \cdots & x_{DD} \\
\end{pmatrix}
w \\
& =
Xw + X^\mathrm{T}w = (X +  X^\mathrm{T})w
\end{aligned}

となる.
以上より,


\begin{aligned}
\frac{\partial}{\partial w}w^\mathrm{T}Xw = (X + X^\mathrm{T})w
\end{aligned}

が成立する.

 \frac{\partial}{\partial w}w^\mathrm{T}w = 2w


\begin{aligned}
\frac{\partial}{\partial w}w^\mathrm{T}Xw = (X + X^\mathrm{T})w
\end{aligned}

 X単位行列 Eに置き換えればよい.


\begin{aligned}
\frac{\partial}{\partial w}w^\mathrm{T}w 
& = \frac{\partial}{\partial w}w^\mathrm{T}Ew \\
& = (E + E^\mathrm{T})w \\
& = 2Ew = 2w
\end{aligned}

であり,成立する.