# HG changeset patch # User Jeff Hammel # Date 1504562798 25200 # Node ID 857a606783e1bc89c7071e931f479105d5be1e81 # Parent 2f0caec46e26f1e97f30821b2bfe8a472fdc51c5 [documentation] notes + stubs on gradient descent diff -r 2f0caec46e26 -r 857a606783e1 docs/matrix.txt --- a/docs/matrix.txt Mon Sep 04 14:53:32 2017 -0700 +++ b/docs/matrix.txt Mon Sep 04 15:06:38 2017 -0700 @@ -14,3 +14,21 @@ `W1x1` gives some column vector, where `x1` is the first training example. + +Y = [ y1 y2 ... ym] + +For a two-layer network: + +dZ2 = A2 - Y + +dW = (1/m) dZ2 A1' + +db2 = (1./m)*np.sum(dZ2, axis=1, keepdims=True) + +dZ1 = W2' dZ2 * g1 ( Z1 ) + : W2' dZ2 : an (n1, m) matrix + : * : element-wise product + +dW1 = (1/m) dZ1 X' + +db1 = (1/m) np.sum(dZ1, axis=1, keepdims=True) diff -r 2f0caec46e26 -r 857a606783e1 tvii/activation.py --- a/tvii/activation.py Mon Sep 04 14:53:32 2017 -0700 +++ b/tvii/activation.py Mon Sep 04 15:06:38 2017 -0700 @@ -13,4 +13,3 @@ def ReLUprime(z): return 1. if z > 0 else 0. - diff -r 2f0caec46e26 -r 857a606783e1 tvii/gradient_descent.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tvii/gradient_descent.py Mon Sep 04 15:06:38 2017 -0700 @@ -0,0 +1,9 @@ +""" +gradient descent for neural networks +""" + +# Each loop: +# - computes predictions +# - compute derivatives: dw, db +# update: w = w - alpha * dw; b = b - .... +