changeset 44:857a606783e1

[documentation] notes + stubs on gradient descent
author Jeff Hammel <k0scist@gmail.com>
date Mon, 04 Sep 2017 15:06:38 -0700
parents 2f0caec46e26
children 4d173452377e
files docs/matrix.txt tvii/activation.py tvii/gradient_descent.py
diffstat 3 files changed, 27 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/docs/matrix.txt	Mon Sep 04 14:53:32 2017 -0700
+++ b/docs/matrix.txt	Mon Sep 04 15:06:38 2017 -0700
@@ -14,3 +14,21 @@
 
 `W1x1` gives some column vector, where `x1`
 is the first training example.
+
+Y = [ y1 y2 ... ym]
+
+For a two-layer network:
+
+dZ2 = A2 - Y
+
+dW = (1/m) dZ2 A1'
+
+db2 = (1./m)*np.sum(dZ2, axis=1, keepdims=True)
+
+dZ1 = W2' dZ2 * g1 ( Z1 )
+ : W2' dZ2 : an (n1, m) matrix
+ : * : element-wise product
+
+dW1 = (1/m) dZ1 X'
+
+db1 = (1/m) np.sum(dZ1, axis=1, keepdims=True)
--- a/tvii/activation.py	Mon Sep 04 14:53:32 2017 -0700
+++ b/tvii/activation.py	Mon Sep 04 15:06:38 2017 -0700
@@ -13,4 +13,3 @@
 def ReLUprime(z):
     return 1. if z > 0 else 0.
 
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tvii/gradient_descent.py	Mon Sep 04 15:06:38 2017 -0700
@@ -0,0 +1,9 @@
+"""
+gradient descent for neural networks
+"""
+
+# Each loop:
+# - computes predictions
+# - compute derivatives: dw, db
+# update: w = w - alpha * dw; b = b - ....
+