Version:0.9 StartHTML:0000000105 EndHTML:0000019513 StartFragment:0000001234 EndFragment:0000019497 mXScriptasHTML
# Sentiments context classifier cassandra -/\-
# also freq prob (shape) thread_test.py and digits plot
# add the same in python as pascal before, mX4

import random
import numpy as np
#import pandas as pd

x=np.array([[0,0],[0,1],[1,0],[1,1]])
y=np.array([[0],[1],[1],[0]])

np.random.seed(0)

# Optional, but a good idea to have +ve and -ve weights
theta1=np.random.rand(2,8)-0.5
theta2=np.random.rand(8,1)-0.5

# Necessary - the bias terms should have same number of dimensions
# as the layer. For some reason you had one bias vector per example.
# (You could still use np.random.rand(8) and np.random.rand(1))
b1=np.zeros(8)
b2=np.zeros(1)

alpha=0.01
# Regularisation not necessary for XOR, cause you have a complete training set.
# You could have lamda=0.0, but I left a value here just to show it works.
lamda=0.001

#Training - Forward propagation

# More iterations than you might think! This is because we have
# so little training data, we need to repeat it a lot.
for i in range(1,40000):
    z1=x.dot(theta1)+b1
    h1=1/(1+np.exp(-z1))
    z2=h1.dot(theta2)+b2
    h2=1/(1+np.exp(-z2))

#Training - Back propagation

    # This dz term assumes binary cross-entropy loss
    dz2 = h2-y 
    # You could also have stuck with squared error loss, the extra h2 terms
    # are the derivative of the sigmoid transfer function. 
    # It converges slower though:
    # dz2 = (h2-y) * h2 * (1-h2)

    # This is just the same as you had before, but with less temp variables
    dw2 = np.dot(h1.T, dz2)
    db2 = np.sum(dz2, axis=0)

    # derivative of sigmoid is h1 * (1-h1), NOT dh1*(1-dh1)
    dz1 = np.dot(dz2, theta2.T) * h1 * (1-h1)
    dw1 = np.dot(x.T, dz1)
    db1 = np.sum(dz1, axis=0)

    # L2 regularisation terms ADD to the gradients of the weights
    dw2 += lamda * theta2
    dw1 += lamda * theta1
    theta1 += -alpha * dw1
    theta2 += -alpha * dw2
    b1 += -alpha * db1
    b2 += -alpha * db2

#Prediction
#This is where you can kick yourself, you forgot to use the biases!

input1=np.array([[0,0],[0,1],[1,0],[1,1]])
z1=np.dot(input1,theta1)+b1
h1=1/(1+np.exp(-z1))
z2=np.dot(h1,theta2)+b2
h2=1/(1+np.exp(-z2))

print('perceptron predict res: \n\n',h2)



"""
Python Machine Learning: Machine Learning and Deep Learning with Python, 
scikit-learn, and TensorFlow, -Edition Paperback
 September 20, 2017
 
The usual way to test for a NaN is to see if it's equal to itself:

def isNaN(num):
    return num != num

"""