Version:0.9 StartHTML:0000000105 EndHTML:0000019513 StartFragment:0000001234 EndFragment:0000019497
# Sentiments context classifier cassandra -/\-
# also freq prob (shape) thread_test.py and digits plot
# add the same in python as pascal before, mX4
import random
import numpy as np
#import pandas as pd
x=np.array([[0,0],[0,1],[1,0],[1,1]])
y=np.array([[0],[1],[1],[0]])
np.random.seed(0)
# Optional, but a good idea to have +ve and -ve weights
theta1=np.random.rand(2,8)-0.5
theta2=np.random.rand(8,1)-0.5
# Necessary - the bias terms should have same number of dimensions
# as the layer. For some reason you had one bias vector per example.
# (You could still use np.random.rand(8) and np.random.rand(1))
b1=np.zeros(8)
b2=np.zeros(1)
alpha=0.01
# Regularisation not necessary for XOR, cause you have a complete training set.
# You could have lamda=0.0, but I left a value here just to show it works.
lamda=0.001
#Training - Forward propagation
# More iterations than you might think! This is because we have
# so little training data, we need to repeat it a lot.
for i in range(1,40000):
z1=x.dot(theta1)+b1
h1=1/(1+np.exp(-z1))
z2=h1.dot(theta2)+b2
h2=1/(1+np.exp(-z2))
#Training - Back propagation
# This dz term assumes binary cross-entropy loss
dz2 = h2-y
# You could also have stuck with squared error loss, the extra h2 terms
# are the derivative of the sigmoid transfer function.
# It converges slower though:
# dz2 = (h2-y) * h2 * (1-h2)
# This is just the same as you had before, but with less temp variables
dw2 = np.dot(h1.T, dz2)
db2 = np.sum(dz2, axis=0)
# derivative of sigmoid is h1 * (1-h1), NOT dh1*(1-dh1)
dz1 = np.dot(dz2, theta2.T) * h1 * (1-h1)
dw1 = np.dot(x.T, dz1)
db1 = np.sum(dz1, axis=0)
# L2 regularisation terms ADD to the gradients of the weights
dw2 += lamda * theta2
dw1 += lamda * theta1
theta1 += -alpha * dw1
theta2 += -alpha * dw2
b1 += -alpha * db1
b2 += -alpha * db2
#Prediction
#This is where you can kick yourself, you forgot to use the biases!
input1=np.array([[0,0],[0,1],[1,0],[1,1]])
z1=np.dot(input1,theta1)+b1
h1=1/(1+np.exp(-z1))
z2=np.dot(h1,theta2)+b2
h2=1/(1+np.exp(-z2))
print('perceptron predict res: \n\n',h2)
"""
Python Machine Learning: Machine Learning and Deep Learning with Python,
scikit-learn, and TensorFlow, -Edition Paperback
September 20, 2017
The usual way to test for a NaN is to see if it's equal to itself:
def isNaN(num):
return num != num
"""