mXScriptasHTML

Version:0.9 StartHTML:0000000105 EndHTML:0000071229 StartFragment:0000001234 EndFragment:0000071213 mXScriptasHTML
"""
================
Confusion matrix & S Vector machines & PCA
================

Example of confusion matrix usage to evaluate the quality
of the output of a classifier on the iris data set. The
diagonal elements represent the number of points for which
the predicted label is equal to the true label, while
off-diagonal elements are those that are mislabeled by the
classifier. The higher the diagonal values of the confusion
matrix the better, indicating many correct predictions.

The figures show the confusion matrix with and without
normalization by class support size (number of elements
in each class). This kind of normalization can be
interesting in case of class imbalance to have a more
visual interpretation of which class is being misclassified.

Here the results are not as good as they could be as our
choice for the regularization parameter C was not the best.
In real life applications this parameter is usually chosen
using :ref:`grid_search`.

"""

print(__doc__)

import itertools
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as clrs

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from scipy import special, optimize
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA

#from pylab import *

# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)

#"Now we'll use linear SVC to partition our graph into clusters:"

# C = 1.0
# svc = svm.SVC(kernel='linear', C=C).fit(X, y)

def createClusteredData(N, k):
       pointsPerCluster = float(N)/k
       X = []
       y = []
       for i in range (k):
           incomeCentroid = np.random.uniform(20000.0, 200000.0)
           ageCentroid = np.random.uniform(20.0, 70.0)
           for j in range(int(pointsPerCluster)):
               X.append([np.random.normal(incomeCentroid, 10000.0), 
                 np.random.normal(ageCentroid, 2.0)])
               y.append(i)
       X = np.array(X)
       y = np.array(y)
       print('Cluster uniform, with normalization')
       print(y)
       return X, y


def plotPredictions(clf):
        xx, yy = np.meshgrid(np.arange(0, 250000, 10),
                        np.arange(10, 70, 0.5))
        Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
        colors = ('red', 'blue', 'lightgreen', 'gray', 'DarkSeaGreen')
        #Matplotlib.colors.ListedColormap in python
        #cmap = clrs.ListedColormap(colors[:len(np.unique(y))])
        cmap = clrs.ListedColormap(colors)
        # https://stackoverflow.com/questions/22408237/named-colors-in-matplotlib
    
        plt.figure(figsize=(8, 6))
        #fig, ax = plt.subplots()
        

        Z = Z.reshape(xx.shape)
        plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
        #plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.8)
        
        #print(plt.cm.Paired.)
        plt.scatter(X[:,0], X[:,1], c=y.astype(np.float), marker=10,label="Label {}".format(y))
        #plt.scatter(X[:,0], X[:,1], c=y.astype(0.), marker=11,label="Label {}".format(y))
        #plt.scatter(X[:,0], X[:,1], color=['red','green','blue'], marker=11,label="Label {}".format(y))
        #print(y.astype(np.float))
        
    
        """for i in range(5):
           x1,y1 = np.random.rand(2, 20)
           ax.scatter(x1, y1, label="Label {}".format(clrs.cnames.items()))
        leg = ax.legend()

        for h, t in zip(leg.legendHandles, leg.get_texts()):
            t.set_color(h.get_facecolor()[0]) 
        """    

        #print(clrs.cnames.items[1])
        #print(clrs.cnames.items{1})
        
        for name, i in clrs.cnames.items():
          print(name, i)
        ##plt.scatter(X[:,0], X[:,1], c=y.astype(np.float), marker=5, verts=np.float)
    
        plt.show()
   


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True real label')
    plt.xlabel('Predicted label')


def plot_decision_regions(X, y, classifier, resolution=0.02):
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    #Matplotlib.colors.ListedColormap in python
    cmap = ListedColormap(colors[:len(np.unique(y))])
    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
        np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())
    # plot class samples
    for idx, cl in enumerate(np.unique(y)):
      plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],alpha=0.8, 
    c=cmap(idx),marker=markers[idx], label=cl)


# Compute confusion matrix
"""cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()
"""

# SVmachines
#(X, y) = createClusteredData(125, 5)
(X, y) = createClusteredData(125, 5)

plt.figure(figsize=(8, 6))
plt.scatter(X[:,0], X[:,1], c=y.astype(np.float))
plt.show()

C = 1.0
svc = svm.SVC(kernel='linear', C=C).fit(X, y)

plotPredictions(svc)
#yield
pass
print(svc.predict([[200000, 70]]))
print(svc.predict([[170000, 60]]))
print(svc.predict([[150000, 50]]))
print(svc.predict([[100000, 40]]))
print(svc.predict([[50000, 30]]))

# cnf_matrix = confusion_matrix(X[:1], y[:1])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)

cnf_matrix = confusion_matrix(y_test, y_pred)


np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
class_names = [0,1,2,3,4]

plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

# To getter a better understanding of interaction of the dimensions
# plot the first three PCA dimensions
fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
X_reduced = PCA(n_components=2).fit_transform(X)
#ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y,
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y,
           cmap=plt.cm.Set1, edgecolor='k', s=40)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])

plt.show()