Day 05_Multivariate Gaussian_Winery_Classifier_MNIST

2019. 7. 13. 02:49

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
"""Day 09_Multivariable Gaussian Classifier_MNIST.ipynb
Automatically generated by Colaboratory.
Original file is located at
    https://colab.research.google.com/drive/1yQhgjwD07GjN_vtmmM7B4sF-I_92ChOU
"""
 
from google.colab import drive
drive.mount('/gdrive')
 
PATH = "/gdrive/My Drive/Colab Notebooks/resources/"
 
# %matplotlib inline
import matplotlib.pyplot as plt 
import gzip, os
import numpy as np
from scipy.stats import multivariate_normal
import sys
 
if sys.version_info[0] == 2:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve
 
# Function that downloads a specified MNIST data file from Yann Le Cun's website
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
    print("Downloading %s" % filename)
    urlretrieve(source + filename, filename)
 
# Invokes download() if necessary, then reads in images
def load_mnist_images(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1,784)
    return data
 
def load_mnist_labels(filename):
    if not os.path.exists(filename):
        download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
    return data
 
def displaychar(image):
    plt.imshow(np.reshape(image, (28,28)), cmap=plt.cm.gray)
    plt.axis('off')
    plt.show()
 
## Load the training set
train_data = load_mnist_images('train-images-idx3-ubyte.gz')
train_labels = load_mnist_labels('train-labels-idx1-ubyte.gz')
 
## Load the testing set
test_data = load_mnist_images('t10k-images-idx3-ubyte.gz')
test_labels = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
 
train_data.shape , train_labels.shape
 
displaychar(train_data[0])
 
def fit_generative_model(x,y):
    
    #사진 타겟 클래스의 갯수 
    k = 10
    
    #784 = 셀의 갯수
    d = (x.shape)[1]
    
    #피처 각각의 평균 0번은 쓰지 않고 1, 2, 3번 인덱스만 사용.
    mu = np.zeros((k,d))
    
    #피처 각각의 표준편차
    cov_mat = np.zeros((k,d,d))
    
    #클래스 분류시 가중치로 쓰일 pi변수 1, 2, 3번 인덱스만 사용.
    pi = np.zeros(k)
    
    
    #타겟 클래스의 수만큼 도는 포문
    for label in range(0,k):
        
        #타겟 클래스가 label과 일치하는 인덱스 추출. ( 1~3 )
        indices = (y == label)
        
        #해당 클래스의 평균을 mu의 클래스 번호 인덱스에 할당.
        mu[label] = np.mean(x[indices,:], axis=0)
        
        #해당 클래스의 공분산 매트릭스를 mu의 클래스 번호 인덱스에 할당.
        cov_mat[label] = np.cov(x[indices,:], rowvar=0, bias=1)
        
        #전체 y의 수에서 해당 타겟 클래스의 갯수를 pi의 타겟클래스 인덱스에 저장. 나중에 가중치로 쓰일 예정
        pi[label] = float(sum(indices))/float(len(y))
    return mu, cov_mat, pi
 
mu, cov_mat, pi = fit_generative_model( train_data, train_labels )
 
## regularize
 
## c is a magic number
c = 4
 
for i in range(10):
    eyemat = np.eye(cov_mat[i]. shape[0], cov_mat[i]. shape[1]) * c
    cov_mat[i] += eyemat
 
for i in range(10):
    displaychar(mu[i])
 
#average shape of numbers
 
"""%%time
# 결과 테스트
k = 10
score = np.zeros(( len(test_labels), k))
for label in range( 0, k ):
    rv = multivariate_normal(mean=mu[label], cov=cov_mat[label])
    for i in range( 0, len( test_labels )):
        score[i, label] = np.log( pi[label] ) + rv.logpdf(test_data[i , :])
predictions = np.argmax(score, axis = 1)        
errors = np.sum( predictions != test_labels )
print("model makes " +str(errors/100000)  + " errors out of 10000")
"""
 
Colored by Color Scripter
cs

저작자표시

'Python Library > Machine Learning' 카테고리의 다른 글

Day 07_ridge-regression_gradient_descent (0)	2019.07.17
Day 06.logistic_regression_Sentiment_Analysis (0)	2019.07.14
Day 05_Multivariate Gaussian_Winery_Classifier (0)	2019.07.13
Day 03.Probability and statistics Review (0)	2019.07.08
Day 02. KNN Practice with Spine Dataset (0)	2019.07.07

Software knowledge worth spreading

Day 05_Multivariate Gaussian_Winery_Classifier_MNIST

'Python Library > Machine Learning' 카테고리의 다른 글

+ Recent posts

티스토리툴바