Deep Learning from Scratch1 - Ch4 신경망 학습 구현

2 minute read

시험데이터로 평가하기

2층 신경망 구현

import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient

class TwoLayerNet:
    #초기화 메서드
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):        
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size) # 0으로 가득찬 hidden_size 크기의 1차원 배열을 형성
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    #예측함수
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y

    # 손실함수 값 계산. x: 입력데이터, t: 정답레이블
    def loss(self, x, t):
        y = self.predict(x)

        return cross_entropy_error(y, t)

    # 정확도 계산
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1) # 가로축(각 이미지)에서 최대값의 인덱스를 추출
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y==t) / float(x.shape[0]) # 행의 개수(이미지의 개수)로 나눈다.
        return accuracy

    # 수치미분
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

    # 오차역전파법. 기울기 계산을 고속으로 수행하는 방법
    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads

net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
print("가중치와 매개변수의 형상")
print(net.params['W1'].shape)
print(net.params['b1'].shape)
print(net.params['W2'].shape)
print(net.params['b2'].shape)
print()

x = np.random.rand(100, 784) # 100장 분량의 더미 입력데이터
t = np.random.rand(100, 10) # 100장 분량의 더미 정답레이블
y = net.predict(x)

신경망 학습을 위한 2층 신경망 구현
학습시간을 줄이기 위해 기울기를 수치미분방식이 아닌 오차역전파법을 이용하여 구함(이에 대해서는 Ch5에서 기술할 예정)

학습구현

import sys,os
sys.path.append(os.pardir)
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from TwoLayerNet import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

#하이퍼파라미터
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

'''
에폭(epoch)
- 학습에서 훈련데이터를 모두 소진했을 때의 횟수에 해당하는 단위
- ex) 10,000개의 훈련데이터를 100개의 미니배치로 학습하는 경우 확률적 경사 하강법을 100회 반복하면 모든 훈련데이터를 소진한다.
- 이때, 1에폭은 100회가 된다.
'''

# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    #미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    #기울기 계산
    #grad = network.numerical_gradient(x_batch, t_batch)
    grad = network.gradient(x_batch, t_batch)

    #매개변수 갱신
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate*grad[key]

    #학습 경과 기록
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    #1 에폭당 정확도 계산
    if i%iter_per_epoch == 0 :
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc : " + str(train_acc) + ", " + str(test_acc))

# 그래프 그리기
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

에폭(epoch)이란 학습에서 훈련데이터를 모두 소진했을 때의 횟수에 해당하는 단위이다.
ex) 10,000개의 훈련데이터를 100개의 미니배치로 학습하는 경우 확률적 경사 하강법을 100회 반복하면 모든 훈련데이터를 소진한다. 이때, 1에폭은 100회가 된다.
정확도를 1에폭마다 계산하는 이유는 그래프의 큰 추이를 살필 때 지나치게 자주 기록할 필요성이 떨어지기 때문

그래프

에폭이 진행될수록(학습이 진행될수록) 훈련데이터와 시험데이터에 대한 정확도가 점점 좋아지고 있음
이번 학습에서는 오버피팅이 일어나지 않았음을 알 수 있다.

[참고자료]

밑바닥부터 시작하는 딥러닝

Share on

Twitter Facebook LinkedIn

Bokyoung

Deep Learning from Scratch1 - Ch4 신경망 학습 구현

시험데이터로 평가하기

2층 신경망 구현

학습구현

그래프

[참고자료]

Share on

Leave a comment

You may also enjoy

SW Expert Academy - 2383. 점심 식사시간

SW Expert Academy - 5650. 핀볼 게임

백준 15683 - 감시

백준 14891 - 톱니바퀴