Assignment 2 | 斯坦福CS231n-深度学习与计算机视觉课程

该笔记是以斯坦福cs231n课程的python编程任务为主线,展开对该课程主要内容的理解和部分数学推导。这篇文章是第二篇。

CS231n简介

CS231n的全称是CS231n: Convolutional Neural Networks for Visual Recognition,即面向视觉识别的卷积神经网络。该课程是斯坦福大学计算机视觉实验室推出的课程。需要注意的是,目前大家说CS231n,大都指的是2016年冬季学期(一月到三月)的最新版本。

课程描述 Information 计算机视觉在社会中已经逐渐普及,并广泛运用于搜索检索、图像理解、手机应用、地图导航、医疗制药、无人机和无人驾驶汽车等领域。而这些应用的核心技术就是图像分类、图像定位和图像探测等视觉识别任务。近期神经网络(也就是“深度学习”)方法上的进展极大地提升了这些代表当前发展水平的视觉识别系统的性能。 本课程将深入讲解深度学习框架的细节问题,聚焦面向视觉识别任务(尤其是图像分类任务)的端到端学习模型。在10周的课程中,学生们将会学习如何实现、训练和调试他们自己的神经网络,并建立起对计算机视觉领域的前沿研究方向的细节理解。最终的作业将包括训练一个有几百万参数的卷积神经网络,并将其应用到最大的图像分类数据库(ImageNet)上。我们将会聚焦于教授如何确定图像识别问题,学习算法(比如反向传播算法),对网络的训练和精细调整(fine-tuning)中的工程实践技巧,指导学生动手完成课程作业和最终的课程项目。

视频入口

Assignment 2

02

Python编程任务(线性分类器)

· 我用的IDE是Pycharm。 · Assignment1的线性分类器部分,我们需要完成 linear_svm.py,softmax.py,linear_classifier.py。在完成后,你可以用svm.ipynb和softmax.ipynb里的代码来debug你的模型,获得最优模型,然后在测试集上测试分类水平。 · Assignment1用的图像库是CIFAR-10,你也可以从这里下载。

linear_svm.py代码如下:

__coauthor__ = 'Deeplayer'
# 5.19.2016import numpy as np
def svm_loss_naive(W, X, y, reg):
    """
    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means 
         that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]
    loss = 0.0
    for i in xrange(num_train):    
        scores = X[i].dot(W)    
        correct_class_score = scores[y[i]]
        for j in xrange(num_classes):
            if j == y[i]:    
                continue
            margin = scores[j] - correct_class_score + 1   # note delta = 1
            if margin > 0:
                loss += margin
                dW[:, y[i]] += -X[i, :]     # compute the correct_class gradients
                dW[:, j] += X[i, :]         # compute the wrong_class gradients
    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= num_train
    dW /= num_train
    # Add regularization to the loss.
    loss += 0.5 * reg * np.sum(W * W)
    dW += reg * W
    return loss, dW

def svm_loss_vectorized(W, X, y, reg):
    """
    Structured SVM loss function, vectorized implementation.Inputs and outputs 
    are the same as svm_loss_naive.
    """
    loss = 0.0
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    scores = X.dot(W)        # N by C
    num_train = X.shape[0]
    num_classes = W.shape[1]
    scores_correct = scores[np.arange(num_train), y]   # 1 by N
    scores_correct = np.reshape(scores_correct, (num_train, 1))  # N by 1
    margins = scores - scores_correct + 1.0     # N by C
    margins[np.arange(num_train), y] = 0.0
    margins[margins <= 0] = 0.0
    loss += np.sum(margins) / num_train
    loss += 0.5 * reg * np.sum(W * W)
    # compute the gradient
    margins[margins > 0] = 1.0
    row_sum = np.sum(margins, axis=1)                  # 1 by N
    margins[np.arange(num_train), y] = -row_sum        
    dW += np.dot(X.T, margins)/num_train + reg * W     # D by C
  
    return loss, dW

softmax.py代码如下:

__coauthor__ = 'Deeplayer'
# 5.19.2016

import numpy as np

def softmax_loss_naive(W, X, y, reg):    

    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)    # D by C
    dW_each = np.zeros_like(W)
    num_train, dim = X.shape
    num_class = W.shape[1]
    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C
    y_trueClass = np.zeros_like(prob)
    y_trueClass[np.arange(num_train), y] = 1.0
    for i in xrange(num_train):
        for j in xrange(num_class):    
            loss += -(y_trueClass[i, j] * np.log(prob[i, j]))    
            dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :]
        dW += dW_each
    loss /= num_train
    loss += 0.5 * reg * np.sum(W * W)
    dW /= num_train
    dW += reg * W

    return loss, dW

def softmax_loss_vectorized(W, X, y, reg):    
    """    
    Softmax loss function, vectorized version.    

    Inputs and outputs are the same as softmax_loss_naive.    
    """    
    # Initialize the loss and gradient to zero.    
    loss = 0.0    
    dW = np.zeros_like(W)    # D by C    
    num_train, dim = X.shape

    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True)
    y_trueClass = np.zeros_like(prob)
    y_trueClass[range(num_train), y] = 1.0    # N by C
    loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W)
    dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W

    return loss, dW

linear_classifier.py代码如下:

__coauthor__ = 'Deeplayer'

# 5.19.2016

from linear_svm import *

from softmax import *

class LinearClassifier(object):

def __init__(self):

self.W = None

def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,

batch_size=200, verbose=True):

Train this linear classifier using stochastic gradient descent.

Inputs:

- X: A numpy array of shape (N, D) containing training data; there are N

training samples each of dimension D.

- y: A numpy array of shape (N,) containing training labels; y[i] = c

means that X[i] has label 0 <= c < C for C classes.

- learning_rate: (float) learning rate for optimization.

- reg: (float) regularization strength.

- num_iters: (integer) number of steps to take when optimizing

- batch_size: (integer) number of training examples to use at each step.

- verbose: (boolean) If true, print progress during optimization.

Outputs:

A list containing the value of the loss function at each training iteration.

"""

num_train, dim = X.shape

# assume y takes values 0...K-1 where K is number of classes

num_classes = np.max(y) + 1

if self.W is None:

# lazily initialize W

self.W = 0.001 * np.random.randn(dim, num_classes) # D by C

# Run stochastic gradient descent(Mini-Batch) to optimize W

loss_history = []

for it in xrange(num_iters):

X_batch = None

y_batch = None

# Sampling with replacement is faster than sampling without replacement.

sample_index = np.random.choice(num_train, batch_size, replace=False)

X_batch = X[sample_index, :] # batch_size by D

y_batch = y[sample_index] # 1 by batch_size

# evaluate loss and gradient

loss, grad = self.loss(X_batch, y_batch, reg)

loss_history.append(loss)

# perform parameter update

self.W += -learning_rate * grad

if verbose and it % 100 == 0:

print 'Iteration %d / %d: loss %f' % (it, num_iters, loss)

return loss_history def predict(self, X):

"""

Use the trained weights of this linear classifier to predict labels for

data points.

Inputs:

- X: D x N array of training data. Each column is a D-dimensional point.

Returns:

- y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional

array of length N, and each element is an integer giving the

predicted class.

""" y_pred = np.zeros(X.shape[1]) # 1 by N

y_pred = np.argmax(np.dot(self.W.T, X), axis=0)

return y_pred

def loss(self, X_batch, y_batch, reg):

"""

Compute the loss function and its derivative.

Subclasses will override this.

Inputs:

- X_batch: A numpy array of shape (N, D) containing a minibatch of N

data points; each point has dimension D.

- y_batch: A numpy array of shape (N,) containing labels for the minibatch.

- reg: (float) regularization strength.

Returns: A tuple containing:

- loss as a single float

- gradient with respect to self.W; an array of the same shape as W

"""

pass

class LinearSVM(LinearClassifier):

"""

A subclass that uses the Multiclass SVM loss function

"""

def loss(self, X_batch, y_batch, reg):

return svm_loss_vectorized(self.W, X_batch, y_batch, reg) class Softmax(LinearClassifier):

"""

A subclass that uses the Softmax + Cross-entropy loss function

"""

def loss(self, X_batch, y_batch, reg):

return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)


下面我贴一下微调超参数获得最优模型的代码,并给出一些运行结果和图:

1、 LinearClassifier_svm_start.py

__coauthor__ = 'Deeplayer'

# 5.20.2016 import numpy as np

import matplotlib.pyplot as plt

import math

from linear_classifier import

* from data_utils import load_CIFAR10

# Load the raw CIFAR-10 data.

cifar10_dir = 'E:/PycharmProjects/ML/CS231n/cifar-10-batches-py' # u should change this

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# As a sanity check, we print out the size of the training and test data.

print 'Training data shape: ', X_train.shape # (50000,32,32,3)

print 'Training labels shape: ', y_train.shape # (50000L,)

print 'Test data shape: ', X_test.shape # (10000,32,32,3)

print 'Test labels shape: ', y_test.shape # (10000L,)

print

# Visualize some examples from the dataset

. # We show a few examples of training images from each class.

classes = ['plane', 'car', 'bird', 'cat', 'deer',

'dog', 'frog', 'horse', 'ship', 'truck']

num_classes = len(classes) samples_per_class = 7

for y, cls in enumerate(classes):

idxs = np.flatnonzero(y_train == y)

idxs = np.random.choice(idxs, samples_per_class, replace=False)

for i, idx in enumerate(idxs):

plt_idx = i * num_classes + y + 1

plt.subplot(samples_per_class, num_classes, plt_idx)

plt.imshow(X_train[idx].astype('uint8'))

plt.axis('off')

if i == 0:

plt.title(cls)

plt.show()

# Split the data into train, val, and test sets.

num_training = 49000

num_validation = 1000

num_test = 1000

mask = range(num_training, num_training + num_validation)

X_val = X_train[mask] # (1000,32,32,3)]

y_val = y_train[mask] # (1,1000)

mask = range(num_training

) X_train = X_train[mask] # (49000,32,32,3)

y_train = y_train[mask] # (1,49000)

mask = range(num_test)

X_test = X_test[mask] # (1000,32,32,3)

y_test = y_test[mask] # (1,1000)

# Preprocessing1: reshape the image data into rows

X_train = np.reshape(X_train, (X_train.shape[0], -1)) # (49000,3072)

X_val = np.reshape(X_val, (X_val.shape[0], -1)) # (1000,3072)

X_test = np.reshape(X_test, (X_test.shape[0], -1)) # (1000,3072)

# Preprocessing2: subtract the mean image

mean_image = np.mean(X_train, axis=0) # (1,3072)

X_train -= mean_image

X_val -= mean_image

X_test -= mean_image

# Visualize the mean image

plt.figure(figsize=(4, 4))

plt.imshow(mean_image.reshape((32, 32, 3)).astype('uint8'))

plt.show()

# Bias trick, extending the data

X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) # (49000,3073)

X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) # (1000,3073)

X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # (1000,3073)

# Use the validation set to tune hyperparameters (regularization strength

# and learning rate).

learning_rates = [1e-7, 5e-5]

regularization_strengths = [5e4, 1e5]

results = {}best_val = -1 # The highest validation accuracy that we have seen so far.

best_svm = None # The LinearSVM object that achieved the highest validation rate.

iters = 1500

for lr in learning_rates: for rs in regularization_strengths:

svm = LinearSVM()

svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)

Tr_pred = svm.predict(X_train.T)

acc_train = np.mean(y_train == Tr_pred)

Val_pred = svm.predict(X_val.T)

acc_val = np.mean(y_val == Val_pred)

results[(lr, rs)] = (acc_train, acc_val)

if best_val < acc_val:

best_val = acc_val

best_svm = svm

# print results for lr, reg in sorted(results):

train_accuracy, val_accuracy = results[(lr, reg)]

print 'lr %e reg %e train accuracy: %f val accuracy: %f' %

(lr, reg, train_accuracy, val_accuracy)

print 'Best validation accuracy achieved during validation: %f' %

best_val # around 38.2% # Visualize the learned weights for each class

w = best_svm

.W[:-1, :] # strip out the bias w = w.reshape(32, 32, 3, 10)

w_min, w_max = np.min(w), np.max(w)

classes = ['plane', 'car', 'bird', 'cat', 'deer',

'dog', 'frog', 'horse', 'ship', 'truck'] for i in xrange(10):

plt.subplot(2, 5, i + 1)

# Rescale the weights to be between 0 and 255

wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)

plt.imshow(wimg.astype('uint8'))

plt.axis('off')

plt.title(classes[i])

plt.show()

# Evaluate the best svm on test set

Ts_pred = best_svm.predict(X_test.T)

test_accuracy = np.mean(y_test == Ts_pred) # around 37.1%

print 'LinearSVM on raw pixels of CIFAR-10 final test set accuracy: %f' % test_accuracy


下面可视化一下部分原始图片、均值图像和学习到的权重:

figure_1.png

figure_2.png

figure_3.png


2、 LinearClassifier_softmax_start.py

__coauthor__ = 'Deeplayer'
# 5.20.2016

import numpy as np
from data_utils import load_CIFAR10
from linear_classifier import *

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):  
    """ 
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 
    it for the linear classifier. These are the same steps as we used for the SVM, 
    but condensed to a single function.  
    """  
    # Load the raw CIFAR-10 data 
    cifar10_dir = 'E:/PycharmProjects/ML/CS231n/cifar-10-batches-py'   # make a change
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)  
    # subsample the data  
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]  
    y_val = y_train[mask]  
    mask = range(num_training)  
    X_train = X_train[mask]  
    y_train = y_train[mask]  
    mask = range(num_test)  
    X_test = X_test[mask]  
    y_test = y_test[mask]  
    # Preprocessing: reshape the image data into rows  
    X_train = np.reshape(X_train, (X_train.shape[0], -1))  
    X_val = np.reshape(X_val, (X_val.shape[0], -1)) 
    X_test = np.reshape(X_test, (X_test.shape[0], -1))  
    # subtract the mean image  
    mean_image = np.mean(X_train, axis=0)  
    X_train -= mean_image  
    X_val -= mean_image  
    X_test -= mean_image  
    # add bias dimension and transform into columns  
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])  
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])  
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])  

    return X_train, y_train, X_val, y_val, X_test, y_test

# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()

# Use the validation set to tune hyperparameters (regularization strength 
# and learning rate).
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-7, 5e-7]
regularization_strengths = [5e4, 1e4]
iters = 1500
for lr in learning_rates:    
    for rs in regularization_strengths:        
        softmax = Softmax()       
        softmax.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)        
        Tr_pred = softmax.predict(X_train.T)       
        acc_train = np.mean(y_train == Tr_pred)       
        Val_pred = softmax.predict(X_val.T)        
        acc_val = np.mean(y_val == Val_pred)       
        results[(lr, rs)] = (acc_train, acc_val)       
        if best_val < acc_val:           
            best_val = acc_val            
            best_softmax = softmax

# Print out results.
for lr, reg in sorted(results):    
    train_accuracy, val_accuracy = results[(lr, reg)]    
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % 
                                    (lr, reg, train_accuracy, val_accuracy)
        # around 38.9%                     
print 'best validation accuracy achieved during cross-validation: %f' % best_val

# Evaluate the best softmax on test set.
Ts_pred = best_softmax.predict(X_test.T)
test_accuracy = np.mean(y_test == Ts_pred)       # around 37.4%
print 'Softmax on raw pixels of CIFAR-10 final test set accuracy: %f' % test_accuracy

最后以SVM为例,比较一下向量化和非向量化编程在运算速度上的差异:

--> naive_vs_vectorized.py

__coauthor__ = 'Deeplayer'
# 5.20.2016

import time
from linear_svm import *
from data_utils import load_CIFAR10

def get_CIFAR10_data(num_training=49000, num_dev=500):  

    # Load the raw CIFAR-10 data  
    cifar10_dir = 'E:/PycharmProjects/ML/CS231n/cifar-10-batches-py'   # make a change
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)  
    mask = range(num_training)  
    X_train = X_train[mask]  
    mask = np.random.choice(num_training, num_dev, replace=False)    
    X_dev = X_train[mask]  
    y_dev = y_train[mask]  

    X_train = np.reshape(X_train, (X_train.shape[0], -1))  
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))    

    mean_image = np.mean(X_train, axis=0)  
    X_dev -= mean_image  
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])  

    return X_dev, y_dev

X_dev, y_dev = get_CIFAR10_data()
# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Naive loss and gradient: computed in %fs' % (toc - tic)    # around 0.198s

tic = time.time()
loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic)    # around 0.005s

原文发布于微信公众号 - 人工智能LeadAI(atleadai)

原文发表时间:2017-09-05

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

相关文章

来自专栏IT派

麻省博士的机器学习经验干货分享

我的一个朋友最近正要开始人工智能的研究,他问及我在 AI 领域近两年的研究中有哪些经验教训。本文就将介绍这两年来我所学到的经验。其内容涵盖日常生活到 AI 领域...

441
来自专栏吉浦迅科技

对话二三事:微软公司语言辨识技术创新纪录

微软Microsoft的研究人员创造了语言辨识的世界纪录,该公司推出的最新科技,采用GPU加速深度学习技术,辨识对话中的字句,几乎达到等同于人类的准确率。 研究...

3185
来自专栏新智元

【资源】机器学习和神经网络实践:书籍及博客推介

【新智元导读】前几天我们向大家推荐了自学成为 Data Scientist 在线课程系列,很多人纷纷收藏和分享。今天新智元再针对数学,为大家介绍几本书和相关资料...

3388
来自专栏量子位

十个生成模型(GANs)的最佳案例和原理 | 代码+论文

王小新 编译 原文作者:Sumeet Agrawal 量子位 出品 | 公众号 QbitAI 生成对抗网络(GANs)是一种能“教会”计算机胜任人类工作的有趣方...

4376
来自专栏新智元

【趋势】Yoshua Bengio: 机器的梦可以让我们实现无监督学习

【新智元导读】“让机器会做梦,从某种程度上来说,是人工智能发展的一个关键技能”,Bengio在接受O‘reilly的采访时说到。在这里,“做梦”代表的是想象的能...

4047
来自专栏人工智能头条

机器学习:更多的数据总是优于更好的算法吗?

1495
来自专栏大数据文摘

众说纷纭,机器学习究竟是什么

1776
来自专栏目标检测和深度学习

想要入坑机器学习?这是MIT在读博士的AI心得

821
来自专栏企鹅号快讯

AI技术词条 强化学习

关注AI君,领略人工智能之美 ? 强化学习 Reinforcement Learning 前言 各位读者,新年好! 欢迎阅读《AI技术词条》系列文章,这一系列文...

1838
来自专栏AI科技大本营的专栏

数据科学中的 R、Python 和 Julia —— 机器学习的学习随想 02

1. 我认为 R,Python 和 Julia 是机器学习和数据科学中三个最重要的语言。任何人如果想在这个领域有所发展,长远来说这三种语言都需要掌握。 2. ...

3208

扫描关注云+社区