# 如何用 Caffe 生成对抗样本？这篇文章告诉你一个更高效的算法

## 用Caffe生成对抗样本

FGS法因为非常简单，用任何框架都很容易实现，Ian Goodfellow 有个作为完整工具包的官方实现，基于 TensorFlow，详细链接：

http://t.cn/RKAXoUz

http://t.cn/RKAXWrl

http://t.cn/RKAXRQ7

http://t.cn/RKAX3RZ

force_backward: true

# model to attack model_definition = '/path/to/deploy.prototxt' model_weights = '/path/to/squeezenet_v1.0.caffemodel' channel_means = numpy.array([104., 117., 123.]) # initialize net net = caffe.Net(model_definition, model_weights, caffe.TEST) n_channels, height, width = net.blobs['data'].shape[-3:] net.blobs['data'].reshape(1, n_channels, height, width) # initialize transformer transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) transformer.set_mean('data', channel_means) transformer.set_raw_scale('data', 255) transformer.set_channel_swap('data', (2, 1, 0))

# Load image & forward img = caffe.io.load_image('little_white_dog.jpg') transformed_img = transformer.preprocess('data', img) net.blobs['data'].data[0] = transformed_img net.forward() # Get predicted label index pred = numpy.argmax(net.blobs['prob'].data.flatten()) # Set gradient direction to reduce the current prediction net.blobs['prob'].diff[0][pred] = -1. # Generate attack image with fast gradient sign method diffs = net.backward() diff_sign_mat = numpy.sign(diffs['data']) adversarial_noise = 1.0 * diff_sign_mat

net.blobs[prob_blob].diff[0][label_index]=1.

# clip exceeded values attack_hwc = transformer.deprocess(data_blob, transformed_img + adversarial_noise[0]) attack_hwc[attack_hwc > 1] = 1. attack_hwc[attack_hwc < 0] = 0. attack_img = transformer.preprocess(data_blob, attack_hwc)

attack_img就是和Caffe的blob形状一致的对抗样本了，attack_hwc是维度按照图片高度，图片宽度，图片通道顺序的格式，可以用matplotlib直接可视化。

## 可视化和简单分析

def make_n_test_adversarial_example( img, net, transformer, epsilon, data_blob='data', prob_blob='prob', label_index=None, top_k=5): # Load image & forward transformed_img = transformer.preprocess(data_blob, img) net.blobs[data_blob].data[0] = transformed_img net.forward() probs = [x for x in enumerate(net.blobs[prob_blob].data.flatten())] num_classes = len(probs) sorted_probs = sorted(probs, key=itemgetter(1), reverse=True) top_preds = sorted_probs[:top_k] pred = sorted_probs[0][0] # if label_index is set, # generate a adversarial example toward the label, # else # reduce the probability of predicted label net.blobs[prob_blob].diff[...] = 0 if type(label_index) is int and 0 <= label_index < num_classes: net.blobs[prob_blob].diff[0][label_index] = 1. else: net.blobs[prob_blob].diff[0][pred] = -1. # generate attack image with fast gradient sign method diffs = net.backward() diff_sign_mat = numpy.sign(diffs[data_blob]) adversarial_noise = epsilon * diff_sign_mat # clip exceeded values attack_hwc = transformer.deprocess(data_blob, transformed_img + adversarial_noise[0]) attack_hwc[attack_hwc > 1] = 1. attack_hwc[attack_hwc < 0] = 0. attack_img = transformer.preprocess(data_blob, attack_hwc) net.blobs[data_blob].data[0] = attack_img net.forward() probs = [x for x in enumerate(net.blobs[prob_blob].data.flatten())] sorted_probs = sorted(probs, key=itemgetter(1), reverse=True) top_attacked_preds = sorted_probs[:top_k] return attack_hwc, top_preds, top_attacked_preds

def visualize_attack(title, original_img, attack_img, original_preds, attacked_preds, labels): pred = original_preds[0][0] attacked_pred = attacked_preds[0][0] k = len(original_preds) fig_name = '{}: {} to {}'.format(title, labels[pred], labels[attacked_pred]) pyplot.figure(fig_name) for img, plt0, plt1, preds in [ (original_img, 231, 234, original_preds), (attack_img, 233, 236, attacked_preds) ]: pyplot.subplot(plt0) pyplot.axis('off') pyplot.imshow(img) ax = pyplot.subplot(plt1) pyplot.axis('off') ax.set_xlim([0, 2]) bars = ax.barh(range(k-1, -1, -1), [x[1] for x in preds]) for i, bar in enumerate(bars): x_loc = bar.get_x() + bar.get_width() y_loc = k - i - 1 label = labels[preds[i][0]] ax.text(x_loc, y_loc, '{}: {:.2f}%'.format(label, preds[i][1]*100)) pyplot.subplot(232) pyplot.axis('off') noise = attack_img - original_img pyplot.imshow(255 * noise)

attack_img, original_preds, attacked_preds = \ make_n_test_adversarial_example(img, net, transformer, 1.0) visualize_attack('example0', img, attack_img, original_preds, attacked_preds, labels)

attack_img, original_preds, attacked_preds = \ make_n_test_adversarial_example(img, net, transformer, 1.0, label_index=296) visualize_attack('example1', img, attack_img, original_preds, attacked_preds, labels)

http://t.cn/RLVzahm

## 利用迭代更好地生成对抗样本

https://racket-lang.org/

attack_img, original_preds, attacked_preds = \ make_n_test_adversarial_example(img, net, transformer, 0.1, label_index=752) for i in range(9): attack_img, _, attacked_preds = \ make_n_test_adversarial_example(attack_img, net, transformer, 0.1, label_index=752) visualize_attack('racket_try1'.format(i), img, attack_img, original_preds, attacked_preds, labels)

http://t.cn/RKAYOdE

1372 篇文章118 人订阅

0 条评论

## 相关文章

1422

### 【专知-PyTorch手把手深度学习教程02】CNN快速理解与PyTorch实现: 图文+代码

【导读】主题链路知识是我们专知的核心功能之一，为用户提供AI领域系统性的知识学习服务，一站式学习人工智能的知识，包含人工智能（ 机器学习、自然语言处理、计算机视...

4K11

### 深度学习目标检测指南：如何过滤不感兴趣的分类及添加新分类？

AI 科技大本营按：本文编译自 Adrian Rosebrock 发表在 PyImageSearch 上的一篇博文。该博文缘起于一位网友向原作者请教的两个关于目...

1162

### 机器学习逻辑回归：算法兑现为python代码

0 回顾 昨天推送了逻辑回归的基本原理：从逻辑回归的目标任务，到二分类模型的构建，再到如何用梯度下降求出二分类模型的权重参数。今天，我们将对这个算法兑现为代码...

3505

35612

49911

### 深度学习的目标检测技术演进：R-CNN、Fast R-CNN、Faster R-CNN

object detection我的理解，就是在给定的图片中精确找到物体所在位置，并标注出物体的类别。object detection要解决的问题就是物体在哪里...

4625

1623

7626

### K近邻法(KNN)原理小结

K近邻法(k-nearest neighbors,KNN)是一种很基本的机器学习方法了，在我们平常的生活中也会不自主的应用。比如，我们判断一个人的人品，...

1205