# 程序猿python学习AIphaZero，TensorFlow强化学习AI游戏，100行代码运行看看！

Deep Q Network是DeepMind在2013年提出来的网络，是第一个成功地将深度学习和强化学习结合起来的模型，也是打败世界围棋冠军柯洁AIphaGO Zero核心原理，启发了后续一系列的工作。这些后续工作中比较有名的有Double DQN, Prioritized Replay 和 Dueling Network。.

lPipinstall pygame

lPip install numpy

1.defconvolutional_neural_network(input_image):

2.weights = {'w_conv1':tf.Variable(tf.zeros([8, 8, 4, 32])),

3.'w_conv2':tf.Variable(tf.zeros([4, 4, 32, 64])),

4.'w_conv3':tf.Variable(tf.zeros([3, 3, 64, 64])),

5.'w_fc4':tf.Variable(tf.zeros([3456, 784])),

6.'w_out':tf.Variable(tf.zeros([784, output]))}

7.

8.biases = {'b_conv1':tf.Variable(tf.zeros([32])),

9.'b_conv2':tf.Variable(tf.zeros([64])),

10.'b_conv3':tf.Variable(tf.zeros([64])),

11.'b_fc4':tf.Variable(tf.zeros([784])),

12.'b_out':tf.Variable(tf.zeros([output]))}

13.

17.conv3_flat = tf.reshape(conv3, [-1, 3456])

19.

20.output_layer = tf.matmul(fc4, weights['w_out']) + biases['b_out']

21.returnoutput_layer

1.deftrain_neural_network(input_image):

2.predict_action = convolutional_neural_network(input_image)

3.

4.argmax = tf.placeholder("float", [None, output])

5.gt = tf.placeholder("float", [None])

6.

7.action = tf.reduce_sum(tf.mul(predict_action, argmax), reduction_indices = 1)

8.cost = tf.reduce_mean(tf.square(action - gt))

10.

11.game = Game()

12.D = deque()

13.

14._, image = game.step(MOVE_STAY)

15.#转换为灰度值

16.image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)

17.#转换为二值

18.ret, image = cv2.threshold(image, 1, 255, cv2.THRESH_BINARY)

19.input_image_data = np.stack((image, image, image, image), axis = 2)

20.

21.with tf.Session() as sess:

22.sess.run(tf.initialize_all_variables())

23.

24.saver = tf.train.Saver()

25.

26.n = 0

27.epsilon = INITIAL_EPSILON

28.whileTrue:

29.action_t = predict_action.eval(feed_dict = )[0]

30.

31.argmax_t = np.zeros([output], dtype=np.int)

32.if(random.random()

33.maxIndex = random.randrange(output)

34.else:

35.maxIndex = np.argmax(action_t)

36.argmax_t[maxIndex] = 1

37.ifepsilon > FINAL_EPSILON:

38.epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

39.

41.# if event.type == QUIT:

42.# pygame.quit()

43.# sys.exit()

44.reward, image = game.step(list(argmax_t))

45.

46.image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)

47.ret, image = cv2.threshold(image, 1, 255, cv2.THRESH_BINARY)

48.image = np.reshape(image, (80, 100, 1))

49.input_image_data1 = np.append(image, input_image_data[:, :, 0:3], axis = 2)

50.

51.D.append((input_image_data, argmax_t, reward, input_image_data1))

52.

53.iflen(D) > REPLAY_MEMORY:

54.D.popleft()

55.

56.ifn > OBSERVE:

57.minibatch = random.sample(D, BATCH)

58.input_image_data_batch = [d[0]fordinminibatch]

59.argmax_batch = [d[1]fordinminibatch]

60.reward_batch = [d[2]fordinminibatch]

61.input_image_data1_batch = [d[3]fordinminibatch]

62.

63.gt_batch = []

64.

65.out_batch = predict_action.eval(feed_dict = )

66.

67.foriinrange(0, len(minibatch)):

68.gt_batch.append(reward_batch[i] + LEARNING_RATE * np.max(out_batch[i]))

69.

70.optimizer.run(feed_dict = )

71.

72.input_image_data = input_image_data1

73.n = n+1

74.

75.ifn % 10000 == 0:

76.saver.save(sess,'game.cpk', global_step = n)#保存模型

77.

78.print(n,"epsilon:", epsilon," ","action:", maxIndex," ","reward:", reward)

79.

80.

81.train_neural_network(input_image)

AI傻乎乎的自动尝试玩这款游戏，不断试错，玩的不亦乐乎。

774 篇文章96 人订阅

0 条评论

## 相关文章

3505

29010

### A Gentle Introduction to Autocorrelation and Partial Autocorrelation (译文)

A Gentle Introduction to Autocorrelation and Partial Autocorrelation 自相关和偏自相关的简单...

3026

3669

### Github 项目推荐 | YOLOv3 的最小化 PyTorch 实现

https://github.com/eriklindernoren/PyTorch-YOLOv3

3962

2341

2307

4419

8037

### 人脸检测（一）——基于单文档的应用台程序

Opencv自带训练好的人脸模型（人脸的人眼、口等器官类似），此文基于vs2013建立应用台单文档程序，具体建立过程不予详细叙述，主要记录利用的Opencv自带...

4055