Google Earth Engine（Tensorflow深度学习）

气象学家

发布于 2020-06-17 16:03:40

3K0

发布于 2020-06-17 16:03:40

文章被收录于专栏：气象学家

本次我们讲一下如何利用colab训练深度学习（Tensorflow）模型，并上传到Google云平台上面。然后我们再通过GEE进行调用，这样我们在GEE上面运行自己的深度学习模型了。

不仅可以进行使用卷积神经网络，还可以用深度学习做一些回归的训练。

我们本次需要使用到的除了GEE的在线平台，还有colab（之前讲过如何使用），还要在谷歌云平台建立自己的工程（project）及工程下面的存储空间（storage bucket）。

本期的内容会比较长，代码也会稍微困难一些。官方文档对其中一些代码解释的也不是很清楚，但是我们主要是通过这个代码理解整个模型训练的流程。那些模型处理的过程我们也不用太关心。

colab训练模型

首先还是授权：

from google.colab import auth
auth.authenticate_user()

import ee
ee.Authenticate()
ee.Initialize()

定义我们常用的变量：

#导入Tensorflow的包
import tensorflow as tf
#提前预设一些变量
#把谷歌云平台里面你的工程名输入进去
PROJECT = 'boyguo'
#设置Cloud Storage bucket，会把你的训练数据和测试数据存储进去 
OUTPUT_BUCKET = 'xiaoguo1'
# 使用Landsat-8数据作为训练
L8SR = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')
# 只选择其中一部分波段进行使用
BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
# 这个跟我们上期使用的数据一样，都是三种地物类型
LABEL_DATA = ee.FeatureCollection('projects/google/demo_landcover_labels')
# 我们预测的的属性
LABEL = 'landcover'
# 总共分为三类
N_CLASSES = 3
# 把波段名和最后的标签名提前设置好
FEATURE_NAMES = list(BANDS)
FEATURE_NAMES.append(LABEL)
#这里把存储的训练数据和测试数据提前设置好
TRAIN_FILE_PREFIX = 'Training_demo'
TEST_FILE_PREFIX = 'Testing_demo'
file_extension = '.tfrecord.gz'
TRAIN_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension
TEST_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension

获取Landsat-8数据

# 定义去云的函数
def maskL8sr(image):
  cloudShadowBitMask = ee.Number(2).pow(3).int()
  cloudsBitMask = ee.Number(2).pow(5).int()
  qa = image.select('pixel_qa')
  mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
  return image.updateMask(mask).select(BANDS).divide(10000)
# 对数据做个中值合成
image = L8SR.filterDate('2018-01-01', '2018-12-31').map(maskL8sr).median()

提取训练数据和测试数据

#提取Landsat-8数据，并把数据分为训练数据和测试数据
sample = image.sampleRegions(
  collection=LABEL_DATA, properties=[LABEL], scale=30).randomColumn()
#百分之70的训练数据，百分之30的测试数据
training = sample.filter(ee.Filter.lt('random', 0.7))
testing = sample.filter(ee.Filter.gte('random', 0.7))

把数据传到云平台上

# 把数据传到云平台上
training_task = ee.batch.Export.table.toCloudStorage(
  collection=training,
  description='Training Export',
  fileNamePrefix=TRAIN_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  fileFormat='TFRecord',
  selectors=FEATURE_NAMES)


testing_task = ee.batch.Export.table.toCloudStorage(
  collection=testing,
  description='Testing Export',
  fileNamePrefix=TEST_FILE_PREFIX,
  bucket=OUTPUT_BUCKET,
  fileFormat='TFRecord',
  selectors=FEATURE_NAMES)
# 任务开始
training_task.start()
testing_task.start()

#检查数据是否存储成功
print('Found training file.' if tf.io.gfile.exists(TRAIN_FILE_PATH) 
    else 'No training file found.')
print('Found testing file.' if tf.io.gfile.exists(TEST_FILE_PATH) 
    else 'No testing file found.')

解析数据

train_dataset = tf.data.TFRecordDataset([TRAIN_FILE_PATH, TEST_FILE_PATH],compression_type='GZIP')


columns = [
  tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES
]


# Dictionary with names as keys, features as values.
features_dict = dict(zip(FEATURE_NAMES, columns))


#定义解析数据的函数
def parse_tfrecord(example_proto):
  """The parsing function.


  Read a serialized example into the structure defined by FEATURES_DICT.


  Args:
    example_proto: a serialized Example.


  Returns:
    A tuple of the predictors dictionary and the LABEL, cast to an `int32`.
  """
  parsed_features = tf.io.parse_single_example(example_proto, features_dict)
  labels = parsed_features.pop(LABEL)
  return parsed_features, tf.cast(labels, tf.int32)


# Map the function over the dataset.
parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=4)


# 数据转成元组.Make predictors 1x1xP and labels 1x1xN_CLASSES.
def to_tuple(inputs, label):
  return (tf.expand_dims(tf.transpose(list(inputs.values())), 1),
          tf.expand_dims(tf.one_hot(indices=label, depth=N_CLASSES), 1))


input_dataset = parsed_dataset.map(to_tuple)


input_dataset = input_dataset.shuffle(128).batch(8)

运行模型：

from tensorflow import keras
# 定义模型的结构
model = tf.keras.models.Sequential([
  tf.keras.layers.Input((None, None, len(BANDS),)),
  tf.keras.layers.Conv2D(64, (1,1), activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.1),
  tf.keras.layers.Conv2D(N_CLASSES, (1,1), activation=tf.nn.softmax)
])
# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
# 训练模型
model.fit(x=input_dataset, epochs=7)
#存储模型
MODEL_DIR = 'gs://' + OUTPUT_BUCKET + '/xiaoguo_model'
model.save(MODEL_DIR, save_format='tf')

对模型进行处理，使它可以上传到ai platform上面

from tensorflow.python.tools import saved_model_utils


meta_graph_def = saved_model_utils.get_meta_graph_def(MODEL_DIR, 'serve')
inputs = meta_graph_def.signature_def['serving_default'].inputs
outputs = meta_graph_def.signature_def['serving_default'].outputs


# Just get the first thing(s) from the serving signature def.  i.e. this
# model only has a single input and a single output.
input_name = None
for k,v in inputs.items():
  input_name = v.name
  break


output_name = None
for k,v in outputs.items():
  output_name = v.name
  break


# Make a dictionary that maps Earth Engine outputs and inputs to
# AI Platform inputs and outputs, respectively.
import json
input_dict = "'" + json.dumps({input_name: "array"}) + "'"
output_dict = "'" + json.dumps({output_name: "output"}) + "'"
# Put the EEified model next to the trained model directory.
EEIFIED_DIR = 'gs://' + OUTPUT_BUCKET + '/eeified_xiaoguo_model'


# You need to set the project before using the model prepare command.
!earthengine set_project {PROJECT}
!earthengine model prepare --source_dir {MODEL_DIR} --dest_dir {EEIFIED_DIR} --input {input_dict} --output {output_dict}

上传模型到ai platform上面

MODEL_NAME = 'xiaoguo_demo_model_3'
VERSION_NAME = 'v0'


!gcloud ai-platform models create {MODEL_NAME} --project {PROJECT}
!gcloud ai-platform versions create {VERSION_NAME} \
  --project {PROJECT} \
  --model {MODEL_NAME} \
  --origin {EEIFIED_DIR} \
  --framework "TENSORFLOW" \
  --runtime-version=2.1 \
  --python-version=3.7

利用GEE调用我们训练好的模型

我们模型已经训练好并且上传到Ai platform上面，接下来我们就可以通过GEE使用我们自定义的模型了。

直接上代码：

var table = ee.FeatureCollection("users/boyxiaozheng/feature/beijing");
var a=ee.Geometry.Point(-75.6,40.02);
var BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7'];

var maks_cloud=function(image){
  var cloudShadowBitMask = (1 << 3);
  var cloudsBitMask = (1 << 5);
  var qa = image.select('pixel_qa');
  var mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0)
                          .and(qa.bitwiseAnd(cloudsBitMask).eq(0));
  var img=image.updateMask(mask).select(BANDS).divide(10000);
  return img;
}

var image = ee.ImageCollection("LANDSAT/LC08/C01/T1_SR")
                        .filterDate('2018-06-01', '2018-10-31')
                        .filterBounds(table)
                        .map(maks_cloud).median().float();
//加载我们刚才训练好的模型
var model = ee.Model.fromAiPlatformPredictor({
    projectName:'boyguo',
    modelName:'xiaoguo_demo_model_3',
    version:'v0',
    // 可以为任意的大小
    inputTileSize:[8, 8],
    proj:ee.Projection('EPSG:4326').atScale(30),
    fixInputProj:true,
    // Note the names here need to match what you specified in the
    // output dictionary you passed to the EEifier.
    outputBands:{'output': {
        'type': ee.PixelType.float(),
        'dimensions': 1
      }
    }
    });
//对图像进行预测
var predictions = model.predictImage(image.toArray());
//输出的数组每个元素含有三种地物的概率，把这个转为一个三维数组
var probabilities = predictions.arrayFlatten([['bare', 'veg', 'water']]);
//把每个像元概率最高的地物输出并转为band
var label = predictions.arrayArgmax().arrayGet([0]).rename('label');

var label_vis = {
    'palette': ['red', 'green', 'blue'],
    'min': 0, 'max': 2

}
//这里使用我们自己的模型，显示应该比较慢，耐心等待即可
Map.centerObject(table,12);
Map.addLayer(label, label_vis);

结果：