TypeError: Expected state_dict to be dict-like, got <class 'function'>.
这个是模型
from __future__ import print_function, division
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision.models.vgg import model_urls
from torchvision.models.resnet import model_urls as resnet_urls
import pretrainedmodels
class FeatureExtraction(torch.nn.Module): #特征提取
def __init__(self, train_fe=True, use_cuda=True, feature_extraction_cnn='vgg', last_layer=''):
super(FeatureExtraction, self).__init__()
if feature_extraction_cnn == 'vgg':
model_urls['vgg16'] = model_urls['vgg16'].replace('https://', 'http://')
self.model = models.vgg16(pretrained=True)
# keep feature extraction network up to indicated layer
# 保持特征提取网络达到指定的层
vgg_feature_layers=['conv1_1','relu1_1','conv1_2','relu1_2','pool1','conv2_1',
'relu2_1','conv2_2','relu2_2','pool2','conv3_1','relu3_1',
'conv3_2','relu3_2','conv3_3','relu3_3','pool3','conv4_1',
'relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','pool4',
'conv5_1','relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','pool5']
if last_layer=='':
last_layer = 'pool4'
last_layer_idx = vgg_feature_layers.index(last_layer)
self.model = nn.Sequential(*list(self.model.features.children())[:last_layer_idx+1])
if feature_extraction_cnn == 'resnet101':
resnet_urls['resnet101'] = resnet_urls['resnet101'].replace('https://', 'http://')
self.model = models.resnet101(pretrained=True)
resnet_feature_layers = ['conv1',
'bn1',
'relu',
'maxpool',
'layer1',
'layer2',
'layer3',
'layer4']
if last_layer=='':
last_layer = 'layer3'
last_layer_idx = resnet_feature_layers.index(last_layer)
resnet_module_list = [self.model.conv1,
self.model.bn1,
self.model.relu,
self.model.maxpool,
self.model.layer1,
self.model.layer2,
self.model.layer3,
self.model.layer4]
self.model = nn.Sequential(*resnet_module_list[:last_layer_idx+1])
if feature_extraction_cnn == 'resnext101':
self.model = pretrainedmodels.resnext101_32x4d(pretrained='imagenet')
self.model = nn.Sequential(*list(self.model.children())[0][:-1])
if feature_extraction_cnn == 'se_resnext101':
self.model = pretrainedmodels.se_resnext101_32x4d(pretrained='imagenet')
self.model = nn.Sequential(*list(self.model.children())[:-3])
if feature_extraction_cnn == 'densenet169':
self.model = models.densenet169(pretrained=True)
self.model = nn.Sequential(*list(self.model.features.children())[:-3])
if not train_fe:
# freeze parameters
for param in self.model.parameters():
param.requires_grad = False
# print('FeatureExtraction Network is Freezed')
# move to GPU
if use_cuda:
self.model.cuda()
def forward(self, image_batch):
return self.model(image_batch)
class FeatureL2Norm(torch.nn.Module):
def __init__(self):
super(FeatureL2Norm, self).__init__()
def forward(self, feature):
epsilon = 1e-6
# print(feature.size())
# print(torch.pow(torch.sum(torch.pow(feature,2),1)+epsilon,0.5).size())
norm = torch.pow(torch.sum(torch.pow(feature, 2), 1) + epsilon, 0.5).unsqueeze(1).expand_as(feature)
return torch.div(feature, norm)
class FeatureCorrelation(torch.nn.Module):
def __init__(self):
super(FeatureCorrelation, self).__init__()
def forward(self, feature_A, feature_B):
b, c, h, w = feature_A.size()
# reshape features for matrix multiplication 重塑矩阵乘法的特征
feature_A = feature_A.transpose(2, 3).contiguous().view(b, c, h * w)
feature_B = feature_B.view(b, c, h * w).transpose(1, 2)
# perform matrix mult. 执行矩阵相乘。
feature_mul = torch.bmm(feature_B, feature_A)
correlation_tensor = feature_mul.view(b, h, w, h * w).transpose(2, 3).transpose(1, 2)
return correlation_tensor
class FeatureRegression(nn.Module): #特征回归
def __init__(self, output_dim=6, use_cuda=True):
super(FeatureRegression, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(15 * 15, 128, kernel_size=7, padding=0),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 64, kernel_size=5, padding=0),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
)
self.linear = nn.Linear(64 * 5 * 5, output_dim)
if use_cuda:
self.conv.cuda()
self.linear.cuda()
def forward(self, x):
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.linear(x)
return x
class net_single_stream(nn.Module):
def __init__(self, geometric_model='affine',
normalize_features=True,
normalize_matches=True, batch_normalization=True,
use_cuda=True,
feature_extraction_cnn='se_resnext101',
train_fe=False):
super(net_single_stream, self).__init__()
self.use_cuda = use_cuda
self.normalize_features = normalize_features
self.normalize_matches = normalize_matches
self.FeatureExtraction = FeatureExtraction(train_fe=train_fe,
use_cuda=self.use_cuda,
feature_extraction_cnn=feature_extraction_cnn)
self.FeatureL2Norm = FeatureL2Norm()
self.LocalPreserve = nn.AvgPool2d(kernel_size=3, stride=1)
self.FeatureCorrelation = FeatureCorrelation()
if geometric_model=='affine':
output_dim = 6
self.FeatureRegression = FeatureRegression(output_dim, use_cuda=self.use_cuda)
self.ReLU = nn.ReLU(inplace=True)
def forward(self, tnf_batch):
# do feature extraction
feature_A = self.FeatureExtraction(tnf_batch['source_image'])
feature_B = self.FeatureExtraction(tnf_batch['target_image'])
# normalize (feature maps)
if self.normalize_features:
feature_A = self.FeatureL2Norm(feature_A)
feature_B = self.FeatureL2Norm(feature_B)
# do feature correlation symmetrically
correlation_AB = self.FeatureCorrelation(feature_A,feature_B)
correlation_BA = self.FeatureCorrelation(feature_B,feature_A)
# normalize (correlation maps)
if self.normalize_matches:
correlation_AB = self.FeatureL2Norm(self.ReLU(correlation_AB))
correlation_BA = self.FeatureL2Norm(self.ReLU(correlation_BA))
# do regression to tnf parameters theta
theta_AB = self.FeatureRegression(correlation_AB)
theta_BA = self.FeatureRegression(correlation_BA)
return theta_AB, theta_BA
class net_two_stream(nn.Module):
def __init__(self, geometric_model='affine',
normalize_features=True,
normalize_matches=True,
batch_normalization=True,
use_cuda=True,
feature_extraction_cnn='se_resnext101',
train_fe=False):
super(net_two_stream, self).__init__()
self.use_cuda = use_cuda
self.normalize_features = normalize_features
self.normalize_matches = normalize_matches
self.FeatureExtraction = FeatureExtraction(train_fe=train_fe,
use_cuda=self.use_cuda,
feature_extraction_cnn=feature_extraction_cnn)
self.FeatureL2Norm = FeatureL2Norm()
self.LocalPreserve = nn.AvgPool2d(kernel_size=3, stride=1)
self.FeatureCorrelation = FeatureCorrelation()
if geometric_model=='affine':
output_dim = 6
self.FeatureRegression = FeatureRegression(output_dim, use_cuda=self.use_cuda)
self.ReLU = nn.ReLU(inplace=True)
def forward(self, tnf_batch):
# do feature extraction
feature_A = self.FeatureExtraction(tnf_batch['source_image'])
feature_B = self.FeatureExtraction(tnf_batch['target_image'])
feature_C = self.FeatureExtraction(tnf_batch['target_image_jit'])
# normalize (feature maps)
if self.normalize_features:
feature_A = self.FeatureL2Norm(feature_A)
feature_B = self.FeatureL2Norm(feature_B)
feature_C = self.FeatureL2Norm(feature_C)
# do feature correlation symmetrically
correlation_AB = self.FeatureCorrelation(feature_A,feature_B)
correlation_BA = self.FeatureCorrelation(feature_B,feature_A)
# do feature correlation between A and C
correlation_AC = self.FeatureCorrelation(feature_A, feature_C)
correlation_CA = self.FeatureCorrelation(feature_C, feature_A)
# normalize (correlation maps)
if self.normalize_matches:
correlation_AB = self.FeatureL2Norm(self.ReLU(correlation_AB))
correlation_BA = self.FeatureL2Norm(self.ReLU(correlation_BA))
correlation_AC = self.FeatureL2Norm(self.ReLU(correlation_AC))
correlation_CA = self.FeatureL2Norm(self.ReLU(correlation_CA))
# do regression to tnf parameters theta
theta_AB = self.FeatureRegression(correlation_AB)
theta_BA = self.FeatureRegression(correlation_BA)
theta_AC = self.FeatureRegression(correlation_AC)
theta_CA = self.FeatureRegression(correlation_CA)
return theta_AB, theta_BA, theta_AC, theta_CA
这个是训练的
from __future__ import print_function, division
import argparse
import os
import ssl
from os.path import exists, join, basename
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# import network
from model.AerialNet import net_two_stream as net
from model.loss import TransformedGridLoss
from data.download import download_train
from data.synth_dataset import SynthDataset
from geotnf.transformation import SynthPairTnf
from image.normalization import NormalizeImageDict
from util.train_test_fn import train, test
from util.torch_util import save_checkpoint, str_to_bool, print_info
import pickle
from functools import partial
# torch.cuda.set_device(1) # Using second GPU
pickle.load = partial(pickle.load, encoding="latin1")
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")
ssl._create_default_https_context = ssl._create_unverified_context
if __name__ == '__main__':
print_info('[Deep Aerial Matching] training script',['green','bold'])
# Argument parsing
parser = argparse.ArgumentParser(description='Deep Aerial Matching PyTorch Implementation')
# Paths
parser.add_argument('--training-dataset', type=str, default='GoogleEarth', help='dataset to use for training')
parser.add_argument('--training-tnf-csv', type=str, default='', help='path to training transformation csv folder')
parser.add_argument('--training-image-path', type=str, default='', help='path to folder containing training images')
parser.add_argument('--trained-models-dir', type=str, default='trained_models', help='path to trained models folder')
parser.add_argument('--trained-models-fn', type=str, default='checkpoint_adam', help='trained model filename')
# Optimization parameters
parser.add_argument('--lr', type=float, default=0.0004, help='learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum constant')
parser.add_argument('--num-epochs', type=int, default=100, help='number of training epochs')
parser.add_argument('--batch-size', type=int, default=12, help='training batch size')
parser.add_argument('--weight-decay', type=float, default=0, help='weight decay constant')
parser.add_argument('--seed', type=int, default=1, help='Pseudo-RNG seed')
# Model parameters
parser.add_argument('--geometric-model', type=str, default='affine', help='geometric model to be regressed at output: affine parameters (6 degrees of freedom)')
parser.add_argument('--use-mse-loss', type=str_to_bool, nargs='?', const=True, default=False, help='Use MSE loss on tnf. parameters')
parser.add_argument('--feature-extraction-cnn', type=str, default='se_resnext101', help='Feature extraction architecture: resnet101/resnext101/se_resnext101/densenet169')
parser.add_argument('--train-fe', type=str_to_bool, nargs='?', const=True, default=True, help='True: train feature extraction or False: freeze feature extraction')
# Synthetic dataset parameters
parser.add_argument('--random-sample', type=str_to_bool, nargs='?', const=True, default=False, help='sample random transformations')
# Reload model parameter
parser.add_argument('--load-model', type=bool, default=False, help='loading the trained model checkpoint')
args = parser.parse_args()
use_cuda = torch.cuda.is_available()
# Seed
torch.manual_seed(args.seed)
if use_cuda:
torch.cuda.manual_seed(args.seed)
# Download dataset if needed and set paths
if args.training_dataset == 'GoogleEarth':
if args.training_image_path == '':
args.training_image_path = 'datasets/'
# Download dataset
download_train(args.training_image_path+args.training_dataset)
if args.training_tnf_csv == '' and args.geometric_model=='affine':
args.training_tnf_csv = 'training_data/GoogleEarth-aff'
# CNN model and loss
print('Creating CNN model...')
model = net(train_fe=args.train_fe,
geometric_model=args.geometric_model,
feature_extraction_cnn=args.feature_extraction_cnn,
use_cuda=use_cuda)
if args.use_mse_loss:
print('Using MSE loss...')
loss = nn.MSELoss()
else:
print('Using grid loss...')
loss = TransformedGridLoss(use_cuda=use_cuda,geometric_model=args.geometric_model)
# Dataset and dataloader
dataset_train = SynthDataset(geometric_model=args.geometric_model,
csv_file=os.path.join(args.training_tnf_csv,'train_pair.csv'),
training_image_path=args.training_image_path,
transform=NormalizeImageDict(['src_image','trg_image','trg_image_jit']),
random_sample=args.random_sample)
dataloader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=4)
dataset_test = SynthDataset(geometric_model=args.geometric_model,
csv_file=os.path.join(args.training_tnf_csv,'val_pair.csv'),
training_image_path=args.training_image_path,
transform=NormalizeImageDict(['src_image','trg_image','trg_image_jit']),
random_sample=args.random_sample)
dataloader_test = DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True, num_workers=4)
pair_generation_tnf = SynthPairTnf(geometric_model=args.geometric_model,use_cuda=use_cuda)
# Optimizer
optimizer = optim.Adam(model.parameters(), lr=args.lr)
# optimizer = optim.Adam([{'params':model.FeatureExtraction.parameters()},{'params':model.FeatureRegression.parameters(),'lr':1e-3}], lr=args.lr)
# The number of trainable parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# Train
best_test_loss = float("inf")
print('Starting training...\n')
print_info("# ===================================== #\n"
"\t\t\t...Train config...\n"
"\t\t\t------------------\n"
"\t\t CNN model: "+args.feature_extraction_cnn+"\n"
"\t\t Geometric model: "+args.geometric_model+"\n"
"\t\t Dataset: "+args.training_dataset+"\n"
"\t\t # of train data: "+str(len(dataset_train))+"\n\n"
"\t\t Learning rate: "+str(args.lr)+"\n"
"\t\t Batch size: "+str(args.batch_size)+"\n"
"\t\t Maximum epoch: "+str(args.num_epochs)+"\n"
"\t\t Reload checkpoint: "+str(args.load_model)+"\n\n"
"\t\t # of parameters: "+str(total_params)+"\n"
"# ===================================== #\n",['yellow','bold'])
if args.load_model:
load_dir = 'trained_models/checkpoint_seresnext101.pth.tar'
checkpoint = torch.load(load_dir, map_location=lambda storage, loc: storage) # Load trained model
# Load parameters of FeatureExtraction
for name, param in model.FeatureExtraction.state_dict().items():
model.FeatureExtraction.state_dict()[name].copy_(checkpoint['state_dict']['FeatureExtraction.' + name])
# Load parameters of FeatureRegression (Affine)
for name, param in model.FeatureRegression.state_dict().items():
model.FeatureRegression.state_dict()[name].copy_(checkpoint['state_dict']['FeatureRegression.' + name])
print("Reloading from--[%s]" % load_dir)
for epoch in range(1, args.num_epochs+1):
# Call train, test function
train_loss = train(epoch,model,loss,optimizer,dataloader_train,pair_generation_tnf,log_interval=100)
# test_loss = test(model,loss,dataloader_test,pair_generation_tnf)
if args.use_mse_loss:
checkpoint_name = os.path.join(args.trained_models_dir,args.geometric_model+'_mse_loss_'+args.feature_extraction_cnn+'_'+args.training_dataset+'_epoch_'+str(epoch)+'.pth.tar')
else:
checkpoint_name = os.path.join(args.trained_models_dir,args.geometric_model+'_grid_loss_'+args.feature_extraction_cnn+'_'+args.training_dataset+'_epoch_'+str(epoch)+'.pth.tar')
# Save checkpoint
save_checkpoint({
'epoch': epoch + 1,
'args': args,
'state_dict': model.state_dict(),
'optimizer' : optimizer.state_dict(),
},checkpoint_name)
print('Done!')
目前只知道大概是训练的model =net(train_fe=args.train_fe, geometric_model=args.geometric_model, feature_extraction_cnn=args.feature_extraction_cnn, use_cuda=use_cuda)
出问题了,
最后一个错误跳到这个底层文件
if not isinstance(state_dict, Mapping):
raise TypeError("Expected state_dict to be dict-like, got {}.".format(type(state_dict)))
相似问题