# CV 新手避坑指南：计算机视觉常见的8个错误

1.翻转图像和关键点

def flip_img_and_keypoints(img: np.ndarray, kpts: Sequence[Sequence[int]]): img = np.fliplr(img) h, w, *_ = img.shape kpts = [(y, w - x) for y, x in kpts] return img, kpts

image = np.ones((10, 10), dtype=np.float32) kpts = [(0, 1), (2, 2)] image_flipped, kpts_flipped = flip_img_and_keypoints(image, kpts) img1 = image.copy() for y, x in kpts: img1[y, x] = 0 img2 = image_flipped.copy() for y, x in kpts_flipped: img2[y, x] = 0 _ = plt.imshow(np.hstack((img1, img2)))

image = np.ones((10, 10), dtype=np.float32) kpts = [(0, 0), (1, 1)] image_flipped, kpts_flipped = flip_img_and_keypoints(image, kpts) img1 = image.copy() for y, x in kpts: img1[y, x] = 0 img2 = image_flipped.copy() for y, x in kpts_flipped: img2[y, x] = 0 -------------------------------------------------------------------- ------- IndexError Traceback (most recent call last) <ipython-input-5-997162463eae> in <module> 8 img2 = image_flipped.copy() 9 for y, x in kpts_flipped: ---> 10 img2[y, x] = 0 IndexError: index 10 is out of bounds for axis 1 with size 10

def flip_img_and_keypoints(img: np.ndarray, kpts: Sequence[Sequence[int]]): img = np.fliplr(img) h, w, *_ = img.shape kpts = [(y, w - x - 1) for y, x in kpts] return img, kpts

2.继续谈谈关键点

kpts = [ (20, 20), # left pinky (20, 200), # right pinky ... ]

• 在应用增强或其他特性之前，了解并考虑数据结构和语义；
• 保持你的实验的独立性：添加一个小的变化（例如，一个新的转换），检查它是如何进行的，如果分数提高了再合并。

3.自定义损失函数

def iou_continuous_loss(y_pred, y_true): eps = 1e-6 def _sum(x): return x.sum(-1).sum(-1) numerator = (_sum(y_true * y_pred) + eps) denominator = (_sum(y_true ** 2) + _sum(y_pred ** 2) - _sum(y_true * y_pred) + eps) return (numerator / denominator).mean()

In [3]: ones = np.ones((1, 3, 10, 10)) ...: x1 = iou_continuous_loss(ones * 0.01, ones) ...: x2 = iou_continuous_loss(ones * 0.99, ones) In [4]: x1, x2 Out[4]: (0.010099999897990103, 0.9998990001020204)

v> def iou_continuous(y_pred, y_true): eps = 1e-6 def _sum(x): return x.sum(-1).sum(-1) numerator = (_sum(y_true * y_pred) + eps) denominator = (_sum(y_true ** 2) + _sum(y_pred ** 2) - _sum(y_true * y_pred) + eps) return (numerator / denominator).mean() def iou_continuous_loss(y_pred, y_true): return 1 - iou_continuous(y_pred, y_true)

• 编写一个单元测试来检查损失的方向：形式化地表示一个期望，即更接近实际的东西应该输出更低的损失；
• 做一个全面的检查，尝试过拟合你的模型的 batch。

4.使用 Pytorch

from ceevee.base import AbstractPredictor class MySuperPredictor(AbstractPredictor): def __init__(self, weights_path: str, ): super().__init__() self.model = self._load_model(weights_path=weights_path) def process(self, x, *kw): with torch.no_grad(): res = self.model(x) return res @staticmethod def _load_model(weights_path): model = ModelClass() weights = torch.load(weights_path, map_location='cpu') model.load_state_dict(weights) return model

In [4]: model = nn.Sequential( ...: nn.Linear(10, 10), ..: nn.Dropout(.5) ...: ) ...: ...: traced_model = torch.jit.trace(model.eval(), torch.rand(10)) # No more warnings!

5.复制粘贴问题

v> def make_dataloaders(train_cfg, val_cfg, batch_size): train = Dataset.from_config(train_cfg) val = Dataset.from_config(val_cfg) shared_params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': cpu_count()} train = DataLoader(train, **shared_params) val = DataLoader(train, **shared_params) return train, val

# https://github.com/albu/albumentations/blob/0.3.0/albumentations/aug mentations/transforms.py def apply_to_keypoint(self, keypoint, crop_height=0, crop_width=0, h_start=0, w_start= 0, rows=0, cols=0, **params): keypoint = F.keypoint_random_crop(keypoint, crop_height, crop_width, h_start, w_start, rows, cols) scale_x = self.width / crop_height scale_y = self.height / crop_height keypoint = F.keypoint_scale(keypoint, scale_x, scale_y) return keypoint

datasets = [] data_a = get_dataset(MyDataset(config['dataset_a']), config['shared_param'], param_a) datasets.append(data_a) data_b = get_dataset(MyDataset(config['dataset_b']), config['shared_param'], param_b) datasets.append(data_b)

datasets = [] for name, param in zip(('dataset_a', 'dataset_b'), (param_a, param_b), ): datasets.append(get_dataset(MyDataset(config[name]), config['shared_param'], param))

6.合适的数据类型

def add_noise(img: np.ndarray) -> np.ndarray: mask = np.random.rand(*img.shape) + .5 img = img.astype('float32') * mask return img.astype('uint8')

7.打字错误

from tqdm import tqdm class GridPredictor: """ This class can be used to predict a segmentation mask for the big image when you have GPU memory limitation """ def __init__(self, predictor: AbstractPredictor, size: int, stride: Optional[int] = None): self.predictor = predictor self.size = size self.stride = stride if stride is not None else size // 2 def __call__(self, x: np.ndarray): h, w, _ = x.shape mask = np.zeros((h, w, 1), dtype='float32') weights = mask.copy() for i in tqdm(range(0, h - 1, self.stride)): for j in range(0, w - 1, self.stride): a, b, c, d = i, min(h, i + self.size), j, min(w, j + self.size) patch = x[a:b, c:d, :] mask[a:b, c:d, :] += np.expand_dims(self.predictor(patch), -1) weights[a:b, c:d, :] = 1 return mask / weights

class Model(nn.Module): def forward(self, x): return x.mean(axis=-1) model = Model() grid_predictor = GridPredictor(model, size=128, stride=64) simple_pred = np.expand_dims(model(img), -1) grid_pred = grid_predictor(img) np.testing.assert_allclose(simple_pred, grid_pred, atol=.001)

def __call__(self, x: np.ndarray): h, w, _ = x.shape mask = np.zeros((h, w, 1), dtype='float32') weights = mask.copy() for i in tqdm(range(0, h - 1, self.stride)): for j in range(0, w - 1, self.stride): a, b, c, d = i, min(h, i + self.size), j, min(w, j + self.size) patch = x[a:b, c:d, :] mask[a:b, c:d, :] += np.expand_dims(self.predictor(patch), -1) weights[a:b, c:d, :] += 1 return mask / weights

8.ImageNet 规范化

model_a = UNet(3, 1) optimizer = torch.optim.Adam(model_a.parameters(), lr=1e-3) losses = [] for t in tqdm(range(20)): loss = criterion(model_a(img), mask) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() _ = plt.plot(losses)

model_b = UNet(3, 1) optimizer = torch.optim.Adam(model_b.parameters(), lr=1e-3) losses = [] for t in tqdm(range(20)): loss = criterion(model_b(img), mask / 255.) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() _ = plt.plot(losses)

• 测试很重要；
• 运行断言可以用于训练管道；
• 可视化是一种不错的手段；
• 抄袭是一种诅咒；
• 没有什么是灵丹妙药，机器学习工程师必须时刻小心。

0 条评论

• ### 用PyTorch做物体检测和追踪

在我之前的工作中，我尝试过用自己的图像在PyTorch中训练一个图像分类器，然后用它来进行图像识别。现在，我将向你们展示如何使用预训练的分类器在一张图像中检测多...

• ### 如何使用注意力模型生成图像描述？

我们的目标是用一句话来描述图片， 比如「一个冲浪者正在冲浪」。 本教程中用到了基于注意力的模型，它使我们很直观地看到当文字生成时模型会关注哪些部分。

• ### 专栏 | 【从零开始学习YOLOv3】7. 教你在YOLOv3模型中添加Attention机制

正如[convolutional],[maxpool],[net],[route]等层在cfg中的定义一样，我们再添加全新的模块的时候，要规定一下cfg的格式。...

• ### 获取素材图无忧，Pixabay图库网Python多线程采集下载

图片素材想必是不少人都在寻找的内容，随着版权意识的加深，可供免费使用的图片素材可不是那么好找的哦，不过还是有不少国外知名素材网站可供我们使用，而且国内访问也是比...

• ### JavaScript设计模式--代理模式

代理模式：为一个对象提供一个代用品或占位符，以便控制对它的访问。 代理分为：保护代理和虚拟代理 保护代理：用于控制不同权限的对象对目标对象的访问，在Java...

• ### 使用腾讯云 GPU 学习深度学习系列之四：深度学习的特征工程

本系列文章主要介绍如何使用 腾讯云GPU服务器 进行深度学习运算，前面主要介绍原理部分，后期则以实践为主。

• ### SDWebImageV3.7.5源码解析

SDWebImage更新到如今这个版本，过程做了许多改进，性能已经非常的好了。以前就粗略的看过SDWebImage的源码，但是未做记录整理。再次阅读还是受益良多...