VSLAM前端：双目极线搜索匹配

猫叔Rex

发布于 2021-04-07 15:35:16

2.6K0

发布于 2021-04-07 15:35:16

文章被收录于专栏：科学计算

VSLAM前端：双目极线搜索匹配

一、极线搜索匹配

1.1 最小化图像块重投影误差步骤：

1. 假设我们知道第

k-1

帧中特征点位置以及它们的深度；

2. 已知

I_{k-1}

帧中的某个特征在图像平面的位置

(u,v)

，以及它的深度

，将该二维特征投影到三维空间

p_{k-1}

，该三维空间的坐标系是定义在

I_{k-1}

摄像机坐标系的。所以，我们要将它投影到当前帧

I_k

中，需要位姿转换

T_{k，k-1}

，得到该点在当前帧坐标系中的三维坐标

p_k

。最后通过摄像机内参数，投影到

I_k

的图像平面

(u',v')

，进行重投影；

3. 对于空间中同一个点，被相邻两帧拍到，亮度值变化很小。但由于位姿是假设的一个值，所以重投影的点不准确，导致投影前后的亮度值是不相等的，不断进行迭代优化。

1.2 极线搜索确定匹配点

假设参考帧

I_r

中确定一个特征点的二维图像坐标，假设它的深度值在

\begin{bmatrix} d_{min},d_{max}\end{bmatrix}

之间，根据这两个端点深度值，能够计算出他们在当前帧

I_r

中的位置，即图中圆圈中的线段。确定了极线位置，则可以进行特征搜索匹配。如果极线段很短，小于两个像素，直接使用上面求位姿时提到的最小化图像块重投影误差方法进行二维特征点位置的确定。如果极线段很长，则分两步，第一步在极线段上间隔采样，对采样的多个特征块一一和参考帧中的特征块匹配，用Zero mean Sum of Squared Differences 方法对各采样特征块评分，得分最高和参考帧中的特征块最匹配。第二步就是在这个得分最高点附近使用特征对齐得到次像素精度的特征点位置。

特征对齐推荐大家阅读论文Lucas-Kanade 20 Years On: A Unifying Framework。

二、svo代码

bool DirectMatcher::findEpipolarMatchDirect(const vio::cameras::CameraBase& cam_ref,
                                            const vio::cameras::CameraBase& cam_cur,
                                            const Vector2f& px_ref,//左相机特征点2d坐标
                                            const Vector3f& f_ref,//左相机特征点的归一话坐标
                                            const Matrix3f& R_cur_ref,//外参
                                            const Vector3f& t_cur_ref,//外参
                                            const int level_ref,//左特征点所在的金字塔层
                                            const float d_estimate,//深度估计的初值(左目的)
                                            const float d_min,//深度值范围最小值
                                            const float d_max,//深度值范围最大值
                                            const std::vector<cv::Mat>& pyrs_ref,
                                            const std::vector<cv::Mat>& pyrs_cur,
                                            const cv::Mat_<uchar>& mask_cur,//右相机掩码
                                            const bool edgelet_feature,
                                            Vector2f* px_cur,//右相机的特征点
                                            float* depth,//左相机特征点深度
                                            int* level_cur,
                                            cv::Mat* dbg_cur) {
  CHECK_NEAR(f_ref[2], 1.f, 1e-6);
  CHECK_EQ(pyrs_ref.size(), pyrs_cur.size());

  // Compute start and end of epipolar line in old_kf for match search, on unit plane!
  // i.e., A & B are the first two elements of unit rays.
  // We will search from far to near
  Vector3f ray_A, ray_B;//极线的起点和终点在相机坐标系中
  Vector2f px_A, px_B;//极线的起点和终点在图像坐标系中
  ray_B = R_cur_ref * (f_ref * d_max) + t_cur_ref;  //最大深度对应的极线端点
  ray_B /= ray_B(2);
  if (vio::cameras::CameraBase::ProjectionStatus::Successful !=
      cam_cur.project(ray_B, &px_B)) {
    return false;
  }

  bool invalid_ray_A = true;
  for (float d = d_min; d < d_estimate; d *= 10) {
    ray_A = R_cur_ref * (f_ref * d) + t_cur_ref;  // near
    ray_A /= ray_A(2);
    if (vio::cameras::CameraBase::ProjectionStatus::Successful ==
        cam_cur.project(ray_A, &px_A)) {// 最小深度向上采样最近的有效深度对应的极线端点
      invalid_ray_A = false;
      break;
    }
  }
  if (invalid_ray_A) {
    return false;
  }

  // Compute warp affine matrix
  // 计算初始的仿射变化矩阵 A_r_l_2×2
  //输入左右相机类,左相机像素坐标,归一化坐标,估计的深度,特征点所在的金字塔层,外参
  //输出仿射矩阵
  if (!warp::getWarpMatrixAffine(cam_ref,
                                 cam_cur,
                                 px_ref,
                                 f_ref,
                                 d_estimate,
                                 R_cur_ref,
                                 t_cur_ref,
                                 level_ref,
                                 &A_cur_ref_)) {
    LOG(WARNING) << "warp::getWarpMatrixAffine fails";
    return false;
  }

  const int max_level = pyrs_ref.size() - 1;
  //找到在右相机上最佳的搜索金字塔层
  const int search_level = warp::getBestSearchLevel(A_cur_ref_, max_level);
  epi_dir_ = ray_A.head<2>() - ray_B.head<2>();  // far to near, B to A极线段向量
  epi_length_ = (px_A - px_B).norm() / (1 << search_level);

//将左相机图像特征点中心的图像块warp到右相机图像坐标系中
//输入之前得到的粗略的仿射矩阵,左相机特征点所在所在金字塔图像,左特征点像素坐标,左特征的金字塔层，右相机需要搜索的金字塔层,
  if (!warp::warpAffine(A_cur_ref_,
                        pyrs_ref[level_ref],
                        px_ref,
                        level_ref,
                        search_level,
                        halfpatch_size_ + 1,
                        patch_with_border_)) {
    return false;
  }
  //patch_with_border 生成 patch
  createPatchFromPatchWithBorder();
  //如果基线足够小,那么不同再基线搜索了,直接图片对齐
  if (epi_length_ < options_.max_epi_length_optim)
  {
    // The epipolar search line is short enough (< 2 pixels)
    // to perform direct alignment
    *px_cur = (px_A + px_B) * 0.5f; // 取平均值
    Vector2f px_scaled(*px_cur / (1 << search_level));//变换到对应的最佳金字塔层
    bool success;
    if (options_.align_1d) {
      Vector2f direction = (px_A - px_B).normalized();
      success = align::align1D(pyrs_cur[search_level],
                               direction,
                               patch_with_border_,
                               patch_,
                               options_.max_iter,
                               &px_scaled,
                               &h_inv_);
    } else {
   //默认是这个
   //输入右相机的图像,从左相机变换到右相机上的patch_border,从左相机变换到右相机上的patch,最大迭代次数
   //输出最终块匹配残差最小的右目中特征点的像素坐标
      success = align::align2D(pyrs_cur[search_level],
                               patch_with_border_,
                               patch_,
                               options_.max_iter,
                               &px_scaled);
    }

    //如果迭代收敛的话
    if (success)
    {
      *px_cur = px_scaled * (1 << search_level);//最终这个特征点在右目中的位置
      Vector3f f_cur;
      if (cam_cur.backProject(*px_cur, &f_cur))
      {
        CHECK_NEAR(f_cur[2], 1.f, 1e-6);
        //输入外参,左相机这个特征点的归一化坐标,右相机归一化坐标
        //构造Ax=b,得到左目坐标系下特征点的深度
        if (!depthFromTriangulation(R_cur_ref,
                                    t_cur_ref,
                                    f_ref,
                                    f_cur,
                                    depth)) {
          LOG(WARNING) << "depthFromTriangulation fails, set depth to d_max";
          *depth = d_max;//求解失败就给最大深度
        }
      }
    }
    }
    return success;
  }

  //极线不够近,所以极线搜索
  // Determine the steps to Search along the epipolar line
  // [NOTE] The epipolar line can be curvy, so we slightly increase it
  //        to roughly have one step per pixel (heuristically).
  size_t n_steps = epi_length_ / 0.7;//搜索步数
  Vector3f step;
  step << epi_dir_ / n_steps, 0;//步长
  if (n_steps > options_.max_epi_search_steps) {//需要的步数太多,那就不搜索了
    LOG(ERROR) << "Skip epipolar search: evaluations = " << n_steps
               << "epi length (px) = " << epi_length_;
    return false;
  }

  // Search along the epipolar line (on unit plane) with patch matching
  // for matching, precompute sum and sum2 of warped reference patch
  // [heuristic] The ssd from patch mean difference can be up to 50% of the resulting ssd.
  //  ssd = zmssd + N * (a_bar - b_bar)^2
  typedef patch_score::ZMSSD<halfpatch_size_> PatchScore;
  PatchScore patch_score(patch_);//左相机warp到右相机以后的patch
  int zmssd_best = PatchScore::threshold();
  int ssd_corr = PatchScore::threshold() * 2;
  Vector3f ray_best;
  Vector3f ray = ray_B;//从极线远段往近端搜索
  Eigen::Vector2i last_checked_pxi(0, 0);
  const int search_img_rows = pyrs_cur[search_level].rows;
  const int search_img_cols = pyrs_cur[search_level].cols;
  ++n_steps;

  for (size_t i = 0; i < n_steps; ++i, ray += step)
  {
    Vector2f px;
    //投影到右目像素坐标系,并且转换到对应金字塔层
    if (vio::cameras::CameraBase::ProjectionStatus::Successful != cam_cur.project(ray, &px)) {
      // We have already checked the valid projection of starting and ending rays.  However,
      // under very rare circumstance, cam_cur.project may still fail:
      // close to zero denominator for radial tangential 8 distortion:
      // 1 + k4 * r^2 + k5 * r^4 + k6 * r^6 < 1e-6
      continue;
    }
    Vector2i pxi(px[0] / (1 << search_level) + 0.5,
                 px[1] / (1 << search_level) + 0.5);  // round to closest int

      // 和上一个相同则下一个
    if (pxi == last_checked_pxi) {
      continue;
    }
    last_checked_pxi = pxi;
// 检查patch是否都在其内
    // check if the patch is full within the new frame
    if (pxi[0] >= halfpatch_size_ && pxi[0] < search_img_cols - halfpatch_size_ &&
        pxi[1] >= halfpatch_size_ && pxi[1] < search_img_rows - halfpatch_size_ &&
        mask_cur(pxi(1), pxi(0)) > 0)
    {
      // TODO(mingyu): Interpolation instead?
      //得到右目patch的头指针
      uint8_t* cur_patch_ptr = pyrs_cur[search_level].data
          + (pxi[1] - halfpatch_size_) * search_img_cols
          + (pxi[0] - halfpatch_size_);
      int ssd, zmssd;
// 计算极线上的patch与ref仿射变换得到的patch_之间的ZMSSD得分
//这个我没具体关注这个评分怎么算的,应该和ZNCC,SSD差不多吧,都是强度相似度
      patch_score.computeScore(cur_patch_ptr, search_img_cols, &zmssd, &ssd);
      if (zmssd < zmssd_best) {
        // We store the best zmssd and its corresponding ssd score.  Usually,
        // zmssd and ssd have good correlation if the *matching* is reasonable.
        //保存下最佳的评分以及对应的点
        zmssd_best = zmssd;
        ssd_corr = ssd;
        ray_best = ray;
      }
      if (dbg_cur != nullptr) {
        dbg_cur->at<cv::Vec3b>(pxi[1], pxi[0]) = cv::Vec3b(255, 0, 0);
      }
    } else {
      // The patch contains out of bound pixels
      continue;
    }
  }
  //如果得分小于阈值,说明可以接受,再进行优化,找到更准的右目中的这个点的像素坐标,同上
  if (zmssd_best < PatchScore::threshold())
  {
    cam_cur.project(ray_best, px_cur);
    if (options_.subpix_refinement) {
      Vector2f px_scaled(*px_cur / (1 << search_level));
      bool success;
      if (options_.align_1d) {
        Vector2f direction = (px_A - px_B).normalized();
        success = align::align1D(pyrs_cur[search_level],
                                 direction,
                                 patch_with_border_,
                                 patch_,
                                 options_.max_iter,
                                 &px_scaled,
                                 &h_inv_);
      } else {
        success = align::align2D(pyrs_cur[search_level],
                                 patch_with_border_,
                                 patch_,
                                 options_.max_iter,
                                 &px_scaled);
      }

      if (success) {
        *px_cur = px_scaled * (1 << search_level);
        Vector3f f_cur;
        if (cam_cur.backProject(*px_cur, &f_cur)) {
          CHECK_NEAR(f_cur[2], 1.f, 1e-6);
          if (!depthFromTriangulation(R_cur_ref,
                                      t_cur_ref,
                                      f_ref,
                                      f_cur,
                                      depth)) {
            LOG(WARNING) << "depthFromTriangulation fails, set depth to d_max";
            *depth = d_max;
          }
        }
      }

      if (dbg_cur != nullptr) {
        if (success) {
          // green: subpix alignment is good
          cv::circle(*dbg_cur, cv::Point2f((*px_cur)(0), (*px_cur)(1)), 2, cv::Scalar(0, 255, 0));
        } else {
          // red: subpix alignment fails
          cv::circle(*dbg_cur, cv::Point2f((*px_cur)(0), (*px_cur)(1)), 2, cv::Scalar(0, 0, 255));
        }
      }
      return success;
    } else {
      // No subpix refinement
      CHECK_NEAR(ray_best[2], 1.f, 1e-6);
      if (!depthFromTriangulation(R_cur_ref,
                                  t_cur_ref,
                                  f_ref,
                                  ray_best,
                                  depth)) {
        LOG(WARNING) << "depthFromTriangulation fails, set depth to d_max";
        *depth = d_max;
      }
      return true;
    }
  }