我有两个值列表(形状相同)和一个度量(它应该与任何可以进入scipy.spatial.distance.cdist
函数的度量一起工作):
vals_1 = np.array([[0,1], [1,1], [2,1]])
vals_2 = np.array([[0,1], [2,1], [4,1]])
metric = 'euclidean'
我想计算相应的值对之间的距离。
我可以这样做(用循环):
dists = []
for val_1, val_2 in zip(vals_1, vals_2):
dists.append(cdist([val_1], [val_2], metric=metric).item())
dists # [0.0, 1.0, 2.0]
下面是一种将其矢量化的方法:
dists = cdist(vals_1, vals_2, metric=metric).diagonal()
dists # array([0., 1., 2.])
它占用了太多的内存:它计算所有成对的距离:
array([[0., 2., 4.],
[1., 1., 3.],
[2., 0., 2.]])
然后只选择对角线。
是否有一种高效的矢量化方法--而不是计算冗余距离?
发布于 2022-08-12 10:34:04
为了计算对应位置向量之间的距离,可以很好地将相应的位置操作和沿numpy轴的操作矢量化(参考基本数组操作),因此我们可以轻松地编写以下代码:
def euclidean(XA, XB, *, out=None):
return np.sqrt(np.add.reduce(np.square(XA - XB), 1), out=out)
# ^^^^^^^^^^^^^ is equivalent to np.sum but faster
def sqeuclidean(XA, XB, *, out=None):
return np.add.reduce(np.square(XA - XB), 1, out=out)
def cityblock(XA, XB, *, out=None):
return np.add.reduce(np.abs(XA - XB), 1, out=out)
def chebyshev(XA, XB, *, out=None):
return np.maximum.reduce(np.abs(XA - XB), 1, out=out)
# ^^^^^^^^^^^^^^^^^ is equivalent to np.max but faster
def hamming(XA, XB, *, out=None):
return np.add.reduce(XA != XB, 1, out=out)
def mahalanobis(XA, XB, VI, *, out=None):
delta = XA - XB
return np.sqrt(np.add.reduce(delta @ VI * delta, 1), out=out)
# more functions...
如果我们需要像cdist
那样工作,我们可以参考它的实现来编写类似的代码,下面是完成的一些核心部分:
_METRIC_INFOS = {
euclidean: ['euclidean', 'euclid', 'eu', 'e'],
sqeuclidean: ['sqeuclidean', 'sqe', 'sqeuclid'],
cityblock: ['manhattan', 'cityblock', 'cblock', 'cb', 'c'],
chebyshev: ['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch'],
hamming: ['matching', 'hamming', 'hamm', 'ha', 'h'],
mahalanobis: ['mahalanobis', 'mahal', 'mah']
}
_METRICS = {metric.__name__: metric for metric in _METRIC_INFOS}
_METRIC_ALIAS = {alias: metric for metric, aka in _METRIC_INFOS.items() for alias in aka}
_METRIC_NAMES = list(_METRICS)
def dist(XA, XB, metric='euclidean', *, out=None, **kwargs):
XA = np.asarray(XA)
XB = np.asarray(XB)
if XA.ndim != 2:
raise ValueError('XA must be a 2-dimensional array.')
if XA.shape != XB.shape:
raise ValueError('XA and XB must have the same shape.')
if callable(metric):
return _dist_callable(XA, XB, out=out, metric=metric, **kwargs)
elif isinstance(metric, str):
metric = metric.lower()
metirc_info = _METRIC_ALIAS.get(metric, None)
if metirc_info is not None:
return metirc_info(XA, XB, out=out, **kwargs)
else:
raise ValueError(f'Unknown Distance Metric: {metric}')
else:
raise TypeError('2nd argument metric must be a string identifier '
'or a function.')
def _dist_callable(XA, XB, *, out, metirc, **kwargs):
mA = XA.shape[0]
if out is None:
out = np.empty(mA)
for i in range(mA):
out[i] = metirc(XA[i], XB[i], **kwargs)
return out
测试:
>>> a = np.arange(20).reshape(-1, 4)
>>> b = a[::-1]
>>> dist(a, b)
array([32., 16., 0., 16., 32.])
>>> dist(a, b, 'cityblock')
array([64, 32, 0, 32, 64])
>>> dist(a, b, 'chebyshev')
array([16, 8, 0, 8, 16])
>>> dist(a, b, 'sqeuclidean')
array([1024, 256, 0, 256, 1024])
>>> dist(a, b, 'hamming')
array([4, 4, 0, 4, 4])
>>> dist(a, b, 'mahalanobis', VI=np.eye(a.shape[1]) * 2)
array([45.254834, 22.627417, 0. , 22.627417, 45.254834])
https://stackoverflow.com/questions/73305987
复制相似问题