我最近用C++编写了一个矩阵模块。
在开发过程中,我引用了一些源代码,发现了一个问题。
例如,矩阵乘法:
这种方法适用于所有N×N矩阵:
void multiplyMatrix(const float32* a, const float32* b, float32* dst,
int32 aColumns, int32 bColumns, int32 dstColumns, int32 dstRows) {
for (int32 i = 0; i < dstRows; i++) {
for (int32 j = 0; j < dstColumns; j++)
dst[i * dstColumns + j] = dotMatrix(a, b, aColumns, bColumns, j, i);
}
}
float32 dotMatrix(const float32* a, const float32* b,
int32 aColumns, int32 bColumns,
int32 column, int32 row) {
float32 result = 0.0f;
int32 index = aColumns * row;
for (int32 i = 0; i < aColumns; i++) {
result += a[index++] * b[column];
column += bColumns;
}
return result;
}
接下来,我编写了一个3x3矩阵类。
class Matrix3x3
{
public:
float32 m11, m12, m13,
m21, m22, m23,
m31, m32, m33;
float32 element[9];
void multiply(float32 ma11, float32 ma12, float32 ma13,
float32 ma21, float32 ma22, float32 ma23,
float32 ma31, float32 ma32, float32 ma33) {
float32 temp1 = m11 * ma11 + m21 * ma12 + m31 * ma13;
float32 temp2 = m12 * ma11 + m22 * ma12 + m32 * ma13;
m13 = m13 * ma11 + m23 * ma12 + m33 * ma13;
m11 = temp1;
m12 = temp2;
temp1 = m11 * ma21 + m21 * ma22 + m31 * ma23;
temp2 = m12 * ma21 + m22 * ma22 + m32 * ma23;
m23 = m13 * ma21 + m23 * ma22 + m33 * ma23;
m21 = temp1;
m22 = temp2;
temp1 = m11 * ma31 + m21 * ma32 + m31 * ma33;
temp2 = m12 * ma31 + m22 * ma32 + m32 * ma33;
m31 = m13 * ma31 + m23 * ma32 + m33 * ma33;
m32 = temp1;
m33 = temp2;
}
}
显然,第一个很方便。
接下来,我测试了计算所需的时间:
float32 e1[9];
e1[0] = 2.1018f; e1[1] = -1.81754f; e1[2] = 1.2541f;
e1[3] = 0.54194f; e1[4] = 2.75391f; e1[5] = -0.1167f;
e1[6] = -5.81652f; e1[7] = -7.9381f; e1[8] = 4.2816f;
float32 e2[9];
e2[0] = 2.1018f; e2[1] = -1.81754f; e2[2] = 1.2541f;
e2[3] = 0.54194f; e2[4] = 2.75391f; e2[5] = -0.1167f;
e2[6] = -5.81652f; e2[7] = -7.9381f; e2[8] = 4.2816f;
Matrix3x3 a;
a.m11 = 2.1018f; a.m12 = -1.81754f; a.m13 = 1.2541f;
a.m21 = 0.54194f; a.m22 = 2.75391f; a.m23 = -0.1167f;
a.m31 = -5.81652f; a.m32 = -7.9381f; a.m33 = 4.2816f;
Matrix3x3 b = a;
float64 timeSpent = 0;
LARGE_INTEGER nFreq;
LARGE_INTEGER nBeginTime;
LARGE_INTEGER nEndTime;
QueryPerformanceFrequency(&nFreq); // statistical frequency
QueryPerformanceCounter(&nBeginTime);// start timer
for (int32 i = 0; i < 100000; i++) {
multiplyMatrix(e1, e2, dst, 3, 3, 3, 3);
}
QueryPerformanceCounter(&nEndTime); //end timer
timeSpent = (float64)(nEndTime.QuadPart - nBeginTime.QuadPart) / (nFreq.QuadPart);
printf("timeSpent1:%f\n", timeSpent);
QueryPerformanceCounter(&nBeginTime);
for (int32 i = 0; i < 100000; i++) {
b.multiply(a.m11, a.m12, a.m13,
a.m21, a.m22, a.m23,
a.m31, a.m32, a.m33);
}
QueryPerformanceCounter(&nEndTime);
timeSpent = (float64)(nEndTime.QuadPart - nBeginTime.QuadPart) / (nFreq.QuadPart);
printf("timeSpent2:%f\n", timeSpent);
产出:
timeSpent1:0.014277
timeSpent2:0.004649
timeSpent1:0.012684
timeSpent2:0.004522
.......
.......
timeSpent1:0.003414
timeSpent2:0.001166
timeSpent1:0.003407
timeSpent2:0.001242
这种效率的差异是显著的还是可以忽略不计的?
发布于 2019-01-17 04:02:52
3x3的因子4是相同的顺序,好的。
可以编写生成Matrix99x99
C++文件,并对其进行测试。我猜这也是第四因子。如果是2,那么就完全没问题了。
带A维数LxM和B维数MxN的正规矩阵乘法A.B,要求S‘hared M,从而得到维数LxN。因此,这样一个小的C++类就是nice._
https://codereview.stackexchange.com/questions/211673
复制