目前,我正试图实现拉普拉斯方程的算法。我看了几个实现,并且我坚持要放置OpenMP的编译指示的最佳位置。
while( var > tol && iter <= maxIter ) {
++iter;
var = 0.0;
for (i=1; i<=n; ++i)
for (j=1; j<=n; ++j) {
Tnew[i*n2+j] = 0.25*( T[(i-1)*n2+j] + T[(i+1)*n2+j]
+ T[i*n2+(j-1)] + T[i*n2+(j+1)] );
var = fmax(var, fabs(Tnew[i*n2+j] - T[i*n2+j]));
}
}
什么是最好的方式去做这件事,为什么?
发布于 2018-06-01 09:00:15
首先,我认为你忘记交换数组,Tnew
并且T
在所有外循环迭代结束时。假设这些是指针,这里是纠正的串行代码:
while(var > tol && iter <= maxIter) {
++iter;
var = 0.0;
for (i=1; i<=n; ++i)
for (j=1; j<=n; ++j) {
Tnew[i*n2+j] = 0.25*(T[(i-1)*n2+j] + T[(i+1)*n2+j]
+ T[i*n2+(j-1)] + T[i*n2+(j+1)]);
var = fmax(var, fabs(Tnew[i*n2+j] - T[i*n2+j]));
}
}
temp = TNew;
TNew = T;
T = temp;
}
while(var > tol && iter <= maxIter) {
++iter;
var = 0.0;
#pragma omp parallel for
for (i=1; i<=n; ++i)
for (j=1; j<=n; ++j) {
Tnew[i*n2+j] = 0.25*(T[(i-1)*n2+j] + T[(i+1)*n2+j]
+ T[i*n2+(j-1)] + T[i*n2+(j+1)]);
#pragma omp critical
{
var = fmax(var, fabs(Tnew[i*n2+j] - T[i*n2+j]));
}
}
}
temp = TNew;
TNew = T;
T = temp;
}
#pragma omp parallel private(iter) shared(var, maxIter)
{
while(var > tol && iter <= maxIter) {
++iter;
#pragma omp barrier
#pragma omp single
{
var = 0.0;
}
#pragma omp for
for (i=1; i<=n; ++i)
for (j=1; j<=n; ++j) {
Tnew[i*n2+j] = 0.25*(T[(i-1)*n2+j] + T[(i+1)*n2+j]
+ T[i*n2+(j-1)] + T[i*n2+(j+1)]);
#pragma omp critical
{
var = fmax(var, fabs(Tnew[i*n2+j] - T[i*n2+j]));
}
}
}
#pragma omp single
{
temp = TNew;
TNew = T;
T = temp;
}
}
}
https://stackoverflow.com/questions/-100004676
复制相似问题