首页
学习
活动
专区
工具
TVP
发布
社区首页 >问答首页 >MPI_ERR_RANK:群集的排名无效

MPI_ERR_RANK:群集的排名无效
EN

Stack Overflow用户
提问于 2018-10-11 05:29:37
回答 1查看 933关注 0票数 -2

我正在为一个类做一个项目,我已经使用了互联网上的顺序存储桶排序的代码,我试图让它成为一个使用OpenMPI的并行版本。这段代码将在集群系统上运行。当我测试它时,它给了我以下错误:

"cluster:5379 * A error on MPI_Send cluster:5379 on communicator MPI_COMM_WORLD cluster:5379 MPI_ERR_RANK: invalid rank cluster:5379 * MPI_ERRORS_ARE_FATAL:您的MPI_COMM_WORLD作业现在将中止“

有没有人能建议我怎么解决这个问题?

ps。我在编码方面相当糟糕,所以我可能无法回答一些问题。

代码语言:javascript
复制
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"

struct bucket
{
    int count;
    int* value;
};

int compareIntegers(const void* first, const void* second)
{
    int x = *((int*)first), y = *((int*)second);
    if (x == y)
    {
        return 0;
    }
    else if (x < y)
    {
        return -1;
    }
    else
    {
        return 1;
    }
}

void bucketSort(int array[], int n)
{
    struct bucket buckets[3];
    int i, j, k;
    for (i = 0; i < 3; i++)
    {
        buckets[i].count = 0;
        buckets[i].value = (int*)malloc(sizeof(int) * n);
    }

    for (i = 0; i < n; i++)
    {
        if (array[i] < 0)
        {
            buckets[0].value[buckets[0].count++] = array[i];
        }
        else if (array[i] > 10)
        {
            buckets[2].value[buckets[2].count++] = array[i];
        }
        else
        {
            buckets[1].value[buckets[1].count++] = array[i];
        }
    }
    for (k = 0, i = 0; i < 3; i++)
    {
        // now using quicksort to sort the elements of buckets
        qsort(buckets[i].value, buckets[i].count, sizeof(int), &compareIntegers);
        for (j = 0; j < buckets[i].count; j++)
        {
            array[k + j] = buckets[i].value[j];
        }
        k += buckets[i].count;
        free(buckets[i].value);
    }

}

int main(char *argv[], int argc)
{
    int array[1000000];
    int i = 0, j, k, n;
    int num;
    //for MPI
    int numProc, rank;
    char procName[MPI_MAX_PROCESSOR_NAME];
    int nameLen;
    int chunksize;
    double start, end;
    int msgtag;

    //MPI
    MPI_Status stat;
    start = MPI_Wtime();    //timer start
    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //process rank ,comm_world = communication of the process
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); //number of process
    msgtag = 1234;

    if (rank == 0)
    {
        printf("Enter number of element to be sort: ");
        scanf("%d", &num);

        for (i = 0; i < num; i++) //random num elements
        {
            array[i] = rand();
        }

        n = i;
        printf("\nBefore Sorting\n");
        for (j = 0; j < i; j++)
        {
            printf("%d ", array[j]);
        }
        MPI_Send(&array[j], j, MPI_INT, 1, msgtag, MPI_COMM_WORLD);
    }

    if (rank == 1)
    {
        MPI_Recv(&array[j], j, MPI_INT, 0, msgtag, MPI_COMM_WORLD, &stat);
        bucketSort(array, n);
        MPI_Send(&array, n, MPI_INT, 2, msgtag, MPI_COMM_WORLD);
    }

    if (rank == 2)
    {
        MPI_Recv(&array, n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
        printf("\nAfter Sorting\n");
        for (k = 0; k < i; k++)
        {
            printf("%d ", array[k]);
        }
    }
    //MPI END
    MPI_Finalize();
    end = MPI_Wtime();  // timer end   
    double time_spent = end - start;
    printf("\ntime used for this program was %f Sec.", time_spent);

    return 0;
}
EN

回答 1

Stack Overflow用户

发布于 2018-10-14 04:37:18

它们是你代码中的相当多的错误。希望截止日期是星期一…

优先:

代码语言:javascript
复制
int main(int argc, char *argv[])

将比int main(int argc, char *argv[])工作得更好

Second

进程0是指定用于读取要生成的元素数量的进程。

然后它必须将它传递给所有其他进程,否则其他进程将在变量num中有一个未定义的数字,对吗?

因此

代码语言:javascript
复制
if (rank == 0)
{
        printf("Enter number of element to be sort: ");
        fflush(stdout);
        scanf("%d", &num);
        for (i = 0; i < num; i++) //random num elements
    {
        array[i] = rand();
    }
        n = num;
        printf("\nBefore Sorting (%i)\n", n);
    for (j = 0; j < n; j++)
    {
        printf("%d ", array[j]);
    }
        fflush(stdout);
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);

第三:

避免重复使用循环中涉及的值。我明白在那之后

代码语言:javascript
复制
for (j = 0; j < n; j++)
{
    printf("%d ", array[j]);
}

你有

代码语言:javascript
复制
j=n

但是还不是很清楚…

Fourth

MPI_Send或receive的第一个参数是数组中第一个元素的地址。就这么过去了

代码语言:javascript
复制
MPI_Send(&array[j], j, MPI_INT, 1, msgtag, MPI_COMM_WORLD);

由于j=n (见上面的注释),我猜你不会得到你想要的东西。

你需要的是

代码语言:javascript
复制
MPI_Send(&array[0], n, MPI_INT, 1, msgtag, MPI_COMM_WORLD);

Fifth

MPI_Barrier是你的朋友。输出是一个关键操作,因此在输出操作之前,您可以(可选)确保所有进程都达到了这一点。

代码语言:javascript
复制
if (rank == 2)
{
    MPI_Recv(&array, n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
    printf("\nAfter Sorting\n");
    for (k = 0; k < i; k++)
    {
        printf("%d ", array[k]);
    }
}

变成了

代码语言:javascript
复制
if (rank == 2)
{
    MPI_Recv(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
}
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 2)
{
    printf("\nAfter Sorting\n");
    for (k = 0; k < n; k++)
    {
        printf("%d ", array[k]);
    }
}
    MPI_Barrier(MPI_COMM_WORLD);

结论:

代码语言:javascript
复制
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"

struct bucket
{
    int count;
    int* value;
};

int compareIntegers(const void* first, const void* second)
{
    int x = *((int*)first), y = *((int*)second);
    if (x == y)
    {
        return 0;
    }
    else if (x < y)
    {
        return -1;
    }
    else
    {
        return 1;
    }
}

void bucketSort(int array[], int n)
{
    struct bucket buckets[3];
    int i, j, k;
    for (i = 0; i < 3; i++)
    {
        buckets[i].count = 0;
        buckets[i].value = (int*)malloc(sizeof(int) * n);
    }

    for (i = 0; i < n; i++)
    {
        if (array[i] < 0)
        {
            buckets[0].value[buckets[0].count++] = array[i];
        }
        else if (array[i] > 10)
        {
            buckets[2].value[buckets[2].count++] = array[i];
        }
        else
        {
            buckets[1].value[buckets[1].count++] = array[i];
        }
    }
    for (k = 0, i = 0; i < 3; i++)
    {
        // now using quicksort to sort the elements of buckets
        qsort(buckets[i].value, buckets[i].count, sizeof(int), &compareIntegers);
        for (j = 0; j < buckets[i].count; j++)
        {
            array[k + j] = buckets[i].value[j];
        }
        k += buckets[i].count;
        free(buckets[i].value);
    }

}

int main(int argc, char *argv[])
{
    int array[1000000];
    int i = 0, j, k, n;
    int num;
    //for MPI
    int numProc, rank;
    char procName[MPI_MAX_PROCESSOR_NAME];
    int nameLen;
    int chunksize;
    double start, end;
    int msgtag;

    //MPI
    MPI_Status stat;
    start = MPI_Wtime();    //timer start
    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //process rank ,comm_world = communication of the process
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); //number of process
    msgtag = 1234;

    if (rank == 0)
    {
        printf("Enter number of element to be sort: ");
        fflush(stdout);
        scanf("%d", &num);
        for (i = 0; i < num; i++) //random num elements
        {
            array[i] = rand();
        }
        n = num;
        printf("\nBefore Sorting\n");
        for (j = 0; j < n; j++)
        {
            printf("%d ", array[j]);
        }
        fflush(stdout);
    }
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if (rank == 0)
    {
        MPI_Send(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD);
    }
    if (rank == 1)
    {
        MPI_Recv(&(array[0]), n, MPI_INT, 0, msgtag, MPI_COMM_WORLD, &stat);
        bucketSort(array, n);
        MPI_Send(&(array[0]), n, MPI_INT, 2, msgtag, MPI_COMM_WORLD);
    }
    if (rank == 2)
    {
        MPI_Recv(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank == 2)
    {
        printf("\nAfter Sorting\n");
        for (k = 0; k < n; k++)
        {
            printf("%d ", array[k]);
        }
    }
    //MPI END
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
    end = MPI_Wtime();  // timer end   
    double time_spent = end - start;
    printf("\ntime used for this program was %f Sec.", time_spent);

    return 0;
}

正在运行

代码语言:javascript
复制
mpirun -np 3 test_mpi.exe

输出

代码语言:javascript
复制
Enter number of element to be sort: 10

Before Sorting
1804289383 846930886 1681692777 1714636915 1957747793 424238335 719885386 1649760492 596516649 1189641421
After Sorting

424238335 596516649 719885386 846930886 1189641421 1649760492 1681692777 1714636915 1804289383 1957747793
time used for this program was 2.271976 Sec.time used for this program was 2.281183 Sec.
time used for this program was 2.277746 Sec.
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/52749059

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档