首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >用Openmp实现AES在C语言中的并行仿真

用Openmp实现AES在C语言中的并行仿真
EN

Stack Overflow用户
提问于 2013-11-18 19:53:31
回答 1查看 2.8K关注 0票数 1

这是我的问题。我想使用Openmp在C中并行AES-128加密。下面的代码使用openmp,我几乎没有得到任何加速。我的机器是四核英特尔i5机。

这是密码。任何关于如何进一步并行化这段代码的建议都是非常感谢的。请看一下代码末尾的主要功能。下面的AES代码由几个函数组成,以实现其功能。请建议如何最好地从中提取并行性。

非常感谢。

代码语言:javascript
运行
复制
/*
******************************************************************
**       Advanced Encryption Standard implementation in C.      **
**       By Niyaz PK                                            **
**       E-mail: niyazpk@gmail.com                              **
**       Downloaded from Website: www.hoozi.com                 **
******************************************************************
This is the source code for encryption using the latest AES algorithm.
******************************************************************
*/

// Include stdio.h for standard input/output.
// Used for giving output to the screen.
#include<omp.h>
#include<stdio.h>
#include<time.h>
#include<stdlib.h>


// The number of columns comprising a state in AES. This is a constant in AES. Value=4
#define Nb 4

// The number of rounds in AES Cipher. It is simply initiated to zero. The actual value is recieved in the program.
int Nr=0;

// The number of 32 bit words in the key. It is simply initiated to zero. The actual value is recieved in the program.
int Nk=0;

// in - it is the array that holds the plain text to be encrypted.
// out - it is the array that holds the output CipherText after encryption.
// state - the array that holds the intermediate results during encryption.
unsigned char in[16], out[16], state[4][4];

// The array that stores the round keys.
unsigned char RoundKey[240];

// The Key input to the AES Program
unsigned char Key[32];



int getSBoxValue(int num)
{
    int sbox[256] =   {
    //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; //F
    return sbox[num];
}

// The round constant word array, Rcon[i], contains the values given by 
// x to th e power (i-1) being powers of x (x is denoted as {02}) in the field GF(28)
// Note that i starts at 1, not 0).
int Rcon[255] = {
    0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 
    0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 
    0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 
    0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 
    0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 
    0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 
    0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 
    0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 
    0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 
    0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 
    0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 
    0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 
    0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 
    0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 
    0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 
    0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb  };

// This function produces Nb(Nr+1) round keys. The round keys are used in each round to encrypt the states. 
void KeyExpansion()
{
    int i,j;
    unsigned char temp[4],k;

    // The first round key is the key itself.
    for(i=0;i<Nk;i++)
    {
        RoundKey[i*4]=Key[i*4];
        RoundKey[i*4+1]=Key[i*4+1];
        RoundKey[i*4+2]=Key[i*4+2];
        RoundKey[i*4+3]=Key[i*4+3];
    }

    // All other round keys are found from the previous round keys.
    while (i < (Nb * (Nr+1)))
    {
        for(j=0;j<4;j++)
        {
            temp[j]=RoundKey[(i-1) * 4 + j];
        }
        if (i % Nk == 0)
        {
            // This function rotates the 4 bytes in a word to the left once.
            // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]

            // Function RotWord()
            {
                k = temp[0];
                temp[0] = temp[1];
                temp[1] = temp[2];
                temp[2] = temp[3];
                temp[3] = k;
            }

            // SubWord() is a function that takes a four-byte input word and 
            // applies the S-box to each of the four bytes to produce an output word.

            // Function Subword()
            {
                temp[0]=getSBoxValue(temp[0]);
                temp[1]=getSBoxValue(temp[1]);
                temp[2]=getSBoxValue(temp[2]);
                temp[3]=getSBoxValue(temp[3]);
            }

            temp[0] =  temp[0] ^ Rcon[i/Nk];
        }
        else if (Nk > 6 && i % Nk == 4)
        {
            // Function Subword()
            {
                temp[0]=getSBoxValue(temp[0]);
                temp[1]=getSBoxValue(temp[1]);
                temp[2]=getSBoxValue(temp[2]);
                temp[3]=getSBoxValue(temp[3]);
            }
        }
        RoundKey[i*4+0] = RoundKey[(i-Nk)*4+0] ^ temp[0];
        RoundKey[i*4+1] = RoundKey[(i-Nk)*4+1] ^ temp[1];
        RoundKey[i*4+2] = RoundKey[(i-Nk)*4+2] ^ temp[2];
        RoundKey[i*4+3] = RoundKey[(i-Nk)*4+3] ^ temp[3];
        i++;
    }
}

// This function adds the round key to state.
// The round key is added to the state by an XOR function.
void AddRoundKey(int round) 
{
    int i,j;
    for(i=0;i<4;i++)
    {
        for(j=0;j<4;j++)
        {
            state[j][i] ^= RoundKey[round * Nb * 4 + i * Nb + j];
        }
    }
}

// The SubBytes Function Substitutes the values in the
// state matrix with values in an S-box.
void SubBytes()
{
    int i,j;
    for(i=0;i<4;i++)
    {
        for(j=0;j<4;j++)
        {
            state[i][j] = getSBoxValue(state[i][j]);

        }
    }
}

// The ShiftRows() function shifts the rows in the state to the left.
// Each row is shifted with different offset.
// Offset = Row number. So the first row is not shifted.
void ShiftRows()
{
    unsigned char temp;

    // Rotate first row 1 columns to left    
    temp=state[1][0];
    state[1][0]=state[1][1];
    state[1][1]=state[1][2];
    state[1][2]=state[1][3];
    state[1][3]=temp;

    // Rotate second row 2 columns to left    
    temp=state[2][0];
    state[2][0]=state[2][2];
    state[2][2]=temp;

    temp=state[2][1];
    state[2][1]=state[2][3];
    state[2][3]=temp;

    // Rotate third row 3 columns to left
    temp=state[3][0];
    state[3][0]=state[3][3];
    state[3][3]=state[3][2];
    state[3][2]=state[3][1];
    state[3][1]=temp;
}

// xtime is a macro that finds the product of {02} and the argument to xtime modulo {1b}  
#define xtime(x)   ((x<<1) ^ (((x>>7) & 1) * 0x1b))

// MixColumns function mixes the columns of the state matrix
// The method used may look complicated, but it is easy if you know the underlying theory.
// Refer the documents specified above.
void MixColumns()
{
    int i;
    unsigned char Tmp,Tm,t;
    for(i=0;i<4;i++)
    {    
        t=state[0][i];
        Tmp = state[0][i] ^ state[1][i] ^ state[2][i] ^ state[3][i] ;
        Tm = state[0][i] ^ state[1][i] ; Tm = xtime(Tm); state[0][i] ^= Tm ^ Tmp ;
        Tm = state[1][i] ^ state[2][i] ; Tm = xtime(Tm); state[1][i] ^= Tm ^ Tmp ;
        Tm = state[2][i] ^ state[3][i] ; Tm = xtime(Tm); state[2][i] ^= Tm ^ Tmp ;
        Tm = state[3][i] ^ t ; Tm = xtime(Tm); state[3][i] ^= Tm ^ Tmp ;
    }
}

// Cipher is the main function that encrypts the PlainText.
void Cipher()
{
    int i,j,round=0;

    //Copy the input PlainText to state array.
    for(i=0;i<4;i++)
    {
        for(j=0;j<4;j++)
        {
            state[j][i] = in[i*4 + j];
        }
    }

    // Add the First round key to the state before starting the rounds.
    AddRoundKey(0); 

    // There will be Nr rounds.
    // The first Nr-1 rounds are identical.
    // These Nr-1 rounds are executed in the loop below.
    for(round=1;round<Nr;round++)
    {
        SubBytes();
        ShiftRows();
        MixColumns();
        AddRoundKey(round);
    }

    // The last round is given below.
    // The MixColumns function is not here in the last round.
    SubBytes();
    ShiftRows();
    AddRoundKey(Nr);

    // The encryption process is over.
    // Copy the state array to output array.
    for(i=0;i<4;i++)
    {
        for(j=0;j<4;j++)
        {
            out[i*4+j]=state[j][i];
        }
    }
}

void encrypt(int *K,int *PT,int *CT)
{
    int i;

    //    int ct;

    // Calculate Nk and Nr from the received value.
    Nr = 128;
    Nk = Nr / 32;
    Nr = Nk + 6;


     // Copy the Key and PlainText
    for(i=0;i<Nk*4;i++)
    {
        Key[i]=K[i];
        in[i]=PT[i];
    }

   /* 
   printf("\nKey for encryption:\n");
    for(i=0; i < Nk*4; i++)
      printf("%02x",Key[i]);
    printf("\n");
*/
/*
    printf("\nText before encryption:\n");
    for(i=0; i < Nk*4; i++)
      printf("%02x",in[i]);
    printf("\n");
*/    
    // The KeyExpansion routine must be called before encryption.
    KeyExpansion();

    // The next function call encrypts the PlainText with the Key using AES algorithm.
    Cipher();


    // Output the encrypted text.
    //io_printf("\nText after encryption:\n");
     for(i=0; i < Nk*4; i++)
    {
        CT[i] = out[i];
        printf("%02x",out[i]);
      }
    printf("\n");

    //  ct = out[15];
    // return ct;

}

//main function
int main()
{


  srand(time(NULL));
  unsigned int rnd[4];

  int key[16];
  int pt[16];
  int ct[16];

  unsigned int i,j;

  #pragma omp parallel for num_threads(4) schedule(dynamic)
  for(i=0; i<65000*10; i++)
  {
   rnd[0]=rand();
   rnd[1]=rand();
   rnd[2]=rand();
   rnd[3]=rand();

   for(j=0; j < 4; j++)
   {
    key[4*j]   = (rnd[j] & 0xff);
    pt[4*j]    = key[4*j];
    key[4*j+1] = ((rnd[j] >> 8)  & 0xff) ; 
    pt[4*j+1]  = key[4*j+1];
    key[4*j+2] = ((rnd[j] >> 16) & 0xff) ;
    pt[4*j+2]  = key[4*j+2];
    key[4*j+3] = ((rnd[j] >> 24) & 0xff) ;
    pt[4*j+3]  = key[4*j+3];
   }

   #pragma omp task      
   encrypt(key,pt,ct);

  }

  return 0;

}

我已经按照赫里斯托的建议修改了密码。谢谢你的努力。下面是代码的外观。我不明白如何使encrypt( )函数使用局部变量。你能解释一下吗。请在应该在的地方添加代码。再次感谢你的努力。其次,如果没有printf语句,如何查看输出是否正确。我的意思是,还有其他机制来显示或保存输出。最后,如下所示的代码仍然比串行执行慢(即没有openmp)。在串行版本中也没有printf,以使比较公平。

代码语言:javascript
运行
复制
void encrypt(int *K,int *PT,int *CT)
{
    int i;

    //    int ct;

    // Calculate Nk and Nr from the received value.
    Nr = 128;
    Nk = Nr / 32;
    Nr = Nk + 6;


     // Copy the Key and PlainText
    for(i=0;i<Nk*4;i++)
    {
        Key[i]=K[i];
        in[i]=PT[i];
    }

   /* 
   printf("\nKey for encryption:\n");
    for(i=0; i < Nk*4; i++)
      printf("%02x",Key[i]);
    printf("\n");
*/
/*
    printf("\nText before encryption:\n");
    for(i=0; i < Nk*4; i++)
      printf("%02x",in[i]);
    printf("\n");
*/    
    // The KeyExpansion routine must be called before encryption.
    KeyExpansion();

    // The next function call encrypts the PlainText with the Key using AES algorithm.
    Cipher();


    // Output the encrypted text.
    //io_printf("\nText after encryption:\n");
     for(i=0; i < Nk*4; i++)
    {
        CT[i] = out[i];
//        printf("%02x",out[i]);
      }
//    printf("\n");

    //  ct = out[15];
    // return ct;

}

//main function
int main()
{


  srand(time(NULL));
  unsigned int rnd[4];

//  printf("rand_key = %2x%2x%2x%2x\n",rnd[0],rnd[1],rnd[2],rnd[3]);

  int key[16];
  int pt[16];
  int ct[16];

  unsigned int i,j;
  #pragma omp parallel for private(key,pt,ct) num_threads(2) schedule(static)
  for(i=0; i<65000; i++)
  {
   rnd[0]=rand();
   rnd[1]=rand();
   rnd[2]=rand();
   rnd[3]=rand();

   for(j=0; j < 4; j++)
   {
    key[4*j]   = (rnd[j] & 0xff);
    pt[4*j]    = key[4*j];
    key[4*j+1] = ((rnd[j] >> 8)  & 0xff) ; 
    pt[4*j+1]  = key[4*j+1];
    key[4*j+2] = ((rnd[j] >> 16) & 0xff) ;
    pt[4*j+2]  = key[4*j+2];
    key[4*j+3] = ((rnd[j] >> 24) & 0xff) ;
    pt[4*j+3]  = key[4*j+3];
   }

   encrypt(key,pt,ct);


  }

  return 0;

}
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2013-11-18 23:11:07

您不需要schedule(dynamic)task构造。就我所知的AES的本质而言,这是一个完全有规律的问题--每一种加密都需要完全相同的周期,因此,无论密钥是什么,都需要相同的时钟时间。这完全排除了使用动态调度和任务的必要性。即使是在出现问题的情况下,简单地添加schedule(dynamic)也是一个非常糟糕的主意。原因是dynamic的默认块大小是1,这意味着每个线程执行一个迭代,然后向OpenMP运行时请求另一个迭代。在您的例子中,开销乘以650000倍。动态调度在实际应用中是非常强大的,但是人们应该谨慎地选择最优的块大小,后者通常需要进行大量的试验,直到找到最优值为止。

除此之外,你还产生了650000项任务。每个任务都有与其创建和工作线程随后的消耗相关联的一定开销。考虑到Pentium (ref:维基百科)上每个字节的AES周期约为18次,对encrypt()的每次调用可能会占用OpenMP运行时所需的大约相同的时间来执行任务(如果不是针对内部的printf()语句的话)。printf()输出到终端或文件流(如果重定向的话)并使用相同的描述符执行I/O操作本质上是串行操作,即它序列化线程。请参阅这个答案以了解printf()对并行性能的影响程度。

但是,代码中最糟糕的问题实际上是大量的数据竞争。encrypt()依赖并更改几个全局变量的值。这不仅导致由于真正的缓存共享而放慢速度,而且很可能导致完全错误的密文。如果有必要保持全局变量,这些全局变量都应该是encrypt()的本地变量或threadprivate。然后,并行循环使用几个共享变量,即keyptct。这些应该是private

摘要:使encrypt()只使用局部变量;使keyptct private;将循环调度更改为static;删除task构造;删除在每次迭代中输出信息的所有printf语句。

额外好处:rand()也将其状态保持在全局变量中。

有这么多的全局变量。把它们变成线-私密。在上一个全局变量的定义之后添加以下OpenMP杂注:

代码语言:javascript
运行
复制
...
// The Key input to the AES Program
unsigned char Key[32];

#pragma omp threadprivate(Nr,Nk,in,out,state,RoundKey,Key)

...

还可以按以下方式更改main()函数:

代码语言:javascript
运行
复制
unsigned int i;
#pragma omp parallel for num_threads(2) schedule(static)
for(i = 0; i < 65000; i++)
{
  unsigned int rnd[4];
  int key[16];
  int pt[16];
  int ct[16];
  unsigned int j;
  // Per-thread PRNG initialisation
  // It could be done better - this is for illustration purposes only
  unsigned int rand_state = time(NULL) + 1337*omp_get_thread_num();

  rnd[0] = rand_r(&rand_state);
  rnd[1] = rand_r(&rand_state);
  rnd[2] = rand_r(&rand_state);
  rnd[3] = rand_r(&rand_state);

  for(j = 0; j < 4; j++)
  {
    key[4*j]   = (rnd[j] & 0xff);
    pt[4*j]    = key[4*j];
    key[4*j+1] = ((rnd[j] >> 8)  & 0xff) ; 
    pt[4*j+1]  = key[4*j+1];
    key[4*j+2] = ((rnd[j] >> 16) & 0xff) ;
    pt[4*j+2]  = key[4*j+2];
    key[4*j+3] = ((rnd[j] >> 24) & 0xff) ;
    pt[4*j+3]  = key[4*j+3];
  }

  encrypt(key, pt, ct);
}

注意- keyptj等变量是在使用它们的作用域中定义的。这使您不必将它们全部放在private子句中,因为这些变量是预先确定为private的。另外,每个线程现在都有自己的PRNG状态。

票数 2
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/20056559

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档