首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >集的快速相交: C++对C#

集的快速相交: C++对C#
EN

Stack Overflow用户
提问于 2009-06-29 21:23:01
回答 13查看 15.8K关注 0票数 8

在我的机器上(Quad,8gb内存),运行Vista x64业务,使用Visual 2008 SP1,我正试图非常快地交叉两组数字。

我在C++中实现了两种方法,在C#中实现了一种方法。到目前为止,C#方法速度更快,我希望改进C++方法,使其比C#更快,我希望C++能够做到这一点。

下面是C#输出:(发布版本)

代码语言:javascript
复制
Found the intersection 1000 times, in 4741.407 ms

下面是两种不同方法(发布C++构建)的初始x64输出:

代码语言:javascript
复制
Found the intersection (using unordered_map) 1000 times, in 21580.7ms
Found the intersection (using set_intersection) 1000 times, in 22366.6ms

以下是最新的C++输出,用于三种方法(发布x64构建):

最新基准:

代码语言:javascript
复制
Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms
Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms
Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms

因此,set_intersection方法现在比C#慢约2倍,但比最初的C++方法快2倍。

最新的C++代码:

代码语言:javascript
复制
Code:

// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <unordered_set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;
using namespace tr1;


int runIntersectionTest2(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_set<int> theSet;      

     theSet.insert( set1.begin(), set1.end() );

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        if ( theSet.find(*iterator) != theSet.end() )
        {
                intersectionSize++;
        }
    }

    return intersectionSize;
}

int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;  

    vector<int>::const_iterator set1_end = set1.end();

    // Now intersect the two sets by populating the map
    for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(const vector<int>& set1_unsorted, const vector<int>& set2_unsorted)
{   
    // Create two vectors
    std::vector<int> set1(set1_unsorted.size());
    std::vector<int> set2(set2_unsorted.size());

    // Copy the unsorted data into them
    std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
    std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());

    // Sort the data
    sort(set1.begin(),set1.end());
    sort(set2.begin(),set2.end());

    vector<int> intersection;
    intersection.reserve(1000);

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));

    return intersection.size(); 
}

void createSets( vector<int>& set1, vector<int>& set2 )
{
    srand ( time(NULL) );

    set1.reserve(100000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Try to get half of our values intersecting
    float ratio = 200000.0f / RAND_MAX;


    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() * ratio + 1;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Make sure set1 is in random order (not sorted)
    random_shuffle(set1.begin(),set1.end());
}

int _tmain(int argc, _TCHAR* argv[])
{
    int intersectionSize = 0;

    vector<int> set1, set2; 
    createSets( set1, set2 );

    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runSetIntersection(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest2(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

C#代码:

代码语言:javascript
复制
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace DictionaryPerformance
{
    class Program
    {
        static void Main(string[] args)
        {
            List<int> set1 = new List<int>(100000);
            List<int> set2 = new List<int>(1000);

            // Create 100,000 values for set1
            for (int i = 0; i < 100000; i++)
            {
                int value = 1000000000 + i;
                set1.Add(value);
            }

            Random random = new Random(DateTime.Now.Millisecond);

            // Create 1,000 values for set2
            for (int i = 0; i < 1000; i++)
            {
                int value = 1000000000 + (random.Next() % 200000 + 1);
                set2.Add(value);
            }

            long start = System.Diagnostics.Stopwatch.GetTimestamp();
            for (int i = 0; i < 1000; i++)
            {
                runIntersectionTest(set1,set2);
            }
            long duration = System.Diagnostics.Stopwatch.GetTimestamp() - start;

            Console.WriteLine(String.Format("Found the intersection 1000 times, in {0} ms", ((float) duration * 1000.0f) / System.Diagnostics.Stopwatch.Frequency));

            Console.ReadKey();
        }

        static int runIntersectionTest(List<int> set1, List<int> set2)
        {

            Dictionary<int,int> theMap = new Dictionary<int,int>(100000);

            // Now intersect the two sets by populating the map
            foreach( int value in set1 )
            {
                theMap[value] = 1;
            }

            int intersectionSize = 0;

            foreach ( int value in set2 )
            {
                int count;
                if ( theMap.TryGetValue(value, out count ) )
                {
                    theMap[value] = 2;
                    intersectionSize++;
                }
            }

            return intersectionSize;
        }
    }
}

C++代码:

代码语言:javascript
复制
// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;

int runIntersectionTest(vector<int> set1, vector<int> set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;

    // Now intersect the two sets by populating the map
    for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(set<int> set1, set<int> set2)
{   
    set<int> intersection;

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));

    return intersection.size(); 
}



int _tmain(int argc, _TCHAR* argv[])
{
    srand ( time(NULL) );

    vector<int> set1;
    vector<int> set2;

    set1.reserve(10000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set2.push_back(value);
    }


    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    set<int> set21;
    set<int> set22;

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set21.insert(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set22.insert(value);
    }

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        runSetIntersection(set21,set22);
    }
    timer.Stop();

    cout << "Found the intersection (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

好的,这是最新的,有一些更改:

  • C++集现在已正确设置,因此它们有一个50%的交集(如C#)。
  • Set1被洗牌,所以它没有排序,set2已经没有排序。
  • set_intersection实现现在使用向量,并首先对它们进行排序。

C++ (Release,x64)结果:

代码语言:javascript
复制
Found the intersection of 503 values (using unordered_map) 1000 times, in 35131.1ms
Found the intersection of 494 values (using set_intersection) 1000 times, in 10317ms

所以它比C#慢2倍。@Jalf:你得到了一些相当快的数字,我是不是做错了什么?

C++代码:

代码语言:javascript
复制
// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;

int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;  

    vector<int>::const_iterator set1_end = set1.end();

    // Now intersect the two sets by populating the map
    for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(const vector<int> set1_unsorted, const vector<int> set2_unsorted)
{   
    // Create two vectors
    std::vector<int> set1(set1_unsorted.size());
    std::vector<int> set2(set2_unsorted.size());

    // Copy the unsorted data into them
    std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
    std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());

    // Sort the data
    sort(set1.begin(),set1.end());
    sort(set2.begin(),set2.end());

    vector<int> intersection;
    intersection.reserve(1000);

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));

    return intersection.size(); 
}

void createSets( vector<int>& set1, vector<int>& set2 )
{
    srand ( time(NULL) );

    set1.reserve(100000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Try to get half of our values intersecting
    float ratio = 200000.0f / RAND_MAX;


    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() * ratio + 1;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Make sure set1 is in random order (not sorted)
    random_shuffle(set1.begin(),set1.end());
}

int _tmain(int argc, _TCHAR* argv[])
{
    int intersectionSize = 0;

    vector<int> set1, set2; 
    createSets( set1, set2 );

    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runSetIntersection(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}
EN

Stack Overflow用户

发布于 2009-06-29 21:32:30

用这个..。

代码语言:javascript
复制
vector<int> set1(10000);
vector<int> set2(1000);

..。得到非零初始大小的向量。然后不要使用push_back,而是直接更新值。

票数 4
EN
查看全部 13 条回答
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/1060648

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档