前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >多线程处理N维度topk问题demo--[c++]

多线程处理N维度topk问题demo--[c++]

作者头像
Gxjun
发布2018-12-26 17:29:59
7060
发布2018-12-26 17:29:59
举报
文章被收录于专栏:mlml

问题

-对多维度特征进行topk排序,使用c++ 优先队列模拟最大堆.

代码语言:javascript
复制
/*
----------------------------------
Version    : ??
File Name :     dm2.py
Description :   
Author  :       xijun1
Email   :      
Date    :       2018/12/7
-----------------------------------
Change Activity  :   2018/12/7
-----------------------------------
__author__ = 'xijun1'
*/
#include <iostream>
#include <queue>
#include <unordered_map>
#include <future>
using namespace std;

class MinHeapUtil {
private:

    struct HeapGreatcompare
    {
        // min heap
        bool operator()(const pair<unsigned int, int>& node_a, const pair<unsigned int, int>& node_b) {
            return node_a.second > node_b.second;
        }
    };


    std::vector< std::priority_queue< std::pair<unsigned int, int>, std::vector< std::pair<unsigned  int, int> > ,HeapGreatcompare> > pq_mat;
    unsigned  int max_topk;
public:
     bool setShape(const int dim  , const unsigned int max_topk );
     int getdim(){
         return  this->pq_mat.size();
     }
     int getMaxTopK(){
         return this->max_topk;
     }
     size_t getSize( int k ){
         return this->pq_mat[ k ].size();
     }
     int push_back(std::vector<std::pair<unsigned int, int>  >& data , int k );
     int push_back( std::pair<unsigned int, int>   word_prob , int k );
    int push_back( std::pair<unsigned int, int>   &word_prob , int k );
     std::pair<unsigned int , int >get_itop(int k){
         auto word_prob = this->pq_mat[ k ].top();
         this->pq_mat[ k ].pop();
         return word_prob;
     }
};

bool MinHeapUtil::setShape(const int dim, const unsigned  int max_topk) {
    this->pq_mat.resize(dim );
    this->max_topk = max_topk;
    return true;
}
 int  MinHeapUtil::push_back(std::vector< std::pair<unsigned int, int> > &data ,int k) {

    for (auto const& word_prob : data) {
        this->pq_mat[ k ].push(std::make_pair(word_prob.second, word_prob.first));
        if ( this->pq_mat[ k ].size() > this->max_topk)  //维护这个K大小
            this->pq_mat[ k ].pop();
    }
     return  0;
}
int MinHeapUtil::push_back(std::pair<unsigned int, int> word_prob, int k) {

    if( this->pq_mat[k].size() < this->max_topk || this->pq_mat[ k ].top().second < word_prob.second) {
        this->pq_mat[k].push(word_prob);
        if (this->pq_mat[k].size() > this->max_topk)  //维护这个K大小
            this->pq_mat[k].pop();
    }
    return  0;

}


int MinHeapUtil::push_back(std::pair<unsigned int, int> &word_prob, int k) {
    this->pq_mat[k].push(word_prob);
    if (this->pq_mat[k].size() > this->max_topk)  //维护这个K大小
        this->pq_mat[k].pop();
    return 0;
}
std::unordered_map<unsigned int , std::vector< int > > test_data;
int  main() {
    MinHeapUtil top_words ;

    top_words.setShape(5,4);
    int total_size = 5;

//    MinHeapUtil.push_back(std::pair<unsigned int, int>(8,2),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(7,3),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(6,4),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(5,5),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(4,6),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(3,7),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(2,4),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(1,8),0);
    for (int j = 0; j < 1000000; ++j) {
        std::vector< int > tmp(5);
        for (int i = 0; i < 5; ++i) {
            tmp[i] = j*5 + i;
        }
        test_data[j] = std::move(tmp);
    }
    struct save_func
    {
        void operator () (int tn  ,int thread_nums,int total_size , MinHeapUtil *top_words )
        {
            std::cout<<tn <<" : "<< thread_nums<<" : "<<total_size<<std::endl;
            for (int i = tn; i < total_size ; i+=thread_nums) {
                for ( auto word_key : test_data) {
                    //std::cout<<word_key.first;
                    top_words->push_back( std::pair<unsigned int, int>(word_key.first, word_key.second[ i ] ) , i );
                }
            }
            std::cout << "func " << tn << std::endl;
        }
    } func;
    unsigned thread_nums = 8;
    std::vector<std::future< void >> muliti_topword_threads;

    for (unsigned tn = 0; tn < thread_nums; ++tn)
        muliti_topword_threads.emplace_back(
                std::async(std::launch::async, func, (int)tn,(int)thread_nums,total_size,&top_words));

    for (unsigned tn = 0; tn < thread_nums; ++tn)
        muliti_topword_threads[tn].get();

    for (int k = 0; k < total_size; ++k) {
        size_t range = top_words.getSize( k );
        for (int i = 0; i < range; ++i) {
            auto temp = top_words.get_itop( k );
            std::cout <<"k:"<<k<<"  ( " << temp.first << "     " << temp.second << ")\n";
        }
    }
    return 0;
}

--结果

代码语言:javascript
复制
01 :  : 88 :  : 55

2 : 8 : 5
3 : 8 : 5
4 : 8 : 5
5 : 8 : 5
func 5
6 : 8 : 5
func 6
7 : 8 : 5
func 7
func 1
func 4
func 0
func 2
func 3
k:0  ( 999996     4999980)
k:0  ( 999997     4999985)
k:0  ( 999998     4999990)
k:0  ( 999999     4999995)
k:1  ( 999996     4999981)
k:1  ( 999997     4999986)
k:1  ( 999998     4999991)
k:1  ( 999999     4999996)
k:2  ( 999996     4999982)
k:2  ( 999997     4999987)
k:2  ( 999998     4999992)
k:2  ( 999999     4999997)
k:3  ( 999996     4999983)
k:3  ( 999997     4999988)
k:3  ( 999998     4999993)
k:3  ( 999999     4999998)
k:4  ( 999996     4999984)
k:4  ( 999997     4999989)
k:4  ( 999998     4999994)
k:4  ( 999999     4999999)
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2018-12-12 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 问题
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档