我已经将一个复杂的数组处理任务拆分为多个线程,以利用多核处理的优势,并且看到了很大的好处。目前,在任务开始时,我创建线程,然后等待它们在完成工作后终止。我创建的线程数量通常是内核数量的四倍,因为每个线程可能需要不同的时间量,并且拥有额外的线程可以确保所有内核在大部分时间内都处于占用状态。我想知道,在程序启动时创建线程,在需要之前保持它们空闲,并在我开始处理时使用它们,是否会有很大的性能优势。更简单地说,除了线程内的处理之外,启动和结束一个新线程需要多长时间?我正在使用以下命令启动线程:
CWinThread *pMyThread = AfxBeginThread(CMyThreadFunc,&MyData,THREAD_PRIORITY_NORMAL);
通常,我将在64位架构上跨8个内核使用32个线程。当前所讨论的进程所需时间小于1秒,并且在每次刷新显示时都会启动。如果线程的开始和结束时间小于1ms,则返回结果并不能证明所做的努力是合理的。我在分析这件事上有些困难。
related question here很有帮助,但对我想要的东西有点含糊。任何反馈都很感谢。
发布于 2019-07-12 07:23:55
我对现代的Windows调度程序很好奇,所以我又写了一个测试应用程序。我尽我最大的努力来测量线程停止时间,我可以选择旋转一个正在观察的线程。
// Tested on Windows 10 v1903 with E5-1660 v3 @ 3.00GHz, 8 Core(s), 16 Logical Processor(s)
// Times are (min, average, max) in milliseconds.
threads: 100, iterations: 1, testStop: true
Start(0.1083, 5.3665, 13.7103) - Stop(0.0341, 1.5122, 11.0660)
threads: 32, iterations: 3, testStop: true
Start(0.1349, 1.6423, 3.5561) - Stop(0.0396, 0.2877, 3.5195)
Start(0.1093, 1.4992, 3.3982) - Stop(0.0351, 0.2734, 2.0384)
Start(0.1159, 1.5345, 3.5754) - Stop(0.0378, 0.4938, 3.2216)
threads: 4, iterations: 3, testStop: true
Start(0.2066, 0.3553, 0.4598) - Stop(0.0410, 0.1534, 0.4630)
Start(0.2769, 0.3740, 0.4994) - Stop(0.0414, 0.1028, 0.2581)
Start(0.2342, 0.3602, 0.5650) - Stop(0.0497, 0.2199, 0.3620)
threads: 4, iterations: 3, testStop: false
Start(0.1698, 0.2492, 0.3713)
Start(0.1473, 0.2477, 0.4103)
Start(0.1756, 0.2909, 0.4295)
threads: 1, iterations: 10, testStop: false
Start(0.1910, 0.1910, 0.1910)
Start(0.1685, 0.1685, 0.1685)
Start(0.1564, 0.1564, 0.1564)
Start(0.1504, 0.1504, 0.1504)
Start(0.1389, 0.1389, 0.1389)
Start(0.1234, 0.1234, 0.1234)
Start(0.1550, 0.1550, 0.1550)
Start(0.2800, 0.2800, 0.2800)
Start(0.1587, 0.1587, 0.1587)
Start(0.1877, 0.1877, 0.1877)
来源:
#include <windows.h>
#include <iostream>
#include <vector>
#include <chrono>
#include <iomanip>
using namespace std::chrono;
struct Test
{
HANDLE Thread = { 0 };
time_point<steady_clock> Creation;
time_point<steady_clock> Started;
time_point<steady_clock> Stopped;
};
DWORD __stdcall ThreadProc(void* lpParamater) {
auto test = (Test*)lpParamater;
test->Started = steady_clock::now();
return 0;
}
DWORD __stdcall TestThreadsEnded(void* lpParamater) {
auto& tests = *(std::vector<Test>*)lpParamater;
std::size_t finished = 0;
while (finished < tests.size())
{
for (auto& test : tests)
{
if (test.Thread != NULL && WaitForSingleObject(test.Thread, 0) == WAIT_OBJECT_0)
{
test.Stopped = steady_clock::now();
test.Thread = NULL;
finished++;
}
}
}
return 0;
}
duration<double, std::milli> diff(time_point<steady_clock> start, time_point<steady_clock> stop)
{
return stop - start;
}
struct Stats
{
double min;
double average;
double max;
};
Stats stats(const std::vector<double>& durations)
{
Stats stats = { 1000, 0, 0 };
for (auto& duration : durations)
{
stats.min = duration < stats.min ? duration : stats.min;
stats.max = duration > stats.max ? duration : stats.max;
stats.average += duration;
}
stats.average /= durations.size();
return stats;
}
void TestScheduler(const int threadCount, const int iterations, const bool testStop)
{
std::cout << "\nthreads: " << threadCount << ", iterations: " << iterations << ", testStop: " << (testStop ? "true" : "false") << "\n";
for (auto i = 0; i < iterations; i++)
{
std::vector<Test> tests(threadCount);
HANDLE testThreadsEnded = NULL;
if (testStop)
{
testThreadsEnded = CreateThread(NULL, 0, TestThreadsEnded, (void*)& tests, 0, NULL);
}
for (auto& test : tests)
{
test.Creation = steady_clock::now();
test.Thread = CreateThread(NULL, 0, ThreadProc, (void*)& test, 0, NULL);
}
if (testStop)
{
WaitForSingleObject(testThreadsEnded, INFINITE);
}
else
{
std::vector<HANDLE> threads;
for (auto& test : tests) threads.push_back(test.Thread);
WaitForMultipleObjects((DWORD)threads.size(), threads.data(), TRUE, INFINITE);
}
std::vector<double> startDurations;
std::vector<double> stopDurations;
for (auto& test : tests)
{
startDurations.push_back(diff(test.Creation, test.Started).count());
stopDurations.push_back(diff(test.Started, test.Stopped).count());
}
auto startStats = stats(startDurations);
auto stopStats = stats(stopDurations);
std::cout << std::fixed << std::setprecision(4);
std::cout << "Start(" << startStats.min << ", " << startStats.average << ", " << startStats.max << ")";
if (testStop)
{
std::cout << " - ";
std::cout << "Stop(" << stopStats.min << ", " << stopStats.average << ", " << stopStats.max << ")";
}
std::cout << "\n";
}
}
int main(void)
{
TestScheduler(100, 1, true);
TestScheduler(32, 3, true);
TestScheduler(4, 3, true);
TestScheduler(4, 3, false);
TestScheduler(1, 10, false);
return 0;
}
https://stackoverflow.com/questions/18274217
复制相似问题