/*
simplistic multithreading example with C++11 standard libraries
===============================================================
The basic idea here is to spread one loop across the available threads, by
using a std::atomic<int> as the loop variable, incremented by the fetch_add
member function.
Atomic variables, when accessed with the proper member functions, guarantee
that regardless of thread timing variations, all values from zero to max will
be observed exactly once by some thread. So there will be no skips and no
duplicates. Furthermore, atomic variables tend to have less overhead than
semaphores, mutexes, and other forms of more explicit synchronization.
ATTENTION: some care has to be taken which loop should be distributed among
processor cores in this manner. You want the work packages for each thread
to be between maybe a million clock cycles to maybe a hundred million clock
cycles. Much smaller work packages will exhibit notable overhead for thread
management. Much bigger work packages won't balance the load between cores
as well.
So you probably don't want to split the innermost loop, or the outermost
loop of a big program. I usually split computation of a single animation
frame. Then the rest of the program can remain sequential, frame by frame, by
design. And I get natural restart points, with each frame saved to disk, to
protect against a power outage (or some other catastrophe).
Distributing an image row by row isn't ideal. But it's good enough in many
cases.
*/
#include <thread>
#include <atomic>
// to coordinate worker threads
std::atomic<int> curRow(0);
int numThreads = 1; // to be properly initialized later
// frame buffer storage
float* ImgBuf = NULL; // to be initialized later
// compute one row of ImgBuf
void renderRow(const Vista2D& vista) {
int y = curRow.fetch_add(1); // number of the next row that needs to be done
while (y < ImgHeight) {
// ... render row y ...
y = curRow.fetch_add(1);
}
return;
}
// parallelize with std::thread
void render(const Vista2D& vista) {
curRow.store(0); // reset our "work queue"
std::vector < std::thread > threads;
// launch render threads
for (int i = numThreads; i > 0; --i) {
threads.push_back(std::thread(renderRow, vista));
}
// wait for render threads
for(auto &thread : threads) {
thread.join();
}
return;
}
// main renders and writes an image to file
int main(void) {
// set up number of worker threads
numThreads = std::thread::hardware_concurrency();
fprintf(stderr, "will use %d threads\n", numThreads);
// set up image buffer
ImgBuf = new float[ImgWidth*ImgHeight];
if (ImgBuf == NULL) {
fprintf(stderr, "error: out of memory for ImgBuf\n");
return -1;
}
Vista2D theVista;
theVista.init(-0.75, 0.0, // center
0.0, // angle, clockwise
0.0, // zoom
1000, // max iterations
0.0, 0.0, // orbit trap location
0.0, // hue shift
1.0); // aspect ratio
theVista.magnify(0.6);
render(theVista);
// ... write image ...
delete[] ImgBuf;
return 0;
}