mirror of
https://projects.blender.org/blender/blender.git
synced 2025-01-22 07:22:12 -05:00
Color management: optimize OCIO predivide processing
cpuProcessorApply_predivide was doing, for each pixel: - Un-premultiply pixel to straight alpha - Call OCIO processor on that one pixel - Premultiply pixel back This is not great due to just function call overhead, and probably prevents whatever "batch processing SIMD optimizations" that OCIO migth have. Instead, do this: - Un-premultiply whole input image, - Call OCIO on the whole image to do whatever it does, - Premultiply whole image back. Doing cpuProcessorApply_predivide on a 4K resolution, float4 image on Ryzen 5950X (Win10/VS2022) on one thread: 128ms -> 69ms Pull Request: https://projects.blender.org/blender/blender/pulls/127307
This commit is contained in:
parent
a904db3ee7
commit
d0ea251c33
1 changed files with 26 additions and 12 deletions
|
@ -578,22 +578,36 @@ void OCIOImpl::cpuProcessorApply_predivide(OCIO_ConstCPUProcessorRcPtr *cpu_proc
|
|||
int channels = img->getNumChannels();
|
||||
|
||||
if (channels == 4) {
|
||||
/* Convert from premultiplied alpha to straight alpha. */
|
||||
assert(img->isFloat());
|
||||
float *pixels = (float *)img->getData();
|
||||
|
||||
size_t width = img->getWidth();
|
||||
size_t height = img->getHeight();
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
float *pixel = pixels + 4 * (y * width + x);
|
||||
|
||||
cpuProcessorApplyRGBA_predivide(cpu_processor, pixel);
|
||||
float *pixel = (float *)img->getData();
|
||||
size_t pixel_count = img->getWidth() * img->getHeight();
|
||||
for (size_t i = 0; i < pixel_count; i++, pixel += 4) {
|
||||
float alpha = pixel[3];
|
||||
if (alpha != 0.0f && alpha != 1.0f) {
|
||||
float inv_alpha = 1.0f / alpha;
|
||||
pixel[0] *= inv_alpha;
|
||||
pixel[1] *= inv_alpha;
|
||||
pixel[2] *= inv_alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
(*(ConstCPUProcessorRcPtr *)cpu_processor)->apply(*img);
|
||||
|
||||
(*(ConstCPUProcessorRcPtr *)cpu_processor)->apply(*img);
|
||||
|
||||
if (channels == 4) {
|
||||
/* Back to premultiplied alpha. */
|
||||
assert(img->isFloat());
|
||||
float *pixel = (float *)img->getData();
|
||||
size_t pixel_count = img->getWidth() * img->getHeight();
|
||||
for (size_t i = 0; i < pixel_count; i++, pixel += 4) {
|
||||
float alpha = pixel[3];
|
||||
if (alpha != 0.0f && alpha != 1.0f) {
|
||||
pixel[0] *= alpha;
|
||||
pixel[1] *= alpha;
|
||||
pixel[2] *= alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception &exception) {
|
||||
|
|
Loading…
Reference in a new issue