mirror of
https://github.com/withastro/astro.git
synced 2025-01-22 10:31:53 -05:00
Improve Static Asset Generation Performance (#12922)
* fix(perf): do not `await` in `for` loop to enqueue asset transforms. Fixes #12845 * fix: avoid `queue.add()` during async for loop * Update changeset --------- Co-authored-by: Matt Kane <m@mk.gg>
This commit is contained in:
parent
1c36331782
commit
faf74af522
3 changed files with 73 additions and 8 deletions
5
.changeset/new-radios-pay.md
Normal file
5
.changeset/new-radios-pay.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
'astro': patch
|
||||
---
|
||||
|
||||
Improves performance of static asset generation by fixing a bug that caused image transforms to be performed serially. This fix ensures that processing uses all CPUs when running in a multi-core environment.
|
|
@ -1,7 +1,6 @@
|
|||
import fs, { readFileSync } from 'node:fs';
|
||||
import { basename } from 'node:path/posix';
|
||||
import { dim, green } from 'kleur/colors';
|
||||
import type PQueue from 'p-queue';
|
||||
import { getOutDirWithinCwd } from '../../core/build/common.js';
|
||||
import type { BuildPipeline } from '../../core/build/pipeline.js';
|
||||
import { getTimeStat } from '../../core/build/util.js';
|
||||
|
@ -101,16 +100,11 @@ export async function generateImagesForPath(
|
|||
originalFilePath: string,
|
||||
transformsAndPath: MapValue<AssetsGlobalStaticImagesList>,
|
||||
env: AssetEnv,
|
||||
queue: PQueue,
|
||||
) {
|
||||
let originalImage: ImageData;
|
||||
|
||||
for (const [_, transform] of transformsAndPath.transforms) {
|
||||
await queue
|
||||
.add(async () => generateImage(transform.finalPath, transform.transform))
|
||||
.catch((e) => {
|
||||
throw e;
|
||||
});
|
||||
await generateImage(transform.finalPath, transform.transform);
|
||||
}
|
||||
|
||||
// In SSR, we cannot know if an image is referenced in a server-rendered page, so we can't delete anything
|
||||
|
|
|
@ -125,7 +125,73 @@ export async function generatePages(options: StaticBuildOptions, internals: Buil
|
|||
|
||||
const assetsTimer = performance.now();
|
||||
for (const [originalPath, transforms] of staticImageList) {
|
||||
await generateImagesForPath(originalPath, transforms, assetsCreationPipeline, queue);
|
||||
// Process each source image in parallel based on the queue’s concurrency
|
||||
// (`cpuCount`). Process each transform for a source image sequentially.
|
||||
//
|
||||
// # Design Decision:
|
||||
// We have 3 source images (A.png, B.png, C.png) and 3 transforms for
|
||||
// each:
|
||||
// ```
|
||||
// A1.png A2.png A3.png
|
||||
// B1.png B2.png B3.png
|
||||
// C1.png C2.png C3.png
|
||||
// ```
|
||||
//
|
||||
// ## Option 1
|
||||
// Enqueue all transforms indiscriminantly
|
||||
// ```
|
||||
// |_A1.png |_B2.png |_C1.png
|
||||
// |_B3.png |_A2.png |_C3.png
|
||||
// |_C2.png |_A3.png |_B1.png
|
||||
// ```
|
||||
// * Advantage: Maximum parallelism, saturate CPU
|
||||
// * Disadvantage: Spike in context switching
|
||||
//
|
||||
// ## Option 2
|
||||
// Enqueue all transforms, but constrain processing order by source image
|
||||
// ```
|
||||
// |_A3.png |_B1.png |_C2.png
|
||||
// |_A1.png |_B3.png |_C1.png
|
||||
// |_A2.png |_B2.png |_C3.png
|
||||
// ```
|
||||
// * Advantage: Maximum parallelism, saturate CPU (same as Option 1) in
|
||||
// hope to avoid context switching
|
||||
// * Disadvantage: Context switching still occurs and performance still
|
||||
// suffers
|
||||
//
|
||||
// ## Option 3
|
||||
// Enqueue each source image, but perform the transforms for that source
|
||||
// image sequentially
|
||||
// ```
|
||||
// \_A1.png \_B1.png \_C1.png
|
||||
// \_A2.png \_B2.png \_C2.png
|
||||
// \_A3.png \_B3.png \_C3.png
|
||||
// ```
|
||||
// * Advantage: Less context switching
|
||||
// * Disadvantage: If you have a low number of source images with high
|
||||
// number of transforms then this is suboptimal.
|
||||
//
|
||||
// ## BEST OPTION:
|
||||
// **Option 3**. Most projects will have a higher number of source images
|
||||
// with a few transforms on each. Even though Option 2 should be faster
|
||||
// and _should_ prevent context switching, this was not observed in
|
||||
// nascent tests. Context switching was high and the overall performance
|
||||
// was half of Option 3.
|
||||
//
|
||||
// If looking to optimize further, please consider the following:
|
||||
// * Avoid `queue.add()` in an async for loop. Notice the `await
|
||||
// queue.onIdle();` after this loop. We do not want to create a scenario
|
||||
// where tasks are added to the queue after the queue.onIdle() resolves.
|
||||
// This can break tests and create annoying race conditions.
|
||||
// * Exposing a concurrency property in `astro.config.mjs` to allow users
|
||||
// to override Node’s os.cpus().length default.
|
||||
// * Create a proper performance benchmark for asset transformations of
|
||||
// projects in varying sizes of source images and transforms.
|
||||
queue
|
||||
.add(() => generateImagesForPath(originalPath, transforms, assetsCreationPipeline))
|
||||
.catch((e) => {
|
||||
throw e;
|
||||
});
|
||||
}
|
||||
|
||||
await queue.onIdle();
|
||||
|
|
Loading…
Reference in a new issue