Refactor createShikiHighlighter (#11825)

Co-authored-by: Sarah Rainsberger <sarah@rainsberger.ca>
This commit is contained in:
Bjorn Lu 2024-09-02 21:08:25 +08:00 committed by GitHub
parent edd8ae9084
commit 560ef15ad2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 234 additions and 139 deletions

View file

@ -0,0 +1,5 @@
---
'@astrojs/markdoc': patch
---
Uses latest version of `@astrojs/markdown-remark` with updated Shiki APIs

View file

@ -0,0 +1,5 @@
---
'@astrojs/markdown-remark': major
---
Updates return object of `createShikiHighlighter` as `codeToHast` and `codeToHtml` to allow generating either the hast or html string directly

View file

@ -0,0 +1,9 @@
---
'astro': major
---
Updates internal Shiki rehype plugin to highlight code blocks as hast (using Shiki's `codeToHast()` API). This allows a more direct Markdown and MDX processing, and improves the performance when building the project, but may cause issues with existing Shiki transformers.
If you are using Shiki transformers passed to `markdown.shikiConfig.transformers`, you must make sure they do not use the `postprocess` hook as it no longer runs on code blocks in `.md` and `.mdx` files. (See [the Shiki documentation on transformer hooks](https://shiki.style/guide/transformers#transformer-hooks) for more information).
Code blocks in `.mdoc` files and `<Code />` component do not use the internal Shiki rehype plugin and are unaffected.

View file

@ -111,13 +111,13 @@ const highlighter = await getCachedHighlighter({
],
theme,
themes,
defaultColor,
wrap,
transformers,
});
const html = await highlighter.highlight(code, typeof lang === 'string' ? lang : lang.name, {
const html = await highlighter.codeToHtml(code, typeof lang === 'string' ? lang : lang.name, {
defaultColor,
wrap,
inline,
transformers,
meta,
attributes: rest as any,
});

View file

@ -5,7 +5,11 @@ import { unescapeHTML } from 'astro/runtime/server/index.js';
import type { AstroMarkdocConfig } from '../config.js';
export default async function shiki(config?: ShikiConfig): Promise<AstroMarkdocConfig> {
const highlighter = await createShikiHighlighter(config);
const highlighter = await createShikiHighlighter({
langs: config?.langs,
theme: config?.theme,
themes: config?.themes,
});
return {
nodes: {
@ -16,7 +20,11 @@ export default async function shiki(config?: ShikiConfig): Promise<AstroMarkdocC
// Only the `js` part is parsed as `attributes.language` and the rest is ignored. This means
// some Shiki transformers may not work correctly as it relies on the `meta`.
const lang = typeof attributes.language === 'string' ? attributes.language : 'plaintext';
const html = await highlighter.highlight(attributes.content, lang);
const html = await highlighter.codeToHtml(attributes.content, lang, {
wrap: config?.wrap,
defaultColor: config?.defaultColor,
transformers: config?.transformers,
});
// Use `unescapeHTML` to return `HTMLString` for Astro renderer to inline as HTML
return unescapeHTML(html) as any;

View file

@ -4,7 +4,11 @@ import { toText } from 'hast-util-to-text';
import { removePosition } from 'unist-util-remove-position';
import { visitParents } from 'unist-util-visit-parents';
type Highlighter = (code: string, language: string, options?: { meta?: string }) => Promise<string>;
type Highlighter = (
code: string,
language: string,
options?: { meta?: string },
) => Promise<Root | string>;
const languagePattern = /\blanguage-(\S+)\b/;
@ -73,12 +77,17 @@ export async function highlightCodeBlocks(tree: Root, highlighter: Highlighter)
for (const { node, language, grandParent, parent } of nodes) {
const meta = (node.data as any)?.meta ?? node.properties.metastring ?? undefined;
const code = toText(node, { whitespace: 'pre' });
// TODO: In Astro 5, have `highlighter()` return hast directly to skip expensive HTML parsing and serialization.
const html = await highlighter(code, language, { meta });
// The replacement returns a root node with 1 child, the `<pr>` element replacement.
const replacement = fromHtml(html, { fragment: true }).children[0] as Element;
// We just generated this node, so any positional information is invalid.
removePosition(replacement);
const result = await highlighter(code, language, { meta });
let replacement: Element;
if (typeof result === 'string') {
// The replacement returns a root node with 1 child, the `<pre>` element replacement.
replacement = fromHtml(result, { fragment: true }).children[0] as Element;
// We just generated this node, so any positional information is invalid.
removePosition(replacement);
} else {
replacement = result.children[0] as Element;
}
// We replace the parent in its parent with the new `<pre>` element.
const index = grandParent.children.indexOf(parent);

View file

@ -26,7 +26,12 @@ export { rehypeHeadingIds } from './rehype-collect-headings.js';
export { remarkCollectImages } from './remark-collect-images.js';
export { rehypePrism } from './rehype-prism.js';
export { rehypeShiki } from './rehype-shiki.js';
export { createShikiHighlighter, type ShikiHighlighter } from './shiki.js';
export {
createShikiHighlighter,
type ShikiHighlighter,
type CreateShikiHighlighterOptions,
type ShikiHighlighterHighlightOptions,
} from './shiki.js';
export * from './types.js';
export const markdownConfigDefaults: Required<AstroMarkdownOptions> = {

View file

@ -8,9 +8,20 @@ export const rehypeShiki: Plugin<[ShikiConfig?], Root> = (config) => {
let highlighterAsync: Promise<ShikiHighlighter> | undefined;
return async (tree) => {
highlighterAsync ??= createShikiHighlighter(config);
highlighterAsync ??= createShikiHighlighter({
langs: config?.langs,
theme: config?.theme,
themes: config?.themes,
});
const highlighter = await highlighterAsync;
await highlightCodeBlocks(tree, highlighter.highlight);
await highlightCodeBlocks(tree, (code, language, options) => {
return highlighter.codeToHast(code, language, {
meta: options?.meta,
wrap: config?.wrap,
defaultColor: config?.defaultColor,
transformers: config?.transformers,
});
});
};
};

View file

@ -1,27 +1,65 @@
import type { Properties } from 'hast';
import type { Properties, Root } from 'hast';
import {
type BundledLanguage,
type LanguageRegistration,
type ShikiTransformer,
type ThemeRegistration,
type ThemeRegistrationRaw,
createCssVariablesTheme,
getHighlighter,
createHighlighter,
isSpecialLang,
} from 'shiki';
import type { ShikiConfig } from './types.js';
import type { ThemePresets } from './types.js';
export interface ShikiHighlighter {
highlight(
codeToHast(
code: string,
lang?: string,
options?: {
inline?: boolean;
attributes?: Record<string, string>;
/**
* Raw `meta` information to be used by Shiki transformers
*/
meta?: string;
},
options?: ShikiHighlighterHighlightOptions,
): Promise<Root>;
codeToHtml(
code: string,
lang?: string,
options?: ShikiHighlighterHighlightOptions,
): Promise<string>;
}
export interface CreateShikiHighlighterOptions {
langs?: LanguageRegistration[];
theme?: ThemePresets | ThemeRegistration | ThemeRegistrationRaw;
themes?: Record<string, ThemePresets | ThemeRegistration | ThemeRegistrationRaw>;
}
export interface ShikiHighlighterHighlightOptions {
/**
* Generate inline code element only, without the pre element wrapper.
*/
inline?: boolean;
/**
* Enable word wrapping.
* - true: enabled.
* - false: disabled.
* - null: All overflow styling removed. Code will overflow the element by default.
*/
wrap?: boolean | null;
/**
* Chooses a theme from the "themes" option that you've defined as the default styling theme.
*/
defaultColor?: 'light' | 'dark' | string | false;
/**
* Shiki transformers to customize the generated HTML by manipulating the hast tree.
*/
transformers?: ShikiTransformer[];
/**
* Additional attributes to be added to the root code block element.
*/
attributes?: Record<string, string>;
/**
* Raw `meta` information to be used by Shiki transformers.
*/
meta?: string;
}
let _cssVariablesTheme: ReturnType<typeof createCssVariablesTheme>;
const cssVariablesTheme = () =>
_cssVariablesTheme ??
@ -31,113 +69,120 @@ export async function createShikiHighlighter({
langs = [],
theme = 'github-dark',
themes = {},
defaultColor,
wrap = false,
transformers = [],
}: ShikiConfig = {}): Promise<ShikiHighlighter> {
}: CreateShikiHighlighterOptions = {}): Promise<ShikiHighlighter> {
theme = theme === 'css-variables' ? cssVariablesTheme() : theme;
const highlighter = await getHighlighter({
const highlighter = await createHighlighter({
langs: ['plaintext', ...langs],
themes: Object.values(themes).length ? Object.values(themes) : [theme],
});
return {
async highlight(code, lang = 'plaintext', options) {
const loadedLanguages = highlighter.getLoadedLanguages();
async function highlight(
code: string,
lang = 'plaintext',
options: ShikiHighlighterHighlightOptions,
to: 'hast' | 'html',
) {
const loadedLanguages = highlighter.getLoadedLanguages();
if (!isSpecialLang(lang) && !loadedLanguages.includes(lang)) {
try {
await highlighter.loadLanguage(lang as BundledLanguage);
} catch (_err) {
// eslint-disable-next-line no-console
console.warn(
`[Shiki] The language "${lang}" doesn't exist, falling back to "plaintext".`,
);
lang = 'plaintext';
}
if (!isSpecialLang(lang) && !loadedLanguages.includes(lang)) {
try {
await highlighter.loadLanguage(lang as BundledLanguage);
} catch (_err) {
// eslint-disable-next-line no-console
console.warn(`[Shiki] The language "${lang}" doesn't exist, falling back to "plaintext".`);
lang = 'plaintext';
}
}
const themeOptions = Object.values(themes).length ? { themes } : { theme };
const inline = options?.inline ?? false;
const themeOptions = Object.values(themes).length ? { themes } : { theme };
const inline = options?.inline ?? false;
return highlighter.codeToHtml(code, {
...themeOptions,
defaultColor,
lang,
// NOTE: while we can spread `options.attributes` here so that Shiki can auto-serialize this as rendered
// attributes on the top-level tag, it's not clear whether it is fine to pass all attributes as meta, as
// they're technically not meta, nor parsed from Shiki's `parseMetaString` API.
meta: options?.meta ? { __raw: options?.meta } : undefined,
transformers: [
{
pre(node) {
// Swap to `code` tag if inline
if (inline) {
node.tagName = 'code';
}
return highlighter[to === 'html' ? 'codeToHtml' : 'codeToHast'](code, {
...themeOptions,
defaultColor: options.defaultColor,
lang,
// NOTE: while we can spread `options.attributes` here so that Shiki can auto-serialize this as rendered
// attributes on the top-level tag, it's not clear whether it is fine to pass all attributes as meta, as
// they're technically not meta, nor parsed from Shiki's `parseMetaString` API.
meta: options?.meta ? { __raw: options?.meta } : undefined,
transformers: [
{
pre(node) {
// Swap to `code` tag if inline
if (inline) {
node.tagName = 'code';
}
const {
class: attributesClass,
style: attributesStyle,
...rest
} = options?.attributes ?? {};
Object.assign(node.properties, rest);
const {
class: attributesClass,
style: attributesStyle,
...rest
} = options?.attributes ?? {};
Object.assign(node.properties, rest);
const classValue =
(normalizePropAsString(node.properties.class) ?? '') +
(attributesClass ? ` ${attributesClass}` : '');
const styleValue =
(normalizePropAsString(node.properties.style) ?? '') +
(attributesStyle ? `; ${attributesStyle}` : '');
const classValue =
(normalizePropAsString(node.properties.class) ?? '') +
(attributesClass ? ` ${attributesClass}` : '');
const styleValue =
(normalizePropAsString(node.properties.style) ?? '') +
(attributesStyle ? `; ${attributesStyle}` : '');
// Replace "shiki" class naming with "astro-code"
node.properties.class = classValue.replace(/shiki/g, 'astro-code');
// Replace "shiki" class naming with "astro-code"
node.properties.class = classValue.replace(/shiki/g, 'astro-code');
// Add data-language attribute
node.properties.dataLanguage = lang;
// Add data-language attribute
node.properties.dataLanguage = lang;
// Handle code wrapping
// if wrap=null, do nothing.
if (wrap === false) {
node.properties.style = styleValue + '; overflow-x: auto;';
} else if (wrap === true) {
node.properties.style =
styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;';
}
},
line(node) {
// Add "user-select: none;" for "+"/"-" diff symbols.
// Transform `<span class="line"><span style="...">+ something</span></span>
// into `<span class="line"><span style="..."><span style="user-select: none;">+</span> something</span></span>`
if (lang === 'diff') {
const innerSpanNode = node.children[0];
const innerSpanTextNode =
innerSpanNode?.type === 'element' && innerSpanNode.children?.[0];
// Handle code wrapping
// if wrap=null, do nothing.
if (options.wrap === false || options.wrap === undefined) {
node.properties.style = styleValue + '; overflow-x: auto;';
} else if (options.wrap === true) {
node.properties.style =
styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;';
}
},
line(node) {
// Add "user-select: none;" for "+"/"-" diff symbols.
// Transform `<span class="line"><span style="...">+ something</span></span>
// into `<span class="line"><span style="..."><span style="user-select: none;">+</span> something</span></span>`
if (lang === 'diff') {
const innerSpanNode = node.children[0];
const innerSpanTextNode =
innerSpanNode?.type === 'element' && innerSpanNode.children?.[0];
if (innerSpanTextNode && innerSpanTextNode.type === 'text') {
const start = innerSpanTextNode.value[0];
if (start === '+' || start === '-') {
innerSpanTextNode.value = innerSpanTextNode.value.slice(1);
innerSpanNode.children.unshift({
type: 'element',
tagName: 'span',
properties: { style: 'user-select: none;' },
children: [{ type: 'text', value: start }],
});
}
if (innerSpanTextNode && innerSpanTextNode.type === 'text') {
const start = innerSpanTextNode.value[0];
if (start === '+' || start === '-') {
innerSpanTextNode.value = innerSpanTextNode.value.slice(1);
innerSpanNode.children.unshift({
type: 'element',
tagName: 'span',
properties: { style: 'user-select: none;' },
children: [{ type: 'text', value: start }],
});
}
}
},
code(node) {
if (inline) {
return node.children[0] as typeof node;
}
},
}
},
...transformers,
],
});
code(node) {
if (inline) {
return node.children[0] as typeof node;
}
},
},
...(options.transformers ?? []),
],
});
}
return {
codeToHast(code, lang, options = {}) {
return highlight(code, lang, options, 'hast') as Promise<Root>;
},
codeToHtml(code, lang, options = {}) {
return highlight(code, lang, options, 'html') as Promise<string>;
},
};
}

View file

@ -1,15 +1,10 @@
import type * as hast from 'hast';
import type * as mdast from 'mdast';
import type { Options as RemarkRehypeOptions } from 'remark-rehype';
import type {
BuiltinTheme,
LanguageRegistration,
ShikiTransformer,
ThemeRegistration,
ThemeRegistrationRaw,
} from 'shiki';
import type { BuiltinTheme } from 'shiki';
import type * as unified from 'unified';
import type { DataMap, VFile } from 'vfile';
import type { CreateShikiHighlighterOptions, ShikiHighlighterHighlightOptions } from './shiki.js';
export type { Node } from 'unist';
@ -35,14 +30,9 @@ export type RemarkRehype = RemarkRehypeOptions;
export type ThemePresets = BuiltinTheme | 'css-variables';
export interface ShikiConfig {
langs?: LanguageRegistration[];
theme?: ThemePresets | ThemeRegistration | ThemeRegistrationRaw;
themes?: Record<string, ThemePresets | ThemeRegistration | ThemeRegistrationRaw>;
defaultColor?: 'light' | 'dark' | string | false;
wrap?: boolean | null;
transformers?: ShikiTransformer[];
}
export interface ShikiConfig
extends Pick<CreateShikiHighlighterOptions, 'langs' | 'theme' | 'themes'>,
Pick<ShikiHighlighterHighlightOptions, 'defaultColor' | 'wrap' | 'transformers'> {}
export interface AstroMarkdownOptions {
syntaxHighlight?: 'shiki' | 'prism' | false;

View file

@ -33,16 +33,25 @@ describe('shiki syntax highlighting', () => {
it('createShikiHighlighter works', async () => {
const highlighter = await createShikiHighlighter();
const html = await highlighter.highlight('const foo = "bar";', 'js');
const html = await highlighter.codeToHtml('const foo = "bar";', 'js');
assert.match(html, /astro-code github-dark/);
assert.match(html, /background-color:#24292e;color:#e1e4e8;/);
});
it('createShikiHighlighter works with codeToHast', async () => {
const highlighter = await createShikiHighlighter();
const hast = await highlighter.codeToHast('const foo = "bar";', 'js');
assert.match(hast.children[0].properties.class, /astro-code github-dark/);
assert.match(hast.children[0].properties.style, /background-color:#24292e;color:#e1e4e8;/);
});
it('diff +/- text has user-select: none', async () => {
const highlighter = await createShikiHighlighter();
const html = await highlighter.highlight(
const html = await highlighter.codeToHtml(
`\
- const foo = "bar";
+ const foo = "world";`,
@ -57,7 +66,7 @@ describe('shiki syntax highlighting', () => {
it('renders attributes', async () => {
const highlighter = await createShikiHighlighter();
const html = await highlighter.highlight(`foo`, 'js', {
const html = await highlighter.codeToHtml(`foo`, 'js', {
attributes: { 'data-foo': 'bar', autofocus: true },
});
@ -66,7 +75,10 @@ describe('shiki syntax highlighting', () => {
});
it('supports transformers that reads meta', async () => {
const highlighter = await createShikiHighlighter({
const highlighter = await createShikiHighlighter();
const html = await highlighter.codeToHtml(`foo`, 'js', {
meta: '{1,3-4}',
transformers: [
{
pre(node) {
@ -79,10 +91,6 @@ describe('shiki syntax highlighting', () => {
],
});
const html = await highlighter.highlight(`foo`, 'js', {
meta: '{1,3-4}',
});
assert.match(html, /data-test="\{1,3-4\}"/);
});