fix: Comic Shanns issues, new fonts structure (#8641)

This commit is contained in:
Marcel Mraz 2024-10-21 01:11:53 +03:00 committed by GitHub
parent 15ca182333
commit 61623bbeba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
279 changed files with 13267 additions and 488 deletions

View file

@ -0,0 +1,202 @@
/**
* Modified version of hb-subset bindings from "subset-font" package https://github.com/papandreou/subset-font/blob/3f711c8aa29a426c7f22655861abfb976950f527/index.js
*
* CHANGELOG:
* - removed dependency on node APIs to work inside the browser
* - removed dependency on font fontverter for brotli compression
* - removed dependencies on lodash and p-limit
* - removed options for preserveNameIds, variationAxes, noLayoutClosure (not needed for now)
* - replaced text input with codepoints
* - rewritten in typescript and with esm modules
Copyright (c) 2012, Andreas Lind Petersen
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of the author nor the names of contributors may
be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// function HB_TAG(str) {
// return str.split("").reduce((a, ch) => {
// return (a << 8) + ch.charCodeAt(0);
// }, 0);
// }
function subset(
hbSubsetWasm: any,
heapu8: Uint8Array,
font: ArrayBuffer,
codePoints: ReadonlySet<number>,
) {
const input = hbSubsetWasm.hb_subset_input_create_or_fail();
if (input === 0) {
throw new Error(
"hb_subset_input_create_or_fail (harfbuzz) returned zero, indicating failure",
);
}
const fontBuffer = hbSubsetWasm.malloc(font.byteLength);
heapu8.set(new Uint8Array(font), fontBuffer);
// Create the face
const blob = hbSubsetWasm.hb_blob_create(
fontBuffer,
font.byteLength,
2, // HB_MEMORY_MODE_WRITABLE
0,
0,
);
const face = hbSubsetWasm.hb_face_create(blob, 0);
hbSubsetWasm.hb_blob_destroy(blob);
// Do the equivalent of --font-features=*
const layoutFeatures = hbSubsetWasm.hb_subset_input_set(
input,
6, // HB_SUBSET_SETS_LAYOUT_FEATURE_TAG
);
hbSubsetWasm.hb_set_clear(layoutFeatures);
hbSubsetWasm.hb_set_invert(layoutFeatures);
// if (preserveNameIds) {
// const inputNameIds = harfbuzzJsWasm.hb_subset_input_set(
// input,
// 4, // HB_SUBSET_SETS_NAME_ID
// );
// for (const nameId of preserveNameIds) {
// harfbuzzJsWasm.hb_set_add(inputNameIds, nameId);
// }
// }
// if (noLayoutClosure) {
// harfbuzzJsWasm.hb_subset_input_set_flags(
// input,
// harfbuzzJsWasm.hb_subset_input_get_flags(input) | 0x00000200, // HB_SUBSET_FLAGS_NO_LAYOUT_CLOSURE
// );
// }
// Add unicodes indices
const inputUnicodes = hbSubsetWasm.hb_subset_input_unicode_set(input);
for (const c of codePoints) {
hbSubsetWasm.hb_set_add(inputUnicodes, c);
}
// if (variationAxes) {
// for (const [axisName, value] of Object.entries(variationAxes)) {
// if (typeof value === "number") {
// // Simple case: Pin/instance the variation axis to a single value
// if (
// !harfbuzzJsWasm.hb_subset_input_pin_axis_location(
// input,
// face,
// HB_TAG(axisName),
// value,
// )
// ) {
// harfbuzzJsWasm.hb_face_destroy(face);
// harfbuzzJsWasm.free(fontBuffer);
// throw new Error(
// `hb_subset_input_pin_axis_location (harfbuzz) returned zero when pinning ${axisName} to ${value}, indicating failure. Maybe the axis does not exist in the font?`,
// );
// }
// } else if (value && typeof value === "object") {
// // Complex case: Reduce the variation space of the axis
// if (
// typeof value.min === "undefined" ||
// typeof value.max === "undefined"
// ) {
// harfbuzzJsWasm.hb_face_destroy(face);
// harfbuzzJsWasm.free(fontBuffer);
// throw new Error(
// `${axisName}: You must provide both a min and a max value when setting the axis range`,
// );
// }
// if (
// !harfbuzzJsWasm.hb_subset_input_set_axis_range(
// input,
// face,
// HB_TAG(axisName),
// value.min,
// value.max,
// // An explicit NaN makes harfbuzz use the existing default value, clamping to the new range if necessary
// value.default ?? NaN,
// )
// ) {
// harfbuzzJsWasm.hb_face_destroy(face);
// harfbuzzJsWasm.free(fontBuffer);
// throw new Error(
// `hb_subset_input_set_axis_range (harfbuzz) returned zero when setting the range of ${axisName} to [${value.min}; ${value.max}] and a default value of ${value.default}, indicating failure. Maybe the axis does not exist in the font?`,
// );
// }
// }
// }
// }
let subset;
try {
subset = hbSubsetWasm.hb_subset_or_fail(face, input);
if (subset === 0) {
hbSubsetWasm.hb_face_destroy(face);
hbSubsetWasm.free(fontBuffer);
throw new Error(
"hb_subset_or_fail (harfbuzz) returned zero, indicating failure. Maybe the input file is corrupted?",
);
}
} finally {
// Clean up
hbSubsetWasm.hb_subset_input_destroy(input);
}
// Get result blob
const result = hbSubsetWasm.hb_face_reference_blob(subset);
const offset = hbSubsetWasm.hb_blob_get_data(result, 0);
const subsetByteLength = hbSubsetWasm.hb_blob_get_length(result);
if (subsetByteLength === 0) {
hbSubsetWasm.hb_blob_destroy(result);
hbSubsetWasm.hb_face_destroy(subset);
hbSubsetWasm.hb_face_destroy(face);
hbSubsetWasm.free(fontBuffer);
throw new Error(
"Failed to create subset font, maybe the input file is corrupted?",
);
}
const subsetFont = new Uint8Array(
heapu8.subarray(offset, offset + subsetByteLength),
);
// Clean up
hbSubsetWasm.hb_blob_destroy(result);
hbSubsetWasm.hb_face_destroy(subset);
hbSubsetWasm.hb_face_destroy(face);
hbSubsetWasm.free(fontBuffer);
return subsetFont;
}
export default {
subset,
};

View file

@ -0,0 +1,57 @@
/**
* DON'T depend on anything from the outside like `promiseTry`, as this module is part of a separate lazy-loaded chunk.
*
* Including anything from the main chunk would include the whole chunk by default.
* Even it it would be tree-shaken during build, it won't be tree-shaken in dev.
*
* In the future consider separating common utils into a separate shared chunk.
*/
import binary from "./harfbuzz-wasm";
import bindings from "./harfbuzz-bindings";
/**
* Lazy loads wasm and respective bindings for font subsetting based on the harfbuzzjs.
*/
let loadedWasm: ReturnType<typeof load> | null = null;
// TODO: consider adding support for fetching the wasm from an URL (external CDN, data URL, etc.)
const load = (): Promise<{
subset: (
fontBuffer: ArrayBuffer,
codePoints: ReadonlySet<number>,
) => Uint8Array;
}> => {
return new Promise(async (resolve, reject) => {
try {
const module = await WebAssembly.instantiate(binary);
const harfbuzzJsWasm = module.instance.exports;
// @ts-expect-error since `.buffer` is custom prop
const heapu8 = new Uint8Array(harfbuzzJsWasm.memory.buffer);
const hbSubset = {
subset: (fontBuffer: ArrayBuffer, codePoints: ReadonlySet<number>) => {
return bindings.subset(
harfbuzzJsWasm,
heapu8,
fontBuffer,
codePoints,
);
},
};
resolve(hbSubset);
} catch (e) {
reject(e);
}
});
};
// lazy load the default export
export default (): ReturnType<typeof load> => {
if (!loadedWasm) {
loadedWasm = load();
}
return loadedWasm;
};

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,129 @@
import { WorkerPool } from "../workers";
import { isServerEnv, promiseTry } from "../utils";
import { WorkerInTheMainChunkError, WorkerUrlNotDefinedError } from "../errors";
import type { Commands } from "./subset-shared.chunk";
let shouldUseWorkers = typeof Worker !== "undefined";
/**
* Tries to subset glyphs in a font based on the used codepoints, returning the font as dataurl.
* Under the hood utilizes worker threads (Web Workers, if available), otherwise fallbacks to the main thread.
*
* Check the following diagram for details: link.excalidraw.com/readonly/MbbnWPSWXgadXdtmzgeO
*
* @param arrayBuffer font data buffer in the woff2 format
* @param codePoints codepoints used to subset the glyphs
*
* @returns font with subsetted glyphs (all glyphs in case of errors) converted into a dataurl
*/
export const subsetWoff2GlyphsByCodepoints = async (
arrayBuffer: ArrayBuffer,
codePoints: Array<number>,
): Promise<string> => {
const { Commands, subsetToBase64, toBase64 } =
await lazyLoadSharedSubsetChunk();
if (!shouldUseWorkers) {
return subsetToBase64(arrayBuffer, codePoints);
}
return promiseTry(async () => {
try {
const workerPool = await getOrCreateWorkerPool();
// copy the buffer to avoid working on top of the detached array buffer in the fallback
// i.e. in case the worker throws, the array buffer does not get automatically detached, even if the worker is terminated
const arrayBufferCopy = arrayBuffer.slice(0);
const result = await workerPool.postMessage(
{
command: Commands.Subset,
arrayBuffer: arrayBufferCopy,
codePoints,
} as const,
{ transfer: [arrayBufferCopy] },
);
// encode on the main thread to avoid copying large binary strings (as dataurl) between threads
return toBase64(result);
} catch (e) {
// don't use workers if they are failing
shouldUseWorkers = false;
if (
// don't log the expected errors server-side
!(
isServerEnv() &&
(e instanceof WorkerUrlNotDefinedError ||
e instanceof WorkerInTheMainChunkError)
)
) {
// eslint-disable-next-line no-console
console.error(
"Failed to use workers for subsetting, falling back to the main thread.",
e,
);
}
// fallback to the main thread
return subsetToBase64(arrayBuffer, codePoints);
}
});
};
// lazy-loaded and cached chunks
let subsetWorker: Promise<typeof import("./subset-worker.chunk")> | null = null;
let subsetShared: Promise<typeof import("./subset-shared.chunk")> | null = null;
const lazyLoadWorkerSubsetChunk = async () => {
if (!subsetWorker) {
subsetWorker = import("./subset-worker.chunk");
}
return subsetWorker;
};
const lazyLoadSharedSubsetChunk = async () => {
if (!subsetShared) {
// load dynamically to force create a shared chunk reused between main thread and the worker thread
subsetShared = import("./subset-shared.chunk");
}
return subsetShared;
};
// could be extended with multiple commands in the future
type SubsetWorkerData = {
command: typeof Commands.Subset;
arrayBuffer: ArrayBuffer;
codePoints: Array<number>;
};
type SubsetWorkerResult<T extends SubsetWorkerData["command"]> =
T extends typeof Commands.Subset ? ArrayBuffer : never;
let workerPool: Promise<
WorkerPool<SubsetWorkerData, SubsetWorkerResult<SubsetWorkerData["command"]>>
> | null = null;
/**
* Lazy initialize or get the worker pool singleton.
*
* @throws implicitly if anything goes wrong - worker pool creation, loading wasm, initializing worker, etc.
*/
const getOrCreateWorkerPool = () => {
if (!workerPool) {
// immediate concurrent-friendly return, to ensure we have only one pool instance
workerPool = promiseTry(async () => {
const { WorkerUrl } = await lazyLoadWorkerSubsetChunk();
const pool = WorkerPool.create<
SubsetWorkerData,
SubsetWorkerResult<SubsetWorkerData["command"]>
>(WorkerUrl);
return pool;
});
}
return workerPool;
};

View file

@ -0,0 +1,81 @@
/**
* DON'T depend on anything from the outside like `promiseTry`, as this module is part of a separate lazy-loaded chunk.
*
* Including anything from the main chunk would include the whole chunk by default.
* Even it it would be tree-shaken during build, it won't be tree-shaken in dev.
*
* In the future consider separating common utils into a separate shared chunk.
*/
import loadWoff2 from "./woff2/woff2-loader";
import loadHbSubset from "./harfbuzz/harfbuzz-loader";
/**
* Shared commands between the main thread and worker threads.
*/
export const Commands = {
Subset: "SUBSET",
} as const;
/**
* Used by browser (main thread), node and jsdom, to subset the font based on the passed codepoints.
*
* @returns woff2 font as a base64 encoded string
*/
export const subsetToBase64 = async (
arrayBuffer: ArrayBuffer,
codePoints: Array<number>,
): Promise<string> => {
try {
const buffer = await subsetToBinary(arrayBuffer, codePoints);
return toBase64(buffer);
} catch (e) {
console.error("Skipped glyph subsetting", e);
// Fallback to encoding whole font in case of errors
return toBase64(arrayBuffer);
}
};
/**
* Used by browser (worker thread) and as part of `subsetToBase64`, to subset the font based on the passed codepoints.
*
* @eturns woff2 font as an ArrayBuffer, to avoid copying large strings between worker threads and the main thread.
*/
export const subsetToBinary = async (
arrayBuffer: ArrayBuffer,
codePoints: Array<number>,
): Promise<ArrayBuffer> => {
// lazy loaded wasm modules to avoid multiple initializations in case of concurrent triggers
// IMPORTANT: could be expensive, as each new worker instance lazy loads these to their own memory ~ keep the # of workes small!
const { compress, decompress } = await loadWoff2();
const { subset } = await loadHbSubset();
const decompressedBinary = decompress(arrayBuffer).buffer;
const snftSubset = subset(decompressedBinary, new Set(codePoints));
const compressedBinary = compress(snftSubset.buffer);
return compressedBinary.buffer;
};
/**
* Util for isomoprhic browser (main thread), node and jsdom usage.
*
* Isn't used inside the worker to avoid copying large binary strings (as dataurl) between worker threads and the main thread.
*/
export const toBase64 = async (arrayBuffer: ArrayBuffer) => {
let base64: string;
if (typeof Buffer !== "undefined") {
// node, jsdom
base64 = Buffer.from(arrayBuffer).toString("base64");
} else {
// browser (main thread)
// it's perfectly fine to treat each byte independently,
// as we care only about turning individual bytes into codepoints,
// not about multi-byte unicode characters
const byteString = String.fromCharCode(...new Uint8Array(arrayBuffer));
base64 = btoa(byteString);
}
return `data:font/woff2;base64,${base64}`;
};

View file

@ -0,0 +1,42 @@
/**
* DON'T depend on anything from the outside like `promiseTry`, as this module is part of a separate lazy-loaded chunk.
*
* Including anything from the main chunk would include the whole chunk by default.
* Even it it would be tree-shaken during build, it won't be tree-shaken in dev.
*
* In the future consider separating common utils into a separate shared chunk.
*/
import { Commands, subsetToBinary } from "./subset-shared.chunk";
/**
* Due to this export (and related dynamic import), this worker code will be included in the bundle automatically (as a separate chunk),
* without the need for esbuild / vite /rollup plugins and special browser / server treatment.
*
* `import.meta.url` is undefined in nodejs
*/
export const WorkerUrl: URL | undefined = import.meta.url
? new URL(import.meta.url)
: undefined;
// run only in the worker context
if (typeof window === "undefined" && typeof self !== "undefined") {
self.onmessage = async (e: {
data: {
command: typeof Commands.Subset;
arrayBuffer: ArrayBuffer;
codePoints: Array<number>;
};
}) => {
switch (e.data.command) {
case Commands.Subset:
const buffer = await subsetToBinary(
e.data.arrayBuffer,
e.data.codePoints,
);
self.postMessage(buffer, { transfer: [buffer] });
break;
}
};
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
/**
* DON'T depend on anything from the outside like `promiseTry`, as this module is part of a separate lazy-loaded chunk.
*
* Including anything from the main chunk would include the whole chunk by default.
* Even it it would be tree-shaken during build, it won't be tree-shaken in dev.
*
* In the future consider separating common utils into a separate shared chunk.
*/
import binary from "./woff2-wasm";
import bindings from "./woff2-bindings";
/**
* Lazy loads wasm and respective bindings for woff2 compression and decompression.
*/
type Vector = any;
let loadedWasm: ReturnType<typeof load> | null = null;
// re-map from internal vector into byte array
function convertFromVecToUint8Array(vector: Vector): Uint8Array {
const arr = [];
for (let i = 0, l = vector.size(); i < l; i++) {
arr.push(vector.get(i));
}
return new Uint8Array(arr);
}
// TODO: consider adding support for fetching the wasm from an URL (external CDN, data URL, etc.)
const load = (): Promise<{
compress: (buffer: ArrayBuffer) => Uint8Array;
decompress: (buffer: ArrayBuffer) => Uint8Array;
}> => {
return new Promise((resolve, reject) => {
try {
// initializing the module manually, so that we could pass in the wasm binary
// note that the `bindings.then` is not not promise/A+ compliant, hence the need for another explicit try/catch
bindings({ wasmBinary: binary }).then(
(module: {
woff2Enc: (buffer: ArrayBuffer, byteLength: number) => Vector;
woff2Dec: (buffer: ArrayBuffer, byteLength: number) => Vector;
}) => {
try {
// re-exporting only compress and decompress functions (also avoids infinite loop inside emscripten bindings)
const woff2 = {
compress: (buffer: ArrayBuffer) =>
convertFromVecToUint8Array(
module.woff2Enc(buffer, buffer.byteLength),
),
decompress: (buffer: ArrayBuffer) =>
convertFromVecToUint8Array(
module.woff2Dec(buffer, buffer.byteLength),
),
};
resolve(woff2);
} catch (e) {
reject(e);
}
},
);
} catch (e) {
reject(e);
}
});
};
// lazy loaded default export
export default (): ReturnType<typeof load> => {
if (!loadedWasm) {
loadedWasm = load();
}
return loadedWasm;
};

File diff suppressed because one or more lines are too long