From 2734e646ca8460b1343393416184fa615530abce Mon Sep 17 00:00:00 2001 From: Marcel Mraz Date: Wed, 30 Oct 2024 15:24:12 +0200 Subject: [PATCH] chore: simplify line-break regexes, separate text wrapping (#8715) --- packages/excalidraw/components/App.tsx | 2 +- packages/excalidraw/element/embeddable.ts | 2 +- packages/excalidraw/element/newElement.ts | 2 +- packages/excalidraw/element/resizeElements.ts | 2 +- .../excalidraw/element/textElement.test.ts | 671 +----------------- packages/excalidraw/element/textElement.ts | 361 +--------- .../excalidraw/element/textWrapping.test.ts | 633 +++++++++++++++++ packages/excalidraw/element/textWrapping.ts | 568 +++++++++++++++ packages/excalidraw/element/textWysiwyg.tsx | 2 +- packages/excalidraw/fonts/Fonts.ts | 7 +- .../tests/linearElementEditor.test.tsx | 2 +- 11 files changed, 1213 insertions(+), 1039 deletions(-) create mode 100644 packages/excalidraw/element/textWrapping.test.ts create mode 100644 packages/excalidraw/element/textWrapping.ts diff --git a/packages/excalidraw/components/App.tsx b/packages/excalidraw/components/App.tsx index 3cb187ab87..5723a0602a 100644 --- a/packages/excalidraw/components/App.tsx +++ b/packages/excalidraw/components/App.tsx @@ -340,7 +340,6 @@ import { isValidTextContainer, measureText, normalizeText, - wrapText, } from "../element/textElement"; import { showHyperlinkTooltip, @@ -461,6 +460,7 @@ import { vectorNormalize, } from "../../math"; import { cropElement } from "../element/cropElement"; +import { wrapText } from "../element/textWrapping"; const AppContext = React.createContext(null!); const AppPropsContext = React.createContext(null!); diff --git a/packages/excalidraw/element/embeddable.ts b/packages/excalidraw/element/embeddable.ts index eada31a5be..9bc4a139b1 100644 --- a/packages/excalidraw/element/embeddable.ts +++ b/packages/excalidraw/element/embeddable.ts @@ -4,7 +4,7 @@ import type { ExcalidrawProps } from "../types"; import { getFontString, updateActiveTool } from "../utils"; import { setCursorForShape } from "../cursor"; import { newTextElement } from "./newElement"; -import { wrapText } from "./textElement"; +import { wrapText } from "./textWrapping"; import { isIframeElement } from "./typeChecks"; import type { ExcalidrawElement, diff --git a/packages/excalidraw/element/newElement.ts b/packages/excalidraw/element/newElement.ts index 55aa011f72..daeb06d5df 100644 --- a/packages/excalidraw/element/newElement.ts +++ b/packages/excalidraw/element/newElement.ts @@ -34,9 +34,9 @@ import { getResizedElementAbsoluteCoords } from "./bounds"; import { measureText, normalizeText, - wrapText, getBoundTextMaxWidth, } from "./textElement"; +import { wrapText } from "./textWrapping"; import { DEFAULT_ELEMENT_PROPS, DEFAULT_FONT_FAMILY, diff --git a/packages/excalidraw/element/resizeElements.ts b/packages/excalidraw/element/resizeElements.ts index 08ca5543f8..1fea04371d 100644 --- a/packages/excalidraw/element/resizeElements.ts +++ b/packages/excalidraw/element/resizeElements.ts @@ -47,10 +47,10 @@ import { handleBindTextResize, getBoundTextMaxWidth, getApproxMinLineHeight, - wrapText, measureText, getMinTextElementWidth, } from "./textElement"; +import { wrapText } from "./textWrapping"; import { LinearElementEditor } from "./linearElementEditor"; import { isInGroup } from "../groups"; import { mutateElbowArrow } from "./routing"; diff --git a/packages/excalidraw/element/textElement.test.ts b/packages/excalidraw/element/textElement.test.ts index 6275b762e3..cfc078c81e 100644 --- a/packages/excalidraw/element/textElement.test.ts +++ b/packages/excalidraw/element/textElement.test.ts @@ -1,4 +1,4 @@ -import { BOUND_TEXT_PADDING, FONT_FAMILY } from "../constants"; +import { FONT_FAMILY } from "../constants"; import { getLineHeight } from "../fonts"; import { API } from "../tests/helpers/api"; import { @@ -6,677 +6,10 @@ import { getContainerCoords, getBoundTextMaxWidth, getBoundTextMaxHeight, - wrapText, detectLineHeight, getLineHeightInPx, - parseTokens, } from "./textElement"; -import type { ExcalidrawTextElementWithContainer, FontString } from "./types"; - -describe("Test wrapText", () => { - // font is irrelevant as jsdom does not support FontFace API - // `measureText` width is mocked to return `text.length` by `jest-canvas-mock` - // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js - const font = "10px Cascadia, Segoe UI Emoji" as FontString; - - it("should wrap the text correctly when word length is exactly equal to max width", () => { - const text = "Hello Excalidraw"; - // Length of "Excalidraw" is 100 and exacty equal to max width - const res = wrapText(text, font, 100); - expect(res).toEqual(`Hello\nExcalidraw`); - }); - - it("should return the text as is if max width is invalid", () => { - const text = "Hello Excalidraw"; - expect(wrapText(text, font, NaN)).toEqual(text); - expect(wrapText(text, font, -1)).toEqual(text); - expect(wrapText(text, font, Infinity)).toEqual(text); - }); - - it("should show the text correctly when max width reached", () => { - const text = "Hello😀"; - const maxWidth = 10; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("H\ne\nl\nl\no\n😀"); - }); - - it("should not wrap number when wrapping line", () => { - const text = "don't wrap this number 99,100.99"; - const maxWidth = 300; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("don't wrap this number\n99,100.99"); - }); - - it("should trim all trailing whitespaces", () => { - const text = "Hello "; - const maxWidth = 50; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hello"); - }); - - it("should trim all but one trailing whitespaces", () => { - const text = "Hello "; - const maxWidth = 60; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hello "); - }); - - it("should keep preceding whitespaces and trim all trailing whitespaces", () => { - const text = " Hello World"; - const maxWidth = 90; - const res = wrapText(text, font, maxWidth); - expect(res).toBe(" Hello\nWorld"); - }); - - it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => { - const text = " Hello World "; - const maxWidth = 90; - const res = wrapText(text, font, maxWidth); - expect(res).toBe(" Hello\nWorld "); - }); - - it("should trim keep those whitespace that fit in the trailing line", () => { - const text = "Hello Wo rl d "; - const maxWidth = 100; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hello Wo\nrl d "); - }); - - it("should support multiple (multi-codepoint) emojis", () => { - const text = "😀🗺🔥👩🏽‍🦰👨‍👩‍👧‍👦🇨🇿"; - const maxWidth = 1; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("😀\n🗺\n🔥\n👩🏽‍🦰\n👨‍👩‍👧‍👦\n🇨🇿"); - }); - - it("should wrap the text correctly when text contains hyphen", () => { - let text = - "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; - const res = wrapText(text, font, 110); - expect(res).toBe( - `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`, - ); - - text = "Hello thereusing-now"; - expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now"); - }); - - it("should support wrapping nested lists", () => { - const text = `\tA) one tab\t\t- two tabs - 8 spaces`; - - const maxWidth = 100; - const res = wrapText(text, font, maxWidth); - expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`); - - const maxWidth2 = 50; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`); - }); - - describe("When text is CJK", () => { - it("should break each CJK character when width is very small", () => { - // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi" - const text = "안녕하세요こんにちは世界コンニチハ你好"; - const maxWidth = 10; - const res = wrapText(text, font, maxWidth); - expect(res).toBe( - "안\n녕\n하\n세\n요\nこ\nん\nに\nち\nは\n世\n界\nコ\nン\nニ\nチ\nハ\n你\n好", - ); - }); - - it("should break CJK text into longer segments when width is larger", () => { - // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi" - const text = "안녕하세요こんにちは世界コンニチハ你好"; - const maxWidth = 30; - const res = wrapText(text, font, maxWidth); - - // measureText is mocked, so it's not precisely what would happen in prod - expect(res).toBe("안녕하\n세요こ\nんにち\nは世界\nコンニ\nチハ你\n好"); - }); - - it("should handle a combination of CJK, latin, emojis and whitespaces", () => { - const text = `a醫 醫 bb 你好 world-i-😀🗺🔥`; - - const maxWidth = 150; - const res = wrapText(text, font, maxWidth); - expect(res).toBe(`a醫 醫 bb 你\n好 world-i-😀🗺\n🔥`); - - const maxWidth2 = 50; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe(`a醫 醫\nbb 你\n好\nworld\n-i-😀\n🗺🔥`); - - const maxWidth3 = 30; - const res3 = wrapText(text, font, maxWidth3); - expect(res3).toBe(`a醫\n醫\nbb\n你好\nwor\nld-\ni-\n😀\n🗺\n🔥`); - }); - - it("should break before and after a regular CJK character", () => { - const text = "HelloたWorld"; - const maxWidth1 = 50; - const res1 = wrapText(text, font, maxWidth1); - expect(res1).toBe("Hello\nた\nWorld"); - - const maxWidth2 = 60; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe("Helloた\nWorld"); - }); - - it("should break before and after certain CJK symbols", () => { - const text = "こんにちは〃世界"; - const maxWidth1 = 50; - const res1 = wrapText(text, font, maxWidth1); - expect(res1).toBe("こんにちは\n〃世界"); - - const maxWidth2 = 60; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe("こんにちは〃\n世界"); - }); - - it("should break after, not before for certain CJK pairs", () => { - const text = "Hello た。"; - const maxWidth = 70; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hello\nた。"); - }); - - it("should break before, not after for certain CJK pairs", () => { - const text = "Hello「たWorld」"; - const maxWidth = 60; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hello\n「た\nWorld」"); - }); - - it("should break after, not before for certain CJK character pairs", () => { - const text = "「Helloた」World"; - const maxWidth = 70; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("「Hello\nた」World"); - }); - - it("should break Chinese sentences", () => { - const text = `中国你好!这是一个测试。 -我们来看看:人民币¥1234「很贵」 -(括号)、逗号,句号。空格 换行 全角符号…—`; - - const maxWidth1 = 80; - const res1 = wrapText(text, font, maxWidth1); - expect(res1).toBe(`中国你好!这是一\n个测试。 -我们来看看:人民\n币¥1234「很\n贵」 -(括号)、逗号,\n句号。空格 换行\n全角符号…—`); - - const maxWidth2 = 50; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe(`中国你好!\n这是一个测\n试。 -我们来看\n看:人民币\n¥1234\n「很贵」 -(括号)、\n逗号,句\n号。空格\n换行 全角\n符号…—`); - }); - }); - - it("should break Japanese sentences", () => { - const text = `日本こんにちは!これはテストです。 - 見てみましょう:円¥1234「高い」 - (括弧)、読点、句点。 - 空白 改行 全角記号…ー`; - - const maxWidth1 = 80; - const res1 = wrapText(text, font, maxWidth1); - expect(res1).toBe(`日本こんにちは!\nこれはテストで\nす。 - 見てみましょ\nう:円¥1234\n「高い」 - (括弧)、読\n点、句点。 - 空白 改行\n全角記号…ー`); - - const maxWidth2 = 50; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe(`日本こんに\nちは!これ\nはテストで\nす。 - 見てみ\nましょう:\n円\n¥1234\n「高い」 - (括\n弧)、読\n点、句点。 - 空白\n改行 全角\n記号…ー`); - }); - - it("should break Korean sentences", () => { - const text = `한국 안녕하세요! 이것은 테스트입니다. -우리 보자: 원화₩1234「비싸다」 -(괄호), 쉼표, 마침표. -공백 줄바꿈 전각기호…—`; - - const maxWidth1 = 80; - const res1 = wrapText(text, font, maxWidth1); - expect(res1).toBe(`한국 안녕하세\n요! 이것은 테\n스트입니다. -우리 보자: 원\n화₩1234「비\n싸다」 -(괄호), 쉼\n표, 마침표. -공백 줄바꿈 전\n각기호…—`); - - const maxWidth2 = 60; - const res2 = wrapText(text, font, maxWidth2); - expect(res2).toBe(`한국 안녕하\n세요! 이것\n은 테스트입\n니다. -우리 보자:\n원화\n₩1234\n「비싸다」 -(괄호),\n쉼표, 마침\n표. -공백 줄바꿈\n전각기호…—`); - }); - - describe("When text contains leading whitespaces", () => { - const text = " \t Hello world"; - - it("should preserve leading whitespaces", () => { - const maxWidth = 120; - const res = wrapText(text, font, maxWidth); - expect(res).toBe(" \t Hello\nworld"); - }); - - it("should break and collapse leading whitespaces when line breaks", () => { - const maxWidth = 60; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("\nHello\nworld"); - }); - - it("should break and collapse leading whitespaces whe words break", () => { - const maxWidth = 30; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("\nHel\nlo\nwor\nld"); - }); - }); - - describe("When text contains trailing whitespaces", () => { - it("shouldn't add new lines for trailing spaces", () => { - const text = "Hello whats up "; - const maxWidth = 200 - BOUND_TEXT_PADDING * 2; - const res = wrapText(text, font, maxWidth); - expect(res).toBe(text); - }); - - it("should ignore trailing whitespaces when line breaks", () => { - const text = "Hippopotomonstrosesquippedaliophobia ??????"; - const maxWidth = 400; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????"); - }); - - it("should not ignore trailing whitespaces when word breaks", () => { - const text = "Hippopotomonstrosesquippedaliophobia ??????"; - const maxWidth = 300; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia ??????"); - }); - - it("should ignore trailing whitespaces when word breaks and line breaks", () => { - const text = "Hippopotomonstrosesquippedaliophobia ??????"; - const maxWidth = 180; - const res = wrapText(text, font, maxWidth); - expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????"); - }); - }); - - describe("When text doesn't contain new lines", () => { - const text = "Hello whats up"; - - [ - { - desc: "break all words when width of each word is less than container width", - width: 80, - res: `Hello\nwhats\nup`, - }, - { - desc: "break all characters when width of each character is less than container width", - width: 25, - res: `H -e -l -l -o -w -h -a -t -s -u -p`, - }, - { - desc: "break words as per the width", - - width: 140, - res: `Hello whats\nup`, - }, - { - desc: "fit the container", - - width: 250, - res: "Hello whats up", - }, - { - desc: "should push the word if its equal to max width", - width: 60, - res: `Hello -whats -up`, - }, - ].forEach((data) => { - it(`should ${data.desc}`, () => { - const res = wrapText(text, font, data.width - BOUND_TEXT_PADDING * 2); - expect(res).toEqual(data.res); - }); - }); - }); - - describe("When text contain new lines", () => { - const text = `Hello -whats up`; - [ - { - desc: "break all words when width of each word is less than container width", - width: 80, - res: `Hello\nwhats\nup`, - }, - { - desc: "break all characters when width of each character is less than container width", - width: 25, - res: `H -e -l -l -o -w -h -a -t -s -u -p`, - }, - { - desc: "break words as per the width", - - width: 150, - res: `Hello -whats up`, - }, - { - desc: "fit the container", - - width: 250, - res: `Hello -whats up`, - }, - ].forEach((data) => { - it(`should respect new lines and ${data.desc}`, () => { - const res = wrapText(text, font, data.width - BOUND_TEXT_PADDING * 2); - expect(res).toEqual(data.res); - }); - }); - }); - - describe("When text is long", () => { - const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`; - [ - { - desc: "fit characters of long string as per container width", - width: 170, - res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`, - }, - { - desc: "fit characters of long string as per container width and break words as per the width", - - width: 130, - res: `hellolongtex -tthisiswhats -upwithyouIam -typingggggan -dtypinggg -break it now`, - }, - { - desc: "fit the long text when container width is greater than text length and move the rest to next line", - - width: 600, - res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`, - }, - ].forEach((data) => { - it(`should ${data.desc}`, () => { - const res = wrapText(text, font, data.width - BOUND_TEXT_PADDING * 2); - expect(res).toEqual(data.res); - }); - }); - }); - - describe("Test parseTokens", () => { - it("should tokenize latin", () => { - let text = "Excalidraw is a virtual collaborative whiteboard"; - - expect(parseTokens(text)).toEqual([ - "Excalidraw", - " ", - "is", - " ", - "a", - " ", - "virtual", - " ", - "collaborative", - " ", - "whiteboard", - ]); - - text = - "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; - expect(parseTokens(text)).toEqual([ - "Wikipedia", - " ", - "is", - " ", - "hosted", - " ", - "by", - " ", - "Wikimedia-", - " ", - "Foundation,", - " ", - "a", - " ", - "non-", - "profit", - " ", - "organization", - " ", - "that", - " ", - "also", - " ", - "hosts", - " ", - "a", - " ", - "range-", - "of", - " ", - "other", - " ", - "projects", - ]); - }); - - it("should not tokenize number", () => { - const text = "99,100.99"; - const tokens = parseTokens(text); - expect(tokens).toEqual(["99,100.99"]); - }); - - it("should tokenize joined emojis", () => { - const text = `😬🌍🗺🔥☂️👩🏽‍🦰👨‍👩‍👧‍👦👩🏾‍🔬🏳️‍🌈🧔‍♀️🧑‍🤝‍🧑🙅🏽‍♂️✅0️⃣🇨🇿🦅`; - const tokens = parseTokens(text); - - expect(tokens).toEqual([ - "😬", - "🌍", - "🗺", - "🔥", - "☂️", - "👩🏽‍🦰", - "👨‍👩‍👧‍👦", - "👩🏾‍🔬", - "🏳️‍🌈", - "🧔‍♀️", - "🧑‍🤝‍🧑", - "🙅🏽‍♂️", - "✅", - "0️⃣", - "🇨🇿", - "🦅", - ]); - }); - - it("should tokenize emojis mixed with mixed text", () => { - const text = `😬a🌍b🗺c🔥d☂️《👩🏽‍🦰》👨‍👩‍👧‍👦德👩🏾‍🔬こ🏳️‍🌈안🧔‍♀️g🧑‍🤝‍🧑h🙅🏽‍♂️e✅f0️⃣g🇨🇿10🦅#hash`; - const tokens = parseTokens(text); - - expect(tokens).toEqual([ - "😬", - "a", - "🌍", - "b", - "🗺", - "c", - "🔥", - "d", - "☂️", - "《", - "👩🏽‍🦰", - "》", - "👨‍👩‍👧‍👦", - "德", - "👩🏾‍🔬", - "こ", - "🏳️‍🌈", - "안", - "🧔‍♀️", - "g", - "🧑‍🤝‍🧑", - "h", - "🙅🏽‍♂️", - "e", - "✅", - "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common) - "🇨🇿", - "10", // nice! do not break the number, as it's by default matched by \p{Emoji} - "🦅", - "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji} - ]); - }); - - it("should tokenize decomposed chars into their composed variants", () => { - // each input character is in a decomposed form - const text = "čでäぴέ다й한"; - expect(text.normalize("NFC").length).toEqual(8); - expect(text).toEqual(text.normalize("NFD")); - - const tokens = parseTokens(text); - expect(tokens.length).toEqual(8); - expect(tokens).toEqual(["č", "で", "ä", "ぴ", "έ", "다", "й", "한"]); - }); - - it("should tokenize artificial CJK", () => { - const text = `《道德經》醫-醫こんにちは世界!안녕하세요세계;다.다...원/달(((다)))[[1]]〚({((한))>)〛た…[Hello] World?ニューヨーク・¥3700.55す。090-1234-5678¥1,000〜$5,000「素晴らしい!」〔重要〕#1:Taro君30%は、(たなばた)〰¥110±¥570で20℃〜9:30〜10:00【一番】`; - - // [ - // '《道', '德', '經》', '醫-', - // '醫', 'こ', 'ん', 'に', - // 'ち', 'は', '世', '界!', - // '안', '녕', '하', '세', - // '요', '세', '계;', '다.', - // '다...', '원/', '달', '(((다)))', - // '[[1]]', '〚({((한))>)〛', 'た…', '[Hello]', - // ' ', 'World?', 'ニ', 'ュ', - // 'ー', 'ヨ', 'ー', 'ク・', - // '¥3700.55', 'す。', '090-', '1234-', - // '5678¥1,000', '〜', '$5,000', '「素', - // '晴', 'ら', 'し', 'い!」', - // '〔重', '要〕', '#', '1:', - // 'Taro', '君', '30%', 'は、', - // '(た', 'な', 'ば', 'た)', - // '〰', '¥110±', '¥570', 'で', - // '20℃', '〜', '9:30', '〜', - // '10:00', '【一', '番】' - // ] - const tokens = parseTokens(text); - - // Latin - expect(tokens).toContain("[[1]]"); - expect(tokens).toContain("[Hello]"); - expect(tokens).toContain("World?"); - expect(tokens).toContain("Taro"); - - // Chinese - expect(tokens).toContain("《道"); - expect(tokens).toContain("德"); - expect(tokens).toContain("經》"); - expect(tokens).toContain("醫-"); - expect(tokens).toContain("醫"); - - // Japanese - expect(tokens).toContain("こ"); - expect(tokens).toContain("ん"); - expect(tokens).toContain("に"); - expect(tokens).toContain("ち"); - expect(tokens).toContain("は"); - expect(tokens).toContain("世"); - expect(tokens).toContain("ニ"); - expect(tokens).toContain("ク・"); - expect(tokens).toContain("界!"); - expect(tokens).toContain("た…"); - expect(tokens).toContain("す。"); - expect(tokens).toContain("ュ"); - expect(tokens).toContain("ー"); - expect(tokens).toContain("「素"); - expect(tokens).toContain("晴"); - expect(tokens).toContain("ら"); - expect(tokens).toContain("し"); - expect(tokens).toContain("い!」"); - expect(tokens).toContain("君"); - expect(tokens).toContain("は、"); - expect(tokens).toContain("(た"); - expect(tokens).toContain("な"); - expect(tokens).toContain("ば"); - expect(tokens).toContain("た)"); - expect(tokens).toContain("で"); - expect(tokens).toContain("【一"); - expect(tokens).toContain("番】"); - - // Check for Korean - expect(tokens).toContain("안"); - expect(tokens).toContain("녕"); - expect(tokens).toContain("하"); - expect(tokens).toContain("세"); - expect(tokens).toContain("요"); - expect(tokens).toContain("세"); - expect(tokens).toContain("계;"); - expect(tokens).toContain("다."); - expect(tokens).toContain("다..."); - expect(tokens).toContain("원/"); - expect(tokens).toContain("달"); - expect(tokens).toContain("(((다)))"); - expect(tokens).toContain("〚({((한))>)〛"); - - // Numbers and units - expect(tokens).toContain("¥3700.55"); - expect(tokens).toContain("090-"); - expect(tokens).toContain("1234-"); - expect(tokens).toContain("5678¥1,000"); - expect(tokens).toContain("$5,000"); - expect(tokens).toContain("1:"); - expect(tokens).toContain("30%"); - expect(tokens).toContain("¥110±"); - expect(tokens).toContain("¥570"); - expect(tokens).toContain("20℃"); - expect(tokens).toContain("9:30"); - expect(tokens).toContain("10:00"); - - // Punctuation and symbols - expect(tokens).toContain("〜"); - expect(tokens).toContain("〰"); - expect(tokens).toContain("#"); - }); - }); -}); +import type { ExcalidrawTextElementWithContainer } from "./types"; describe("Test measureText", () => { describe("Test getContainerCoords", () => { diff --git a/packages/excalidraw/element/textElement.ts b/packages/excalidraw/element/textElement.ts index 9d72961b50..8c4bc59882 100644 --- a/packages/excalidraw/element/textElement.ts +++ b/packages/excalidraw/element/textElement.ts @@ -16,12 +16,12 @@ import { BOUND_TEXT_PADDING, DEFAULT_FONT_FAMILY, DEFAULT_FONT_SIZE, - ENV, TEXT_ALIGN, VERTICAL_ALIGN, } from "../constants"; import type { MaybeTransformHandleType } from "./transformHandles"; import { isTextElement } from "."; +import { wrapText } from "./textWrapping"; import { isBoundToContainer, isArrowElement } from "./typeChecks"; import { LinearElementEditor } from "./linearElementEditor"; import type { AppState } from "../types"; @@ -31,172 +31,6 @@ import { } from "./containerCache"; import type { ExtractSetType } from "../utility-types"; -/** - * Matches various emoji types. - * - * 1. basic emojis (😀, 🌍) - * 2. flags (🇨🇿) - * 3. multi-codepoint emojis: - * - skin tones (👍🏽) - * - variation selectors (☂️) - * - keycaps (1️⃣) - * - tag sequences (🏴󠁧󠁢󠁥󠁮󠁧󠁿) - * - emoji sequences (👨‍👩‍👧‍👦, 👩‍🚀, 🏳️‍🌈) - * - * Unicode points: - * - \uFE0F: presentation selector - * - \u20E3: enclosing keycap - * - \u200D: ZWJ (zero width joiner) - * - \u{E0020}-\u{E007E}: tags - * - \u{E007F}: cancel tag - * - * @see https://unicode.org/reports/tr51/#EBNF_and_Regex, with changes: - * - replaced \p{Emoji} with [\p{Extended_Pictographic}\p{Emoji_Presentation}], see more in `should tokenize emojis mixed with mixed text` test - * - replaced \p{Emod} with \p{Emoji_Modifier} as some do not understand the abbreviation (i.e. https://devina.io/redos-checker) - */ -const _EMOJI_CHAR = - /(\p{RI}\p{RI}|[\p{Extended_Pictographic}\p{Emoji_Presentation}](?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?(?:\u200D(?:\p{RI}\p{RI}|[\p{Emoji}](?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?))*)/u; - -/** - * Detect a CJK char, though does not include every possible char used in CJK texts, - * such as symbols and punctuations. - * - * By default every CJK is a breaking point, though CJK has additional breaking points, - * including full width punctuations or symbols (Chinese and Japanese) and western punctuations (Korean). - * - * Additional CJK breaking point rules: - * - expect a break before (lookahead), but not after (negative lookbehind), i.e. "(" or "(" - * - expect a break after (lookbehind), but not before (negative lookahead), i.e. ")" or ")" - * - expect a break always (lookahead and lookbehind), i.e. "〃" - */ -const _CJK_CHAR = - /\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}/u; - -/** - * Following characters break only with CJK, not with alphabetic characters. - * This is essential for Korean, as it uses alphabetic punctuation, but expects CJK-like breaking points. - * - * Hello((た)) → ["Hello", "((た))"] - * Hello((World)) → ["Hello((World))"] - */ -const _CJK_BREAK_NOT_AFTER_BUT_BEFORE = /<\(\[\{/u; -const _CJK_BREAK_NOT_BEFORE_BUT_AFTER = />\)\]\}.,:;\?!/u; -const _CJK_BREAK_ALWAYS = / 〃〜~〰#&*+-ー/=|¬ ̄¦/u; -const _CJK_SYMBOLS_AND_PUNCTUATION = - /()[]{}〈〉《》⦅⦆「」「」『』【】〖〗〔〕〘〙〚〛<>〝〞'〟・。゚゙,、.:;?!%ー/u; - -/** - * Following characters break with any character, even though are mostly used with CJK. - * - * Hello た。→ ["Hello", "た。"] - * ↑ DON'T BREAK "た。" (negative lookahead) - * Hello「た」 World → ["Hello", "「た」", "World"] - * ↑ DON'T BREAK "「た" (negative lookbehind) - * ↑ DON'T BREAK "た」"(negative lookahead) - * ↑ BREAK BEFORE "「" (lookahead) - * ↑ BREAK AFTER "」" (lookbehind) - */ -const _ANY_BREAK_NOT_AFTER_BUT_BEFORE = /([{〈《⦅「「『【〖〔〘〚<〝/u; -const _ANY_BREAK_NOT_BEFORE_BUT_AFTER = - /)]}〉》⦆」」』】〗〕〙〛>〞'〟・。゚゙,、.:;?!%±‥…\//u; - -/** - * Natural breaking points for any grammars. - * - * Hello-world - * ↑ BREAK AFTER "-" → ["Hello-", "world"] - * Hello world - * ↑ BREAK ALWAYS " " → ["Hello", " ", "world"] - */ -const _ANY_BREAK_AFTER = /-/u; -const _ANY_BREAK_ALWAYS = /\s/u; - -/** - * Simple fallback for browsers (mainly Safari < 16.4) that don't support "Lookbehind assertion". - * - * Browser support as of 10/2024: - * - 91% Lookbehind assertion https://caniuse.com/mdn-javascript_regular_expressions_lookbehind_assertion - * - 94% Unicode character class escape https://caniuse.com/mdn-javascript_regular_expressions_unicode_character_class_escape - * - * Does not include advanced CJK breaking rules, but covers most of the core cases, especially for latin. - */ -const BREAK_LINE_REGEX_SIMPLE = new RegExp( - `${_EMOJI_CHAR.source}|([${_ANY_BREAK_ALWAYS.source}${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}${_ANY_BREAK_AFTER.source}])`, - "u", -); - -// Hello World → ["Hello", " World"] -// ↑ BREAK BEFORE " " -// HelloたWorld → ["Hello", "たWorld"] -// ↑ BREAK BEFORE "た" -// Hello「World」→ ["Hello", "「World」"] -// ↑ BREAK BEFORE "「" -const getLookaheadBreakingPoints = () => { - const ANY_BREAKING_POINT = `(? { - const ANY_BREAKING_POINT = `(?![${_ANY_BREAK_NOT_BEFORE_BUT_AFTER.source}])(?<=[${_ANY_BREAK_NOT_BEFORE_BUT_AFTER.source}${_ANY_BREAK_ALWAYS.source}${_ANY_BREAK_AFTER.source}])`; - const CJK_BREAKING_POINT = `(?![${_ANY_BREAK_NOT_BEFORE_BUT_AFTER.source}${_CJK_BREAK_NOT_BEFORE_BUT_AFTER.source}${_ANY_BREAK_AFTER.source}])(?<=[${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}][${_CJK_BREAK_NOT_BEFORE_BUT_AFTER.source}]*)`; - return new RegExp(`(?:${ANY_BREAKING_POINT}|${CJK_BREAKING_POINT})`, "u"); -}; - -/** - * Break a line based on the whitespaces, CJK / emoji chars and language specific breaking points, - * like hyphen for alphabetic and various full-width codepoints for CJK - especially Japanese, e.g.: - * - * "Hello 世界。🌎🗺" → ["Hello", " ", "世", "界。", "🌎", "🗺"] - * "Hello-world" → ["Hello-", "world"] - * "「Hello World」" → ["「Hello", " ", "World」"] - */ -const getBreakLineRegexAdvanced = () => - new RegExp( - `${_EMOJI_CHAR.source}|${getLookaheadBreakingPoints().source}|${ - getLookbehindBreakingPoints().source - }`, - "u", - ); - -let cachedBreakLineRegex: RegExp | undefined; - -// Lazy-load for browsers that don't support "Lookbehind assertion" -const getBreakLineRegex = () => { - if (!cachedBreakLineRegex) { - try { - cachedBreakLineRegex = getBreakLineRegexAdvanced(); - } catch { - cachedBreakLineRegex = BREAK_LINE_REGEX_SIMPLE; - } - } - - return cachedBreakLineRegex; -}; - -const CJK_REGEX = new RegExp( - `[${_CJK_CHAR.source}${_CJK_BREAK_ALWAYS.source}${_CJK_SYMBOLS_AND_PUNCTUATION.source}]`, - "u", -); - -const EMOJI_REGEX = new RegExp(`${_EMOJI_CHAR.source}`, "u"); - -export const containsCJK = (text: string) => { - return CJK_REGEX.test(text); -}; - -export const containsEmoji = (text: string) => { - return EMOJI_REGEX.test(text); -}; - export const normalizeText = (text: string) => { return ( normalizeEOL(text) @@ -510,7 +344,7 @@ let canvas: HTMLCanvasElement | undefined; * * `Math.ceil` of the final width adds additional buffer which stabilizes slight wrapping incosistencies. */ -const getLineWidth = ( +export const getLineWidth = ( text: string, font: FontString, forceAdvanceWidth?: true, @@ -575,197 +409,6 @@ export const getTextHeight = ( return getLineHeightInPx(fontSize, lineHeight) * lineCount; }; -export const parseTokens = (line: string) => { - const breakLineRegex = getBreakLineRegex(); - - // normalizing to single-codepoint composed chars due to canonical equivalence of multi-codepoint versions for chars like č, で (~ so that we don't break a line in between c and ˇ) - // filtering due to multi-codepoint chars like 👨‍👩‍👧‍👦, 👩🏽‍🦰 - return line.normalize("NFC").split(breakLineRegex).filter(Boolean); -}; - -// handles multi-byte chars (é, 中) and purposefully does not handle multi-codepoint char (👨‍👩‍👧‍👦, 👩🏽‍🦰) -const isSingleCharacter = (maybeSingleCharacter: string) => { - return ( - maybeSingleCharacter.codePointAt(0) !== undefined && - maybeSingleCharacter.codePointAt(1) === undefined - ); -}; - -const satisfiesWordInvariant = (word: string) => { - if (import.meta.env.MODE === ENV.TEST || import.meta.env.DEV) { - if (/\s/.test(word)) { - throw new Error("Word should not contain any whitespaces!"); - } - } -}; - -const wrapWord = ( - word: string, - font: FontString, - maxWidth: number, -): Array => { - // multi-codepoint emojis are already broken apart and shouldn't be broken further - if (EMOJI_REGEX.test(word)) { - return [word]; - } - - satisfiesWordInvariant(word); - - const lines: Array = []; - const chars = Array.from(word); - - let currentLine = ""; - let currentLineWidth = 0; - - for (const char of chars) { - const _charWidth = charWidth.calculate(char, font); - const testLineWidth = currentLineWidth + _charWidth; - - if (testLineWidth <= maxWidth) { - currentLine = currentLine + char; - currentLineWidth = testLineWidth; - continue; - } - - if (currentLine) { - lines.push(currentLine); - } - - currentLine = char; - currentLineWidth = _charWidth; - } - - if (currentLine) { - lines.push(currentLine); - } - - return lines; -}; - -const wrapLine = ( - line: string, - font: FontString, - maxWidth: number, -): string[] => { - const lines: Array = []; - const tokens = parseTokens(line); - const tokenIterator = tokens[Symbol.iterator](); - - let currentLine = ""; - let currentLineWidth = 0; - - let iterator = tokenIterator.next(); - - while (!iterator.done) { - const token = iterator.value; - const testLine = currentLine + token; - - // cache single codepoint whitespace, CJK or emoji width calc. as kerning should not apply here - const testLineWidth = isSingleCharacter(token) - ? currentLineWidth + charWidth.calculate(token, font) - : getLineWidth(testLine, font, true); - - // build up the current line, skipping length check for possibly trailing whitespaces - if (/\s/.test(token) || testLineWidth <= maxWidth) { - currentLine = testLine; - currentLineWidth = testLineWidth; - iterator = tokenIterator.next(); - continue; - } - - // current line is empty => just the token (word) is longer than `maxWidth` and needs to be wrapped - if (!currentLine) { - const wrappedWord = wrapWord(token, font, maxWidth); - const trailingLine = wrappedWord[wrappedWord.length - 1] ?? ""; - const precedingLines = wrappedWord.slice(0, -1); - - lines.push(...precedingLines); - - // trailing line of the wrapped word might -still be joined with next token/s - currentLine = trailingLine; - currentLineWidth = getLineWidth(trailingLine, font, true); - iterator = tokenIterator.next(); - } else { - // push & reset, but don't iterate on the next token, as we didn't use it yet! - lines.push(currentLine.trimEnd()); - - // purposefully not iterating and not setting `currentLine` to `token`, so that we could use a simple !currentLine check above - currentLine = ""; - currentLineWidth = 0; - } - } - - // iterator done, push the trailing line if exists - if (currentLine) { - const trailingLine = trimTrailingLine(currentLine, font, maxWidth); - lines.push(trailingLine); - } - - return lines; -}; - -// similarly to browsers, does not trim all whitespaces, but only those exceeding the maxWidth -const trimTrailingLine = (line: string, font: FontString, maxWidth: number) => { - const shouldTrimWhitespaces = getLineWidth(line, font, true) > maxWidth; - - if (!shouldTrimWhitespaces) { - return line; - } - - // defensively default to `trimeEnd` in case the regex does not match - let [, trimmedLine, whitespaces] = line.match(/^(.+?)(\s+)$/) ?? [ - line, - line.trimEnd(), - "", - ]; - - let trimmedLineWidth = getLineWidth(trimmedLine, font, true); - - for (const whitespace of Array.from(whitespaces)) { - const _charWidth = charWidth.calculate(whitespace, font); - const testLineWidth = trimmedLineWidth + _charWidth; - - if (testLineWidth > maxWidth) { - break; - } - - trimmedLine = trimmedLine + whitespace; - trimmedLineWidth = testLineWidth; - } - - return trimmedLine; -}; - -export const wrapText = ( - text: string, - font: FontString, - maxWidth: number, -): string => { - // if maxWidth is not finite or NaN which can happen in case of bugs in - // computation, we need to make sure we don't continue as we'll end up - // in an infinite loop - if (!Number.isFinite(maxWidth) || maxWidth < 0) { - return text; - } - - const lines: Array = []; - const originalLines = text.split("\n"); - - for (const originalLine of originalLines) { - const currentLineWidth = getLineWidth(originalLine, font, true); - - if (currentLineWidth <= maxWidth) { - lines.push(originalLine); - continue; - } - - const wrappedLine = wrapLine(originalLine, font, maxWidth); - lines.push(...wrappedLine); - } - - return lines.join("\n"); -}; - export const charWidth = (() => { const cachedCharWidth: { [key: FontString]: Array } = {}; diff --git a/packages/excalidraw/element/textWrapping.test.ts b/packages/excalidraw/element/textWrapping.test.ts new file mode 100644 index 0000000000..6c7bcb819d --- /dev/null +++ b/packages/excalidraw/element/textWrapping.test.ts @@ -0,0 +1,633 @@ +import { wrapText, parseTokens } from "./textWrapping"; +import type { FontString } from "./types"; + +describe("Test wrapText", () => { + // font is irrelevant as jsdom does not support FontFace API + // `measureText` width is mocked to return `text.length` by `jest-canvas-mock` + // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js + const font = "10px Cascadia, Segoe UI Emoji" as FontString; + + it("should wrap the text correctly when word length is exactly equal to max width", () => { + const text = "Hello Excalidraw"; + // Length of "Excalidraw" is 100 and exacty equal to max width + const res = wrapText(text, font, 100); + expect(res).toEqual(`Hello\nExcalidraw`); + }); + + it("should return the text as is if max width is invalid", () => { + const text = "Hello Excalidraw"; + expect(wrapText(text, font, NaN)).toEqual(text); + expect(wrapText(text, font, -1)).toEqual(text); + expect(wrapText(text, font, Infinity)).toEqual(text); + }); + + it("should show the text correctly when max width reached", () => { + const text = "Hello😀"; + const maxWidth = 10; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("H\ne\nl\nl\no\n😀"); + }); + + it("should not wrap number when wrapping line", () => { + const text = "don't wrap this number 99,100.99"; + const maxWidth = 300; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("don't wrap this number\n99,100.99"); + }); + + it("should trim all trailing whitespaces", () => { + const text = "Hello "; + const maxWidth = 50; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello"); + }); + + it("should trim all but one trailing whitespaces", () => { + const text = "Hello "; + const maxWidth = 60; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello "); + }); + + it("should keep preceding whitespaces and trim all trailing whitespaces", () => { + const text = " Hello World"; + const maxWidth = 90; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(" Hello\nWorld"); + }); + + it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => { + const text = " Hello World "; + const maxWidth = 90; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(" Hello\nWorld "); + }); + + it("should trim keep those whitespace that fit in the trailing line", () => { + const text = "Hello Wo rl d "; + const maxWidth = 100; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello Wo\nrl d "); + }); + + it("should support multiple (multi-codepoint) emojis", () => { + const text = "😀🗺🔥👩🏽‍🦰👨‍👩‍👧‍👦🇨🇿"; + const maxWidth = 1; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("😀\n🗺\n🔥\n👩🏽‍🦰\n👨‍👩‍👧‍👦\n🇨🇿"); + }); + + it("should wrap the text correctly when text contains hyphen", () => { + let text = + "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; + const res = wrapText(text, font, 110); + expect(res).toBe( + `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`, + ); + + text = "Hello thereusing-now"; + expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now"); + }); + + it("should support wrapping nested lists", () => { + const text = `\tA) one tab\t\t- two tabs - 8 spaces`; + + const maxWidth = 100; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`); + }); + + describe("When text is CJK", () => { + it("should break each CJK character when width is very small", () => { + // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi" + const text = "안녕하세요こんにちは世界コンニチハ你好"; + const maxWidth = 10; + const res = wrapText(text, font, maxWidth); + expect(res).toBe( + "안\n녕\n하\n세\n요\nこ\nん\nに\nち\nは\n世\n界\nコ\nン\nニ\nチ\nハ\n你\n好", + ); + }); + + it("should break CJK text into longer segments when width is larger", () => { + // "안녕하세요" (Hangul) + "こんにちは世界" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "你好" (Han) = "Hello Hello World Hello Hi" + const text = "안녕하세요こんにちは世界コンニチハ你好"; + const maxWidth = 30; + const res = wrapText(text, font, maxWidth); + + // measureText is mocked, so it's not precisely what would happen in prod + expect(res).toBe("안녕하\n세요こ\nんにち\nは世界\nコンニ\nチハ你\n好"); + }); + + it("should handle a combination of CJK, latin, emojis and whitespaces", () => { + const text = `a醫 醫 bb 你好 world-i-😀🗺🔥`; + + const maxWidth = 150; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(`a醫 醫 bb 你\n好 world-i-😀🗺\n🔥`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`a醫 醫\nbb 你\n好\nworld\n-i-😀\n🗺🔥`); + + const maxWidth3 = 30; + const res3 = wrapText(text, font, maxWidth3); + expect(res3).toBe(`a醫\n醫\nbb\n你好\nwor\nld-\ni-\n😀\n🗺\n🔥`); + }); + + it("should break before and after a regular CJK character", () => { + const text = "HelloたWorld"; + const maxWidth1 = 50; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe("Hello\nた\nWorld"); + + const maxWidth2 = 60; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe("Helloた\nWorld"); + }); + + it("should break before and after certain CJK symbols", () => { + const text = "こんにちは〃世界"; + const maxWidth1 = 50; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe("こんにちは\n〃世界"); + + const maxWidth2 = 60; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe("こんにちは〃\n世界"); + }); + + it("should break after, not before for certain CJK pairs", () => { + const text = "Hello た。"; + const maxWidth = 70; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello\nた。"); + }); + + it("should break before, not after for certain CJK pairs", () => { + const text = "Hello「たWorld」"; + const maxWidth = 60; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello\n「た\nWorld」"); + }); + + it("should break after, not before for certain CJK character pairs", () => { + const text = "「Helloた」World"; + const maxWidth = 70; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("「Hello\nた」World"); + }); + + it("should break Chinese sentences", () => { + const text = `中国你好!这是一个测试。 +我们来看看:人民币¥1234「很贵」 +(括号)、逗号,句号。空格 换行 全角符号…—`; + + const maxWidth1 = 80; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe(`中国你好!这是一\n个测试。 +我们来看看:人民\n币¥1234「很\n贵」 +(括号)、逗号,\n句号。空格 换行\n全角符号…—`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`中国你好!\n这是一个测\n试。 +我们来看\n看:人民币\n¥1234\n「很贵」 +(括号)、\n逗号,句\n号。空格\n换行 全角\n符号…—`); + }); + + it("should break Japanese sentences", () => { + const text = `日本こんにちは!これはテストです。 + 見てみましょう:円¥1234「高い」 + (括弧)、読点、句点。 + 空白 改行 全角記号…ー`; + + const maxWidth1 = 80; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe(`日本こんにちは!\nこれはテストで\nす。 + 見てみましょ\nう:円¥1234\n「高い」 + (括弧)、読\n点、句点。 + 空白 改行\n全角記号…ー`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`日本こんに\nちは!これ\nはテストで\nす。 + 見てみ\nましょう:\n円\n¥1234\n「高い」 + (括\n弧)、読\n点、句点。 + 空白\n改行 全角\n記号…ー`); + }); + + it("should break Korean sentences", () => { + const text = `한국 안녕하세요! 이것은 테스트입니다. +우리 보자: 원화₩1234「비싸다」 +(괄호), 쉼표, 마침표. +공백 줄바꿈 전각기호…—`; + + const maxWidth1 = 80; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe(`한국 안녕하세\n요! 이것은 테\n스트입니다. +우리 보자: 원\n화₩1234「비\n싸다」 +(괄호), 쉼\n표, 마침표. +공백 줄바꿈 전\n각기호…—`); + + const maxWidth2 = 60; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`한국 안녕하\n세요! 이것\n은 테스트입\n니다. +우리 보자:\n원화\n₩1234\n「비싸다」 +(괄호),\n쉼표, 마침\n표. +공백 줄바꿈\n전각기호…—`); + }); + }); + + describe("When text contains leading whitespaces", () => { + const text = " \t Hello world"; + + it("should preserve leading whitespaces", () => { + const maxWidth = 120; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(" \t Hello\nworld"); + }); + + it("should break and collapse leading whitespaces when line breaks", () => { + const maxWidth = 60; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("\nHello\nworld"); + }); + + it("should break and collapse leading whitespaces whe words break", () => { + const maxWidth = 30; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("\nHel\nlo\nwor\nld"); + }); + }); + + describe("When text contains trailing whitespaces", () => { + it("shouldn't add new lines for trailing spaces", () => { + const text = "Hello whats up "; + const maxWidth = 190; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(text); + }); + + it("should ignore trailing whitespaces when line breaks", () => { + const text = "Hippopotomonstrosesquippedaliophobia ??????"; + const maxWidth = 400; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????"); + }); + + it("should not ignore trailing whitespaces when word breaks", () => { + const text = "Hippopotomonstrosesquippedaliophobia ??????"; + const maxWidth = 300; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia ??????"); + }); + + it("should ignore trailing whitespaces when word breaks and line breaks", () => { + const text = "Hippopotomonstrosesquippedaliophobia ??????"; + const maxWidth = 180; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????"); + }); + }); + + describe("When text doesn't contain new lines", () => { + const text = "Hello whats up"; + + [ + { + desc: "break all words when width of each word is less than container width", + width: 70, + res: `Hello\nwhats\nup`, + }, + { + desc: "break all characters when width of each character is less than container width", + width: 15, + res: `H\ne\nl\nl\no\nw\nh\na\nt\ns\nu\np`, + }, + { + desc: "break words as per the width", + + width: 130, + res: `Hello whats\nup`, + }, + { + desc: "fit the container", + + width: 240, + res: "Hello whats up", + }, + { + desc: "push the word if its equal to max width", + width: 50, + res: `Hello\nwhats\nup`, + }, + ].forEach((data) => { + it(`should ${data.desc}`, () => { + const res = wrapText(text, font, data.width); + expect(res).toEqual(data.res); + }); + }); + }); + + describe("When text contain new lines", () => { + const text = `Hello\n whats up`; + [ + { + desc: "break all words when width of each word is less than container width", + width: 70, + res: `Hello\n whats\nup`, + }, + { + desc: "break all characters when width of each character is less than container width", + width: 15, + res: `H\ne\nl\nl\no\n\nw\nh\na\nt\ns\nu\np`, + }, + { + desc: "break words as per the width", + width: 140, + res: `Hello\n whats up`, + }, + ].forEach((data) => { + it(`should respect new lines and ${data.desc}`, () => { + const res = wrapText(text, font, data.width); + expect(res).toEqual(data.res); + }); + }); + }); + + describe("When text is long", () => { + const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`; + [ + { + desc: "fit characters of long string as per container width", + width: 160, + res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`, + }, + { + desc: "fit characters of long string as per container width and break words as per the width", + + width: 120, + res: `hellolongtex\ntthisiswhats\nupwithyouIam\ntypingggggan\ndtypinggg\nbreak it now`, + }, + { + desc: "fit the long text when container width is greater than text length and move the rest to next line", + + width: 590, + res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`, + }, + ].forEach((data) => { + it(`should ${data.desc}`, () => { + const res = wrapText(text, font, data.width); + expect(res).toEqual(data.res); + }); + }); + }); + + describe("Test parseTokens", () => { + it("should tokenize latin", () => { + let text = "Excalidraw is a virtual collaborative whiteboard"; + + expect(parseTokens(text)).toEqual([ + "Excalidraw", + " ", + "is", + " ", + "a", + " ", + "virtual", + " ", + "collaborative", + " ", + "whiteboard", + ]); + + text = + "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; + expect(parseTokens(text)).toEqual([ + "Wikipedia", + " ", + "is", + " ", + "hosted", + " ", + "by", + " ", + "Wikimedia-", + " ", + "Foundation,", + " ", + "a", + " ", + "non-", + "profit", + " ", + "organization", + " ", + "that", + " ", + "also", + " ", + "hosts", + " ", + "a", + " ", + "range-", + "of", + " ", + "other", + " ", + "projects", + ]); + }); + + it("should not tokenize number", () => { + const text = "99,100.99"; + const tokens = parseTokens(text); + expect(tokens).toEqual(["99,100.99"]); + }); + + it("should tokenize joined emojis", () => { + const text = `😬🌍🗺🔥☂️👩🏽‍🦰👨‍👩‍👧‍👦👩🏾‍🔬🏳️‍🌈🧔‍♀️🧑‍🤝‍🧑🙅🏽‍♂️✅0️⃣🇨🇿🦅`; + const tokens = parseTokens(text); + + expect(tokens).toEqual([ + "😬", + "🌍", + "🗺", + "🔥", + "☂️", + "👩🏽‍🦰", + "👨‍👩‍👧‍👦", + "👩🏾‍🔬", + "🏳️‍🌈", + "🧔‍♀️", + "🧑‍🤝‍🧑", + "🙅🏽‍♂️", + "✅", + "0️⃣", + "🇨🇿", + "🦅", + ]); + }); + + it("should tokenize emojis mixed with mixed text", () => { + const text = `😬a🌍b🗺c🔥d☂️《👩🏽‍🦰》👨‍👩‍👧‍👦德👩🏾‍🔬こ🏳️‍🌈안🧔‍♀️g🧑‍🤝‍🧑h🙅🏽‍♂️e✅f0️⃣g🇨🇿10🦅#hash`; + const tokens = parseTokens(text); + + expect(tokens).toEqual([ + "😬", + "a", + "🌍", + "b", + "🗺", + "c", + "🔥", + "d", + "☂️", + "《", + "👩🏽‍🦰", + "》", + "👨‍👩‍👧‍👦", + "德", + "👩🏾‍🔬", + "こ", + "🏳️‍🌈", + "안", + "🧔‍♀️", + "g", + "🧑‍🤝‍🧑", + "h", + "🙅🏽‍♂️", + "e", + "✅", + "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common) + "🇨🇿", + "10", // nice! do not break the number, as it's by default matched by \p{Emoji} + "🦅", + "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji} + ]); + }); + + it("should tokenize decomposed chars into their composed variants", () => { + // each input character is in a decomposed form + const text = "čでäぴέ다й한"; + expect(text.normalize("NFC").length).toEqual(8); + expect(text).toEqual(text.normalize("NFD")); + + const tokens = parseTokens(text); + expect(tokens.length).toEqual(8); + expect(tokens).toEqual(["č", "で", "ä", "ぴ", "έ", "다", "й", "한"]); + }); + + it("should tokenize artificial CJK", () => { + const text = `《道德經》醫-醫こんにちは世界!안녕하세요세계;요』,다.다...원/달(((다)))[[1]]〚({((한))>)〛(「た」)た…[Hello] \t World?ニューヨーク・¥3700.55す。090-1234-5678¥1,000〜$5,000「素晴らしい!」〔重要〕#1:Taro君30%は、(たなばた)〰¥110±¥570で20℃〜9:30〜10:00【一番】`; + // [ + // '《道', '德', '經》', '醫-', + // '醫', 'こ', 'ん', 'に', + // 'ち', 'は', '世', '界!', + // '안', '녕', '하', '세', + // '요', '세', '계;', '요』,', + // '다.', '다...', '원/', '달', + // '(((다)))', '[[1]]', '〚({((한))>)〛', '(「た」)', + // 'た…', '[Hello]', ' ', '\t', + // ' ', 'World?', 'ニ', 'ュ', + // 'ー', 'ヨ', 'ー', 'ク・', + // '¥3700.55', 'す。', '090-', '1234-', + // '5678', '¥1,000〜', '$5,000', '「素', + // '晴', 'ら', 'し', 'い!」', + // '〔重', '要〕', '#', '1:', + // 'Taro', '君', '30%', 'は、', + // '(た', 'な', 'ば', 'た)', + // '〰', '¥110±', '¥570', 'で', + // '20℃〜', '9:30〜', '10:00', '【一', + // '番】' + // ] + const tokens = parseTokens(text); + + // Latin + expect(tokens).toContain("[[1]]"); + expect(tokens).toContain("[Hello]"); + expect(tokens).toContain("World?"); + expect(tokens).toContain("Taro"); + + // Chinese + expect(tokens).toContain("《道"); + expect(tokens).toContain("德"); + expect(tokens).toContain("經》"); + expect(tokens).toContain("醫-"); + expect(tokens).toContain("醫"); + + // Japanese + expect(tokens).toContain("こ"); + expect(tokens).toContain("ん"); + expect(tokens).toContain("に"); + expect(tokens).toContain("ち"); + expect(tokens).toContain("は"); + expect(tokens).toContain("世"); + expect(tokens).toContain("ク・"); + expect(tokens).toContain("界!"); + expect(tokens).toContain("た…"); + expect(tokens).toContain("す。"); + expect(tokens).toContain("ュ"); + expect(tokens).toContain("「素"); + expect(tokens).toContain("晴"); + expect(tokens).toContain("ら"); + expect(tokens).toContain("し"); + expect(tokens).toContain("い!」"); + expect(tokens).toContain("君"); + expect(tokens).toContain("は、"); + expect(tokens).toContain("(た"); + expect(tokens).toContain("な"); + expect(tokens).toContain("ば"); + expect(tokens).toContain("た)"); + expect(tokens).toContain("で"); + expect(tokens).toContain("【一"); + expect(tokens).toContain("番】"); + + // Check for Korean + expect(tokens).toContain("안"); + expect(tokens).toContain("녕"); + expect(tokens).toContain("하"); + expect(tokens).toContain("세"); + expect(tokens).toContain("요"); + expect(tokens).toContain("세"); + expect(tokens).toContain("계;"); + expect(tokens).toContain("요』,"); + expect(tokens).toContain("다."); + expect(tokens).toContain("다..."); + expect(tokens).toContain("원/"); + expect(tokens).toContain("달"); + expect(tokens).toContain("(((다)))"); + expect(tokens).toContain("〚({((한))>)〛"); + expect(tokens).toContain("(「た」)"); + + // Numbers and units + expect(tokens).toContain("¥3700.55"); + expect(tokens).toContain("090-"); + expect(tokens).toContain("1234-"); + expect(tokens).toContain("5678"); + expect(tokens).toContain("¥1,000〜"); + expect(tokens).toContain("$5,000"); + expect(tokens).toContain("1:"); + expect(tokens).toContain("30%"); + expect(tokens).toContain("¥110±"); + expect(tokens).toContain("20℃〜"); + expect(tokens).toContain("9:30〜"); + expect(tokens).toContain("10:00"); + + // Punctuation and symbols + expect(tokens).toContain(" "); + expect(tokens).toContain("\t"); + expect(tokens).toContain(" "); + expect(tokens).toContain("ニ"); + expect(tokens).toContain("ー"); + expect(tokens).toContain("ヨ"); + expect(tokens).toContain("〰"); + expect(tokens).toContain("#"); + }); + }); +}); diff --git a/packages/excalidraw/element/textWrapping.ts b/packages/excalidraw/element/textWrapping.ts new file mode 100644 index 0000000000..597f62e151 --- /dev/null +++ b/packages/excalidraw/element/textWrapping.ts @@ -0,0 +1,568 @@ +import { ENV } from "../constants"; +import { charWidth, getLineWidth } from "./textElement"; +import type { FontString } from "./types"; + +let cachedCjkRegex: RegExp | undefined; +let cachedLineBreakRegex: RegExp | undefined; +let cachedEmojiRegex: RegExp | undefined; + +/** + * Test if a given text contains any CJK characters (including symbols, punctuation, etc,). + */ +export const containsCJK = (text: string) => { + if (!cachedCjkRegex) { + cachedCjkRegex = Regex.class(...Object.values(CJK)); + } + + return cachedCjkRegex.test(text); +}; + +const getLineBreakRegex = () => { + if (!cachedLineBreakRegex) { + try { + cachedLineBreakRegex = getLineBreakRegexAdvanced(); + } catch { + cachedLineBreakRegex = getLineBreakRegexSimple(); + } + } + + return cachedLineBreakRegex; +}; + +const getEmojiRegex = () => { + if (!cachedEmojiRegex) { + cachedEmojiRegex = getEmojiRegexUnicode(); + } + + return cachedEmojiRegex; +}; + +/** + * Common symbols used across different languages. + */ +const COMMON = { + /** + * Natural breaking points for any grammars. + * + * Hello world + * ↑ BREAK ALWAYS " " → ["Hello", " ", "world"] + * Hello-world + * ↑ BREAK AFTER "-" → ["Hello-", "world"] + */ + WHITESPACE: /\s/u, + HYPHEN: /-/u, + /** + * Generally do not break, unless closed symbol is followed by an opening symbol. + * + * Also, western punctation is often used in modern Korean and expects to be treated + * similarly to the CJK opening and closing symbols. + * + * Hello(한글)→ ["Hello", "(한", "글)"] + * ↑ BREAK BEFORE "(" + * ↑ BREAK AFTER ")" + */ + OPENING: /<\(\[\{/u, + CLOSING: />\)\]\}.,:;!\?…\//u, +}; + +/** + * Characters and symbols used in Chinese, Japanese and Korean. + */ +const CJK = { + /** + * Every CJK breaks before and after, unless it's paired with an opening or closing symbol. + * + * Does not include every possible char used in CJK texts, such as currency, parentheses or punctuation. + */ + CHAR: /\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}`'^〃〰〆#&*+-ー/\=|¦〒¬ ̄/u, + /** + * Opening and closing CJK punctuation breaks before and after all such characters (in case of many), + * and creates pairs with neighboring characters. + * + * Hello た。→ ["Hello", "た。"] + * ↑ DON'T BREAK "た。" + * * Hello「た」 World → ["Hello", "「た」", "World"] + * ↑ DON'T BREAK "「た" + * ↑ DON'T BREAK "た" + * ↑ BREAK BEFORE "「" + * ↑ BREAK AFTER "」" + */ + // eslint-disable-next-line prettier/prettier + OPENING:/([{〈《⦅「「『【〖〔〘〚<〝/u, + CLOSING: /)]}〉》⦆」」』】〗〕〙〛>。.,、〟‥?!:;・〜〞/u, + /** + * Currency symbols break before, not after + * + * Price¥100 → ["Price", "¥100"] + * ↑ BREAK BEFORE "¥" + */ + CURRENCY: /¥₩£¢$/u, +}; + +const EMOJI = { + FLAG: /\p{RI}\p{RI}/u, + JOINER: + /(?:\p{Emoji_Modifier}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?/u, + ZWJ: /\u200D/u, + ANY: /[\p{Emoji}]/u, + MOST: /[\p{Extended_Pictographic}\p{Emoji_Presentation}]/u, +}; + +/** + * Simple fallback for browsers (mainly Safari < 16.4) that don't support "Lookbehind assertion". + * + * Browser support as of 10/2024: + * - 91% Lookbehind assertion https://caniuse.com/mdn-javascript_regular_expressions_lookbehind_assertion + * - 94% Unicode character class escape https://caniuse.com/mdn-javascript_regular_expressions_unicode_character_class_escape + * + * Does not include advanced CJK breaking rules, but covers most of the core cases, especially for latin. + */ +const getLineBreakRegexSimple = () => + Regex.or( + getEmojiRegex(), + Break.On(COMMON.HYPHEN, COMMON.WHITESPACE, CJK.CHAR), + ); + +/** + * Specifies the line breaking rules based for alphabetic-based languages, + * Chinese, Japanese, Korean and Emojis. + * + * "Hello-world" → ["Hello-", "world"] + * "Hello 「世界。」🌎🗺" → ["Hello", " ", "「世", "界。」", "🌎", "🗺"] + */ +const getLineBreakRegexAdvanced = () => + Regex.or( + // Unicode-defined regex for (multi-codepoint) Emojis + getEmojiRegex(), + // Rules for whitespace and hyphen + Break.Before(COMMON.WHITESPACE).Build(), + Break.After(COMMON.WHITESPACE, COMMON.HYPHEN).Build(), + // Rules for CJK (chars, symbols, currency) + Break.Before(CJK.CHAR, CJK.CURRENCY) + .NotPrecededBy(COMMON.OPENING, CJK.OPENING) + .Build(), + Break.After(CJK.CHAR) + .NotFollowedBy(COMMON.HYPHEN, COMMON.CLOSING, CJK.CLOSING) + .Build(), + // Rules for opening and closing punctuation + Break.BeforeMany(CJK.OPENING).NotPrecededBy(COMMON.OPENING).Build(), + Break.AfterMany(CJK.CLOSING).NotFollowedBy(COMMON.CLOSING).Build(), + Break.AfterMany(COMMON.CLOSING).FollowedBy(COMMON.OPENING).Build(), + ); + +/** + * Matches various emoji types. + * + * 1. basic emojis (😀, 🌍) + * 2. flags (🇨🇿) + * 3. multi-codepoint emojis: + * - skin tones (👍🏽) + * - variation selectors (☂️) + * - keycaps (1️⃣) + * - tag sequences (🏴󠁧󠁢󠁥󠁮󠁧󠁿) + * - emoji sequences (👨‍👩‍👧‍👦, 👩‍🚀, 🏳️‍🌈) + * + * Unicode points: + * - \uFE0F: presentation selector + * - \u20E3: enclosing keycap + * - \u200D: zero width joiner + * - \u{E0020}-\u{E007E}: tags + * - \u{E007F}: cancel tag + * + * @see https://unicode.org/reports/tr51/#EBNF_and_Regex, with changes: + * - replaced \p{Emoji} with [\p{Extended_Pictographic}\p{Emoji_Presentation}], see more in `should tokenize emojis mixed with mixed text` test + * - replaced \p{Emod} with \p{Emoji_Modifier} as some engines do not understand the abbreviation (i.e. https://devina.io/redos-checker) + */ +const getEmojiRegexUnicode = () => + Regex.group( + Regex.or( + EMOJI.FLAG, + Regex.and( + EMOJI.MOST, + EMOJI.JOINER, + Regex.build( + `(?:${EMOJI.ZWJ.source}(?:${EMOJI.FLAG.source}|${EMOJI.ANY.source}${EMOJI.JOINER.source}))*`, + ), + ), + ), + ); + +/** + * Regex utilities for unicode character classes. + */ +const Regex = { + /** + * Builds a regex from a string. + */ + build: (regex: string): RegExp => new RegExp(regex, "u"), + /** + * Joins regexes into a single string. + */ + join: (...regexes: RegExp[]): string => regexes.map((x) => x.source).join(""), + /** + * Joins regexes into a single regex as with "and" operator. + */ + and: (...regexes: RegExp[]): RegExp => Regex.build(Regex.join(...regexes)), + /** + * Joins regexes into a single regex with "or" operator. + */ + or: (...regexes: RegExp[]): RegExp => + Regex.build(regexes.map((x) => x.source).join("|")), + /** + * Puts regexes into a matching group. + */ + group: (...regexes: RegExp[]): RegExp => + Regex.build(`(${Regex.join(...regexes)})`), + /** + * Puts regexes into a character class. + */ + class: (...regexes: RegExp[]): RegExp => + Regex.build(`[${Regex.join(...regexes)}]`), +}; + +/** + * Human-readable lookahead and lookbehind utilities for defining line break + * opportunities between pairs of character classes. + */ +const Break = { + /** + * Break on the given class of characters. + */ + On: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + return Regex.build(`([${joined}])`); + }, + /** + * Break before the given class of characters. + */ + Before: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + const builder = () => Regex.build(`(?=[${joined}])`); + return Break.Chain(builder) as Omit< + ReturnType, + "FollowedBy" + >; + }, + /** + * Break after the given class of characters. + */ + After: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + const builder = () => Regex.build(`(?<=[${joined}])`); + return Break.Chain(builder) as Omit< + ReturnType, + "PreceededBy" + >; + }, + /** + * Break before one or multiple characters of the same class. + */ + BeforeMany: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + const builder = () => Regex.build(`(?, + "FollowedBy" + >; + }, + /** + * Break after one or multiple character from the same class. + */ + AfterMany: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + const builder = () => Regex.build(`(?<=[${joined}])(?![${joined}])`); + return Break.Chain(builder) as Omit< + ReturnType, + "PreceededBy" + >; + }, + /** + * Do not break before the given class of characters. + */ + NotBefore: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + const builder = () => Regex.build(`(?![${joined}])`); + return Break.Chain(builder) as Omit< + ReturnType, + "NotFollowedBy" + >; + }, + /** + * Do not break after the given class of characters. + */ + NotAfter: (...regexes: RegExp[]) => { + const joined = Regex.join(...regexes); + const builder = () => Regex.build(`(?, + "NotPrecededBy" + >; + }, + Chain: (rootBuilder: () => RegExp) => ({ + /** + * Build the root regex. + */ + Build: rootBuilder, + /** + * Specify additional class of characters that should precede the root regex. + */ + PreceededBy: (...regexes: RegExp[]) => { + const root = rootBuilder(); + const preceeded = Break.After(...regexes).Build(); + const builder = () => Regex.and(preceeded, root); + return Break.Chain(builder) as Omit< + ReturnType, + "PreceededBy" + >; + }, + /** + * Specify additional class of characters that should follow the root regex. + */ + FollowedBy: (...regexes: RegExp[]) => { + const root = rootBuilder(); + const followed = Break.Before(...regexes).Build(); + const builder = () => Regex.and(root, followed); + return Break.Chain(builder) as Omit< + ReturnType, + "FollowedBy" + >; + }, + /** + * Specify additional class of characters that should not precede the root regex. + */ + NotPrecededBy: (...regexes: RegExp[]) => { + const root = rootBuilder(); + const notPreceeded = Break.NotAfter(...regexes).Build(); + const builder = () => Regex.and(notPreceeded, root); + return Break.Chain(builder) as Omit< + ReturnType, + "NotPrecededBy" + >; + }, + /** + * Specify additional class of characters that should not follow the root regex. + */ + NotFollowedBy: (...regexes: RegExp[]) => { + const root = rootBuilder(); + const notFollowed = Break.NotBefore(...regexes).Build(); + const builder = () => Regex.and(root, notFollowed); + return Break.Chain(builder) as Omit< + ReturnType, + "NotFollowedBy" + >; + }, + }), +}; + +/** + * Breaks the line into the tokens based on the found line break opporutnities. + */ +export const parseTokens = (line: string) => { + const breakLineRegex = getLineBreakRegex(); + + // normalizing to single-codepoint composed chars due to canonical equivalence + // of multi-codepoint versions for chars like č, で (~ so that we don't break a line in between c and ˇ) + // filtering due to multi-codepoint chars like 👨‍👩‍👧‍👦, 👩🏽‍🦰 + return line.normalize("NFC").split(breakLineRegex).filter(Boolean); +}; + +/** + * Wraps the original text into the lines based on the given width. + */ +export const wrapText = ( + text: string, + font: FontString, + maxWidth: number, +): string => { + // if maxWidth is not finite or NaN which can happen in case of bugs in + // computation, we need to make sure we don't continue as we'll end up + // in an infinite loop + if (!Number.isFinite(maxWidth) || maxWidth < 0) { + return text; + } + + const lines: Array = []; + const originalLines = text.split("\n"); + + for (const originalLine of originalLines) { + const currentLineWidth = getLineWidth(originalLine, font, true); + + if (currentLineWidth <= maxWidth) { + lines.push(originalLine); + continue; + } + + const wrappedLine = wrapLine(originalLine, font, maxWidth); + lines.push(...wrappedLine); + } + + return lines.join("\n"); +}; + +/** + * Wraps the original line into the lines based on the given width. + */ +const wrapLine = ( + line: string, + font: FontString, + maxWidth: number, +): string[] => { + const lines: Array = []; + const tokens = parseTokens(line); + const tokenIterator = tokens[Symbol.iterator](); + + let currentLine = ""; + let currentLineWidth = 0; + + let iterator = tokenIterator.next(); + + while (!iterator.done) { + const token = iterator.value; + const testLine = currentLine + token; + + // cache single codepoint whitespace, CJK or emoji width calc. as kerning should not apply here + const testLineWidth = isSingleCharacter(token) + ? currentLineWidth + charWidth.calculate(token, font) + : getLineWidth(testLine, font, true); + + // build up the current line, skipping length check for possibly trailing whitespaces + if (/\s/.test(token) || testLineWidth <= maxWidth) { + currentLine = testLine; + currentLineWidth = testLineWidth; + iterator = tokenIterator.next(); + continue; + } + + // current line is empty => just the token (word) is longer than `maxWidth` and needs to be wrapped + if (!currentLine) { + const wrappedWord = wrapWord(token, font, maxWidth); + const trailingLine = wrappedWord[wrappedWord.length - 1] ?? ""; + const precedingLines = wrappedWord.slice(0, -1); + + lines.push(...precedingLines); + + // trailing line of the wrapped word might still be joined with next token/s + currentLine = trailingLine; + currentLineWidth = getLineWidth(trailingLine, font, true); + iterator = tokenIterator.next(); + } else { + // push & reset, but don't iterate on the next token, as we didn't use it yet! + lines.push(currentLine.trimEnd()); + + // purposefully not iterating and not setting `currentLine` to `token`, so that we could use a simple !currentLine check above + currentLine = ""; + currentLineWidth = 0; + } + } + + // iterator done, push the trailing line if exists + if (currentLine) { + const trailingLine = trimLine(currentLine, font, maxWidth); + lines.push(trailingLine); + } + + return lines; +}; + +/** + * Wraps the word into the lines based on the given width. + */ +const wrapWord = ( + word: string, + font: FontString, + maxWidth: number, +): Array => { + // multi-codepoint emojis are already broken apart and shouldn't be broken further + if (getEmojiRegex().test(word)) { + return [word]; + } + + satisfiesWordInvariant(word); + + const lines: Array = []; + const chars = Array.from(word); + + let currentLine = ""; + let currentLineWidth = 0; + + for (const char of chars) { + const _charWidth = charWidth.calculate(char, font); + const testLineWidth = currentLineWidth + _charWidth; + + if (testLineWidth <= maxWidth) { + currentLine = currentLine + char; + currentLineWidth = testLineWidth; + continue; + } + + if (currentLine) { + lines.push(currentLine); + } + + currentLine = char; + currentLineWidth = _charWidth; + } + + if (currentLine) { + lines.push(currentLine); + } + + return lines; +}; + +/** + * Similarly to browsers, does not trim all trailing whitespaces, but only those exceeding the `maxWidth`. + */ +const trimLine = (line: string, font: FontString, maxWidth: number) => { + const shouldTrimWhitespaces = getLineWidth(line, font, true) > maxWidth; + + if (!shouldTrimWhitespaces) { + return line; + } + + // defensively default to `trimeEnd` in case the regex does not match + let [, trimmedLine, whitespaces] = line.match(/^(.+?)(\s+)$/) ?? [ + line, + line.trimEnd(), + "", + ]; + + let trimmedLineWidth = getLineWidth(trimmedLine, font, true); + + for (const whitespace of Array.from(whitespaces)) { + const _charWidth = charWidth.calculate(whitespace, font); + const testLineWidth = trimmedLineWidth + _charWidth; + + if (testLineWidth > maxWidth) { + break; + } + + trimmedLine = trimmedLine + whitespace; + trimmedLineWidth = testLineWidth; + } + + return trimmedLine; +}; + +/** + * Check if the given string is a single character. + * + * Handles multi-byte chars (é, 中) and purposefully does not handle multi-codepoint char (👨‍👩‍👧‍👦, 👩🏽‍🦰). + */ +const isSingleCharacter = (maybeSingleCharacter: string) => { + return ( + maybeSingleCharacter.codePointAt(0) !== undefined && + maybeSingleCharacter.codePointAt(1) === undefined + ); +}; + +/** + * Invariant for the word wrapping algorithm. + */ +const satisfiesWordInvariant = (word: string) => { + if (import.meta.env.MODE === ENV.TEST || import.meta.env.DEV) { + if (/\s/.test(word)) { + throw new Error("Word should not contain any whitespaces!"); + } + } +}; diff --git a/packages/excalidraw/element/textWysiwyg.tsx b/packages/excalidraw/element/textWysiwyg.tsx index 23778cb7b5..5663af8f8b 100644 --- a/packages/excalidraw/element/textWysiwyg.tsx +++ b/packages/excalidraw/element/textWysiwyg.tsx @@ -27,13 +27,13 @@ import { getTextWidth, normalizeText, redrawTextBoundingBox, - wrapText, getBoundTextMaxHeight, getBoundTextMaxWidth, computeContainerDimensionForBoundText, computeBoundTextPosition, getBoundTextElement, } from "./textElement"; +import { wrapText } from "./textWrapping"; import { actionDecreaseFontSize, actionIncreaseFontSize, diff --git a/packages/excalidraw/fonts/Fonts.ts b/packages/excalidraw/fonts/Fonts.ts index 31b5ad000d..3e307e7daa 100644 --- a/packages/excalidraw/fonts/Fonts.ts +++ b/packages/excalidraw/fonts/Fonts.ts @@ -7,11 +7,8 @@ import { getFontFamilyFallbacks, } from "../constants"; import { isTextElement } from "../element"; -import { - charWidth, - containsCJK, - getContainerElement, -} from "../element/textElement"; +import { charWidth, getContainerElement } from "../element/textElement"; +import { containsCJK } from "../element/textWrapping"; import { ShapeCache } from "../scene/ShapeCache"; import { getFontString, PromisePool, promiseTry } from "../utils"; import { ExcalidrawFontFace } from "./ExcalidrawFontFace"; diff --git a/packages/excalidraw/tests/linearElementEditor.test.tsx b/packages/excalidraw/tests/linearElementEditor.test.tsx index 5df260d1d5..3341d2da31 100644 --- a/packages/excalidraw/tests/linearElementEditor.test.tsx +++ b/packages/excalidraw/tests/linearElementEditor.test.tsx @@ -20,7 +20,6 @@ import { LinearElementEditor } from "../element/linearElementEditor"; import { act, queryByTestId, queryByText } from "@testing-library/react"; import { getBoundTextElementPosition, - wrapText, getBoundTextMaxWidth, } from "../element/textElement"; import * as textElementUtils from "../element/textElement"; @@ -29,6 +28,7 @@ import { vi } from "vitest"; import { arrayToMap } from "../utils"; import type { GlobalPoint } from "../../math"; import { pointCenter, pointFrom } from "../../math"; +import { wrapText } from "../element/textWrapping"; const renderInteractiveScene = vi.spyOn( InteractiveCanvas,