123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- /**
- * This file is part of Threema Web.
- *
- * Threema Web is free software: you can redistribute it and/or modify it
- * under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
- * General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with Threema Web. If not, see <http://www.gnu.org/licenses/>.
- */
- export const enum TokenType {
- Text,
- Newline,
- Asterisk,
- Underscore,
- Tilde,
- }
- export interface Token {
- kind: TokenType;
- value?: string;
- }
- // The markup characters.
- const markupChars = {
- [TokenType.Asterisk]: '*',
- [TokenType.Underscore]: '_',
- [TokenType.Tilde]: '~',
- };
- // CSS classes for the HTML markup.
- const cssClasses = {
- [TokenType.Asterisk]: 'text-bold',
- [TokenType.Underscore]: 'text-italic',
- [TokenType.Tilde]: 'text-strike',
- };
- /**
- * Return whether the specified token type is a markup token.
- */
- function isMarkupToken(tokenType: TokenType) {
- return markupChars.hasOwnProperty(tokenType);
- }
- /**
- * Return whether the specified character is a boundary character.
- * When `character` is undefined, the function will return true.
- */
- function isBoundary(character?: string): boolean {
- return character === undefined || /[\s.,!?¡¿‽⸮;:&(){}\[\]⟨⟩‹›«»'"‘’“”*~\-_…⋯᠁]/.test(character);
- }
- /**
- * Return whether the specified character is a URL boundary character.
- * When `character` is undefined, the function will return true.
- *
- * Characters that may be in an URL according to RFC 3986:
- * ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=%
- */
- function isUrlBoundary(character?: string): boolean {
- return character === undefined || !/[a-zA-Z0-9\-._~:/?#\[\]@!$&'()*+,;=%]/.test(character);
- }
- /**
- * Return whether the specified string starts an URL.
- */
- function isUrlStart(substring: string): boolean {
- return substring.match(/^[a-zA-Z]+:\/\//) != null;
- }
- /**
- * This function accepts a string and returns a list of tokens.
- */
- export function tokenize(text: string): Token[] {
- const tokens = [];
- let textBuf = '';
- let matchingUrl = false;
- const pushTextBufToken = () => {
- if (textBuf.length > 0) {
- tokens.push({ kind: TokenType.Text, value: textBuf });
- textBuf = '';
- }
- };
- for (let i = 0; i < text.length; i++) {
- const currentChar = text[i];
- // Detect URLs
- if (!matchingUrl) {
- matchingUrl = isUrlStart(text.substring(i));
- }
- // URLs have a limited set of boundary characters, therefore we need to
- // treat them separately.
- if (matchingUrl) {
- textBuf += currentChar;
- const nextIsUrlBoundary = isUrlBoundary(text[i + 1]);
- if (nextIsUrlBoundary) {
- pushTextBufToken();
- matchingUrl = false;
- }
- } else {
- const prevIsBoundary = isBoundary(text[i - 1]);
- const nextIsBoundary = isBoundary(text[i + 1]);
- if (currentChar === '*' && (prevIsBoundary || nextIsBoundary)) {
- pushTextBufToken();
- tokens.push({ kind: TokenType.Asterisk });
- } else if (currentChar === '_' && (prevIsBoundary || nextIsBoundary)) {
- pushTextBufToken();
- tokens.push({ kind: TokenType.Underscore });
- } else if (currentChar === '~' && (prevIsBoundary || nextIsBoundary)) {
- pushTextBufToken();
- tokens.push({ kind: TokenType.Tilde });
- } else if (currentChar === '\n') {
- pushTextBufToken();
- tokens.push({ kind: TokenType.Newline });
- } else {
- textBuf += currentChar;
- }
- }
- }
- pushTextBufToken();
- return tokens;
- }
- export function parse(tokens: Token[]): string {
- const stack: Token[] = [];
- // Booleans to avoid searching the stack.
- // This is used for optimization.
- const tokensPresent = {
- [TokenType.Asterisk]: false,
- [TokenType.Underscore]: false,
- [TokenType.Tilde]: false,
- };
- // Helper: When called with a value, mark the token type as present or not.
- // When called without a value, return whether this token type is present.
- const hasToken = (token: TokenType, value?: boolean) => {
- if (value === undefined) {
- return tokensPresent[token];
- }
- tokensPresent[token] = value;
- };
- // Helper: Consume the stack, return a string.
- const consumeStack = () => {
- let textBuf = '';
- for (const token of stack) {
- switch (token.kind) {
- case TokenType.Text:
- textBuf += token.value;
- break;
- case TokenType.Asterisk:
- case TokenType.Underscore:
- case TokenType.Tilde:
- textBuf += markupChars[token.kind];
- break;
- case TokenType.Newline:
- throw new Error('Unexpected newline token on stack');
- default:
- throw new Error('Unknown token on stack: ' + token.kind);
- }
- }
- // Clear stack
- // https://stackoverflow.com/a/1232046
- stack.splice(0, stack.length);
- return textBuf;
- };
- // Helper: Pop the stack, throw an exception if it's empty
- const popStack = () => {
- const stackTop = stack.pop();
- if (stackTop === undefined) {
- throw new Error('Stack is empty');
- }
- return stackTop;
- };
- // Helper: Add markup HTML to the stack
- const pushMarkup = (textParts: string[], cssClass: string) => {
- let html = `<span class="${cssClass}">`;
- for (let i = textParts.length - 1; i >= 0; i--) {
- html += textParts[i];
- }
- html += '</span>';
- stack.push({ kind: TokenType.Text, value: html });
- };
- // Process the tokens. Add them to a stack. When a token pair is complete
- // (e.g. the second asterisk is found), pop the stack until you find the
- // matching token and convert everything in between to formatted text.
- for (const token of tokens) {
- switch (token.kind) {
- // Keep text as-is
- case TokenType.Text:
- stack.push(token);
- break;
- // If a markup token is found, try to find a matching token.
- case TokenType.Asterisk:
- case TokenType.Underscore:
- case TokenType.Tilde:
- // Optimization: Only search the stack if a token with this token type exists
- if (hasToken(token.kind)) {
- // Pop tokens from the stack. If a matching token was found, apply
- // markup to the text parts in between those two tokens.
- const textParts = [];
- while (true) {
- const stackTop = popStack();
- if (stackTop.kind === TokenType.Text) {
- textParts.push(stackTop.value);
- } else if (stackTop.kind === token.kind) {
- if (textParts.length > 0) {
- pushMarkup(textParts, cssClasses[token.kind]);
- } else {
- // If this happens, then two markup chars were following each other (e.g. **hello).
- // In that case, just keep them as regular text characters, without applying any markup.
- const markupChar = markupChars[token.kind];
- stack.push({ kind: TokenType.Text, value: markupChar + markupChar });
- }
- hasToken(token.kind, false);
- break;
- } else if (isMarkupToken(stackTop.kind)) {
- textParts.push(markupChars[stackTop.kind]);
- } else {
- throw new Error('Unknown token on stack: ' + token.kind);
- }
- hasToken(stackTop.kind, false);
- }
- } else {
- stack.push(token);
- hasToken(token.kind, true);
- }
- break;
- // Don't apply formatting across newlines, consume the current stack!
- case TokenType.Newline:
- stack.push({ kind: TokenType.Text, value: consumeStack() + '\n' });
- hasToken(TokenType.Asterisk, false);
- hasToken(TokenType.Underscore, false);
- hasToken(TokenType.Tilde, false);
- break;
- default:
- throw new Error('Invalid token kind: ' + token.kind);
- }
- }
- // Concatenate processed tokens
- return consumeStack();
- }
- export function markify(text: string): string {
- return parse(tokenize(text));
- }
|