markup_parser.ts 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /**
  2. * This file is part of Threema Web.
  3. *
  4. * Threema Web is free software: you can redistribute it and/or modify it
  5. * under the terms of the GNU Affero General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or (at
  7. * your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with Threema Web. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. export const enum TokenType {
  18. Text,
  19. Newline,
  20. Asterisk,
  21. Underscore,
  22. Tilde,
  23. }
  24. export interface Token {
  25. kind: TokenType;
  26. value?: string;
  27. }
  28. // The markup characters.
  29. const markupChars = {
  30. [TokenType.Asterisk]: '*',
  31. [TokenType.Underscore]: '_',
  32. [TokenType.Tilde]: '~',
  33. };
  34. // CSS classes for the HTML markup.
  35. const cssClasses = {
  36. [TokenType.Asterisk]: 'text-bold',
  37. [TokenType.Underscore]: 'text-italic',
  38. [TokenType.Tilde]: 'text-strike',
  39. };
  40. /**
  41. * Return whether the specified token type is a markup token.
  42. */
  43. function isMarkupToken(tokenType: TokenType) {
  44. return markupChars.hasOwnProperty(tokenType);
  45. }
  46. /**
  47. * Return whether the specified character is a boundary character.
  48. * When `character` is undefined, the function will return true.
  49. */
  50. function isBoundary(character?: string) {
  51. return character === undefined || /[\s.,!?¡¿‽⸮;:&(){}\[\]⟨⟩‹›«»'"‘’“”*~\-_…⋯᠁]/.test(character);
  52. }
  53. /**
  54. * This function accepts a string and returns a list of tokens.
  55. */
  56. export function tokenize(text: string): Token[] {
  57. const tokens = [];
  58. let textBuf = '';
  59. const pushTextBufToken = () => {
  60. if (textBuf.length > 0) {
  61. tokens.push({ kind: TokenType.Text, value: textBuf });
  62. textBuf = '';
  63. }
  64. };
  65. for (let i = 0; i < text.length; i++) {
  66. const currentChar = text[i];
  67. const prevIsBoundary = isBoundary(text[i - 1]);
  68. const nextIsBoundary = isBoundary(text[i + 1]);
  69. if (currentChar === '*' && (prevIsBoundary || nextIsBoundary)) {
  70. pushTextBufToken();
  71. tokens.push({ kind: TokenType.Asterisk });
  72. } else if (currentChar === '_' && (prevIsBoundary || nextIsBoundary)) {
  73. pushTextBufToken();
  74. tokens.push({ kind: TokenType.Underscore });
  75. } else if (currentChar === '~' && (prevIsBoundary || nextIsBoundary)) {
  76. pushTextBufToken();
  77. tokens.push({ kind: TokenType.Tilde });
  78. } else if (currentChar === '\n') {
  79. pushTextBufToken();
  80. tokens.push({ kind: TokenType.Newline });
  81. } else {
  82. textBuf += currentChar;
  83. }
  84. }
  85. pushTextBufToken();
  86. return tokens;
  87. }
  88. export function parse(tokens: Token[]): string {
  89. const stack: Token[] = [];
  90. // Booleans to avoid searching the stack.
  91. // This is used for optimization.
  92. const tokensPresent = {
  93. [TokenType.Asterisk]: false,
  94. [TokenType.Underscore]: false,
  95. [TokenType.Tilde]: false,
  96. };
  97. // Helper: When called with a value, mark the token type as present or not.
  98. // When called without a value, return whether this token type is present.
  99. const hasToken = (token: TokenType, value?: boolean) => {
  100. if (value === undefined) {
  101. return tokensPresent[token];
  102. }
  103. tokensPresent[token] = value;
  104. };
  105. // Helper: Consume the stack, return a string.
  106. const consumeStack = () => {
  107. let textBuf = '';
  108. for (const token of stack) {
  109. switch (token.kind) {
  110. case TokenType.Text:
  111. textBuf += token.value;
  112. break;
  113. case TokenType.Asterisk:
  114. case TokenType.Underscore:
  115. case TokenType.Tilde:
  116. textBuf += markupChars[token.kind];
  117. break;
  118. case TokenType.Newline:
  119. throw new Error('Unexpected newline token on stack');
  120. default:
  121. throw new Error('Unknown token on stack: ' + token.kind);
  122. }
  123. }
  124. // Clear stack
  125. // https://stackoverflow.com/a/1232046
  126. stack.splice(0, stack.length);
  127. return textBuf;
  128. };
  129. // Helper: Pop the stack, throw an exception if it's empty
  130. const popStack = () => {
  131. const stackTop = stack.pop();
  132. if (stackTop === undefined) {
  133. throw new Error('Stack is empty');
  134. }
  135. return stackTop;
  136. };
  137. // Helper: Add markup HTML to the stack
  138. const pushMarkup = (textParts: string[], cssClass: string) => {
  139. let html = `<span class="${cssClass}">`;
  140. for (let i = textParts.length - 1; i >= 0; i--) {
  141. html += textParts[i];
  142. }
  143. html += '</span>';
  144. stack.push({ kind: TokenType.Text, value: html });
  145. };
  146. // Process the tokens. Add them to a stack. When a token pair is complete
  147. // (e.g. the second asterisk is found), pop the stack until you find the
  148. // matching token and convert everything in between to formatted text.
  149. for (const token of tokens) {
  150. switch (token.kind) {
  151. // Keep text as-is
  152. case TokenType.Text:
  153. stack.push(token);
  154. break;
  155. // If a markup token is found, try to find a matching token.
  156. case TokenType.Asterisk:
  157. case TokenType.Underscore:
  158. case TokenType.Tilde:
  159. // Optimization: Only search the stack if a token with this token type exists
  160. if (hasToken(token.kind)) {
  161. // Pop tokens from the stack. If a matching token was found, apply
  162. // markup to the text parts in between those two tokens.
  163. const textParts = [];
  164. while (true) {
  165. const stackTop = popStack();
  166. if (stackTop.kind === TokenType.Text) {
  167. textParts.push(stackTop.value);
  168. } else if (stackTop.kind === token.kind) {
  169. if (textParts.length > 0) {
  170. pushMarkup(textParts, cssClasses[token.kind]);
  171. } else {
  172. // If this happens, then two markup chars were following each other (e.g. **hello).
  173. // In that case, just keep them as regular text characters, without applying any markup.
  174. const markupChar = markupChars[token.kind];
  175. stack.push({ kind: TokenType.Text, value: markupChar + markupChar });
  176. }
  177. hasToken(token.kind, false);
  178. break;
  179. } else if (isMarkupToken(stackTop.kind)) {
  180. textParts.push(markupChars[stackTop.kind]);
  181. } else {
  182. throw new Error('Unknown token on stack: ' + token.kind);
  183. }
  184. hasToken(stackTop.kind, false);
  185. }
  186. } else {
  187. stack.push(token);
  188. hasToken(token.kind, true);
  189. }
  190. break;
  191. // Don't apply formatting across newlines, consume the current stack!
  192. case TokenType.Newline:
  193. stack.push({ kind: TokenType.Text, value: consumeStack() + '\n' });
  194. hasToken(TokenType.Asterisk, false);
  195. hasToken(TokenType.Underscore, false);
  196. hasToken(TokenType.Tilde, false);
  197. break;
  198. default:
  199. throw new Error('Invalid token kind: ' + token.kind);
  200. }
  201. }
  202. // Concatenate processed tokens
  203. return consumeStack();
  204. }
  205. export function markify(text: string): string {
  206. return parse(tokenize(text));
  207. }