markup_parser.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /**
  2. * Copyright © 2016-2020 Threema GmbH (https://threema.ch/).
  3. *
  4. * This file is part of Threema Web.
  5. *
  6. * Threema Web is free software: you can redistribute it and/or modify it
  7. * under the terms of the GNU Affero General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  14. * General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Affero General Public License
  17. * along with Threema Web. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. // tslint:disable:max-line-length
  20. import {markify, parse, tokenize, TokenType} from '../../src/markup_parser';
  21. describe('Markup Parser', () => {
  22. describe('tokenizer', () => {
  23. it('simple', function() {
  24. const text = 'hello *there*!';
  25. const tokens = tokenize(text);
  26. expect(tokens).toEqual([
  27. { kind: TokenType.Text, value: 'hello ' },
  28. { kind: TokenType.Asterisk },
  29. { kind: TokenType.Text, value: 'there' },
  30. { kind: TokenType.Asterisk },
  31. { kind: TokenType.Text, value: '!' },
  32. ]);
  33. });
  34. it('nested', function() {
  35. const text = 'this is *_nested_*!';
  36. const tokens = tokenize(text);
  37. expect(tokens).toEqual([
  38. { kind: TokenType.Text, value: 'this is ' },
  39. { kind: TokenType.Asterisk },
  40. { kind: TokenType.Underscore },
  41. { kind: TokenType.Text, value: 'nested' },
  42. { kind: TokenType.Underscore },
  43. { kind: TokenType.Asterisk },
  44. { kind: TokenType.Text, value: '!' },
  45. ]);
  46. });
  47. it('ignore if not along boundary', function() {
  48. const text = 'this*is_not~at-boundary';
  49. const tokens = tokenize(text);
  50. expect(tokens).toEqual([
  51. { kind: TokenType.Text, value: 'this*is_not~at-boundary' },
  52. ]);
  53. });
  54. it('ignore markup in URLs', function() {
  55. const text = 'ignore if *in* a link: https://example.com/pic_-_a.jpg';
  56. const tokens = tokenize(text);
  57. expect(tokens).toEqual([
  58. { kind: TokenType.Text, value: 'ignore if ' },
  59. { kind: TokenType.Asterisk },
  60. { kind: TokenType.Text, value: 'in' },
  61. { kind: TokenType.Asterisk },
  62. { kind: TokenType.Text, value: ' a link: https://example.com/pic_-_a.jpg' },
  63. ]);
  64. });
  65. it('with newlines', function() {
  66. const text = 'hello\n*world*\n';
  67. const tokens = tokenize(text);
  68. expect(tokens).toEqual([
  69. { kind: TokenType.Text, value: 'hello' },
  70. { kind: TokenType.Newline },
  71. { kind: TokenType.Asterisk },
  72. { kind: TokenType.Text, value: 'world' },
  73. { kind: TokenType.Asterisk },
  74. { kind: TokenType.Newline },
  75. ]);
  76. });
  77. });
  78. describe('parser', () => {
  79. it('simple text without formatting', () => {
  80. const tokens = [{ kind: TokenType.Text, value: 'hello world' }];
  81. const html = parse(tokens);
  82. expect(html).toEqual('hello world');
  83. });
  84. it('simple bold text', () => {
  85. const tokens = [
  86. { kind: TokenType.Text, value: 'hello ' },
  87. { kind: TokenType.Asterisk },
  88. { kind: TokenType.Text, value: 'bold' },
  89. { kind: TokenType.Asterisk },
  90. ];
  91. const html = parse(tokens);
  92. expect(html).toEqual('hello <span class="text-bold">bold</span>');
  93. });
  94. it('simple italic text', () => {
  95. const tokens = [
  96. { kind: TokenType.Text, value: 'hello ' },
  97. { kind: TokenType.Underscore },
  98. { kind: TokenType.Text, value: 'italic' },
  99. { kind: TokenType.Underscore },
  100. ];
  101. const html = parse(tokens);
  102. expect(html).toEqual('hello <span class="text-italic">italic</span>');
  103. });
  104. it('simple strikethrough text', () => {
  105. const tokens = [
  106. { kind: TokenType.Text, value: 'hello ' },
  107. { kind: TokenType.Tilde },
  108. { kind: TokenType.Text, value: 'strikethrough' },
  109. { kind: TokenType.Tilde },
  110. ];
  111. const html = parse(tokens);
  112. expect(html).toEqual('hello <span class="text-strike">strikethrough</span>');
  113. });
  114. it('correct nesting', () => {
  115. const tokens = [
  116. { kind: TokenType.Text, value: 'hello ' },
  117. { kind: TokenType.Asterisk },
  118. { kind: TokenType.Text, value: 'bold and ' },
  119. { kind: TokenType.Underscore },
  120. { kind: TokenType.Text, value: 'italic' },
  121. { kind: TokenType.Underscore },
  122. { kind: TokenType.Asterisk },
  123. ];
  124. const html = parse(tokens);
  125. expect(html).toEqual('hello <span class="text-bold">bold and <span class="text-italic">italic</span></span>');
  126. });
  127. it('incorrect nesting', () => {
  128. const tokens = [
  129. { kind: TokenType.Asterisk },
  130. { kind: TokenType.Text, value: 'hi ' },
  131. { kind: TokenType.Underscore },
  132. { kind: TokenType.Text, value: 'there' },
  133. { kind: TokenType.Asterisk },
  134. { kind: TokenType.Underscore },
  135. ];
  136. const html = parse(tokens);
  137. expect(html).toEqual('<span class="text-bold">hi _there</span>_');
  138. });
  139. });
  140. function testPatterns(cases) {
  141. for (const testcase of cases) {
  142. const input = testcase[0];
  143. const expected = testcase[1];
  144. expect(markify(input)).toEqual(expected);
  145. }
  146. }
  147. describe('markify', () => {
  148. it('detects bold text', () => {
  149. testPatterns([
  150. ['*bold text (not italic)*',
  151. '<span class="text-bold">bold text (not italic)</span>'],
  152. ]);
  153. });
  154. it('detects italic text', () => {
  155. testPatterns([
  156. ['This text is not italic.',
  157. 'This text is not italic.'],
  158. ['_This text is italic._',
  159. '<span class="text-italic">This text is italic.</span>'],
  160. ['This text is _partially_ italic',
  161. 'This text is <span class="text-italic">partially</span> italic'],
  162. ['This text has _two_ _italic_ bits',
  163. 'This text has <span class="text-italic">two</span> <span class="text-italic">italic</span> bits'],
  164. ]);
  165. });
  166. it('detects strikethrough text', () => {
  167. testPatterns([
  168. ['so ~strikethrough~', 'so <span class="text-strike">strikethrough</span>'],
  169. ]);
  170. });
  171. it('detects mixed markup', () => {
  172. testPatterns([
  173. ['*bold text with _italic_ *',
  174. '<span class="text-bold">bold text with <span class="text-italic">italic</span> </span>'],
  175. ['*part bold,* _part italic_',
  176. '<span class="text-bold">part bold,</span> <span class="text-italic">part italic</span>'],
  177. ['_italic text with *bold* _',
  178. '<span class="text-italic">italic text with <span class="text-bold">bold</span> </span>'],
  179. ]);
  180. });
  181. it('is applied on word boundaries', () => {
  182. testPatterns([
  183. ['(*bold*)',
  184. '(<span class="text-bold">bold</span>)'],
  185. ['¡*Threema* es fantástico!',
  186. '¡<span class="text-bold">Threema</span> es fantástico!'],
  187. ['«_great_ service»',
  188. '«<span class="text-italic">great</span> service»'],
  189. ['"_great_" service',
  190. '"<span class="text-italic">great</span>" service'],
  191. ['*bold*…',
  192. '<span class="text-bold">bold</span>…'],
  193. ['_<a href="https://threema.ch">Threema</a>_',
  194. '<span class="text-italic"><a href="https://threema.ch">Threema</a></span>'],
  195. ]);
  196. });
  197. it('is only applied on word boundaries', () => {
  198. testPatterns([
  199. ['so not_really_italic',
  200. 'so not_really_italic'],
  201. ['invalid*bold*stuff',
  202. 'invalid*bold*stuff'],
  203. ['no~strike~through',
  204. 'no~strike~through'],
  205. ['*bold_but_no~strike~through*',
  206. '<span class="text-bold">bold_but_no~strike~through</span>'],
  207. ['<_< >_>',
  208. '<_< >_>'],
  209. ['<a href="https://threema.ch">_Threema_</a>',
  210. '<a href="https://threema.ch">_Threema_</a>'],
  211. ]);
  212. });
  213. it('does not break URLs', () => {
  214. testPatterns([
  215. ['https://en.wikipedia.org/wiki/Java_class_file *nice*',
  216. 'https://en.wikipedia.org/wiki/Java_class_file <span class="text-bold">nice</span>'],
  217. ['https://example.com/_output_/',
  218. 'https://example.com/_output_/'],
  219. ['https://example.com/*output*/',
  220. 'https://example.com/*output*/'],
  221. ['https://example.com?_twitter_impression=true',
  222. 'https://example.com?_twitter_impression=true'],
  223. ['https://example.com?__twitter_impression=true',
  224. 'https://example.com?__twitter_impression=true'],
  225. ['https://example.com?___twitter_impression=true',
  226. 'https://example.com?___twitter_impression=true'],
  227. ['https://example.com/image_-_1.jpg',
  228. 'https://example.com/image_-_1.jpg'],
  229. ]);
  230. });
  231. it('ignores invalid markup', () => {
  232. testPatterns([
  233. ['*invalid markup (do not parse)_', '*invalid markup (do not parse)_'],
  234. ['random *asterisk', 'random *asterisk'],
  235. ['***three asterisks', '***three asterisks'],
  236. ['***three asterisks*', '**<span class="text-bold">three asterisks</span>'],
  237. ]);
  238. });
  239. it('ignores markup with \\n (newline)', () => {
  240. testPatterns([
  241. ['*First line\n and a new one. (do not parse)*', '*First line\n and a new one. (do not parse)*'],
  242. ['*\nbegins with linebreak. (do not parse)*', '*\nbegins with linebreak. (do not parse)*'],
  243. ['*Just some text. But it ends with newline (do not parse)\n*', '*Just some text. But it ends with newline (do not parse)\n*'],
  244. ]);
  245. });
  246. });
  247. });