6 سال پیش · 6d8ae7e058
--- a/src/markup_parser.ts
+++ b/src/markup_parser.ts
@@ -53,16 +53,35 @@ function isMarkupToken(tokenType: TokenType) {
 
				  * Return whether the specified character is a boundary character.
			
 
				  * When `character` is undefined, the function will return true.
			
 
				  */
			
 
				-function isBoundary(character?: string) {
			
 
				+function isBoundary(character?: string): boolean {
			
 
				     return character === undefined || /[\s.,!?¡¿‽⸮;:&(){}\[\]⟨⟩‹›«»'"‘’“”*~\-_…⋯᠁]/.test(character);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Return whether the specified character is a URL boundary character.
			
 
				+ * When `character` is undefined, the function will return true.
			
 
				+ *
			
 
				+ * Characters that may be in an URL according to RFC 3986:
			
 
				+ * ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=%
			
 
				+ */
			
 
				+function isUrlBoundary(character?: string): boolean {
			
 
				+    return character === undefined || !/[a-zA-Z0-9\-._~:/?#\[\]@!$&'()*+,;=%]/.test(character);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Return whether the specified string starts an URL.
			
 
				+ */
			
 
				+function isUrlStart(substring: string): boolean {
			
 
				+    return substring.match(/^[a-zA-Z]+:\/\//) != null;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * This function accepts a string and returns a list of tokens.
			
 
				  */
			
 
				 export function tokenize(text: string): Token[] {
			
 
				     const tokens = [];
			
 
				     let textBuf = '';
			
 
				+    let matchingUrl = false;
			
 
				 
			
 
				     const pushTextBufToken = () => {
			
 
				         if (textBuf.length > 0) {
			
@@ -73,23 +92,39 @@ export function tokenize(text: string): Token[] {
 
				 
			
 
				     for (let i = 0; i < text.length; i++) {
			
 
				         const currentChar = text[i];
			
 
				-        const prevIsBoundary = isBoundary(text[i - 1]);
			
 
				-        const nextIsBoundary = isBoundary(text[i + 1]);
			
 
				-
			
 
				-        if (currentChar === '*' && (prevIsBoundary || nextIsBoundary)) {
			
 
				-            pushTextBufToken();
			
 
				-            tokens.push({ kind: TokenType.Asterisk });
			
 
				-        } else if (currentChar === '_' && (prevIsBoundary || nextIsBoundary)) {
			
 
				-            pushTextBufToken();
			
 
				-            tokens.push({ kind: TokenType.Underscore });
			
 
				-        } else if (currentChar === '~' && (prevIsBoundary || nextIsBoundary)) {
			
 
				-            pushTextBufToken();
			
 
				-            tokens.push({ kind: TokenType.Tilde });
			
 
				-        } else if (currentChar === '\n') {
			
 
				-            pushTextBufToken();
			
 
				-            tokens.push({ kind: TokenType.Newline });
			
 
				-        } else {
			
 
				+
			
 
				+        // Detect URLs
			
 
				+        if (!matchingUrl) {
			
 
				+            matchingUrl = isUrlStart(text.substring(i));
			
 
				+        }
			
 
				+
			
 
				+        // URLs have a limited set of boundary characters, therefore we need to
			
 
				+        // treat them separately.
			
 
				+        if (matchingUrl) {
			
 
				             textBuf += currentChar;
			
 
				+            const nextIsUrlBoundary = isUrlBoundary(text[i + 1]);
			
 
				+            if (nextIsUrlBoundary) {
			
 
				+                pushTextBufToken();
			
 
				+                matchingUrl = false;
			
 
				+            }
			
 
				+        } else {
			
 
				+            const prevIsBoundary = isBoundary(text[i - 1]);
			
 
				+            const nextIsBoundary = isBoundary(text[i + 1]);
			
 
				+            if (currentChar === '*' && (prevIsBoundary || nextIsBoundary)) {
			
 
				+                pushTextBufToken();
			
 
				+                tokens.push({ kind: TokenType.Asterisk });
			
 
				+            } else if (currentChar === '_' && (prevIsBoundary || nextIsBoundary)) {
			
 
				+                pushTextBufToken();
			
 
				+                tokens.push({ kind: TokenType.Underscore });
			
 
				+            } else if (currentChar === '~' && (prevIsBoundary || nextIsBoundary)) {
			
 
				+                pushTextBufToken();
			
 
				+                tokens.push({ kind: TokenType.Tilde });
			
 
				+            } else if (currentChar === '\n') {
			
 
				+                pushTextBufToken();
			
 
				+                tokens.push({ kind: TokenType.Newline });
			
 
				+            } else {
			
 
				+                textBuf += currentChar;
			
 
				+            }
			
 
				         }
			
 
				     }
			
 
				 
			
--- a/tests/ts/markup_parser.ts
+++ b/tests/ts/markup_parser.ts
@@ -56,15 +56,15 @@ describe('Markup Parser', () => {
 
				             ]);
			
 
				         });
			
 
				 
			
 
				-        it('ignore in URLs', function() {
			
 
				-            const text = 'ignore if *in* a link: https://example.com/_pub_/horse.jpg';
			
 
				+        it('ignore markup in URLs', function() {
			
 
				+            const text = 'ignore if *in* a link: https://example.com/pic_-_a.jpg';
			
 
				             const tokens = tokenize(text);
			
 
				             expect(tokens).toEqual([
			
 
				                 { kind: TokenType.Text, value: 'ignore if ' },
			
 
				                 { kind: TokenType.Asterisk },
			
 
				                 { kind: TokenType.Text, value: 'in' },
			
 
				                 { kind: TokenType.Asterisk },
			
 
				-                { kind: TokenType.Text, value: ' a link: https://example.com/_pub_/horse.jpg' },
			
 
				+                { kind: TokenType.Text, value: ' a link: https://example.com/pic_-_a.jpg' },
			
 
				             ]);
			
 
				         });
			
 
				 
			
@@ -246,6 +246,8 @@ describe('Markup Parser', () => {
 
				                  'https://example.com?__twitter_impression=true'],
			
 
				                 ['https://example.com?___twitter_impression=true',
			
 
				                  'https://example.com?___twitter_impression=true'],
			
 
				+                ['https://example.com/image_-_1.jpg',
			
 
				+                 'https://example.com/image_-_1.jpg'],
			
 
				             ]);
			
 
				         });