9 months ago · 49fe137553
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@@ -20,6 +20,8 @@ import markedExtension from '$lib/utils/marked/extension';
 
				 import markedKatexExtension from '$lib/utils/marked/katex-extension';
			
 
				 import hljs from 'highlight.js';
			
 
				 
			
 
				+import { specialCases } from '$lib/utils/processResponseContent/special-cases'
			
 
				+
			
 
				 //////////////////////////
			
 
				 // Helper functions
			
 
				 //////////////////////////
			
@@ -90,6 +92,9 @@ export const sanitizeResponseContent = (content: string) => {
 
				 };
			
 
				 
			
 
				 export const processResponseContent = (content: string) => {
			
 
				+	// This function is used to process the response content 
			
 
				+	// before the response content is rendered.
			
 
				+	content = specialCases(content);
			
 
				 	return content.trim();
			
 
				 };
			
 
				 
			
@@ -324,6 +329,7 @@ export const copyToClipboard = async (text, formatted = false) => {
 
				 		};
			
 
				 		marked.use(markedKatexExtension(options));
			
 
				 		marked.use(markedExtension(options));
			
 
				+		// DEVELOPER NOTE: Go to `$lib/components/chat/Messages/Markdown.svelte` to add extra markdown extensions for rendering.
			
 
				 
			
 
				 		const htmlContent = marked.parse(text);
			
 
				 
			
--- a/src/lib/utils/processResponseContent/special-cases.ts
+++ b/src/lib/utils/processResponseContent/special-cases.ts
@@ -0,0 +1,81 @@
 
				+/* Expliantion: 

			
 
				+ *   This file handles special cases of LLM output not following markdown syntax.

			
 
				+ *   It obeys the rule of modifying original text as **LITTLE** as possible.

			
 
				+ *   Detailed documentation of rendering problems is provided in comments.

			
 
				+ *   More special cases can be added in future.

			
 
				+ * Note:

			
 
				+ *   It should NOT handle the case unless there is clear evidence that it occurs.

			
 
				+ *   It only deals with special cases, especially with non-English characters, not general ones.

			
 
				+ *   Other general issues found, new files shall be added to folder `'$lib/utils/processResponseContent/`,

			
 
				+ *   and function `processResponseContent` in `$lib/utils/index.ts` should be updated accordingly.

			
 
				+ */

			
 
				+

			
 
				+export const specialCases = (src: string): string => {

			
 
				+    const lines = src.split('\n'); // Process from line to line.

			
 
				+    const processedLines = lines.map(line => {

			
 
				+

			
 
				+        // 1. 中文 (Chinese, CN)

			
 
				+        if (/[\u4e00-\u9fa5]/.test(line)) { // Only execute if there are Chinese characters.

			
 
				+

			
 
				+            // 1.1. Problems caused by Chinese parentheses

			
 
				+            /* Discription:

			
 
				+             *   When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style.

			
 
				+             *   - e.g. `**中文名（English）**中文内容` will be parsed directly, 

			
 
				+             *          instead of `<strong>中文名（English）</strong>中文内容`.

			
 
				+             * Solution:

			
 
				+             *   Adding a `space` before and after the bold/italic part can solve the problem.

			
 
				+             *   - e.g. `**中文名（English）**中文内容` -> ` **中文名（English）** 中文内容`

			
 
				+             * Note:

			
 
				+             *   Similar problem was found with English parentheses and other full delimiters,

			
 
				+             *   but they are not handled here because they are less likely to appear in LLM output.

			
 
				+             *   Change the behavior in future if needed.

			
 
				+             */

			
 
				+            

			
 
				+            if (line.includes('*')) { // Only execute if `*` is found in line.

			
 
				+            // 1.1.1. Handle **bold** with Chinese parentheses

			
 
				+            line = processCN_01(line, '**', '（', '）');

			
 
				+            // 1.1.2. Handle *italic* with Chinese parentheses

			
 
				+            line = processCN_01(line, '*', '（', '）');

			
 
				+            }

			
 
				+

			
 
				+        }

			
 
				+        return line;

			
 
				+    });

			
 
				+    const result = processedLines.join('\n');

			
 
				+    return result;

			
 
				+}

			
 
				+

			
 
				+//////////////////////////

			
 
				+// Helper functions

			
 
				+//////////////////////////

			
 
				+

			
 
				+function isChineseChar(char: string): boolean {

			
 
				+    return /\p{Script=Han}/u.test(char);

			
 
				+}

			
 
				+

			
 
				+function escapeRegExp(string: string): string {

			
 
				+	return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');

			
 
				+}

			
 
				+

			
 
				+

			
 
				+//////////////////////////

			
 
				+// Main functions

			
 
				+////////////////////////// 

			
 
				+

			
 
				+// Handle case `1.1.1` and `1.1.2`

			
 
				+function processCN_01(line: string, symbol: string, leftSymbol: string, rightSymbol: string): string {

			
 
				+    const escapedSymbol = escapeRegExp(symbol);

			
 
				+    const regex = new RegExp(`(.*?)(?<!${escapedSymbol})(${escapedSymbol})([^${escapedSymbol}]+)(${escapedSymbol})(?!${escapedSymbol})(.*?)`, 'g');

			
 
				+    return line.replace(regex, (match, l, left, content, right, r) => {

			
 
				+        const result = (

			
 
				+            (content.startsWith(leftSymbol) || content.endsWith(rightSymbol)) &&

			
 
				+            (!l || (l && l.length > 0 && isChineseChar(l[l.length - 1]))) &&

			
 
				+            (!r || (r && r.length > 0 && isChineseChar(r[0])))

			
 
				+        )

			
 
				+        if (result) {

			
 
				+            return ` ${left}${content}${right} `;

			
 
				+        } else {

			
 
				+            return match;

			
 
				+        }

			
 
				+    });

			
 
				+}