|
@@ -90,44 +90,45 @@ export const sanitizeResponseContent = (content: string) => {
|
|
|
};
|
|
|
|
|
|
export const processResponseContent = (content: string) => {
|
|
|
+ content = processChineseContent(content);
|
|
|
+ return content.trim();
|
|
|
+};
|
|
|
+
|
|
|
+function isChineseChar(char: string): boolean {
|
|
|
+ return /\p{Script=Han}/u.test(char);
|
|
|
+}
|
|
|
+
|
|
|
+// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese.
|
|
|
+function processChineseContent(content: string): string {
|
|
|
// This function is used to process the response content before the response content is rendered.
|
|
|
const lines = content.split('\n');
|
|
|
const processedLines = lines.map((line) => {
|
|
|
if (/[\u4e00-\u9fa5]/.test(line)) {
|
|
|
- line = processChineseContent(line);
|
|
|
+ // Problems caused by Chinese parentheses
|
|
|
+ /* Discription:
|
|
|
+ * When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style.
|
|
|
+ * - e.g. `**中文名(English)**中文内容` will be parsed directly,
|
|
|
+ * instead of `<strong>中文名(English)</strong>中文内容`.
|
|
|
+ * Solution:
|
|
|
+ * Adding a `space` before and after the bold/italic part can solve the problem.
|
|
|
+ * - e.g. `**中文名(English)**中文内容` -> ` **中文名(English)** 中文内容`
|
|
|
+ * Note:
|
|
|
+ * Similar problem was found with English parentheses and other full delimiters,
|
|
|
+ * but they are not handled here because they are less likely to appear in LLM output.
|
|
|
+ * Change the behavior in future if needed.
|
|
|
+ */
|
|
|
+ if (line.includes('*')) {
|
|
|
+ // Handle **bold** with Chinese parentheses
|
|
|
+ line = processChineseParentheses(line, '**', '(', ')');
|
|
|
+ // Handle *italic* with Chinese parentheses
|
|
|
+ line = processChineseParentheses(line, '*', '(', ')');
|
|
|
+ }
|
|
|
}
|
|
|
return line;
|
|
|
});
|
|
|
content = processedLines.join('\n');
|
|
|
- return content.trim();
|
|
|
-};
|
|
|
|
|
|
-// Tackle "Model output issue not following the standard Markdown/LaTeX format" in Chinese.
|
|
|
-function processChineseContent(line: string): string {
|
|
|
- // Problems caused by Chinese parentheses
|
|
|
- /* Discription:
|
|
|
- * When `*` has Chinese parentheses on the inside, markdown parser ignore bold or italic style.
|
|
|
- * - e.g. `**中文名(English)**中文内容` will be parsed directly,
|
|
|
- * instead of `<strong>中文名(English)</strong>中文内容`.
|
|
|
- * Solution:
|
|
|
- * Adding a `space` before and after the bold/italic part can solve the problem.
|
|
|
- * - e.g. `**中文名(English)**中文内容` -> ` **中文名(English)** 中文内容`
|
|
|
- * Note:
|
|
|
- * Similar problem was found with English parentheses and other full delimiters,
|
|
|
- * but they are not handled here because they are less likely to appear in LLM output.
|
|
|
- * Change the behavior in future if needed.
|
|
|
- */
|
|
|
- if (line.includes('*')) {
|
|
|
- // Handle **bold** with Chinese parentheses
|
|
|
- line = processChineseParentheses(line, '**', '(', ')');
|
|
|
- // Handle *italic* with Chinese parentheses
|
|
|
- line = processChineseParentheses(line, '*', '(', ')');
|
|
|
- }
|
|
|
- return line;
|
|
|
-}
|
|
|
-
|
|
|
-function isChineseChar(char: string): boolean {
|
|
|
- return /\p{Script=Han}/u.test(char);
|
|
|
+ return content;
|
|
|
}
|
|
|
|
|
|
// Helper function for `processChineseContent`
|