Bläddra i källkod

enh/refac: temp chat file upload behaviour

client-side content extraction
Timothy Jaeryang Baek 3 månader sedan
förälder
incheckning
8d84b4c2a4

+ 6 - 0
backend/open_webui/retrieval/utils.py

@@ -471,6 +471,12 @@ def get_sources_from_files(
                 "documents": [[doc.get("content") for doc in file.get("docs")]],
                 "metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
             }
+        elif file.get("type") == "text":
+            # Text File
+            query_result = {
+                "documents": [[file.get("content")]],
+                "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
+            }
         elif file.get("type") == "note":
             # Note Attached
             note = Notes.get_note_by_id(file.get("id"))

+ 198 - 0
package-lock.json

@@ -67,6 +67,7 @@
 				"mermaid": "^11.6.0",
 				"paneforge": "^0.0.6",
 				"panzoom": "^9.4.3",
+				"pdfjs-dist": "^5.3.93",
 				"prosemirror-collab": "^1.3.1",
 				"prosemirror-commands": "^1.6.0",
 				"prosemirror-example-setup": "^1.2.3",
@@ -2083,6 +2084,191 @@
 			"resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz",
 			"integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="
 		},
+		"node_modules/@napi-rs/canvas": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.73.tgz",
+			"integrity": "sha512-9iwPZrNlCK4rG+vWyDvyvGeYjck9MoP0NVQP6N60gqJNFA1GsN0imG05pzNsqfCvFxUxgiTYlR8ff0HC1HXJiw==",
+			"license": "MIT",
+			"optional": true,
+			"workspaces": [
+				"e2e/*"
+			],
+			"engines": {
+				"node": ">= 10"
+			},
+			"optionalDependencies": {
+				"@napi-rs/canvas-android-arm64": "0.1.73",
+				"@napi-rs/canvas-darwin-arm64": "0.1.73",
+				"@napi-rs/canvas-darwin-x64": "0.1.73",
+				"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.73",
+				"@napi-rs/canvas-linux-arm64-gnu": "0.1.73",
+				"@napi-rs/canvas-linux-arm64-musl": "0.1.73",
+				"@napi-rs/canvas-linux-riscv64-gnu": "0.1.73",
+				"@napi-rs/canvas-linux-x64-gnu": "0.1.73",
+				"@napi-rs/canvas-linux-x64-musl": "0.1.73",
+				"@napi-rs/canvas-win32-x64-msvc": "0.1.73"
+			}
+		},
+		"node_modules/@napi-rs/canvas-android-arm64": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.73.tgz",
+			"integrity": "sha512-s8dMhfYIHVv7gz8BXg3Nb6cFi950Y0xH5R/sotNZzUVvU9EVqHfkqiGJ4UIqu+15UhqguT6mI3Bv1mhpRkmMQw==",
+			"cpu": [
+				"arm64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"android"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-darwin-arm64": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.73.tgz",
+			"integrity": "sha512-bLPCq8Yyq1vMdVdIpQAqmgf6VGUknk8e7NdSZXJJFOA9gxkJ1RGcHOwoXo7h0gzhHxSorg71hIxyxtwXpq10Rw==",
+			"cpu": [
+				"arm64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"darwin"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-darwin-x64": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.73.tgz",
+			"integrity": "sha512-GR1CcehDjdNYXN3bj8PIXcXfYLUUOQANjQpM+KNnmpRo7ojsuqPjT7ZVH+6zoG/aqRJWhiSo+ChQMRazZlRU9g==",
+			"cpu": [
+				"x64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"darwin"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.73.tgz",
+			"integrity": "sha512-cM7F0kBJVFio0+U2iKSW4fWSfYQ8CPg4/DRZodSum/GcIyfB8+UPJSRM1BvvlcWinKLfX1zUYOwonZX9IFRRcw==",
+			"cpu": [
+				"arm"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"linux"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.73.tgz",
+			"integrity": "sha512-PMWNrMON9uz9klz1B8ZY/RXepQSC5dxxHQTowfw93Tb3fLtWO5oNX2k9utw7OM4ypT9BUZUWJnDQ5bfuXc/EUQ==",
+			"cpu": [
+				"arm64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"linux"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-linux-arm64-musl": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.73.tgz",
+			"integrity": "sha512-lX0z2bNmnk1PGZ+0a9OZwI2lPPvWjRYzPqvEitXX7lspyLFrOzh2kcQiLL7bhyODN23QvfriqwYqp5GreSzVvA==",
+			"cpu": [
+				"arm64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"linux"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.73.tgz",
+			"integrity": "sha512-QDQgMElwxAoADsSR3UYvdTTQk5XOyD9J5kq15Z8XpGwpZOZsSE0zZ/X1JaOtS2x+HEZL6z1S6MF/1uhZFZb5ig==",
+			"cpu": [
+				"riscv64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"linux"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-linux-x64-gnu": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.73.tgz",
+			"integrity": "sha512-wbzLJrTalQrpyrU1YRrO6w6pdr5vcebbJa+Aut5QfTaW9eEmMb1WFG6l1V+cCa5LdHmRr8bsvl0nJDU/IYDsmw==",
+			"cpu": [
+				"x64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"linux"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-linux-x64-musl": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.73.tgz",
+			"integrity": "sha512-xbfhYrUufoTAKvsEx2ZUN4jvACabIF0h1F5Ik1Rk4e/kQq6c+Dwa5QF0bGrfLhceLpzHT0pCMGMDeQKQrcUIyA==",
+			"cpu": [
+				"x64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"linux"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
+		"node_modules/@napi-rs/canvas-win32-x64-msvc": {
+			"version": "0.1.73",
+			"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.73.tgz",
+			"integrity": "sha512-YQmHXBufFBdWqhx+ympeTPkMfs3RNxaOgWm59vyjpsub7Us07BwCcmu1N5kildhO8Fm0syoI2kHnzGkJBLSvsg==",
+			"cpu": [
+				"x64"
+			],
+			"license": "MIT",
+			"optional": true,
+			"os": [
+				"win32"
+			],
+			"engines": {
+				"node": ">= 10"
+			}
+		},
 		"node_modules/@nodelib/fs.scandir": {
 			"version": "2.1.5",
 			"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -9504,6 +9690,18 @@
 				"node": "*"
 			}
 		},
+		"node_modules/pdfjs-dist": {
+			"version": "5.3.93",
+			"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.3.93.tgz",
+			"integrity": "sha512-w3fQKVL1oGn8FRyx5JUG5tnbblggDqyx2XzA5brsJ5hSuS+I0NdnJANhmeWKLjotdbPQucLBug5t0MeWr0AAdg==",
+			"license": "Apache-2.0",
+			"engines": {
+				"node": ">=20.16.0 || >=22.3.0"
+			},
+			"optionalDependencies": {
+				"@napi-rs/canvas": "^0.1.71"
+			}
+		},
 		"node_modules/pend": {
 			"version": "1.2.0",
 			"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",

+ 1 - 0
package.json

@@ -111,6 +111,7 @@
 		"mermaid": "^11.6.0",
 		"paneforge": "^0.0.6",
 		"panzoom": "^9.4.3",
+		"pdfjs-dist": "^5.3.93",
 		"prosemirror-collab": "^1.3.1",
 		"prosemirror-commands": "^1.6.0",
 		"prosemirror-example-setup": "^1.2.3",

+ 1 - 1
src/lib/components/chat/Chat.svelte

@@ -1597,7 +1597,7 @@
 		let files = JSON.parse(JSON.stringify(chatFiles));
 		files.push(
 			...(userMessage?.files ?? []).filter((item) =>
-				['doc', 'file', 'note', 'collection'].includes(item.type)
+				['doc', 'text', 'file', 'note', 'collection'].includes(item.type)
 			)
 		);
 		// Remove duplicates

+ 69 - 33
src/lib/components/chat/MessageInput.svelte

@@ -1,4 +1,8 @@
 <script lang="ts">
+	import * as pdfjs from 'pdfjs-dist';
+	import * as pdfWorker from 'pdfjs-dist/build/pdf.worker.mjs';
+	pdfjs.GlobalWorkerOptions.workerSrc = import.meta.url + 'pdfjs-dist/build/pdf.worker.mjs';
+
 	import DOMPurify from 'dompurify';
 	import { marked } from 'marked';
 	import heic2any from 'heic2any';
@@ -23,13 +27,15 @@
 		tools,
 		user as _user,
 		showControls,
-		TTSWorker
+		TTSWorker,
+		temporaryChatEnabled
 	} from '$lib/stores';
 
 	import {
 		blobToFile,
 		compressImage,
 		createMessagesList,
+		extractContentFromFile,
 		extractCurlyBraceWords,
 		extractInputVariables,
 		getCurrentDateTime,
@@ -529,47 +535,77 @@
 
 		files = [...files, fileItem];
 
-		try {
-			// If the file is an audio file, provide the language for STT.
-			let metadata = null;
-			if (
-				(file.type.startsWith('audio/') || file.type.startsWith('video/')) &&
-				$settings?.audio?.stt?.language
-			) {
-				metadata = {
-					language: $settings?.audio?.stt?.language
-				};
-			}
+		if (!$temporaryChatEnabled) {
+			try {
+				// If the file is an audio file, provide the language for STT.
+				let metadata = null;
+				if (
+					(file.type.startsWith('audio/') || file.type.startsWith('video/')) &&
+					$settings?.audio?.stt?.language
+				) {
+					metadata = {
+						language: $settings?.audio?.stt?.language
+					};
+				}
 
-			// During the file upload, file content is automatically extracted.
-			const uploadedFile = await uploadFile(localStorage.token, file, metadata);
+				// During the file upload, file content is automatically extracted.
+				const uploadedFile = await uploadFile(localStorage.token, file, metadata);
 
-			if (uploadedFile) {
-				console.log('File upload completed:', {
-					id: uploadedFile.id,
-					name: fileItem.name,
-					collection: uploadedFile?.meta?.collection_name
-				});
+				if (uploadedFile) {
+					console.log('File upload completed:', {
+						id: uploadedFile.id,
+						name: fileItem.name,
+						collection: uploadedFile?.meta?.collection_name
+					});
+
+					if (uploadedFile.error) {
+						console.warn('File upload warning:', uploadedFile.error);
+						toast.warning(uploadedFile.error);
+					}
+
+					fileItem.status = 'uploaded';
+					fileItem.file = uploadedFile;
+					fileItem.id = uploadedFile.id;
+					fileItem.collection_name =
+						uploadedFile?.meta?.collection_name || uploadedFile?.collection_name;
+					fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
 
-				if (uploadedFile.error) {
-					console.warn('File upload warning:', uploadedFile.error);
-					toast.warning(uploadedFile.error);
+					files = files;
+				} else {
+					files = files.filter((item) => item?.itemId !== tempItemId);
 				}
+			} catch (e) {
+				toast.error(`${e}`);
+				files = files.filter((item) => item?.itemId !== tempItemId);
+			}
+		} else {
+			// If temporary chat is enabled, we just add the file to the list without uploading it.
+
+			const content = await extractContentFromFile(file, pdfjsLib).catch((error) => {
+				toast.error(
+					$i18n.t('Failed to extract content from the file: {{error}}', { error: error })
+				);
+				return null;
+			});
+
+			if (content === null) {
+				toast.error($i18n.t('Failed to extract content from the file.'));
+				files = files.filter((item) => item?.itemId !== tempItemId);
+				return null;
+			} else {
+				console.log('Extracted content from file:', {
+					name: file.name,
+					size: file.size,
+					content: content
+				});
 
 				fileItem.status = 'uploaded';
-				fileItem.file = uploadedFile;
-				fileItem.id = uploadedFile.id;
-				fileItem.collection_name =
-					uploadedFile?.meta?.collection_name || uploadedFile?.collection_name;
-				fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
+				fileItem.type = 'text';
+				fileItem.content = content;
+				fileItem.id = uuidv4(); // Temporary ID for the file
 
 				files = files;
-			} else {
-				files = files.filter((item) => item?.itemId !== tempItemId);
 			}
-		} catch (e) {
-			toast.error(`${e}`);
-			files = files.filter((item) => item?.itemId !== tempItemId);
 		}
 	};
 

+ 71 - 0
src/lib/utils/index.ts

@@ -1507,3 +1507,74 @@ export const parseJsonValue = (value: string): any => {
 
 	return value;
 };
+
+export const extractContentFromFile = async (file, pdfjsLib = null) => {
+	// Known text file extensions for extra fallback
+	const textExtensions = [
+		'.txt',
+		'.md',
+		'.csv',
+		'.json',
+		'.js',
+		'.ts',
+		'.css',
+		'.html',
+		'.xml',
+		'.yaml',
+		'.yml',
+		'.rtf'
+	];
+
+	function getExtension(filename) {
+		const dot = filename.lastIndexOf('.');
+		return dot === -1 ? '' : filename.substr(dot).toLowerCase();
+	}
+
+	// Uses pdfjs to extract text from PDF
+	async function extractPdfText(file) {
+		if (!pdfjsLib) {
+			throw new Error('pdfjsLib is required for PDF extraction');
+		}
+
+		const arrayBuffer = await file.arrayBuffer();
+		const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
+		let allText = '';
+		for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
+			const page = await pdf.getPage(pageNum);
+			const content = await page.getTextContent();
+			const strings = content.items.map((item) => item.str);
+			allText += strings.join(' ') + '\n';
+		}
+		return allText;
+	}
+
+	// Reads file as text using FileReader
+	function readAsText(file) {
+		return new Promise((resolve, reject) => {
+			const reader = new FileReader();
+			reader.onload = () => resolve(reader.result);
+			reader.onerror = reject;
+			reader.readAsText(file);
+		});
+	}
+
+	const type = file.type || '';
+	const ext = getExtension(file.name);
+
+	// PDF check
+	if (type === 'application/pdf' || ext === '.pdf') {
+		return await extractPdfText(file);
+	}
+
+	// Text check (plain or common text-based)
+	if (type.startsWith('text/') || textExtensions.includes(ext)) {
+		return await readAsText(file);
+	}
+
+	// Fallback: try to read as text, if decodable
+	try {
+		return await readAsText(file);
+	} catch (err) {
+		throw new Error('Unsupported or non-text file type: ' + (file.name || type));
+	}
+};