webui : handle PDF input (as text or image) + convert pasted long content to file (#13562)

* webui : handle PDF input (as text or image)

* handle the case where pdf image + server without mtmd

* fix bug missing pages
This commit is contained in:
Xuan-Son Nguyen 2025-05-15 14:24:50 +02:00 committed by GitHub
parent c753d7bed0
commit 3cc1f1f1d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 425 additions and 22 deletions

Binary file not shown.

View file

@ -18,6 +18,7 @@
"dexie": "^4.0.11",
"highlight.js": "^11.10.0",
"katex": "^0.16.15",
"pdfjs-dist": "^5.2.133",
"postcss": "^8.4.49",
"react": "^18.3.1",
"react-dom": "^18.3.1",
@ -988,7 +989,7 @@
"version": "0.3.8",
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.8.tgz",
"integrity": "sha512-imAbBGkb+ebQyxKgzv5Hu2nmROxoDOXHh80evxdoXNOrvAnVx7zimzc1Oo5h9RlfV4vPXaE2iM5pOFbvOCClWA==",
"dev": true,
"devOptional": true,
"license": "MIT",
"dependencies": {
"@jridgewell/set-array": "^1.2.1",
@ -1003,7 +1004,7 @@
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
"integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
"dev": true,
"devOptional": true,
"license": "MIT",
"engines": {
"node": ">=6.0.0"
@ -1013,30 +1014,224 @@
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz",
"integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==",
"dev": true,
"devOptional": true,
"license": "MIT",
"engines": {
"node": ">=6.0.0"
}
},
"node_modules/@jridgewell/source-map": {
"version": "0.3.6",
"resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.6.tgz",
"integrity": "sha512-1ZJTZebgqllO79ue2bm3rIGud/bOe0pP5BjSRCRxxYkEZS8STV7zN84UBbiYu7jy+eCKSnVIUgoWWE/tt+shMQ==",
"license": "MIT",
"optional": true,
"peer": true,
"dependencies": {
"@jridgewell/gen-mapping": "^0.3.5",
"@jridgewell/trace-mapping": "^0.3.25"
}
},
"node_modules/@jridgewell/sourcemap-codec": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz",
"integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
"dev": true,
"devOptional": true,
"license": "MIT"
},
"node_modules/@jridgewell/trace-mapping": {
"version": "0.3.25",
"resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
"integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
"dev": true,
"devOptional": true,
"license": "MIT",
"dependencies": {
"@jridgewell/resolve-uri": "^3.1.0",
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@napi-rs/canvas": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.70.tgz",
"integrity": "sha512-nD6NGa4JbNYSZYsTnLGrqe9Kn/lCkA4ybXt8sx5ojDqZjr2i0TWAHxx/vhgfjX+i3hCdKWufxYwi7CfXqtITSA==",
"license": "MIT",
"optional": true,
"engines": {
"node": ">= 10"
},
"optionalDependencies": {
"@napi-rs/canvas-android-arm64": "0.1.70",
"@napi-rs/canvas-darwin-arm64": "0.1.70",
"@napi-rs/canvas-darwin-x64": "0.1.70",
"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.70",
"@napi-rs/canvas-linux-arm64-gnu": "0.1.70",
"@napi-rs/canvas-linux-arm64-musl": "0.1.70",
"@napi-rs/canvas-linux-riscv64-gnu": "0.1.70",
"@napi-rs/canvas-linux-x64-gnu": "0.1.70",
"@napi-rs/canvas-linux-x64-musl": "0.1.70",
"@napi-rs/canvas-win32-x64-msvc": "0.1.70"
}
},
"node_modules/@napi-rs/canvas-android-arm64": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.70.tgz",
"integrity": "sha512-I/YOuQ0wbkVYxVaYtCgN42WKTYxNqFA0gTcTrHIGG1jfpDSyZWII/uHcjOo4nzd19io6Y4+/BqP8E5hJgf9OmQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-darwin-arm64": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.70.tgz",
"integrity": "sha512-4pPGyXetHIHkw2TOJHujt3mkCP8LdDu8+CT15ld9Id39c752RcI0amDHSuMLMQfAjvusA9B5kKxazwjMGjEJpQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-darwin-x64": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.70.tgz",
"integrity": "sha512-+2N6Os9LbkmDMHL+raknrUcLQhsXzc5CSXRbXws9C3pv/mjHRVszQ9dhFUUe9FjfPhCJznO6USVdwOtu7pOrzQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.70.tgz",
"integrity": "sha512-QjscX9OaKq/990sVhSMj581xuqLgiaPVMjjYvWaCmAJRkNQ004QfoSMEm3FoTqM4DRoquP8jvuEXScVJsc1rqQ==",
"cpu": [
"arm"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.70.tgz",
"integrity": "sha512-LNakMOwwqwiHIwMpnMAbFRczQMQ7TkkMyATqFCOtUJNlE6LPP/QiUj/mlFrNbUn/hctqShJ60gWEb52ZTALbVw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-arm64-musl": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.70.tgz",
"integrity": "sha512-wBTOllEYNfJCHOdZj9v8gLzZ4oY3oyPX8MSRvaxPm/s7RfEXxCyZ8OhJ5xAyicsDdbE5YBZqdmaaeP5+xKxvtg==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.70.tgz",
"integrity": "sha512-GVUUPC8TuuFqHip0rxHkUqArQnlzmlXmTEBuXAWdgCv85zTCFH8nOHk/YCF5yo0Z2eOm8nOi90aWs0leJ4OE5Q==",
"cpu": [
"riscv64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-x64-gnu": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.70.tgz",
"integrity": "sha512-/kvUa2lZRwGNyfznSn5t1ShWJnr/m5acSlhTV3eXECafObjl0VBuA1HJw0QrilLpb4Fe0VLywkpD1NsMoVDROQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-linux-x64-musl": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.70.tgz",
"integrity": "sha512-aqlv8MLpycoMKRmds7JWCfVwNf1fiZxaU7JwJs9/ExjTD8lX2KjsO7CTeAj5Cl4aEuzxUWbJPUUE2Qu9cZ1vfg==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@napi-rs/canvas-win32-x64-msvc": {
"version": "0.1.70",
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.70.tgz",
"integrity": "sha512-Q9QU3WIpwBTVHk4cPfBjGHGU4U0llQYRXgJtFtYqqGNEOKVN4OT6PQ+ve63xwIPODMpZ0HHyj/KLGc9CWc3EtQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@nodelib/fs.scandir": {
"version": "2.1.5",
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@ -2002,7 +2197,7 @@
"version": "8.14.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz",
"integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==",
"dev": true,
"devOptional": true,
"license": "MIT",
"bin": {
"acorn": "bin/acorn"
@ -2186,6 +2381,14 @@
"devOptional": true,
"license": "MIT/X11"
},
"node_modules/buffer-from": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
"license": "MIT",
"optional": true,
"peer": true
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@ -4843,6 +5046,18 @@
"node": ">=8"
}
},
"node_modules/pdfjs-dist": {
"version": "5.2.133",
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.2.133.tgz",
"integrity": "sha512-abE6ZWDxztt+gGFzfm4bX2ggfxUk9wsDEoFzIJm9LozaY3JdXR7jyLK4Bjs+XLXplCduuWS1wGhPC4tgTn/kzg==",
"license": "Apache-2.0",
"engines": {
"node": ">=20.16.0 || >=22.3.0"
},
"optionalDependencies": {
"@napi-rs/canvas": "^0.1.67"
}
},
"node_modules/picocolors": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@ -5753,6 +5968,17 @@
"node": ">=8"
}
},
"node_modules/source-map": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
"integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
"license": "BSD-3-Clause",
"optional": true,
"peer": true,
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@ -5762,6 +5988,18 @@
"node": ">=0.10.0"
}
},
"node_modules/source-map-support": {
"version": "0.5.21",
"resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
"integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
"license": "MIT",
"optional": true,
"peer": true,
"dependencies": {
"buffer-from": "^1.0.0",
"source-map": "^0.6.0"
}
},
"node_modules/space-separated-tokens": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz",
@ -5859,6 +6097,34 @@
"node": ">=6"
}
},
"node_modules/terser": {
"version": "5.39.1",
"resolved": "https://registry.npmjs.org/terser/-/terser-5.39.1.tgz",
"integrity": "sha512-Mm6+uad0ZuDtcV8/4uOZQDQ8RuiC5Pu+iZRedJtF7yA/27sPL7d++In/AJKpWZlU3SYMPPkVfwetn6sgZ66pUA==",
"license": "BSD-2-Clause",
"optional": true,
"peer": true,
"dependencies": {
"@jridgewell/source-map": "^0.3.3",
"acorn": "^8.8.2",
"commander": "^2.20.0",
"source-map-support": "~0.5.20"
},
"bin": {
"terser": "bin/terser"
},
"engines": {
"node": ">=10"
}
},
"node_modules/terser/node_modules/commander": {
"version": "2.20.3",
"resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz",
"integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==",
"license": "MIT",
"optional": true,
"peer": true
},
"node_modules/textlinestream": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/textlinestream/-/textlinestream-1.1.1.tgz",

View file

@ -21,6 +21,7 @@
"dexie": "^4.0.11",
"highlight.js": "^11.10.0",
"katex": "^0.16.15",
"pdfjs-dist": "^5.2.133",
"postcss": "^8.4.49",
"react": "^18.3.1",
"react-dom": "^18.3.1",

View file

@ -16,6 +16,8 @@ export const CONFIG_DEFAULT = {
showTokensPerSecond: false,
showThoughtInProgress: false,
excludeThoughtOnReq: true,
pasteLongTextToFileLen: 2500,
pdfAsImage: false,
// make sure these default values are in sync with `common.h`
samplers: 'edkypmxt',
temperature: 0.8,
@ -43,6 +45,8 @@ export const CONFIG_DEFAULT = {
export const CONFIG_INFO: Record<string, string> = {
apiKey: 'Set the API Key if you are using --api-key option for the server.',
systemMessage: 'The starting message that defines how model should behave.',
pasteLongTextToFileLen:
'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.',
samplers:
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
temperature:

View file

@ -306,6 +306,7 @@ function ChatInput({
onStop: () => void;
isGenerating: boolean;
}) {
const { config } = useAppContext();
const [isDrag, setIsDrag] = useState(false);
return (
@ -328,7 +329,28 @@ function ChatInput({
{({ getRootProps, getInputProps }) => (
<div
className="flex flex-col rounded-xl border-1 border-base-content/30 p-3 w-full"
// when a file is pasted to the input, we handle it here
// if a text is pasted, and if it is long text, we will convert it to a file
onPasteCapture={(e: ClipboardEvent<HTMLInputElement>) => {
const text = e.clipboardData.getData('text/plain');
if (
text.length > 0 &&
config.pasteLongTextToFileLen > 0 &&
text.length > config.pasteLongTextToFileLen
) {
// if the text is too long, we will convert it to a file
extraContext.addItems([
{
type: 'context',
name: 'Pasted Content',
content: text,
},
]);
e.preventDefault();
return;
}
// if a file is pasted, we will handle it here
const files = Array.from(e.clipboardData.items)
.filter((item) => item.kind === 'file')
.map((item) => item.getAsFile())

View file

@ -100,6 +100,16 @@ const SETTING_SECTIONS: SettingSection[] = [
key,
}) as SettingFieldInput
),
{
type: SettingInputType.SHORT_INPUT,
label: 'Paste length to file',
key: 'pasteLongTextToFileLen',
},
{
type: SettingInputType.CHECKBOX,
label: 'Parse PDF as image instead of text',
key: 'pdfAsImage',
},
],
},
{
@ -452,10 +462,10 @@ function SettingsModalLongInput({
label?: string;
}) {
return (
<label className="form-control mb-2">
<div className="label inline">{label || configKey}</div>
<label className="form-control">
<div className="label inline text-sm">{label || configKey}</div>
<textarea
className="textarea textarea-bordered h-24"
className="textarea textarea-bordered h-24 mb-2"
placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
value={value}
onChange={(e) => onChange(e.target.value)}
@ -482,9 +492,7 @@ function SettingsModalShortInput({
<>
{/* on mobile, we simply show the help message here */}
{helpMsg && (
<div className="block md:hidden mb-1">
<b>{label || configKey}</b>
<br />
<div className="block mb-1 opacity-75">
<p className="text-xs">{helpMsg}</p>
</div>
)}
@ -493,11 +501,6 @@ function SettingsModalShortInput({
<div tabIndex={0} role="button" className="font-bold hidden md:block">
{label || configKey}
</div>
{helpMsg && (
<div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
{helpMsg}
</div>
)}
</div>
<input
type="text"

View file

@ -2,6 +2,17 @@ import { useState } from 'react';
import { MessageExtra } from '../utils/types';
import toast from 'react-hot-toast';
import { useAppContext } from '../utils/app.context';
import * as pdfjs from 'pdfjs-dist';
import pdfjsWorkerSrc from 'pdfjs-dist/build/pdf.worker.min.mjs?url';
import { TextContent, TextItem } from 'pdfjs-dist/types/src/display/api';
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorkerSrc;
// This file handles uploading extra context items (a.k.a files)
// It allows processing these kinds of files:
// - image files (converted to base64)
// - text files (including code files)
// - pdf (converted to text)
// Interface describing the API returned by the hook
export interface ChatExtraContextApi {
@ -13,7 +24,7 @@ export interface ChatExtraContextApi {
}
export function useChatExtraContext(): ChatExtraContextApi {
const { serverProps } = useAppContext();
const { serverProps, config } = useAppContext();
const [items, setItems] = useState<MessageExtra[]>([]);
const addItems = (newItems: MessageExtra[]) => {
@ -28,6 +39,8 @@ export function useChatExtraContext(): ChatExtraContextApi {
setItems([]);
};
const isSupportVision = serverProps?.modalities?.vision;
const onFileAdded = (files: File[]) => {
for (const file of files) {
const mimeType = file.type;
@ -38,7 +51,7 @@ export function useChatExtraContext(): ChatExtraContextApi {
}
if (mimeType.startsWith('image/')) {
if (!serverProps?.modalities?.vision) {
if (!isSupportVision) {
toast.error('Multimodal is not supported by this server or model.');
break;
}
@ -69,7 +82,43 @@ export function useChatExtraContext(): ChatExtraContextApi {
toast.error('Video and audio files are not supported yet.');
break;
} else if (mimeType.startsWith('application/pdf')) {
toast.error('PDF files are not supported yet.');
if (config.pdfAsImage && !isSupportVision) {
toast(
'Multimodal is not supported, PDF will be converted to text instead of image.'
);
break;
}
const promise =
config.pdfAsImage && isSupportVision
? convertPDFToImage(file).then((base64Urls) => {
addItems(
base64Urls.map((base64Url) => ({
type: 'imageFile',
name: file.name,
base64Url,
}))
);
})
: convertPDFToText(file).then((content) => {
if (isSupportVision) {
toast.success(
'PDF file converted to text. You can also convert it to image, see in Settings.'
);
}
addItems([
{
type: 'textFile',
name: file.name,
content,
},
]);
});
promise.catch((error) => {
console.error(error);
toast.error('Failed to parse PDF file.');
});
break;
} else {
// Because there can be many text file types (like code file), we will not check the mime type
@ -105,11 +154,69 @@ export function useChatExtraContext(): ChatExtraContextApi {
};
}
async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (event) => {
if (event.target?.result) {
resolve(event.target.result as ArrayBuffer);
} else {
reject(new Error('Failed to read file.'));
}
};
reader.readAsArrayBuffer(file);
});
}
async function convertPDFToText(file: File): Promise<string> {
const buffer = await getFileAsBuffer(file);
const pdf = await pdfjs.getDocument(buffer).promise;
const numPages = pdf.numPages;
const textContentPromises: Promise<TextContent>[] = [];
for (let i = 1; i <= numPages; i++) {
textContentPromises.push(
pdf.getPage(i).then((page) => page.getTextContent())
);
}
const textContents = await Promise.all(textContentPromises);
const textItems = textContents.flatMap((textContent: TextContent) =>
textContent.items.map((item) => (item as TextItem).str ?? '')
);
return textItems.join('\n');
}
// returns list of base64 images
async function convertPDFToImage(file: File): Promise<string[]> {
const buffer = await getFileAsBuffer(file);
const doc = await pdfjs.getDocument(buffer).promise;
const pages: Promise<string>[] = [];
for (let i = 1; i <= doc.numPages; i++) {
const page = await doc.getPage(i);
const viewport = page.getViewport({ scale: 1.5 });
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
canvas.width = viewport.width;
canvas.height = viewport.height;
if (!ctx) {
throw new Error('Failed to get 2D context from canvas');
}
const task = page.render({ canvasContext: ctx, viewport: viewport });
pages.push(
task.promise.then(() => {
return canvas.toDataURL();
})
);
}
return await Promise.all(pages);
}
// WARN: vibe code below
// This code is a heuristic to determine if a string is likely not binary.
// It is necessary because input file can have various mime types which we don't have time to investigate.
// For example, a python file can be text/plain, application/x-python, etc.
export function isLikelyNotBinary(str: string): boolean {
function isLikelyNotBinary(str: string): boolean {
const options = {
prefixLength: 1024 * 10, // Check the first 10KB of the string
suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars

View file

@ -7,7 +7,7 @@ import * as fflate from 'fflate';
/* eslint-disable */
const MAX_BUNDLE_SIZE = 1.5 * 1024 * 1024; // only increase when absolutely necessary
const MAX_BUNDLE_SIZE = 2 * 1024 * 1024; // only increase when absolutely necessary
const GUIDE_FOR_FRONTEND = `
<!--