From 0321342722fa3727c9dd9dc834758db35d6e84e1 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 17 Jun 2026 17:45:43 -0700 Subject: [PATCH 1/3] feat(files): stream large CSV previews and add import-as-table --- apps/sim/app/api/table/import-async/route.ts | 3 +- .../[id]/files/[fileId]/csv-preview/route.ts | 50 +++++++ .../components/file-viewer/csv-import.ts | 60 ++++++++ .../file-viewer/csv-table-preview.tsx | 49 +++++++ .../components/file-viewer/file-viewer.tsx | 30 ++++ .../files/components/file-viewer/index.ts | 2 +- .../components/file-viewer/preview-panel.tsx | 43 +++++- .../components/file-viewer/text-editor.tsx | 2 + .../workspace/[workspaceId]/files/files.tsx | 8 +- .../mothership-view/mothership-view.tsx | 16 ++- apps/sim/hooks/queries/tables.ts | 32 +++++ .../sim/hooks/queries/workspace-file-table.ts | 50 +++++++ apps/sim/lib/api/contracts/tables.ts | 6 + .../lib/api/contracts/workspace-file-table.ts | 48 +++++++ .../file-parsers/csv-preview-slice.test.ts | 103 +++++++++++++ .../sim/lib/file-parsers/csv-preview-slice.ts | 136 ++++++++++++++++++ apps/sim/lib/table/import-runner.ts | 17 ++- scripts/check-api-validation-contracts.ts | 4 +- 18 files changed, 640 insertions(+), 19 deletions(-) create mode 100644 apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts create mode 100644 apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts create mode 100644 apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-table-preview.tsx create mode 100644 apps/sim/hooks/queries/workspace-file-table.ts create mode 100644 apps/sim/lib/api/contracts/workspace-file-table.ts create mode 100644 apps/sim/lib/file-parsers/csv-preview-slice.test.ts create mode 100644 apps/sim/lib/file-parsers/csv-preview-slice.ts diff --git a/apps/sim/app/api/table/import-async/route.ts b/apps/sim/app/api/table/import-async/route.ts index 0d5b6a418a6..2123852b59b 100644 --- a/apps/sim/app/api/table/import-async/route.ts +++ b/apps/sim/app/api/table/import-async/route.ts @@ -38,7 +38,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { const parsed = await parseRequest(importTableAsyncContract, request, {}) if (!parsed.success) return parsed.response - const { workspaceId, fileKey, fileName } = parsed.data.body + const { workspaceId, fileKey, fileName, keepSource } = parsed.data.body const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId) if (permission !== 'write' && permission !== 'admin') { @@ -111,6 +111,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { fileName, delimiter, mode: 'create', + keepSource, } if (isTriggerDevEnabled) { // Trigger.dev runs the import outside the web container, so it survives app deploys. diff --git a/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts b/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts new file mode 100644 index 00000000000..3856eb3c05b --- /dev/null +++ b/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts @@ -0,0 +1,50 @@ +import { createLogger } from '@sim/logger' +import { type NextRequest, NextResponse } from 'next/server' +import { getWorkspaceCsvPreviewContract } from '@/lib/api/contracts/workspace-file-table' +import { parseRequest } from '@/lib/api/server' +import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' +import { withRouteHandler } from '@/lib/core/utils/with-route-handler' +import { getCsvPreviewSlice } from '@/lib/file-parsers/csv-preview-slice' +import { parseWorkspaceFileKey } from '@/lib/uploads/contexts/workspace/workspace-file-manager' +import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils' + +const logger = createLogger('WorkspaceCsvPreviewAPI') + +export const runtime = 'nodejs' +export const dynamic = 'force-dynamic' + +export const GET = withRouteHandler( + async (request: NextRequest, context: { params: Promise<{ id: string; fileId: string }> }) => { + const authResult = await checkSessionOrInternalAuth(request, { requireWorkflowId: false }) + if (!authResult.success || !authResult.userId) { + return NextResponse.json({ error: 'Authentication required' }, { status: 401 }) + } + const userId = authResult.userId + + const parsed = await parseRequest(getWorkspaceCsvPreviewContract, request, context) + if (!parsed.success) return parsed.response + const { id: workspaceId } = parsed.data.params + const { key } = parsed.data.query + + const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId) + if (!permission) { + return NextResponse.json({ error: 'Access denied' }, { status: 403 }) + } + + // The key is client-supplied — confine it to this workspace's storage prefix so a caller + // can't read another workspace's object. + if (parseWorkspaceFileKey(key) !== workspaceId) { + return NextResponse.json({ error: 'Invalid file key for workspace' }, { status: 400 }) + } + + const slice = await getCsvPreviewSlice({ key, context: 'workspace', signal: request.signal }) + + logger.info('CSV preview served', { + workspaceId, + rows: slice.rows.length, + truncated: slice.truncated, + }) + + return NextResponse.json({ success: true, ...slice }) + } +) diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts new file mode 100644 index 00000000000..aba5fbcf406 --- /dev/null +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts @@ -0,0 +1,60 @@ +'use client' + +import { useCallback, useEffect, useRef } from 'react' +import { generateId } from '@sim/utils/id' +import { useRouter } from 'next/navigation' +import { toast } from '@/components/emcn' +import { CSV_PREVIEW_MAX_ROWS } from '@/lib/api/contracts/workspace-file-table' +import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace' +import { useImportFileAsTable } from '@/hooks/queries/tables' +import { useImportTrayStore } from '@/stores/table/import-tray/store' + +export type CsvImportFileDescriptor = Pick + +/** + * Wires the "Import as a table" affordance for a capped CSV preview. When the preview is + * `truncated`, raises a one-time warning toast whose action kicks off a background import of the + * existing workspace file — no re-upload, source preserved — and navigates to the new table. + */ +export function useCsvTruncationImport( + workspaceId: string, + file: CsvImportFileDescriptor, + truncated: boolean +) { + const router = useRouter() + const importFile = useImportFileAsTable() + + const importAsTable = useCallback(() => { + const pendingId = `pending_${generateId()}` + useImportTrayStore + .getState() + .startUpload({ uploadId: pendingId, workspaceId, title: file.name }) + toast.success(`Importing "${file.name}" as a table`, { + description: 'This runs in the background.', + action: { + label: 'View tables', + onClick: () => router.push(`/workspace/${workspaceId}/tables`), + }, + }) + importFile.mutate( + { workspaceId, fileKey: file.key, fileName: file.name }, + { + onSuccess: () => useImportTrayStore.getState().endUpload(pendingId), + onError: () => useImportTrayStore.getState().endUpload(pendingId), + } + ) + // importFile.mutate and router are stable references + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [workspaceId, file.key, file.name]) + + // Surface the cap as a warning toast with an import action, once per file. + const notifiedKeyRef = useRef(null) + useEffect(() => { + if (!truncated || notifiedKeyRef.current === file.key) return + notifiedKeyRef.current = file.key + toast.warning(`Showing the first ${CSV_PREVIEW_MAX_ROWS.toLocaleString()} rows`, { + description: 'Import this file as a table to view all of its rows.', + action: { label: 'Import as a table', onClick: importAsTable }, + }) + }, [truncated, file.key, importAsTable]) +} diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-table-preview.tsx b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-table-preview.tsx new file mode 100644 index 00000000000..6b39c4eadb3 --- /dev/null +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-table-preview.tsx @@ -0,0 +1,49 @@ +'use client' + +import { memo } from 'react' +import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace' +import { useWorkspaceCsvPreview } from '@/hooks/queries/workspace-file-table' +import { useCsvTruncationImport } from './csv-import' +import { DataTable } from './data-table' +import { PreviewError, PreviewLoadingFrame, resolvePreviewError } from './preview-shared' + +/** + * Read-only preview for a CSV that is too large to load fully into the editor. Streams only the + * first {@link CSV_PREVIEW_MAX_ROWS} rows from storage; when there are more, a warning toast offers + * "Import as a table", which builds a full Table from the file (memory-safe streaming import). + */ +export const CsvTablePreview = memo(function CsvTablePreview({ + file, + workspaceId, +}: { + file: WorkspaceFileRecord + workspaceId: string +}) { + const version = Number(new Date(file.updatedAt)) || file.size + const { + data, + isLoading, + error: fetchError, + } = useWorkspaceCsvPreview(workspaceId, file.id, file.key, version) + useCsvTruncationImport(workspaceId, file, data?.truncated ?? false) + + const error = resolvePreviewError((fetchError as Error | null) ?? null, null) + if (error) return + if (isLoading || !data) { + return + } + + if (data.headers.length === 0) { + return ( +
+

No data to display

+
+ ) + } + + return ( +
+ +
+ ) +}) diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx index f20d1762ccf..d3c6fb21ece 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx @@ -13,6 +13,7 @@ import { useDocPreviewBinary } from './use-doc-preview-binary' export type { StreamingMode } from './text-editor-state' +import { CsvTablePreview } from './csv-table-preview' import { DocxPreview } from './docx-preview' import { ImagePreview } from './image-preview' import type { PdfDocumentSource } from './pdf-viewer' @@ -34,6 +35,13 @@ const PdfViewerCore = dynamic(() => import('./pdf-viewer').then((m) => m.PdfView const logger = createLogger('FileViewer') +/** + * CSVs at or below this size load fully into the editor (editable, with an inline preview). + * Larger CSVs would OOM the browser on `response.text()`, so they render a read-only, + * server-streamed preview of the first rows instead (see {@link CsvTablePreview}). + */ +const CSV_INLINE_EDIT_MAX_BYTES = 5 * 1024 * 1024 + export function isTextEditable(file: { type: string; name: string }): boolean { return resolveFileCategory(file.type, file.name) === 'text-editable' } @@ -42,6 +50,22 @@ export function isPreviewable(file: { type: string; name: string }): boolean { return resolvePreviewType(file.type, file.name) !== null } +/** + * A CSV larger than {@link CSV_INLINE_EDIT_MAX_BYTES} is shown as a streamed, read-only preview — + * the editor would OOM loading the whole file. The viewer renders {@link CsvTablePreview} for it, + * and toolbars use this to hide the edit/split/save controls (there is no editor to switch to). + */ +export function isCsvStreamOnly(file: { + type: string | null + name: string + size?: number | null +}): boolean { + return ( + resolvePreviewType(file.type, file.name) === 'csv' && + (file.size ?? 0) > CSV_INLINE_EDIT_MAX_BYTES + ) +} + export type PreviewMode = 'editor' | 'split' | 'preview' interface FileViewerProps { @@ -76,6 +100,12 @@ export function FileViewer({ const category = resolveFileCategory(file.type, file.name) if (category === 'text-editable') { + // A large CSV can't be loaded whole into the editor (the browser OOMs on the full text). + // Render a streamed, read-only preview of the first rows + an "Import as a table" path instead. + if (isCsvStreamOnly(file)) { + return + } + return ( void @@ -85,6 +89,8 @@ export const PreviewPanel = memo(function PreviewPanel({ content, mimeType, filename, + workspaceId, + fileKey, isStreaming, disableAutoScroll, onCheckboxToggle, @@ -101,7 +107,14 @@ export const PreviewPanel = memo(function PreviewPanel({ /> ) if (previewType === 'html') return - if (previewType === 'csv') return + if (previewType === 'csv') + return ( + + ) if (previewType === 'svg') return if (previewType === 'mermaid') return @@ -1150,8 +1163,17 @@ function MermaidFilePreview({ content, isStreaming }: { content: string; isStrea ) } -const CsvPreview = memo(function CsvPreview({ content }: { content: string }) { - const { headers, rows } = useMemo(() => parseCsv(content), [content]) +const CsvPreview = memo(function CsvPreview({ + content, + workspaceId, + file, +}: { + content: string + workspaceId: string + file: CsvImportFileDescriptor +}) { + const { headers, rows, truncated } = useMemo(() => parseCsv(content), [content]) + useCsvTruncationImport(workspaceId, file, truncated) if (headers.length === 0) { return ( @@ -1168,15 +1190,22 @@ const CsvPreview = memo(function CsvPreview({ content }: { content: string }) { ) }) -function parseCsv(text: string): { headers: string[]; rows: string[][] } { +/** + * Parses CSV text for the inline preview, capping at {@link CSV_PREVIEW_MAX_ROWS} rows so a + * small-but-many-rows file doesn't render thousands of ``s. Slices before parsing so only + * the capped rows are processed; `truncated` drives the "Import as a table" footer. + */ +function parseCsv(text: string): { headers: string[]; rows: string[][]; truncated: boolean } { const lines = text.split('\n').filter((line) => line.trim().length > 0) - if (lines.length === 0) return { headers: [], rows: [] } + if (lines.length === 0) return { headers: [], rows: [], truncated: false } const delimiter = detectDelimiter(lines[0]) const headers = parseCsvLine(lines[0], delimiter) - const rows = lines.slice(1).map((line) => parseCsvLine(line, delimiter)) + const dataLines = lines.slice(1) + const truncated = dataLines.length > CSV_PREVIEW_MAX_ROWS + const rows = dataLines.slice(0, CSV_PREVIEW_MAX_ROWS).map((line) => parseCsvLine(line, delimiter)) - return { headers, rows } + return { headers, rows, truncated } } function detectDelimiter(line: string): string { diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/text-editor.tsx b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/text-editor.tsx index 8cdb1cce6f1..60b1ec2bc8a 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/text-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/text-editor.tsx @@ -755,6 +755,8 @@ export const TextEditor = memo(function TextEditor({ content={content} mimeType={file.type} filename={file.name} + workspaceId={workspaceId} + fileKey={file.key} isStreaming={isStreaming} disableAutoScroll={disableStreamingAutoScroll} onCheckboxToggle={canEdit && !isStreaming ? handleCheckboxToggle : undefined} diff --git a/apps/sim/app/workspace/[workspaceId]/files/files.tsx b/apps/sim/app/workspace/[workspaceId]/files/files.tsx index b4ff6d2706d..53e1ba66cf5 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/files.tsx +++ b/apps/sim/app/workspace/[workspaceId]/files/files.tsx @@ -65,6 +65,7 @@ import { FileRowContextMenu } from '@/app/workspace/[workspaceId]/files/componen import type { PreviewMode } from '@/app/workspace/[workspaceId]/files/components/file-viewer' import { FileViewer, + isCsvStreamOnly, isPreviewable, isTextEditable, } from '@/app/workspace/[workspaceId]/files/components/file-viewer' @@ -1389,8 +1390,11 @@ export function Files() { const fileActions = useMemo(() => { if (!selectedFile) return [] - const canEditText = isTextEditable(selectedFile) - const canPreview = isPreviewable(selectedFile) + // A large CSV renders as a read-only streamed preview (no editor), so it gets neither the + // Save action nor the edit/split/preview toggle — just like a non-editable file. + const streamOnly = isCsvStreamOnly(selectedFile) + const canEditText = isTextEditable(selectedFile) && !streamOnly + const canPreview = isPreviewable(selectedFile) && !streamOnly const hasSplitView = canEditText && canPreview const saveLabel = diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx index ae6857a1044..529cf04b509 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx @@ -5,7 +5,10 @@ import type { FilePreviewSession } from '@/lib/copilot/request/session' import { cn } from '@/lib/core/utils/cn' import { getFileExtension } from '@/lib/uploads/utils/file-utils' import type { PreviewMode } from '@/app/workspace/[workspaceId]/files/components/file-viewer' -import { RICH_PREVIEWABLE_EXTENSIONS } from '@/app/workspace/[workspaceId]/files/components/file-viewer' +import { + isCsvStreamOnly, + RICH_PREVIEWABLE_EXTENSIONS, +} from '@/app/workspace/[workspaceId]/files/components/file-viewer' import { useMothershipResources } from '@/app/workspace/[workspaceId]/home/components/mothership-resources-context' import { hasRenderableFilePreviewContent } from '@/app/workspace/[workspaceId]/home/hooks/preview' import type { @@ -13,6 +16,7 @@ import type { MothershipResource, } from '@/app/workspace/[workspaceId]/home/types' import { useUserPermissionsContext } from '@/app/workspace/[workspaceId]/providers/workspace-permissions-provider' +import { useWorkspaceFiles } from '@/hooks/queries/workspace-files' import { ResourceActions, ResourceContent, ResourceTabs } from './components' const PREVIEW_CYCLE: Record = { @@ -82,10 +86,18 @@ export const MothershipView = memo( setPreviewMode('preview') } + // A large CSV renders read-only (streamed) with no editor, so it must not offer the + // edit/split/preview toggle. Its size lives on the file record, not the resource tab. + const { data: files } = useWorkspaceFiles(workspaceId, 'active', { + enabled: active?.type === 'file', + }) + const activeFile = active?.type === 'file' ? files?.find((f) => f.id === active.id) : undefined + const isActivePreviewable = canEdit && active?.type === 'file' && - RICH_PREVIEWABLE_EXTENSIONS.has(getFileExtension(active.title)) + RICH_PREVIEWABLE_EXTENSIONS.has(getFileExtension(active.title)) && + !(activeFile && isCsvStreamOnly(activeFile)) return (
{ + const response = await requestJson(importTableAsyncContract, { + body: { workspaceId, fileKey, fileName, keepSource: true }, + }) + return response.data + }, + onError: (error) => { + logger.error('Failed to start import from file:', error) + toast.error(error.message, { duration: 5000 }) + }, + onSettled: () => { + queryClient.invalidateQueries({ queryKey: tableKeys.lists() }) + }, + }) +} + export type CsvImportMode = 'append' | 'replace' interface ImportCsvIntoTableAsyncParams { diff --git a/apps/sim/hooks/queries/workspace-file-table.ts b/apps/sim/hooks/queries/workspace-file-table.ts new file mode 100644 index 00000000000..8538c97d26f --- /dev/null +++ b/apps/sim/hooks/queries/workspace-file-table.ts @@ -0,0 +1,50 @@ +import { useQuery } from '@tanstack/react-query' +import { requestJson } from '@/lib/api/client/request' +import { + getWorkspaceCsvPreviewContract, + type WorkspaceCsvPreviewResponse, +} from '@/lib/api/contracts/workspace-file-table' + +/** + * Query keys for the streamed CSV file-viewer preview. `key` (storage object key) and + * `version` (the record's `updatedAt`) are folded in so a re-upload or edit busts the cache. + */ +export const workspaceFileTableKeys = { + all: ['workspaceFileTable'] as const, + previews: () => [...workspaceFileTableKeys.all, 'preview'] as const, + preview: (workspaceId: string, fileId: string, key: string, version?: number) => + [...workspaceFileTableKeys.previews(), workspaceId, fileId, key, version ?? ''] as const, +} + +async function fetchWorkspaceCsvPreview( + workspaceId: string, + fileId: string, + key: string, + version: number | undefined, + signal?: AbortSignal +): Promise { + return requestJson(getWorkspaceCsvPreviewContract, { + params: { id: workspaceId, fileId }, + query: version != null ? { key, v: version } : { key }, + signal, + }) +} + +/** + * Fetches the first {@link CSV_PREVIEW_MAX_ROWS} rows of a CSV via the streaming preview route. + * The server reads only that prefix from storage, so this is safe for arbitrarily large files. + */ +export function useWorkspaceCsvPreview( + workspaceId: string, + fileId: string, + key: string, + version?: number, + options?: { enabled?: boolean } +) { + return useQuery({ + queryKey: workspaceFileTableKeys.preview(workspaceId, fileId, key, version), + queryFn: ({ signal }) => fetchWorkspaceCsvPreview(workspaceId, fileId, key, version, signal), + enabled: !!workspaceId && !!fileId && !!key && (options?.enabled ?? true), + staleTime: 30 * 1000, + }) +} diff --git a/apps/sim/lib/api/contracts/tables.ts b/apps/sim/lib/api/contracts/tables.ts index d5e4c7fa5c6..3e9dd3862a0 100644 --- a/apps/sim/lib/api/contracts/tables.ts +++ b/apps/sim/lib/api/contracts/tables.ts @@ -397,6 +397,12 @@ export const importTableAsyncBodySchema = z.object({ workspaceId: z.string().min(1, 'Workspace ID is required'), fileKey: z.string().min(1, 'fileKey is required'), fileName: z.string().min(1, 'fileName is required'), + /** + * Keep the source file after the import finishes. Set when importing an existing workspace + * file (e.g. from the file viewer) so the user's file isn't deleted; omitted for the normal + * upload-then-import flow where the uploaded object is single-use. + */ + keepSource: z.boolean().optional(), }) export type ImportTableAsyncBody = z.input diff --git a/apps/sim/lib/api/contracts/workspace-file-table.ts b/apps/sim/lib/api/contracts/workspace-file-table.ts new file mode 100644 index 00000000000..a6b14234a69 --- /dev/null +++ b/apps/sim/lib/api/contracts/workspace-file-table.ts @@ -0,0 +1,48 @@ +import { z } from 'zod' +import { workspaceIdSchema } from '@/lib/api/contracts/primitives' +import { + type ContractJsonResponse, + type ContractParamsInput, + type ContractQueryInput, + defineRouteContract, +} from '@/lib/api/contracts/types' + +/** + * Maximum rows returned by the CSV file-viewer preview. The viewer streams only this + * many rows from storage; beyond it the user imports the file as a table to see the rest. + */ +export const CSV_PREVIEW_MAX_ROWS = 1_000 + +export const workspaceCsvPreviewParamsSchema = z.object({ + id: workspaceIdSchema, + fileId: z.string().min(1, 'File ID is required'), +}) + +export const workspaceCsvPreviewQuerySchema = z.object({ + /** Storage object key — drives the access check and busts the cache on re-upload. */ + key: z.string().min(1, 'File key is required'), + /** Content version (the file record's `updatedAt` epoch ms) — busts the cache on edit. */ + v: z.coerce.number().optional(), +}) + +export const workspaceCsvPreviewResponseSchema = z.object({ + success: z.literal(true), + headers: z.array(z.string()), + rows: z.array(z.array(z.string())), + /** True when the file has more than {@link CSV_PREVIEW_MAX_ROWS} data rows. */ + truncated: z.boolean(), +}) + +export const getWorkspaceCsvPreviewContract = defineRouteContract({ + method: 'GET', + path: '/api/workspaces/[id]/files/[fileId]/csv-preview', + params: workspaceCsvPreviewParamsSchema, + query: workspaceCsvPreviewQuerySchema, + response: { mode: 'json', schema: workspaceCsvPreviewResponseSchema }, +}) + +export type WorkspaceCsvPreviewParams = ContractParamsInput +export type WorkspaceCsvPreviewQuery = ContractQueryInput +export type WorkspaceCsvPreviewResponse = ContractJsonResponse< + typeof getWorkspaceCsvPreviewContract +> diff --git a/apps/sim/lib/file-parsers/csv-preview-slice.test.ts b/apps/sim/lib/file-parsers/csv-preview-slice.test.ts new file mode 100644 index 00000000000..51d8574e46d --- /dev/null +++ b/apps/sim/lib/file-parsers/csv-preview-slice.test.ts @@ -0,0 +1,103 @@ +/** + * @vitest-environment node + */ +import { Readable } from 'node:stream' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockDownloadFileStream } = vi.hoisted(() => ({ + mockDownloadFileStream: vi.fn(), +})) + +vi.mock('@/lib/uploads/core/storage-service', () => ({ + downloadFileStream: mockDownloadFileStream, +})) + +import { CSV_PREVIEW_MAX_ROWS } from '@/lib/api/contracts/workspace-file-table' +import { getCsvPreviewSlice } from '@/lib/file-parsers/csv-preview-slice' + +function streamOf(text: string): Readable { + // Array-wrapped so the whole text is one chunk (a bare Buffer/string is iterated element-wise). + return Readable.from([Buffer.from(text, 'utf-8')]) +} + +const args = { key: 'workspace/ws_1/file.csv', context: 'workspace' as const } + +function csvWithRows(dataRows: number): string { + const lines = ['h1,h2'] + for (let i = 0; i < dataRows; i++) lines.push(`${i},x`) + return lines.join('\n') +} + +describe('getCsvPreviewSlice', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('returns headers and every row when under the cap', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf('a,b\n1,2\n3,4\n')) + const slice = await getCsvPreviewSlice(args) + expect(slice.headers).toEqual(['a', 'b']) + expect(slice.rows).toEqual([ + ['1', '2'], + ['3', '4'], + ]) + expect(slice.truncated).toBe(false) + }) + + it('caps at CSV_PREVIEW_MAX_ROWS and flags truncated', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf(csvWithRows(CSV_PREVIEW_MAX_ROWS + 500))) + const slice = await getCsvPreviewSlice(args) + expect(slice.rows).toHaveLength(CSV_PREVIEW_MAX_ROWS) + expect(slice.truncated).toBe(true) + }) + + it('is not truncated at exactly the cap', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf(csvWithRows(CSV_PREVIEW_MAX_ROWS))) + const slice = await getCsvPreviewSlice(args) + expect(slice.rows).toHaveLength(CSV_PREVIEW_MAX_ROWS) + expect(slice.truncated).toBe(false) + }) + + it('detects a semicolon delimiter', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf('a;b;c\n1;2;3\n')) + const slice = await getCsvPreviewSlice(args) + expect(slice.headers).toEqual(['a', 'b', 'c']) + expect(slice.rows).toEqual([['1', '2', '3']]) + }) + + it('detects a tab delimiter', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf('a\tb\n1\t2\n')) + const slice = await getCsvPreviewSlice(args) + expect(slice.headers).toEqual(['a', 'b']) + expect(slice.rows).toEqual([['1', '2']]) + }) + + it('returns empty for an empty file', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf('')) + const slice = await getCsvPreviewSlice(args) + expect(slice).toEqual({ headers: [], rows: [], truncated: false }) + }) + + it('tolerates ragged rows', async () => { + mockDownloadFileStream.mockResolvedValue(streamOf('a,b,c\n1,2\n4,5,6,7\n')) + const slice = await getCsvPreviewSlice(args) + expect(slice.headers).toEqual(['a', 'b', 'c']) + expect(slice.rows[0]).toEqual(['1', '2']) + }) + + it('truncates an oversized cell', async () => { + const big = 'x'.repeat(3000) + mockDownloadFileStream.mockResolvedValue(streamOf(`a\n${big}\n`)) + const slice = await getCsvPreviewSlice(args) + expect(slice.rows[0][0].length).toBeLessThan(3000) + }) + + it('destroys the source stream after reading the slice', async () => { + const source = streamOf(csvWithRows(CSV_PREVIEW_MAX_ROWS + 50)) + const destroySpy = vi.spyOn(source, 'destroy') + mockDownloadFileStream.mockResolvedValue(source) + const slice = await getCsvPreviewSlice(args) + expect(slice.truncated).toBe(true) + expect(destroySpy).toHaveBeenCalled() + }) +}) diff --git a/apps/sim/lib/file-parsers/csv-preview-slice.ts b/apps/sim/lib/file-parsers/csv-preview-slice.ts new file mode 100644 index 00000000000..a78ec99c6ed --- /dev/null +++ b/apps/sim/lib/file-parsers/csv-preview-slice.ts @@ -0,0 +1,136 @@ +import { Readable } from 'node:stream' +import { truncate } from '@sim/utils/string' +import { parse as parseCsvStream } from 'csv-parse' +import { CSV_PREVIEW_MAX_ROWS } from '@/lib/api/contracts/workspace-file-table' +import type { StorageContext } from '@/lib/uploads/config' +import { downloadFileStream } from '@/lib/uploads/core/storage-service' + +/** Cap a single cell so one pathological field can't bloat the preview payload. */ +const MAX_CELL_LENGTH = 2_000 + +/** Read at most this many bytes while sniffing the first line for the delimiter. */ +const DELIMITER_SNIFF_MAX_BYTES = 256 * 1024 + +interface CsvPreviewSliceArgs { + key: string + context: StorageContext + signal?: AbortSignal +} + +export interface CsvPreviewSlice { + headers: string[] + rows: string[][] + /** True when the file has more than {@link CSV_PREVIEW_MAX_ROWS} data rows. */ + truncated: boolean +} + +/** + * Detects the CSV delimiter from a header line by frequency. Mirrors the file viewer's + * client-side heuristic (comma / tab / semicolon) so server-streamed previews match. + */ +function detectDelimiter(line: string): string { + const commaCount = (line.match(/,/g) || []).length + const tabCount = (line.match(/\t/g) || []).length + const semiCount = (line.match(/;/g) || []).length + if (tabCount > commaCount && tabCount > semiCount) return '\t' + if (semiCount > commaCount) return ';' + return ',' +} + +function cell(value: unknown): string { + return truncate(String(value ?? ''), MAX_CELL_LENGTH) +} + +/** + * Streams the first {@link CSV_PREVIEW_MAX_ROWS} rows of a CSV/TSV from storage without + * ever buffering the whole file. The source stream is destroyed as soon as enough rows are + * read (one past the cap, to detect truncation), so a multi-GB file costs O(rows) of memory. + */ +export async function getCsvPreviewSlice({ + key, + context, + signal, +}: CsvPreviewSliceArgs): Promise { + const source = await downloadFileStream({ key, context }) + const onAbort = () => source.destroy() + signal?.addEventListener('abort', onAbort, { once: true }) + + const reader = source[Symbol.asyncIterator]() + + try { + // Pull chunks until the first newline so the delimiter can be sniffed before parsing. + const sniffed: Buffer[] = [] + let firstLine = '' + let sniffedBytes = 0 + while (true) { + const { value, done } = await reader.next() + if (done) break + const chunk = Buffer.isBuffer(value) ? value : Buffer.from(value) + sniffed.push(chunk) + sniffedBytes += chunk.length + const combined = Buffer.concat(sniffed).toString('utf-8') + const nl = combined.indexOf('\n') + if (nl !== -1) { + firstLine = combined.slice(0, nl) + break + } + firstLine = combined + if (sniffedBytes >= DELIMITER_SNIFF_MAX_BYTES) break + } + + if (sniffed.length === 0) { + return { headers: [], rows: [], truncated: false } + } + + const delimiter = detectDelimiter(firstLine) + const parser = parseCsvStream({ + columns: false, + skip_empty_lines: true, + trim: true, + relax_column_count: true, + relax_quotes: true, + skip_records_with_error: true, + cast: false, + bom: true, + delimiter, + }) + + // Re-feed the sniffed prefix, then drain the rest of the source into the parser. + async function* rejoin() { + for (const chunk of sniffed) yield chunk + while (true) { + const { value, done } = await reader.next() + if (done) return + yield value + } + } + const piped = Readable.from(rejoin()) + piped.on('error', (err) => parser.destroy(err)) + piped.pipe(parser) + + let headers: string[] = [] + let headersSet = false + const rows: string[][] = [] + let truncated = false + + for await (const record of parser as AsyncIterable) { + if (!headersSet) { + headers = record.map(cell) + headersSet = true + continue + } + if (rows.length >= CSV_PREVIEW_MAX_ROWS) { + truncated = true + break + } + rows.push(record.map(cell)) + } + + piped.destroy() + parser.destroy() + return { headers, rows, truncated } + } finally { + signal?.removeEventListener('abort', onAbort) + source.destroy() + } +} diff --git a/apps/sim/lib/table/import-runner.ts b/apps/sim/lib/table/import-runner.ts index 5391d22a238..1b6ec861782 100644 --- a/apps/sim/lib/table/import-runner.ts +++ b/apps/sim/lib/table/import-runner.ts @@ -60,6 +60,12 @@ export interface TableImportPayload { mapping?: CsvHeaderMapping /** (append/replace) CSV headers to auto-create as new columns (types inferred from the sample). */ createColumns?: string[] + /** + * Keep the source file after the import is terminal. Set when importing an existing workspace + * file (the file viewer's "Import as a table") so the user's file isn't deleted. Defaults to + * deleting, since the normal flow uploads a single-use copy per import. + */ + keepSource?: boolean } /** @@ -350,9 +356,12 @@ export async function runTableImport(payload: TableImportPayload): Promise // Release the storage stream so its HTTP connection doesn't leak on failure. source?.destroy() // The uploaded source file is single-use (a fresh upload per import) — delete it once the - // import is terminal so the workspace bucket doesn't accumulate. Best-effort. - await deleteFile({ key: fileKey, context: 'workspace' }).catch((err) => { - logger.warn(`[${requestId}] Failed to delete imported file`, { fileKey, err }) - }) + // import is terminal so the workspace bucket doesn't accumulate. Best-effort. When importing + // an existing workspace file (keepSource), the file is the user's own — never delete it. + if (!payload.keepSource) { + await deleteFile({ key: fileKey, context: 'workspace' }).catch((err) => { + logger.warn(`[${requestId}] Failed to delete imported file`, { fileKey, err }) + }) + } } } diff --git a/scripts/check-api-validation-contracts.ts b/scripts/check-api-validation-contracts.ts index cee05f4d2b1..bfd7a169d3c 100644 --- a/scripts/check-api-validation-contracts.ts +++ b/scripts/check-api-validation-contracts.ts @@ -9,8 +9,8 @@ const QUERY_HOOKS_DIR = path.join(ROOT, 'apps/sim/hooks/queries') const SELECTOR_HOOKS_DIR = path.join(ROOT, 'apps/sim/hooks/selectors') const BASELINE = { - totalRoutes: 852, - zodRoutes: 852, + totalRoutes: 853, + zodRoutes: 853, nonZodRoutes: 0, } as const From 81ca9706dfdb6ca9719d4b59168d5abd964c865c Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 17 Jun 2026 18:18:45 -0700 Subject: [PATCH 2/3] fix(files): validate fileId in csv-preview route, guard double-import, fix sniff perf and toggle flash --- .../[id]/files/[fileId]/csv-preview/route.ts | 20 ++++++++++++------- .../components/file-viewer/csv-import.ts | 12 +++++++++-- .../mothership-view/mothership-view.tsx | 5 ++++- .../sim/lib/file-parsers/csv-preview-slice.ts | 12 +++++++---- 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts b/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts index 3856eb3c05b..0fd9553a4c4 100644 --- a/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts +++ b/apps/sim/app/api/workspaces/[id]/files/[fileId]/csv-preview/route.ts @@ -5,7 +5,7 @@ import { parseRequest } from '@/lib/api/server' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { getCsvPreviewSlice } from '@/lib/file-parsers/csv-preview-slice' -import { parseWorkspaceFileKey } from '@/lib/uploads/contexts/workspace/workspace-file-manager' +import { getWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils' const logger = createLogger('WorkspaceCsvPreviewAPI') @@ -23,7 +23,7 @@ export const GET = withRouteHandler( const parsed = await parseRequest(getWorkspaceCsvPreviewContract, request, context) if (!parsed.success) return parsed.response - const { id: workspaceId } = parsed.data.params + const { id: workspaceId, fileId } = parsed.data.params const { key } = parsed.data.query const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId) @@ -31,13 +31,19 @@ export const GET = withRouteHandler( return NextResponse.json({ error: 'Access denied' }, { status: 403 }) } - // The key is client-supplied — confine it to this workspace's storage prefix so a caller - // can't read another workspace's object. - if (parseWorkspaceFileKey(key) !== workspaceId) { - return NextResponse.json({ error: 'Invalid file key for workspace' }, { status: 400 }) + // Resolve the file record (active, in this workspace) and read from its authoritative key — + // never the client-supplied one. This rejects archived/deleted files and keys with no live + // row, matching the access guarantees of /api/files/serve. + const record = await getWorkspaceFile(workspaceId, fileId) + if (!record || record.key !== key) { + return NextResponse.json({ error: 'File not found' }, { status: 404 }) } - const slice = await getCsvPreviewSlice({ key, context: 'workspace', signal: request.signal }) + const slice = await getCsvPreviewSlice({ + key: record.key, + context: 'workspace', + signal: request.signal, + }) logger.info('CSV preview served', { workspaceId, diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts index aba5fbcf406..d852820bfc1 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/csv-import.ts @@ -24,7 +24,13 @@ export function useCsvTruncationImport( const router = useRouter() const importFile = useImportFileAsTable() + // Guards against a double-tap on the toast action kicking off two parallel imports of the same + // file. Reset once the kickoff settles so a failed import can be retried. + const importingRef = useRef(false) + const importAsTable = useCallback(() => { + if (importingRef.current) return + importingRef.current = true const pendingId = `pending_${generateId()}` useImportTrayStore .getState() @@ -39,8 +45,10 @@ export function useCsvTruncationImport( importFile.mutate( { workspaceId, fileKey: file.key, fileName: file.name }, { - onSuccess: () => useImportTrayStore.getState().endUpload(pendingId), - onError: () => useImportTrayStore.getState().endUpload(pendingId), + onSettled: () => { + importingRef.current = false + useImportTrayStore.getState().endUpload(pendingId) + }, } ) // importFile.mutate and router are stable references diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx index 529cf04b509..1daa3e64381 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx @@ -88,7 +88,7 @@ export const MothershipView = memo( // A large CSV renders read-only (streamed) with no editor, so it must not offer the // edit/split/preview toggle. Its size lives on the file record, not the resource tab. - const { data: files } = useWorkspaceFiles(workspaceId, 'active', { + const { data: files, isLoading: filesLoading } = useWorkspaceFiles(workspaceId, 'active', { enabled: active?.type === 'file', }) const activeFile = active?.type === 'file' ? files?.find((f) => f.id === active.id) : undefined @@ -97,6 +97,9 @@ export const MothershipView = memo( canEdit && active?.type === 'file' && RICH_PREVIEWABLE_EXTENSIONS.has(getFileExtension(active.title)) && + // Wait for the record before deciding — otherwise the toggle flashes on for a large CSV + // until its size loads and we can tell it's read-only. + !filesLoading && !(activeFile && isCsvStreamOnly(activeFile)) return ( diff --git a/apps/sim/lib/file-parsers/csv-preview-slice.ts b/apps/sim/lib/file-parsers/csv-preview-slice.ts index a78ec99c6ed..f7053119950 100644 --- a/apps/sim/lib/file-parsers/csv-preview-slice.ts +++ b/apps/sim/lib/file-parsers/csv-preview-slice.ts @@ -59,6 +59,10 @@ export async function getCsvPreviewSlice({ try { // Pull chunks until the first newline so the delimiter can be sniffed before parsing. + // Accumulate the header line incrementally — appending each chunk's decoded text rather than + // re-concatenating the whole buffer each iteration (which would be O(n²) for a header split + // across many small chunks). The delimiter chars (`,` `\t` `;`) are ASCII, so a multi-byte + // character split at a chunk boundary can't introduce a false delimiter into the count. const sniffed: Buffer[] = [] let firstLine = '' let sniffedBytes = 0 @@ -68,13 +72,13 @@ export async function getCsvPreviewSlice({ const chunk = Buffer.isBuffer(value) ? value : Buffer.from(value) sniffed.push(chunk) sniffedBytes += chunk.length - const combined = Buffer.concat(sniffed).toString('utf-8') - const nl = combined.indexOf('\n') + const text = chunk.toString('utf-8') + const nl = text.indexOf('\n') if (nl !== -1) { - firstLine = combined.slice(0, nl) + firstLine += text.slice(0, nl) break } - firstLine = combined + firstLine += text if (sniffedBytes >= DELIMITER_SNIFF_MAX_BYTES) break } From d9de35d77695dcffd5970c07435580183f892ec7 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Wed, 17 Jun 2026 18:38:25 -0700 Subject: [PATCH 3/3] fix(files): scope mothership preview-toggle loading guard to CSV files only --- .../home/components/mothership-view/mothership-view.tsx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx index 1daa3e64381..ba4dce41d4c 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/mothership-view.tsx @@ -92,14 +92,16 @@ export const MothershipView = memo( enabled: active?.type === 'file', }) const activeFile = active?.type === 'file' ? files?.find((f) => f.id === active.id) : undefined + const isActiveCsv = active?.type === 'file' && getFileExtension(active.title) === 'csv' const isActivePreviewable = canEdit && active?.type === 'file' && RICH_PREVIEWABLE_EXTENSIONS.has(getFileExtension(active.title)) && - // Wait for the record before deciding — otherwise the toggle flashes on for a large CSV - // until its size loads and we can tell it's read-only. - !filesLoading && + // Only a CSV's previewability depends on its size (large = read-only, no editor). Wait for + // the record before deciding so the toggle doesn't flash on for a large CSV — but don't gate + // other rich types (markdown, html, svg, …) on the file list loading. + !(isActiveCsv && filesLoading) && !(activeFile && isCsvStreamOnly(activeFile)) return (