diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index 55fca437c..763cde7e9 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -418,6 +418,78 @@ export interface SegmentationStatusResponse { ready?: boolean; } +/** + * Text Query Types for open-vocabulary detection/segmentation + */ + +/** A single detection returned from a text query */ +export interface TextQueryDetection { + /** Bounding box [x1, y1, x2, y2] */ + box: [number, number, number, number]; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Confidence score */ + score: number; + /** Label/class name (often the query text) */ + label: string; + /** Low-res mask for refinement (optional) */ + lowResMask?: number[][]; +} + +export interface TextQueryRequest { + /** Path to the image file */ + imagePath: string; + /** Text query describing what to find (e.g., "fish", "person swimming") */ + text: string; + /** Confidence threshold for detections (default: 0.3) */ + boxThreshold?: number; + /** Maximum number of detections to return (default: 10) */ + maxDetections?: number; + /** Optional boxes to refine [x1, y1, x2, y2][] */ + boxes?: [number, number, number, number][]; + /** Optional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Optional masks to refine */ + masks?: number[][][]; +} + +export interface TextQueryResponse { + /** Whether the query succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** List of detections found */ + detections?: TextQueryDetection[]; + /** The original query text */ + query?: string; + /** Whether fallback method was used (no native text support) */ + fallback?: boolean; +} + +export interface RefineDetectionsRequest { + /** Path to the image file */ + imagePath: string; + /** Detections to refine */ + detections: TextQueryDetection[]; + /** Optional additional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for additional points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Whether to include refined masks in response */ + refineMasks?: boolean; +} + +export interface RefineDetectionsResponse { + /** Whether the refinement succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Refined detections */ + detections?: TextQueryDetection[]; +} + export { provideApi, useApi, diff --git a/client/dive-common/components/EditorMenu.vue b/client/dive-common/components/EditorMenu.vue index 4366737c6..761c189e7 100644 --- a/client/dive-common/components/EditorMenu.vue +++ b/client/dive-common/components/EditorMenu.vue @@ -80,11 +80,18 @@ export default defineComponent({ type: Boolean, default: true, }, + textQueryEnabled: { + type: Boolean, + default: false, + }, }, emits: [ 'set-annotation-state', 'update:tail-settings', 'update:show-user-created-icon', + 'text-query-init', + 'text-query', + 'text-query-all-frames', ], setup(props, { emit }) { const toolTimeTimeout = ref(null); @@ -103,6 +110,59 @@ export default defineComponent({ localStorage.setItem(STORAGE_KEY, String(value)); }); + // Text query state + const textQueryDialogOpen = ref(false); + const textQueryInput = ref(''); + const textQueryLoading = ref(false); + const textQueryThreshold = ref(0.3); + const textQueryInitializing = ref(false); + const textQueryServiceError = ref(''); + const textQueryAllFrames = ref(false); + + const openTextQueryDialog = () => { + textQueryDialogOpen.value = true; + textQueryInput.value = ''; + textQueryServiceError.value = ''; + textQueryAllFrames.value = false; + textQueryInitializing.value = true; + emit('text-query-init'); + }; + + const closeTextQueryDialog = () => { + textQueryDialogOpen.value = false; + textQueryInput.value = ''; + textQueryServiceError.value = ''; + textQueryInitializing.value = false; + textQueryAllFrames.value = false; + }; + + const onTextQueryServiceReady = (success: boolean, error?: string) => { + textQueryInitializing.value = false; + if (!success) { + textQueryServiceError.value = error || 'Text query service is not available'; + } + }; + + const submitTextQuery = () => { + if (!textQueryInput.value.trim()) { + return; + } + textQueryLoading.value = true; + if (textQueryAllFrames.value) { + emit('text-query-all-frames', { + text: textQueryInput.value.trim(), + boxThreshold: textQueryThreshold.value, + }); + } else { + emit('text-query', { + text: textQueryInput.value.trim(), + boxThreshold: textQueryThreshold.value, + }); + } + closeTextQueryDialog(); + textQueryLoading.value = false; + }; + const modeToolTips = { Creating: { rectangle: 'Drag to draw rectangle. Press ESC to exit.', @@ -151,6 +211,18 @@ export default defineComponent({ ...r.mousetrap(), ], })), + /* Text Query button included alongside other annotation types (desktop only) */ + ...(props.textQueryEnabled ? [{ + id: 'Text Query', + icon: 'mdi-text-search', + active: false, + description: 'Text Query', + mousetrap: [{ + bind: 't', + handler: () => openTextQueryDialog(), + }], + click: () => openTextQueryDialog(), + }] : []), ]; }); @@ -253,6 +325,18 @@ export default defineComponent({ segmentationPredicting, segmentationLoading, segmentationTooltip, + // Text query + textQueryDialogOpen, + textQueryInput, + textQueryLoading, + textQueryThreshold, + textQueryInitializing, + textQueryServiceError, + textQueryAllFrames, + openTextQueryDialog, + closeTextQueryDialog, + onTextQueryServiceReady, + submitTextQuery, }; }, }); @@ -434,6 +518,108 @@ export default defineComponent({ @update:show-user-created-icon="$emit('update:show-user-created-icon', $event)" /> + + + + + + + mdi-text-search + + Text Query + + + +
+ +

+ Loading text query model... +

+
+ +
+ + mdi-alert-circle + +

+ {{ textQueryServiceError }} +

+
+ + +

+ Textual query support uses architectures derived from Meta's SAM3 project +

+
+ + + + {{ textQueryServiceError ? 'Close' : 'Cancel' }} + + + Search + + +
+
diff --git a/client/dive-common/components/Viewer.vue b/client/dive-common/components/Viewer.vue index ea2580422..7bd5d57bd 100644 --- a/client/dive-common/components/Viewer.vue +++ b/client/dive-common/components/Viewer.vue @@ -127,6 +127,10 @@ export default defineComponent({ type: Array as PropType, default: () => [], }, + textQueryEnabled: { + type: Boolean, + default: false, + }, }, setup(props, { emit }) { const { prompt, visible } = usePrompt(); @@ -170,6 +174,17 @@ export default defineComponent({ const controlsRef = ref(); const controlsHeight = ref(0); const controlsCollapsed = ref(false); + const editorMenuRef = ref(); + + /** + * Forward text query service ready status to EditorMenu + * Called by ViewerLoader when text query service initialization completes + */ + function onTextQueryServiceReady(success: boolean, error?: string) { + if (editorMenuRef.value?.onTextQueryServiceReady) { + editorMenuRef.value.onTextQueryServiceReady(success, error); + } + } const sideBarCollapsed = ref(false); // Sidebar mode: 'left', 'bottom', or 'collapsed' @@ -1233,6 +1248,8 @@ export default defineComponent({ controlsHeight, controlsCollapsed, sideBarCollapsed, + editorMenuRef, + onTextQueryServiceReady, sidebarMode, cycleSidebarMode, sidebarModeIcon, @@ -1417,6 +1434,7 @@ export default defineComponent({