diff --git a/samples/VideoGenerationPOC/Program.cs b/samples/VideoGenerationPOC/Program.cs new file mode 100644 index 00000000000..78afef12b63 --- /dev/null +++ b/samples/VideoGenerationPOC/Program.cs @@ -0,0 +1,316 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Video Generation POC — Microsoft.Extensions.AI general-purpose CLI +// +// Usage examples: +// dotnet run -- generate "A cat playing piano" +// dotnet run -- generate "She turns and smiles" --input reference.jpg +// dotnet run -- generate "A tracking shot of DotBot" --character char_abc123 +// dotnet run -- upload-character DotBot --input clip.mp4 +// dotnet run -- edit "Change the sky to sunset" --video video_abc123 +// dotnet run -- extend "Continue the scene" --video video_abc123 +// +// All commands print machine-parseable lines (OPERATION_ID, CHARACTER_ID) for scripting. + +using System.CommandLine; +using System.Drawing; +using System.Text.Json.Nodes; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.Logging; +using OpenAI; + +// ── Shared options ───────────────────────────────────────────────────────── +var modelOption = new Option("--model", () => "sora-2", "Model ID."); +var outputOption = new Option("--output", "Output file path (.mp4). Omit for URI-only display."); +var durationOption = new Option("--duration", "Duration in seconds."); +var widthOption = new Option("--width", () => 1280, "Video width."); +var heightOption = new Option("--height", () => 720, "Video height."); +var formatOption = new Option("--format", () => "data", "Response format: data or uri."); + +// ── generate ─────────────────────────────────────────────────────────────── +var generatePromptArg = new Argument("prompt", "Text prompt describing the video to generate."); +var inputOption = new Option("--input", "Input file(s) — images for image-to-video, or a video for editing.") { AllowMultipleArgumentsPerToken = true }; +var characterOption = new Option("--character", "Character ID(s) to include.") { AllowMultipleArgumentsPerToken = true }; + +var generateCommand = new Command("generate", "Generate a new video from a text prompt (optionally with input images and characters).") +{ + generatePromptArg, modelOption, outputOption, inputOption, characterOption, durationOption, widthOption, heightOption, formatOption, +}; + +generateCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(generatePromptArg); + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + string[] inputPaths = context.ParseResult.GetValueForOption(inputOption) ?? []; + string[] characterIds = context.ParseResult.GetValueForOption(characterOption) ?? []; + int? duration = context.ParseResult.GetValueForOption(durationOption); + int? width = context.ParseResult.GetValueForOption(widthOption); + int? height = context.ParseResult.GetValueForOption(heightOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + + List? inputMedia = await LoadInputFilesAsync(inputPaths); + if (inputMedia is null && inputPaths.Length > 0) + { + context.ExitCode = 1; + return; + } + + var options = BuildOptions(duration, width, height, format, characterIds); + var request = new VideoGenerationRequest(prompt); + if (inputMedia is { Count: > 0 }) + { + request.StartFrame = inputMedia[0]; + } + + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── upload-character ─────────────────────────────────────────────────────── +var charNameArg = new Argument("name", "Name for the character (mention this name verbatim in prompts)."); +var charInputOption = new Option("--input", "Video file (.mp4) to upload as the character source.") { IsRequired = true }; + +var uploadCharCommand = new Command("upload-character", "Upload a video clip as a reusable character asset.") +{ + charNameArg, charInputOption, modelOption, +}; + +uploadCharCommand.SetHandler(async (context) => +{ + string name = context.ParseResult.GetValueForArgument(charNameArg); + string inputPath = context.ParseResult.GetValueForOption(charInputOption)!; + string model = context.ParseResult.GetValueForOption(modelOption)!; + + if (!File.Exists(inputPath)) + { + Console.Error.WriteLine($"Error: File not found: {inputPath}"); + context.ExitCode = 1; + return; + } + + using var generator = CreateGenerator(model); + DataContent videoContent = await DataContent.LoadFromAsync(inputPath); + + Console.WriteLine($"Uploading character '{name}' from {inputPath} ({videoContent.Data.Length} bytes)..."); + string characterId = await generator.UploadVideoCharacterAsync(name, videoContent); + + Console.WriteLine($"CHARACTER_ID: {characterId}"); +}); + +// ── edit ─────────────────────────────────────────────────────────────────── +var editPromptArg = new Argument("prompt", "Prompt describing the edit to apply."); +var editVideoOption = new Option("--video", "Video ID of the generation to edit.") { IsRequired = true }; + +var editCommand = new Command("edit", "Edit an existing video by ID.") +{ + editPromptArg, editVideoOption, modelOption, outputOption, formatOption, +}; + +editCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(editPromptArg); + string videoId = context.ParseResult.GetValueForOption(editVideoOption)!; + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + + var options = BuildOptions(duration: null, width: null, height: null, format, characterIds: []); + var request = new VideoGenerationRequest(prompt) + { + OperationKind = VideoOperationKind.Edit, + SourceVideoId = videoId, + }; + + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── extend ───────────────────────────────────────────────────────────────── +var extendPromptArg = new Argument("prompt", "Prompt describing how the scene should continue."); +var extendVideoOption = new Option("--video", "Video ID of the completed video to extend.") { IsRequired = true }; + +var extendCommand = new Command("extend", "Extend a completed video by ID.") +{ + extendPromptArg, extendVideoOption, modelOption, outputOption, durationOption, formatOption, +}; + +extendCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(extendPromptArg); + string videoId = context.ParseResult.GetValueForOption(extendVideoOption)!; + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + int? duration = context.ParseResult.GetValueForOption(durationOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + + var options = BuildOptions(duration, width: null, height: null, format, characterIds: []); + var request = new VideoGenerationRequest(prompt) + { + OperationKind = VideoOperationKind.Extend, + SourceVideoId = videoId, + }; + + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── Root command ─────────────────────────────────────────────────────────── +var rootCommand = new RootCommand("Video Generation POC — Microsoft.Extensions.AI CLI for video generation, editing, extending, and character management.") +{ + generateCommand, + uploadCharCommand, + editCommand, + extendCommand, +}; + +return await rootCommand.InvokeAsync(args); + +// ═══════════════════════════════════════════════════════════════════════════ +// Helpers +// ═══════════════════════════════════════════════════════════════════════════ +static IVideoGenerator CreateGenerator(string model) +{ + string? apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY"); + if (string.IsNullOrEmpty(apiKey)) + { + Console.Error.WriteLine("Error: Set the OPENAI_API_KEY environment variable."); + Environment.Exit(1); + } + + var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug)); + var openAIClient = new OpenAIClient(apiKey); + + return openAIClient + .GetVideoClient() + .AsIVideoGenerator(model) + .AsBuilder() + .UseLogging(loggerFactory) + .UseOpenTelemetry(loggerFactory) + .Build(); +} + +static VideoGenerationOptions BuildOptions(int? duration, int? width, int? height, string format, string[] characterIds) +{ + var options = new VideoGenerationOptions + { + ResponseFormat = string.Equals(format, "uri", StringComparison.OrdinalIgnoreCase) + ? VideoGenerationResponseFormat.Uri + : VideoGenerationResponseFormat.Data, + }; + + if (duration.HasValue) + { + options.Duration = TimeSpan.FromSeconds(duration.Value); + } + + if (width.HasValue && height.HasValue) + { + options.VideoSize = new Size(width.Value, height.Value); + } + + if (characterIds.Length > 0) + { + var chars = new JsonArray(); + foreach (string charId in characterIds) + { + chars.Add(new JsonObject { ["id"] = charId }); + } + + options.AdditionalProperties = new() { ["characters"] = chars }; + } + + return options; +} + +static async Task?> LoadInputFilesAsync(string[] inputPaths) +{ + if (inputPaths.Length == 0) + { + return null; + } + + var media = new List(); + foreach (string path in inputPaths) + { + if (!File.Exists(path)) + { + Console.Error.WriteLine($"Error: Input file not found: {path}"); + return null; + } + + DataContent loaded = await DataContent.LoadFromAsync(path); + media.Add(loaded); + Console.WriteLine($" Loaded: {path} ({loaded.MediaType}, {loaded.Data.Length} bytes)"); + } + + return media; +} + +static async Task CompleteAndSaveAsync(VideoGenerationOperation operation, VideoGenerationOptions options, string? outputPath) +{ + Console.WriteLine($"OPERATION_ID: {operation.OperationId}"); + Console.WriteLine($" Status: {operation.Status}"); + + var sw = System.Diagnostics.Stopwatch.StartNew(); + await operation.WaitForCompletionAsync( + new Progress(p => + Console.WriteLine($" Progress: {p.Status}{(p.PercentComplete.HasValue ? $" ({p.PercentComplete}%)" : string.Empty)}"))); + + sw.Stop(); + Console.WriteLine($" Completed in {sw.Elapsed.TotalSeconds:F1}s"); + + if (operation.Usage is { } usage) + { + Console.WriteLine($" Tokens: input={usage.InputTokenCount}, output={usage.OutputTokenCount}, total={usage.TotalTokenCount}"); + } + + var contents = await operation.GetContentsAsync(options); + Console.WriteLine($" {contents.Count} content item(s)"); + + for (int i = 0; i < contents.Count; i++) + { + switch (contents[i]) + { + case DataContent dc: + if (outputPath is not null) + { + string filePath = contents.Count == 1 + ? outputPath + : Path.Combine( + Path.GetDirectoryName(outputPath) ?? ".", + $"{Path.GetFileNameWithoutExtension(outputPath)}_{i}{Path.GetExtension(outputPath)}"); + + string? dir = Path.GetDirectoryName(filePath); + if (dir is not null) + { + Directory.CreateDirectory(dir); + } + + await dc.SaveToAsync(filePath); + Console.WriteLine($" [{i}] Saved: {filePath} ({dc.Data.Length} bytes, {dc.MediaType})"); + } + else + { + Console.WriteLine($" [{i}] DataContent: {dc.Data.Length} bytes ({dc.MediaType})"); + } + + break; + + case UriContent uc: + Console.WriteLine($" [{i}] URI: {uc.Uri} ({uc.MediaType})"); + break; + + default: + Console.WriteLine($" [{i}] {contents[i].GetType().Name}"); + break; + } + } +} diff --git a/samples/VideoGenerationPOC/VideoGenerationPOC.csproj b/samples/VideoGenerationPOC/VideoGenerationPOC.csproj new file mode 100644 index 00000000000..2572c21a941 --- /dev/null +++ b/samples/VideoGenerationPOC/VideoGenerationPOC.csproj @@ -0,0 +1,18 @@ + + + + Exe + net10.0 + enable + enable + $(NoWarn);MEAI001;OPENAI001 + + + + + + + + + + diff --git a/samples/VideoGenerationPOC/demo-dotbot.ps1 b/samples/VideoGenerationPOC/demo-dotbot.ps1 new file mode 100644 index 00000000000..989462c6f21 --- /dev/null +++ b/samples/VideoGenerationPOC/demo-dotbot.ps1 @@ -0,0 +1,244 @@ +#!/usr/bin/env pwsh +# Video Generation POC — end-to-end DotNetBot scenario +# +# Prerequisites: +# - OPENAI_API_KEY environment variable set +# - Reference image at $ReferenceImage (or pass -ReferenceImage path) +# +# This script demonstrates: +# 1. Image-to-video generation from a reference image +# 2. Character upload from the generated clip +# 3. Two character-consistent generations (surfing + groceries) +# 4. Editing the surfing video (sunset palette shift) +# 5. Extending the grocery video (fruit juggling) +# +# Resume: The script saves progress to a state file in the output directory. +# If a step already completed (output file + ID exist), it is skipped. +# Pass -Reset to start fresh. + +param( + [string]$ReferenceImage = "my-dotnet-bot-mod.png", + [string]$OutputDir = "..\..\artifacts\demo-output", + [string]$Model = "sora-2", + [switch]$Reset +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +if (-not (Test-Path $ReferenceImage)) { + Write-Error "Reference image not found: $ReferenceImage" + exit 1 +} + +New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null + +# ── State management ──────────────────────────────────────────────────── +$stateFile = Join-Path $OutputDir "demo-state.json" + +if ($Reset -and (Test-Path $stateFile)) { + Remove-Item $stateFile -Force + Write-Host "State file cleared." -ForegroundColor Yellow +} + +function Get-State { + if (Test-Path $stateFile) { + return Get-Content $stateFile -Raw | ConvertFrom-Json -AsHashtable + } + return @{} +} + +function Set-State { + param([string]$Key, [string]$Value) + $s = Get-State + $s[$Key] = $Value + $s | ConvertTo-Json | Set-Content $stateFile +} + +# ── Tool helpers ──────────────────────────────────────────────────────── +function Invoke-Tool { + param([string]$Label, [string[]]$Arguments) + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Cyan + Write-Host " $Label" -ForegroundColor Cyan + Write-Host ("=" * 70) -ForegroundColor Cyan + Write-Host "> dotnet run --project $PSScriptRoot -- $($Arguments -join ' ')" -ForegroundColor DarkGray + + $output = & dotnet run --project $PSScriptRoot -- @Arguments 2>&1 + $output | ForEach-Object { Write-Host " $_" } + if ($LASTEXITCODE -ne 0) { + Write-Error "Tool exited with code $LASTEXITCODE" + exit $LASTEXITCODE + } + return ($output | Out-String) +} + +function Extract-Id { + param([string]$Output, [string]$Prefix) + if ($Output -match "$Prefix\:\s*(\S+)") { + return $Matches[1] + } + Write-Error "Could not find $Prefix in tool output." + exit 1 +} + +function Skip-OrRun { + param( + [string]$StateKey, + [string]$Label, + [string]$OutputFile, + [scriptblock]$Action + ) + $state = Get-State + if ($state.ContainsKey($StateKey) -and ((-not $OutputFile) -or (Test-Path $OutputFile))) { + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor DarkGray + Write-Host " SKIP: $Label (already completed — $StateKey=$($state[$StateKey]))" -ForegroundColor DarkGray + Write-Host ("=" * 70) -ForegroundColor DarkGray + return $state[$StateKey] + } + $id = & $Action + Set-State $StateKey $id + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 1: Generate a 4-second intro clip from the reference image +# ───────────────────────────────────────────────────────────────────────── +$introPath = Join-Path $OutputDir "01_DotNetBot_intro.mp4" +$introId = Skip-OrRun "introId" "Step 1: Image-to-video — DotNetBot intro (4s)" $introPath { + $out = Invoke-Tool "Step 1: Image-to-video — DotNetBot intro (4s)" @( + "generate", + "A smooth 360-degree tracking shot around a cute spherical robot called DotNetBot. He has an spherical shape with antenna on his head that remains stationary, two arms and legs, and a belt with buckle that reads '.NET'. The camera orbits to show every side, then DotNetBot lifts his right arm to give the shaka hand sign extending thumb and pinky finger.", + "--input", $ReferenceImage, + "--model", $Model, + "--duration", "4", + "--output", $introPath + ) + $id = Extract-Id $out "OPERATION_ID" + Write-Host " >> Intro video ID: $id" -ForegroundColor Green + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 1b: Trim the intro clip to ≤4.0 s for character upload +# OpenAI requires character reference videos to be between 2–4 seconds, +# but generated clips may slightly overshoot. Use ffmpeg to hard-trim. +# ───────────────────────────────────────────────────────────────────────── +$trimmedPath = Join-Path $OutputDir "01b_DotNetBot_intro_trimmed.mp4" +if ((Test-Path $introPath) -and -not (Test-Path $trimmedPath)) { + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor Cyan + Write-Host " Step 1b: Trimming intro clip to 3.9 s with ffmpeg (re-encode)" -ForegroundColor Cyan + Write-Host ("=" * 70) -ForegroundColor Cyan + & ffmpeg -y -i $introPath -t 3.9 $trimmedPath 2>&1 | ForEach-Object { Write-Host " $_" } + if ($LASTEXITCODE -ne 0) { + Write-Error "ffmpeg trim failed (exit code $LASTEXITCODE). Is ffmpeg installed?" + exit 1 + } + Write-Host " >> Trimmed clip: $trimmedPath" -ForegroundColor Green +} elseif (Test-Path $trimmedPath) { + Write-Host "" + Write-Host ("=" * 70) -ForegroundColor DarkGray + Write-Host " SKIP: Step 1b — trimmed clip already exists" -ForegroundColor DarkGray + Write-Host ("=" * 70) -ForegroundColor DarkGray +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 2: Upload the trimmed intro clip as character "DotNetBot" +# ───────────────────────────────────────────────────────────────────────── +$charId = Skip-OrRun "charId" "Step 2: Upload character 'DotNetBot'" "" { + $out = Invoke-Tool "Step 2: Upload character 'DotNetBot'" @( + "upload-character", "DotNetBot", + "--input", $trimmedPath, + "--model", $Model + ) + $id = Extract-Id $out "CHARACTER_ID" + Write-Host " >> Character ID: $id" -ForegroundColor Green + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 3: Generate DotNetBot surfing (with character) +# ───────────────────────────────────────────────────────────────────────── +$surfPath = Join-Path $OutputDir "03_DotNetBot_surfing.mp4" +$surfId = Skip-OrRun "surfId" "Step 3: DotNetBot goes surfing" $surfPath { + $out = Invoke-Tool "Step 3: DotNetBot goes surfing" @( + "generate", + "A cinematic wide shot of DotNetBot surfing a massive turquoise wave at golden hour. Water sprays around him as he crouches on the board, sun flare behind.", + "--character", $charId, + "--model", $Model, + "--duration", "8", + "--output", $surfPath + ) + $id = Extract-Id $out "OPERATION_ID" + Write-Host " >> Surfing video ID: $id" -ForegroundColor Green + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 4: Generate DotNetBot buying groceries (with character) +# ───────────────────────────────────────────────────────────────────────── +$groceryPath = Join-Path $OutputDir "04_DotNetBot_groceries.mp4" +$groceryId = Skip-OrRun "groceryId" "Step 4: DotNetBot buys groceries" $groceryPath { + $out = Invoke-Tool "Step 4: DotNetBot buys groceries" @( + "generate", + "A medium shot of DotNetBot rolling through a colorful outdoor market, picking up oranges and tomatoes with his small arms and placing them in a basket.", + "--character", $charId, + "--model", $Model, + "--duration", "8", + "--output", $groceryPath + ) + $id = Extract-Id $out "OPERATION_ID" + Write-Host " >> Grocery video ID: $id" -ForegroundColor Green + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 5: Edit the surfing video — shift to sunset tones +# ───────────────────────────────────────────────────────────────────────── +$editPath = Join-Path $OutputDir "05_DotNetBot_surfing_sunset.mp4" +$editId = Skip-OrRun "editId" "Step 5: Edit surfing video — warm sunset palette" $editPath { + $out = Invoke-Tool "Step 5: Edit surfing video — warm sunset palette" @( + "edit", + "Shift the entire color palette to warm sunset tones - deep oranges, soft pinks, and golden highlights. The water turns a deep amber and the sky glows.", + "--video", $surfId, + "--model", $Model, + "--output", $editPath + ) + $id = Extract-Id $out "OPERATION_ID" + Write-Host " >> Edit video ID: $id" -ForegroundColor Green + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Step 6: Extend the grocery video — DotNetBot juggles fruit +# ───────────────────────────────────────────────────────────────────────── +$extendPath = Join-Path $OutputDir "06_DotNetBot_groceries_extended.mp4" +$extendId = Skip-OrRun "extendId" "Step 6: Extend grocery video — fruit juggling exit" $extendPath { + $out = Invoke-Tool "Step 6: Extend grocery video — fruit juggling exit" @( + "extend", + "Continue the scene as DotNetBot leaves the market stall juggling three oranges, rolling away happily while vendors cheer in the background.", + "--video", $groceryId, + "--model", $Model, + "--duration", "8", + "--output", $extendPath + ) + $id = Extract-Id $out "OPERATION_ID" + Write-Host " >> Extended video ID: $id" -ForegroundColor Green + return $id +} + +# ───────────────────────────────────────────────────────────────────────── +# Summary +# ───────────────────────────────────────────────────────────────────────── +Write-Host "" +Write-Host ("=" * 70) -ForegroundColor Cyan +Write-Host " All done! Output files:" -ForegroundColor Cyan +Write-Host ("=" * 70) -ForegroundColor Cyan +Get-ChildItem $OutputDir -Filter "*.mp4" | ForEach-Object { + Write-Host " $_" -ForegroundColor Green +} +Write-Host "" +Write-Host " State: $stateFile" -ForegroundColor DarkGray +Write-Host " (pass -Reset to start fresh)" -ForegroundColor DarkGray diff --git a/samples/VideoGenerationPOC/my-dotnet-bot-mod.png b/samples/VideoGenerationPOC/my-dotnet-bot-mod.png new file mode 100644 index 00000000000..6b47b49d6e6 Binary files /dev/null and b/samples/VideoGenerationPOC/my-dotnet-bot-mod.png differ diff --git a/samples/VideoProviders/GoogleVeo/GoogleVeo.csproj b/samples/VideoProviders/GoogleVeo/GoogleVeo.csproj new file mode 100644 index 00000000000..552fe62669e --- /dev/null +++ b/samples/VideoProviders/GoogleVeo/GoogleVeo.csproj @@ -0,0 +1,16 @@ + + + + Exe + net10.0 + enable + enable + $(NoWarn);MEAI001 + + + + + + + + diff --git a/samples/VideoProviders/GoogleVeo/GoogleVeoVideoGenerationOperation.cs b/samples/VideoProviders/GoogleVeo/GoogleVeoVideoGenerationOperation.cs new file mode 100644 index 00000000000..fc80089baaf --- /dev/null +++ b/samples/VideoProviders/GoogleVeo/GoogleVeoVideoGenerationOperation.cs @@ -0,0 +1,182 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Text.Json; +using Microsoft.Extensions.AI; + +namespace GoogleVeo; + +/// +/// Tracks an in-flight Google Veo operation, polling the Gemini operations API. +/// +/// +/// Polling: GET https://generativelanguage.googleapis.com/v1beta/{operationName}?key={apiKey} +/// Response includes "done": true when complete, with "response.generatedVideos" containing results. +/// +internal sealed class GoogleVeoVideoGenerationOperation : VideoGenerationOperation +{ + private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta"; + + private readonly HttpClient _httpClient; + private readonly string _apiKey; + private string? _status; + private string? _failureReason; + private bool _done; + private readonly List _videoUris = []; + + public GoogleVeoVideoGenerationOperation(string operationName, string apiKey, HttpClient httpClient, string modelId) + { + OperationId = operationName; + ModelId = modelId; + _apiKey = apiKey; + _httpClient = httpClient; + _status = "PROCESSING"; + } + + public override string? OperationId { get; } + + public override string? Status => _status; + + public override int? PercentComplete => _done ? 100 : null; // Veo doesn't report percent + + public override bool IsCompleted => _done; + + public override string? FailureReason => _failureReason; + + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + string url = $"{BaseUrl}/{OperationId}?key={_apiKey}"; + using var response = await _httpClient.GetAsync(url, cancellationToken); + string body = await response.Content.ReadAsStringAsync(cancellationToken); + response.EnsureSuccessStatusCode(); + + using var doc = JsonDocument.Parse(body); + var root = doc.RootElement; + + _done = root.TryGetProperty("done", out var doneProp) && doneProp.GetBoolean(); + + if (root.TryGetProperty("error", out var error)) + { + _status = "FAILED"; + _failureReason = error.TryGetProperty("message", out var msg) ? msg.GetString() : "Unknown error"; + _done = true; + } + else if (_done) + { + _status = "SUCCEEDED"; + + // Parse generated videos — predictLongRunning response format: + // response.generateVideoResponse.generatedSamples[].video.uri + if (root.TryGetProperty("response", out var resp) && + resp.TryGetProperty("generateVideoResponse", out var videoResponse)) + { + if (videoResponse.TryGetProperty("generatedSamples", out var samples)) + { + _videoUris.Clear(); + foreach (var sample in samples.EnumerateArray()) + { + if (sample.TryGetProperty("video", out var videoObj) && + videoObj.TryGetProperty("uri", out var uri)) + { + _videoUris.Add(uri.GetString()!); + } + } + } + + // Check if the response was filtered by safety (RAI) filters + if (_videoUris.Count == 0) + { + string? filterReason = null; + if (videoResponse.TryGetProperty("raiMediaFilteredCount", out var filteredCount) && filteredCount.GetInt32() > 0) + { + filterReason = $"Video was filtered by safety filters ({filteredCount.GetInt32()} filtered)."; + if (videoResponse.TryGetProperty("raiMediaFilteredReasons", out var reasons)) + { + filterReason += $" Reasons: {reasons}"; + } + } + else + { + filterReason = $"No videos in response. Full response: {resp}"; + } + + _status = "FAILED"; + _failureReason = filterReason; + } + } + else if (root.TryGetProperty("response", out var rawResp)) + { + _status = "FAILED"; + _failureReason = $"Unexpected response format: {rawResp}"; + } + else + { + _status = "FAILED"; + _failureReason = $"No response payload in completed operation. Raw: {root}"; + } + } + else + { + _status = "PROCESSING"; + } + } + + public override async Task WaitForCompletionAsync( + IProgress? progress = null, + CancellationToken cancellationToken = default) + { + while (!IsCompleted) + { + // Veo docs recommend ~10 second polling for video generation + await Task.Delay(TimeSpan.FromSeconds(10), cancellationToken); + await UpdateAsync(cancellationToken); + progress?.Report(new VideoGenerationProgress(_status, PercentComplete)); + } + + if (_status == "FAILED") + { + throw new InvalidOperationException($"Video generation failed: {_failureReason}"); + } + } + + public override async Task> GetContentsAsync( + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + if (!IsCompleted || _status == "FAILED") + { + throw new InvalidOperationException("The operation has not completed successfully."); + } + + if (_videoUris.Count == 0) + { + await UpdateAsync(cancellationToken); + } + + if (_videoUris.Count == 0) + { + throw new InvalidOperationException("No video URIs available after completion."); + } + + var results = new List(); + foreach (string videoUri in _videoUris) + { + // Append API key to download URI + string downloadUri = videoUri.Contains('?') ? $"{videoUri}&key={_apiKey}" : $"{videoUri}?key={_apiKey}"; + + if (options?.ResponseFormat == VideoGenerationResponseFormat.Uri) + { + results.Add(new UriContent(new Uri(videoUri), "video/mp4")); + } + else + { + using var response = await _httpClient.GetAsync(downloadUri, cancellationToken); + response.EnsureSuccessStatusCode(); + byte[] data = await response.Content.ReadAsByteArrayAsync(cancellationToken); + results.Add(new DataContent(data, "video/mp4")); + } + } + + return results; + } +} diff --git a/samples/VideoProviders/GoogleVeo/GoogleVeoVideoGenerator.cs b/samples/VideoProviders/GoogleVeo/GoogleVeoVideoGenerator.cs new file mode 100644 index 00000000000..54cc293991c --- /dev/null +++ b/samples/VideoProviders/GoogleVeo/GoogleVeoVideoGenerator.cs @@ -0,0 +1,271 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Drawing; +using System.Net.Http.Headers; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.Extensions.AI; + +namespace GoogleVeo; + +/// +/// Implements for Google Veo via the Gemini API. +/// Supports text-to-video, image-to-video, reference images, and video extension. +/// +/// +/// API Reference: https://ai.google.dev/gemini-api/docs/video +/// Endpoint: POST https://generativelanguage.googleapis.com/v1beta/models/{model}:predictLongRunning +/// Polling: GET https://generativelanguage.googleapis.com/v1beta/{operation.name} +/// +internal sealed class GoogleVeoVideoGenerator : IVideoGenerator +{ + private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta"; + private readonly HttpClient _httpClient; + private readonly string _apiKey; + private readonly string _modelId; + + public GoogleVeoVideoGenerator(string apiKey, string modelId = "veo-3.1-generate-preview", HttpClient? httpClient = null) + { + _apiKey = apiKey; + _modelId = modelId; + _httpClient = httpClient ?? new HttpClient(); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + } + + public async Task GenerateAsync( + VideoGenerationRequest request, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(request); + + string model = options?.ModelId ?? _modelId; + + // Build instance object (prompt, image, reference images, last frame, extension) + var instance = new JsonObject(); + + // Text prompt (required for most operations) + if (request.Prompt is not null) + { + instance["prompt"] = request.Prompt; + } + + // Image for image-to-video + if (request.OperationKind == VideoOperationKind.Create && request.StartFrame is not null) + { + var image = GetImageNode(request.StartFrame); + if (image is not null) + { + instance["image"] = image; + } + } + + // Reference images (first-class property) + if (request.ReferenceImages is { Count: > 0 } refImages) + { + var refArray = new JsonArray(); + foreach (var refImg in refImages) + { + var imgNode = BuildImageNode(refImg); + if (imgNode is not null) + { + refArray.Add(new JsonObject { ["referenceImage"] = new JsonObject { ["image"] = imgNode } }); + } + } + + if (refArray.Count > 0) + { + instance["referenceImages"] = refArray; + } + } + else if (options?.AdditionalProperties?.TryGetValue("referenceImages", out object? refImgs) == true && refImgs is JsonArray refArrayLegacy) + { + instance["referenceImages"] = JsonNode.Parse(refArrayLegacy.ToJsonString())!; + } + + // Last frame for first+last frame interpolation + if (request.EndFrame is not null) + { + instance["lastFrame"] = new JsonObject + { + ["image"] = BuildImageNode(request.EndFrame), + }; + } + else if (options?.AdditionalProperties?.TryGetValue("lastFrameImage", out object? lastFrame) == true) + { + instance["lastFrame"] = new JsonObject + { + ["image"] = BuildImageNode(lastFrame), + }; + } + + // Video extension + if (request.OperationKind == VideoOperationKind.Extend && request.SourceVideoId is not null) + { + instance["extensionSourceVideoId"] = request.SourceVideoId; + } + + // Build the parameters object (generation config) + var parameters = new JsonObject(); + + if (options?.AdditionalProperties?.TryGetValue("personGeneration", out object? personGen) == true && personGen is string personGenStr) + { + parameters["personGeneration"] = personGenStr; + } + + if (options?.Duration is { } duration) + { + parameters["durationSeconds"] = (int)duration.TotalSeconds; + } + + if (options?.VideoSize is { } size) + { + parameters["resolution"] = MapResolution(size); + } + + if (options?.AspectRatio is { } aspectRatio) + { + parameters["aspectRatio"] = aspectRatio; + } + else if (options?.AdditionalProperties?.TryGetValue("aspectRatio", out object? ar) == true && ar is string arStr) + { + parameters["aspectRatio"] = arStr; + } + + if (options?.AdditionalProperties?.TryGetValue("numberOfVideos", out object? numVids) == true && numVids is int numVidsInt) + { + parameters["numberOfVideos"] = numVidsInt; + } + else if (options?.Count is { } count) + { + parameters["numberOfVideos"] = count; + } + + // Negative prompt — prefer first-class property on request, fall back to AdditionalProperties + string? negativePrompt = request.NegativePrompt; + if (negativePrompt is null && options?.AdditionalProperties?.TryGetValue("negativePrompt", out object? negPrompt) == true && negPrompt is string negPromptStr) + { + negativePrompt = negPromptStr; + } + + if (negativePrompt is not null) + { + parameters["negativePrompt"] = negativePrompt; + } + + if (options?.GenerateAudio is bool genAudio) + { + parameters["generateAudio"] = genAudio; + } + else if (options?.AdditionalProperties?.TryGetValue("generateAudio", out object? genAudioObj) == true && genAudioObj is bool genAudioBool) + { + parameters["generateAudio"] = genAudioBool; + } + + if (options?.Seed is int seed) + { + parameters["seed"] = seed; + } + else if (options?.AdditionalProperties?.TryGetValue("seed", out object? seedObj) == true && seedObj is int seedInt) + { + parameters["seed"] = seedInt; + } + + // Wrap in instances/parameters envelope for predictLongRunning + var body = new JsonObject + { + ["instances"] = new JsonArray { instance }, + }; + if (parameters.Count > 0) + { + body["parameters"] = parameters; + } + + string url = $"{BaseUrl}/models/{model}:predictLongRunning?key={_apiKey}"; + string json = body.ToJsonString(); + using var content = new StringContent(json, System.Text.Encoding.UTF8, "application/json"); + using var response = await _httpClient.PostAsync(url, content, cancellationToken); + + string responseBody = await response.Content.ReadAsStringAsync(cancellationToken); + if (!response.IsSuccessStatusCode) + { + throw new HttpRequestException($"Google Veo API error {(int)response.StatusCode} ({response.StatusCode}): {responseBody}"); + } + + var result = JsonDocument.Parse(responseBody); + string operationName = result.RootElement.GetProperty("name").GetString()!; + + return new GoogleVeoVideoGenerationOperation(operationName, _apiKey, _httpClient, model); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + { + if (serviceKey is null && serviceType.IsInstanceOfType(this)) + { + return this; + } + + return null; + } + + public void Dispose() => _httpClient.Dispose(); + + private static JsonNode? GetImageNode(AIContent content) + { + if (content is DataContent dc && (dc.MediaType?.StartsWith("image/", StringComparison.OrdinalIgnoreCase) ?? false) && dc.Data.Length > 0) + { + return new JsonObject + { + ["bytesBase64Encoded"] = Convert.ToBase64String(dc.Data.ToArray()), + ["mimeType"] = dc.MediaType, + }; + } + + if (content is UriContent uc && uc.Uri is not null) + { + return new JsonObject + { + ["gcsUri"] = uc.Uri.ToString(), + }; + } + + return null; + } + + private static JsonNode? BuildImageNode(object imageData) + { + if (imageData is AIContent aiContent) + { + return GetImageNode(aiContent); + } + + if (imageData is string path && File.Exists(path)) + { + byte[] bytes = File.ReadAllBytes(path); + return new JsonObject + { + ["bytesBase64Encoded"] = Convert.ToBase64String(bytes), + ["mimeType"] = "image/png", + }; + } + + if (imageData is JsonNode node) + { + return node; + } + + return new JsonObject { ["gcsUri"] = imageData.ToString() }; + } + + private static string MapResolution(Size size) + { + int maxDim = Math.Max(size.Width, size.Height); + return maxDim switch + { + <= 720 => "720p", + <= 1080 => "1080p", + _ => "4k", + }; + } +} diff --git a/samples/VideoProviders/GoogleVeo/Program.cs b/samples/VideoProviders/GoogleVeo/Program.cs new file mode 100644 index 00000000000..1815e081492 --- /dev/null +++ b/samples/VideoProviders/GoogleVeo/Program.cs @@ -0,0 +1,226 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Google Veo (Gemini API) Video Generation Sample +// +// Usage examples: +// dotnet run -- generate "A cinematic drone shot of a coastline at sunset" +// dotnet run -- generate "A character walks through" --image reference.jpg +// dotnet run -- generate "Smooth transition" --image first.jpg --last-frame last.jpg +// +// Environment: +// GOOGLE_API_KEY — your Gemini API key from https://aistudio.google.com/apikey + +using System.CommandLine; +using System.Text.Json.Nodes; +using GoogleVeo; +using Microsoft.Extensions.AI; + +var modelOption = new Option("--model", () => "veo-3.1-generate-preview", + "Model: veo-3.1-generate-preview, veo-3.1-fast-preview, veo-3, veo-2."); +var outputOption = new Option("--output", "Output file path (.mp4)."); +var durationOption = new Option("--duration", "Duration in seconds (4, 6, or 8)."); +var resolutionOption = new Option("--resolution", () => "720p", "Resolution: 720p, 1080p, 4k."); +var aspectRatioOption = new Option("--aspect-ratio", "Aspect ratio (e.g. 16:9, 9:16)."); +var formatOption = new Option("--format", () => "data", "Response format: data or uri."); +var countOption = new Option("--count", "Number of videos to generate."); +var negativePromptOption = new Option("--negative-prompt", "What to avoid in the video."); +var audioOption = new Option("--audio", () => false, "Generate audio (Veo 3+ only)."); +var seedOption = new Option("--seed", "Seed for reproducibility."); +var personGenOption = new Option("--person-generation", "Person generation policy: dont_allow, allow_adult."); + +// ── generate ──────────────────────────────────────────────────────────────── +var promptArg = new Argument("prompt", "Text prompt."); +var imageOption = new Option("--image", "Input image for image-to-video."); +var lastFrameOption = new Option("--last-frame", "Last frame image for interpolation."); +var refImagesOption = new Option("--ref-image", "Reference image(s) for style guidance (up to 3).") { AllowMultipleArgumentsPerToken = true }; +var refTypeOption = new Option("--ref-type", () => "style", "Reference type: style or subject."); + +var generateCommand = new Command("generate", "Generate video from text, image-to-video, or frame interpolation.") +{ + promptArg, modelOption, outputOption, imageOption, lastFrameOption, + refImagesOption, refTypeOption, + durationOption, resolutionOption, aspectRatioOption, countOption, + negativePromptOption, audioOption, seedOption, personGenOption, formatOption, +}; + +generateCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(promptArg); + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + string? imagePath = context.ParseResult.GetValueForOption(imageOption); + string? lastFrame = context.ParseResult.GetValueForOption(lastFrameOption); + string[]? refImages = context.ParseResult.GetValueForOption(refImagesOption); + string refType = context.ParseResult.GetValueForOption(refTypeOption)!; + int? duration = context.ParseResult.GetValueForOption(durationOption); + string resolution = context.ParseResult.GetValueForOption(resolutionOption)!; + string? aspectRatio = context.ParseResult.GetValueForOption(aspectRatioOption); + int? count = context.ParseResult.GetValueForOption(countOption); + string? negativePrompt = context.ParseResult.GetValueForOption(negativePromptOption); + bool audio = context.ParseResult.GetValueForOption(audioOption); + int? seed = context.ParseResult.GetValueForOption(seedOption); + string? personGen = context.ParseResult.GetValueForOption(personGenOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + + List? media = null; + if (imagePath is not null) + { + media = [await DataContent.LoadFromAsync(imagePath)]; + } + + var options = new VideoGenerationOptions + { + ModelId = model, + Count = count, + ResponseFormat = string.Equals(format, "uri", StringComparison.OrdinalIgnoreCase) + ? VideoGenerationResponseFormat.Uri + : VideoGenerationResponseFormat.Data, + AdditionalProperties = [], + }; + + if (duration.HasValue) + { + options.Duration = TimeSpan.FromSeconds(duration.Value); + } + + options.VideoSize = resolution switch + { + "1080p" => new System.Drawing.Size(1920, 1080), + "4k" => new System.Drawing.Size(3840, 2160), + _ => new System.Drawing.Size(1280, 720), + }; + + if (aspectRatio is not null) + { + options.AspectRatio = aspectRatio; + } + + if (audio) + { + options.GenerateAudio = true; + } + + if (seed.HasValue) + { + options.Seed = seed.Value; + } + + if (personGen is not null) + { + options.AdditionalProperties["personGeneration"] = personGen; + } + + // Last frame for interpolation + if (lastFrame is not null) + { + byte[] lastFrameBytes = await File.ReadAllBytesAsync(lastFrame); + var lastFrameNode = new JsonObject + { + ["imageBytes"] = Convert.ToBase64String(lastFrameBytes), + ["mimeType"] = "image/png", + }; + options.AdditionalProperties["lastFrameImage"] = lastFrameNode; + } + + // Reference images + if (refImages is { Length: > 0 }) + { + var refs = new JsonArray(); + foreach (string refImg in refImages) + { + byte[] refBytes = await File.ReadAllBytesAsync(refImg); + refs.Add(new JsonObject + { + ["image"] = new JsonObject + { + ["bytesBase64Encoded"] = Convert.ToBase64String(refBytes), + ["mimeType"] = "image/png", + }, + ["referenceType"] = refType.ToUpperInvariant(), + }); + } + + options.AdditionalProperties["referenceImages"] = refs; + } + + var request = new VideoGenerationRequest(prompt, media); + + if (negativePrompt is not null) + { + request.NegativePrompt = negativePrompt; + } + + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── Root ──────────────────────────────────────────────────────────────────── +var rootCommand = new RootCommand("Google Veo (Gemini API) video generation sample using MEAI IVideoGenerator.") +{ + generateCommand, +}; + +return await rootCommand.InvokeAsync(args); + +// ═══════════════════════════════════════════════════════════════════════════ +// Helpers +// ═══════════════════════════════════════════════════════════════════════════ +static IVideoGenerator CreateGenerator(string model) +{ + string? apiKey = Environment.GetEnvironmentVariable("GOOGLE_API_KEY"); + if (string.IsNullOrEmpty(apiKey)) + { + Console.Error.WriteLine("Error: Set the GOOGLE_API_KEY environment variable."); + Console.Error.WriteLine("Get a key at https://aistudio.google.com/apikey"); + Environment.Exit(1); + } + + return new GoogleVeoVideoGenerator(apiKey, model); +} + +static async Task CompleteAndSaveAsync(VideoGenerationOperation operation, VideoGenerationOptions options, string? outputPath) +{ + Console.WriteLine($"OPERATION: {operation.OperationId}"); + Console.WriteLine($" Status: {operation.Status}"); + + var sw = System.Diagnostics.Stopwatch.StartNew(); + await operation.WaitForCompletionAsync( + new Progress(p => + Console.WriteLine($" Progress: {p.Status}{(p.PercentComplete.HasValue ? $" ({p.PercentComplete}%)" : "")}"))); + + sw.Stop(); + Console.WriteLine($" Completed in {sw.Elapsed.TotalSeconds:F1}s"); + + var contents = await operation.GetContentsAsync(options); + Console.WriteLine($" {contents.Count} content item(s)"); + + for (int i = 0; i < contents.Count; i++) + { + string savePath = outputPath is not null && contents.Count > 1 + ? Path.Combine( + Path.GetDirectoryName(outputPath) ?? ".", + $"{Path.GetFileNameWithoutExtension(outputPath)}_{i}{Path.GetExtension(outputPath)}") + : outputPath!; + + switch (contents[i]) + { + case DataContent dc when outputPath is not null: + Directory.CreateDirectory(Path.GetDirectoryName(savePath) ?? "."); + await dc.SaveToAsync(savePath); + Console.WriteLine($" [{i}] Saved: {savePath} ({dc.Data.Length} bytes)"); + break; + case DataContent dc: + Console.WriteLine($" [{i}] DataContent: {dc.Data.Length} bytes ({dc.MediaType})"); + break; + case UriContent uc: + Console.WriteLine($" [{i}] URI: {uc.Uri}"); + break; + default: + Console.WriteLine($" [{i}] {contents[i].GetType().Name}"); + break; + } + } +} diff --git a/samples/VideoProviders/GoogleVeo/README.md b/samples/VideoProviders/GoogleVeo/README.md new file mode 100644 index 00000000000..083211f4ab5 --- /dev/null +++ b/samples/VideoProviders/GoogleVeo/README.md @@ -0,0 +1,79 @@ +# Google Veo (Gemini API) Video Generation Sample + +This sample demonstrates using the **Microsoft.Extensions.AI** `IVideoGenerator` abstraction with Google's Veo models via the Gemini API. + +## Getting Access + +1. Go to [https://aistudio.google.com/apikey](https://aistudio.google.com/apikey) +2. Create a Gemini API key +3. Veo models may require specific Google Cloud billing or allowlist access +4. See [Google Veo docs](https://ai.google.dev/gemini-api/docs/video) for feature availability + +## Environment Setup + +```bash +export GOOGLE_API_KEY="AIza..." +``` + +## Models + +| Model | ID | Features | +|---|---|---| +| Veo 3.1 | `veo-3.1-generate-preview` | Text/image-to-video, extension, refer images, interpolation, 720p-4k, audio | +| Veo 3.1 Fast | `veo-3.1-fast-preview` | Same features, faster generation, lower quality | +| Veo 3 | `veo-3` | Text-to-video with native audio, 720p-1080p | +| Veo 2 | `veo-2` | Text/image-to-video, 720p-4k | + +## Supported Operations + +| Operation | MEAI Mapping | Notes | +|---|---|---| +| Text-to-video | `VideoOperationKind.Create`, no `StartFrame` | Prompt-only generation | +| Image-to-video | `VideoOperationKind.Create` + `StartFrame` (image) | Image as starting reference | +| First+last frame interpolation | `StartFrame` + `EndFrame` | Generate video between two frames | +| Reference images (up to 3) | `ReferenceImages` | Style/subject transfer with `reference_type` | +| Video extension | `VideoOperationKind.Extend` | Extend up to 20 times (7s each, 720p only) | +| Multiple outputs | `VideoGenerationOptions.Count` | Generate 1-4 videos from one request | + +## Usage + +```bash +# Text-to-video +dotnet run -- generate "A cinematic drone shot of a coastline at sunset" --output sunset.mp4 + +# Image-to-video +dotnet run -- generate "The scene comes alive" --image photo.jpg --output scene.mp4 + +# First+last frame interpolation +dotnet run -- generate "Smooth transition between frames" --image first.jpg --last-frame last.jpg --output interp.mp4 + +# Reference images for style +dotnet run -- generate "A character walking" --ref-image style1.png --ref-image style2.png --ref-type style + +# With audio (Veo 3+) +dotnet run -- generate "A thunderstorm over a city" --model veo-3 --audio --output storm.mp4 + +# High resolution, specific duration +dotnet run -- generate "A serene lake" --resolution 4k --duration 8 --output lake.mp4 + +# With negative prompt +dotnet run -- generate "A person walking" --negative-prompt "blurry, distorted" --person-generation allow_adult + +# Multiple outputs +dotnet run -- generate "A sunset" --count 4 --output sunset.mp4 +``` + +## API Gaps / Limitations + +- **Reference images with typed purpose**: Veo supports `referenceImages` with `referenceType` ("REFERENCE_TYPE_STYLE" or "REFERENCE_TYPE_SUBJECT"), allowing up to 3 images for style/subject transfer. MEAI's `ReferenceImages` collection maps well to this but doesn't include the `referenceType` metadata — provider-specific `AdditionalProperties` can be used for that. +- **First/last frame interpolation**: Veo generates a video between two keyframe images. MEAI's `StartFrame` and `EndFrame` properties map directly to this. +- **Native audio generation**: Veo 3+ can generate synchronized audio with video. MEAI has no audio-related option. +- **Negative prompts**: Veo supports `negativePrompt` to exclude unwanted elements. Not part of the core MEAI options. +- **Resolution as named tier**: Veo uses `"720p"`, `"1080p"`, `"4k"` — not pixel dimensions. The `VideoSize` abstraction works but the mapping is lossy. +- **Aspect ratio as string**: Veo uses `"16:9"`, `"9:16"` etc. `VideoSize` can encode this but it's different from the ratio concept each provider uses. +- **Duration as string**: Veo requires `durationSeconds` as a string (`"4"`, `"6"`, `"8"`). The `TimeSpan Duration` maps fine but the valid values are model-specific. +- **Person generation policy**: Veo has `personGeneration` (`"dont_allow"`, `"allow_adult"`) — a safety control with no MEAI equivalent. +- **Seed**: Reproducibility parameter not part of core MEAI options. +- **Video extension**: Extension works by passing frames from previous videos. The Veo API requires using the Gemini Files API to upload the source video first, making `SourceVideoId` insufficient as a simple string ID. +- **Operation polling model**: Veo returns a Gemini LRO (Long Running Operation) with `operations.get()`. The `VideoGenerationOperation.UpdateAsync()` pattern maps well to this. +- **Multiple videos**: Veo can generate 1-4 videos per request via `numberOfVideos`. MEAI's `Count` option maps to this, but `GetContentsAsync` returns them all in one list. diff --git a/samples/VideoProviders/LumaAI/LumaAI.csproj b/samples/VideoProviders/LumaAI/LumaAI.csproj new file mode 100644 index 00000000000..552fe62669e --- /dev/null +++ b/samples/VideoProviders/LumaAI/LumaAI.csproj @@ -0,0 +1,16 @@ + + + + Exe + net10.0 + enable + enable + $(NoWarn);MEAI001 + + + + + + + + diff --git a/samples/VideoProviders/LumaAI/LumaVideoGenerationOperation.cs b/samples/VideoProviders/LumaAI/LumaVideoGenerationOperation.cs new file mode 100644 index 00000000000..5c473aff1c3 --- /dev/null +++ b/samples/VideoProviders/LumaAI/LumaVideoGenerationOperation.cs @@ -0,0 +1,127 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Text.Json; +using Microsoft.Extensions.AI; + +namespace LumaAI; + +/// +/// Tracks an in-flight Luma AI video generation, polling GET /v1/generations/{id} for status. +/// +internal sealed class LumaVideoGenerationOperation : VideoGenerationOperation +{ + private const string BaseUrl = "https://api.lumalabs.ai/dream-machine/v1"; + + private readonly HttpClient _httpClient; + private string? _status; + private string? _failureReason; + private string? _videoUrl; + + public LumaVideoGenerationOperation(string operationId, HttpClient httpClient, string modelId) + { + OperationId = operationId; + ModelId = modelId; + _httpClient = httpClient; + _status = "queued"; + } + + public override string? OperationId { get; } + + public override string? Status => _status; + + public override int? PercentComplete => _status switch + { + "completed" => 100, + "failed" => null, + "dreaming" => 50, // Luma uses "dreaming" for in-progress + _ => 0, + }; + + public override bool IsCompleted => _status is "completed" or "failed"; + + public override string? FailureReason => _failureReason; + + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + using var response = await _httpClient.GetAsync($"{BaseUrl}/generations/{OperationId}", cancellationToken); + string body = await response.Content.ReadAsStringAsync(cancellationToken); + response.EnsureSuccessStatusCode(); + + using var doc = JsonDocument.Parse(body); + var root = doc.RootElement; + + _status = root.GetProperty("state").GetString(); + if (root.TryGetProperty("failure_reason", out var fr) && fr.ValueKind == JsonValueKind.String) + { + _failureReason = fr.GetString(); + } + + if (root.TryGetProperty("assets", out var assets) && + assets.TryGetProperty("video", out var video) && + video.ValueKind == JsonValueKind.String) + { + _videoUrl = video.GetString(); + } + } + + public override async Task WaitForCompletionAsync( + IProgress? progress = null, + CancellationToken cancellationToken = default) + { + while (!IsCompleted) + { + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + await UpdateAsync(cancellationToken); + progress?.Report(new VideoGenerationProgress(_status, PercentComplete)); + } + + if (_status == "failed") + { + throw new InvalidOperationException($"Video generation failed: {_failureReason}"); + } + } + + public override async Task> GetContentsAsync( + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + if (!IsCompleted || _status == "failed") + { + throw new InvalidOperationException("The operation has not completed successfully."); + } + + if (_videoUrl is null) + { + // Re-fetch to get the video URL + await UpdateAsync(cancellationToken); + } + + if (_videoUrl is null) + { + throw new InvalidOperationException("No video URL available after completion."); + } + + if (options?.ResponseFormat == VideoGenerationResponseFormat.Uri) + { + return [new UriContent(new Uri(_videoUrl), "video/mp4")]; + } + + // Download the video data + using var response = await _httpClient.GetAsync(_videoUrl, cancellationToken); + response.EnsureSuccessStatusCode(); + byte[] data = await response.Content.ReadAsByteArrayAsync(cancellationToken); + return [new DataContent(data, "video/mp4")]; + } + + public override VideoGenerationRequest CreateExtensionRequest(string? prompt = null) + { + // Luma extend uses keyframes with type=generation, id= + return new VideoGenerationRequest + { + Prompt = prompt, + SourceVideoId = OperationId, + OperationKind = VideoOperationKind.Extend, + }; + } +} diff --git a/samples/VideoProviders/LumaAI/LumaVideoGenerator.cs b/samples/VideoProviders/LumaAI/LumaVideoGenerator.cs new file mode 100644 index 00000000000..8fdc3876ec3 --- /dev/null +++ b/samples/VideoProviders/LumaAI/LumaVideoGenerator.cs @@ -0,0 +1,194 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Drawing; +using System.Net.Http.Headers; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.Extensions.AI; + +namespace LumaAI; + +/// +/// Implements for the Luma AI Dream Machine API. +/// Supports text-to-video, image-to-video, extend, and keyframe interpolation. +/// +/// +/// API Reference: https://docs.lumalabs.ai/docs/video-generation +/// Endpoint: https://api.lumalabs.ai/dream-machine/v1/generations +/// +internal sealed class LumaVideoGenerator : IVideoGenerator +{ + private const string BaseUrl = "https://api.lumalabs.ai/dream-machine/v1"; + private readonly HttpClient _httpClient; + private readonly string _modelId; + + public LumaVideoGenerator(string apiKey, string modelId = "ray-2", HttpClient? httpClient = null) + { + _modelId = modelId; + _httpClient = httpClient ?? new HttpClient(); + _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + } + + public async Task GenerateAsync( + VideoGenerationRequest request, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(request); + + string model = options?.ModelId ?? _modelId; + var body = new JsonObject { ["prompt"] = request.Prompt, ["model"] = model }; + + // Duration + if (options?.Duration is { } duration) + { + body["duration"] = $"{(int)duration.TotalSeconds}s"; + } + + // Resolution + if (options?.VideoSize is { } size) + { + body["resolution"] = MapResolution(size); + } + + // Aspect ratio — prefer first-class property, fall back to AdditionalProperties + string? aspectRatio = options?.AspectRatio; + if (aspectRatio is null && options?.AdditionalProperties?.TryGetValue("aspect_ratio", out object? ar) == true && ar is string arStr) + { + aspectRatio = arStr; + } + + if (aspectRatio is not null) + { + body["aspect_ratio"] = aspectRatio; + } + + // Loop + if (options?.AdditionalProperties?.TryGetValue("loop", out object? loop) == true && loop is bool loopBool) + { + body["loop"] = loopBool; + } + + // Concepts + if (options?.AdditionalProperties?.TryGetValue("concepts", out object? concepts) == true && concepts is JsonArray conceptsArray) + { + body["concepts"] = JsonNode.Parse(conceptsArray.ToJsonString())!; + } + + // Callback URL + if (options?.AdditionalProperties?.TryGetValue("callback_url", out object? cbUrl) == true && cbUrl is string cbUrlStr) + { + body["callback_url"] = cbUrlStr; + } + + // Build keyframes based on operation kind + var keyframes = new JsonObject(); + + switch (request.OperationKind) + { + case VideoOperationKind.Create: + // Image-to-video: use StartFrame as first frame (frame0) and EndFrame as last frame (frame1) + if (request.StartFrame is not null) + { + AddImageKeyframe(keyframes, "frame0", request.StartFrame); + } + + if (request.EndFrame is not null) + { + AddImageKeyframe(keyframes, "frame1", request.EndFrame); + } + + break; + + case VideoOperationKind.Extend: + // Extend: use SourceVideoId as frame0 generation reference + if (request.SourceVideoId is not null) + { + keyframes["frame0"] = new JsonObject + { + ["type"] = "generation", + ["id"] = request.SourceVideoId, + }; + } + + break; + + case VideoOperationKind.Edit: + // Luma doesn't have a direct "edit" endpoint — map to video-to-video via keyframes + if (request.SourceVideoId is not null) + { + keyframes["frame0"] = new JsonObject + { + ["type"] = "generation", + ["id"] = request.SourceVideoId, + }; + } + + break; + } + + if (keyframes.Count > 0) + { + body["keyframes"] = keyframes; + } + + string json = body.ToJsonString(); + using var content = new StringContent(json, System.Text.Encoding.UTF8, "application/json"); + using var response = await _httpClient.PostAsync($"{BaseUrl}/generations", content, cancellationToken); + + string responseBody = await response.Content.ReadAsStringAsync(cancellationToken); + response.EnsureSuccessStatusCode(); + + var result = JsonDocument.Parse(responseBody); + string operationId = result.RootElement.GetProperty("id").GetString()!; + + return new LumaVideoGenerationOperation(operationId, _httpClient, model); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + { + if (serviceKey is null && serviceType.IsInstanceOfType(this)) + { + return this; + } + + return null; + } + + public void Dispose() => _httpClient.Dispose(); + + private static void AddImageKeyframe(JsonObject keyframes, string frameKey, AIContent content) + { + if (content is UriContent uc && uc.Uri is not null) + { + keyframes[frameKey] = new JsonObject + { + ["type"] = "image", + ["url"] = uc.Uri.ToString(), + }; + } + else if (content is DataContent dc && dc.Data.Length > 0) + { + string dataUri = dc.Uri ?? $"data:{dc.MediaType ?? "image/png"};base64,{Convert.ToBase64String(dc.Data.ToArray())}"; + keyframes[frameKey] = new JsonObject + { + ["type"] = "image", + ["url"] = dataUri, + }; + } + } + + private static string MapResolution(Size size) + { + int maxDim = Math.Max(size.Width, size.Height); + return maxDim switch + { + <= 540 => "540p", + <= 720 => "720p", + <= 1080 => "1080p", + _ => "4k", + }; + } +} diff --git a/samples/VideoProviders/LumaAI/Program.cs b/samples/VideoProviders/LumaAI/Program.cs new file mode 100644 index 00000000000..255db72f6ad --- /dev/null +++ b/samples/VideoProviders/LumaAI/Program.cs @@ -0,0 +1,199 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Luma AI (Dream Machine) Video Generation Sample +// +// Usage examples: +// dotnet run -- generate "A tiger walking through snow" +// dotnet run -- generate "The scene continues" --image start-frame.jpg +// dotnet run -- extend "The tiger starts running" --video +// +// Environment: +// LUMA_API_KEY — your Luma API key from https://lumalabs.ai/dream-machine/api/keys + +using System.CommandLine; +using LumaAI; +using Microsoft.Extensions.AI; + +var modelOption = new Option("--model", () => "ray-2", "Model ID (ray-2 or ray-flash-2)."); +var outputOption = new Option("--output", "Output file path (.mp4)."); +var durationOption = new Option("--duration", "Duration string (e.g. '5s')."); +var resolutionOption = new Option("--resolution", () => "720p", "Resolution: 540p, 720p, 1080p, 4k."); +var aspectRatioOption = new Option("--aspect-ratio", "Aspect ratio (e.g. 16:9)."); +var loopOption = new Option("--loop", () => false, "Whether to loop the video."); +var formatOption = new Option("--format", () => "data", "Response format: data or uri."); + +// ── generate ──────────────────────────────────────────────────────────────── +var generatePromptArg = new Argument("prompt", "Text prompt."); +var imageOption = new Option("--image", "Input image file for image-to-video (HTTPS URL or local file)."); +var endImageOption = new Option("--end-image", "End frame image for keyframe interpolation."); + +var generateCommand = new Command("generate", "Generate a video from text or image.") +{ + generatePromptArg, modelOption, outputOption, imageOption, endImageOption, + durationOption, resolutionOption, aspectRatioOption, loopOption, formatOption, +}; + +generateCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(generatePromptArg); + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + string? imagePath = context.ParseResult.GetValueForOption(imageOption); + string? endImagePath = context.ParseResult.GetValueForOption(endImageOption); + string? duration = context.ParseResult.GetValueForOption(durationOption); + string resolution = context.ParseResult.GetValueForOption(resolutionOption)!; + string? aspectRatio = context.ParseResult.GetValueForOption(aspectRatioOption); + bool loop = context.ParseResult.GetValueForOption(loopOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + + List? media = null; + if (imagePath is not null) + { + media = [await DataContent.LoadFromAsync(imagePath)]; + if (endImagePath is not null) + { + media.Add(await DataContent.LoadFromAsync(endImagePath)); + } + } + + var options = BuildOptions(duration, resolution, aspectRatio, loop, format); + var request = new VideoGenerationRequest(prompt, media); + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── extend ────────────────────────────────────────────────────────────────── +var extendPromptArg = new Argument("prompt", "Prompt for the extension."); +var extendVideoOption = new Option("--video", "Generation ID to extend.") { IsRequired = true }; + +var extendCommand = new Command("extend", "Extend a completed video generation.") +{ + extendPromptArg, extendVideoOption, modelOption, outputOption, formatOption, +}; + +extendCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(extendPromptArg); + string videoId = context.ParseResult.GetValueForOption(extendVideoOption)!; + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + var options = BuildOptions(null, "720p", null, false, format); + var request = new VideoGenerationRequest(prompt) + { + OperationKind = VideoOperationKind.Extend, + SourceVideoId = videoId, + }; + + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── Root ──────────────────────────────────────────────────────────────────── +var rootCommand = new RootCommand("Luma AI (Dream Machine) video generation sample using MEAI IVideoGenerator.") +{ + generateCommand, + extendCommand, +}; + +return await rootCommand.InvokeAsync(args); + +// ═══════════════════════════════════════════════════════════════════════════ +// Helpers +// ═══════════════════════════════════════════════════════════════════════════ +static IVideoGenerator CreateGenerator(string model) +{ + string? apiKey = Environment.GetEnvironmentVariable("LUMA_API_KEY"); + if (string.IsNullOrEmpty(apiKey)) + { + Console.Error.WriteLine("Error: Set the LUMA_API_KEY environment variable."); + Console.Error.WriteLine("Get a key at https://lumalabs.ai/dream-machine/api/keys"); + Environment.Exit(1); + } + + return new LumaVideoGenerator(apiKey, model); +} + +static VideoGenerationOptions BuildOptions(string? duration, string resolution, string? aspectRatio, bool loop, string format) +{ + var options = new VideoGenerationOptions + { + ResponseFormat = string.Equals(format, "uri", StringComparison.OrdinalIgnoreCase) + ? VideoGenerationResponseFormat.Uri + : VideoGenerationResponseFormat.Data, + AdditionalProperties = [], + }; + + if (duration is not null) + { + // Try to parse "5s" → 5 seconds + if (int.TryParse(duration.TrimEnd('s'), out int secs)) + { + options.Duration = TimeSpan.FromSeconds(secs); + } + } + + if (aspectRatio is not null) + { + options.AspectRatio = aspectRatio; + } + + if (loop) + { + options.AdditionalProperties["loop"] = true; + } + + // Map resolution string to VideoSize for the abstraction + options.VideoSize = resolution switch + { + "540p" => new System.Drawing.Size(960, 540), + "1080p" => new System.Drawing.Size(1920, 1080), + "4k" => new System.Drawing.Size(3840, 2160), + _ => new System.Drawing.Size(1280, 720), + }; + + return options; +} + +static async Task CompleteAndSaveAsync(VideoGenerationOperation operation, VideoGenerationOptions options, string? outputPath) +{ + Console.WriteLine($"OPERATION_ID: {operation.OperationId}"); + Console.WriteLine($" Status: {operation.Status}"); + + var sw = System.Diagnostics.Stopwatch.StartNew(); + await operation.WaitForCompletionAsync( + new Progress(p => + Console.WriteLine($" Progress: {p.Status}{(p.PercentComplete.HasValue ? $" ({p.PercentComplete}%)" : "")}"))); + + sw.Stop(); + Console.WriteLine($" Completed in {sw.Elapsed.TotalSeconds:F1}s"); + + var contents = await operation.GetContentsAsync(options); + Console.WriteLine($" {contents.Count} content item(s)"); + + for (int i = 0; i < contents.Count; i++) + { + switch (contents[i]) + { + case DataContent dc when outputPath is not null: + Directory.CreateDirectory(Path.GetDirectoryName(outputPath) ?? "."); + await dc.SaveToAsync(outputPath); + Console.WriteLine($" [{i}] Saved: {outputPath} ({dc.Data.Length} bytes)"); + break; + case DataContent dc: + Console.WriteLine($" [{i}] DataContent: {dc.Data.Length} bytes ({dc.MediaType})"); + break; + case UriContent uc: + Console.WriteLine($" [{i}] URI: {uc.Uri}"); + break; + default: + Console.WriteLine($" [{i}] {contents[i].GetType().Name}"); + break; + } + } +} diff --git a/samples/VideoProviders/LumaAI/README.md b/samples/VideoProviders/LumaAI/README.md new file mode 100644 index 00000000000..4d530c0556b --- /dev/null +++ b/samples/VideoProviders/LumaAI/README.md @@ -0,0 +1,64 @@ +# Luma AI (Dream Machine) Video Generation Sample + +This sample demonstrates using the **Microsoft.Extensions.AI** `IVideoGenerator` abstraction with Luma AI's Dream Machine API (Ray 2 models). + +## Getting Access + +1. Go to [https://lumalabs.ai/dream-machine/api/keys](https://lumalabs.ai/dream-machine/api/keys) +2. Sign in or create a Luma account +3. Create an API key +4. Check billing at [https://lumalabs.ai/dream-machine/api/billing/overview](https://lumalabs.ai/dream-machine/api/billing/overview) + +## Environment Setup + +```bash +export LUMA_API_KEY="luma-xxxx" +``` + +## Models + +| Model | ID | Notes | +|---|---|---| +| Ray 2 | `ray-2` | Full quality, supports 540p–4k | +| Ray 2 Flash | `ray-flash-2` | Faster, lower cost | + +## Supported Operations + +| Operation | MEAI Mapping | Notes | +|---|---|---| +| Text-to-video | `VideoOperationKind.Create`, no `StartFrame` | Basic prompt → video | +| Image-to-video (start frame) | `VideoOperationKind.Create` + `StartFrame` (image) | Image as first frame (`keyframes.frame0`) | +| Image-to-video (start+end frames) | `VideoOperationKind.Create` + `StartFrame` + `EndFrame` | Two images as keyframes (`frame0`+`frame1`) for interpolation | +| Extend video | `VideoOperationKind.Extend` + `SourceVideoId` | Extend using the generation ID of a completed video | +| Reverse extend | `AdditionalProperties` | Extend backwards — requires provider-specific keyframe manipulation | +| Video interpolation | `AdditionalProperties` | Interpolate between two generation IDs | + +## Usage + +```bash +# Text-to-video +dotnet run -- generate "A tiger walking through snow" --output tiger.mp4 + +# Image-to-video with start frame +dotnet run -- generate "The scene comes alive" --image start.jpg --output scene.mp4 + +# Start + end frame interpolation +dotnet run -- generate "Smooth transition" --image start.jpg --end-image end.jpg + +# Extend a completed video +dotnet run -- extend "The tiger starts running" --video --output extended.mp4 + +# With options +dotnet run -- generate "A neon cityscape" --model ray-2 --resolution 1080p --aspect-ratio 16:9 --loop --duration 5s +``` + +## API Gaps / Limitations + +- **Image URLs only**: Luma requires HTTPS URLs for `promptImage`, not data URIs. The sample sends data URIs but the API may reject them — callers may need to pre-upload to a CDN. +- **No direct edit**: There is no video editing endpoint; `VideoOperationKind.Edit` is mapped to keyframe continuation which is not true editing. +- **Reverse extend**: Requires setting `SourceVideoId` as `frame1` (not `frame0`). This requires provider-specific handling not captured by the current abstraction. +- **Concepts/camera motion**: Luma supports "concepts" (e.g., `dolly_zoom`) and camera motion keywords in prompts. These are prompt-level, no dedicated API field. +- **Callback URL**: Luma supports `callback_url` for push-based status updates — not part of the MEAI polling model. +- **Modify Video**: Luma has a separate `/modify-video` endpoint for video editing (not modeled here). +- **Reframe**: Luma supports video/image reframing to different aspect ratios — a unique feature. +- **Add Audio**: Luma has a separate endpoint to add audio to a completed generation. diff --git a/samples/VideoProviders/MultiProviderPOC/MultiProviderPOC.csproj b/samples/VideoProviders/MultiProviderPOC/MultiProviderPOC.csproj new file mode 100644 index 00000000000..d5d23595d8e --- /dev/null +++ b/samples/VideoProviders/MultiProviderPOC/MultiProviderPOC.csproj @@ -0,0 +1,18 @@ + + + + Exe + net10.0 + enable + enable + $(NoWarn);MEAI001;OPENAI001 + + + + + + + + + + diff --git a/samples/VideoProviders/MultiProviderPOC/Program.cs b/samples/VideoProviders/MultiProviderPOC/Program.cs new file mode 100644 index 00000000000..34b7ac0de8b --- /dev/null +++ b/samples/VideoProviders/MultiProviderPOC/Program.cs @@ -0,0 +1,796 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Multi-Provider Video Generation POC +// +// Demonstrates using IVideoGenerator with multiple providers through a unified CLI. +// Supports: OpenAI (Sora), Google Veo, Runway, and Luma AI. +// +// Usage: +// dotnet run -- generate --provider openai "A cat playing piano" +// dotnet run -- generate --provider veo "A cinematic drone shot" --audio --aspect-ratio 16:9 +// dotnet run -- generate --provider runway "A bunny hopping" --seed 42 +// dotnet run -- generate --provider luma "A tiger in snow" --aspect-ratio 16:9 +// dotnet run -- image-to-video --provider openai "The scene comes alive" --image photo.jpg +// dotnet run -- edit --provider openai "Change sky to sunset" --video +// dotnet run -- extend --provider openai "Continue the scene" --video +// +// Environment variables (set the ones for the providers you plan to use): +// OPENAI_API_KEY — OpenAI API key +// GOOGLE_API_KEY — Google Gemini API key +// RUNWAY_API_KEY — Runway API key +// LUMA_API_KEY — Luma AI API key + +using System.CommandLine; +using System.Drawing; +using System.Net.Http.Headers; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.Logging; +using OpenAI; + +// ── Shared options ───────────────────────────────────────────────────────── +var providerOption = new Option("--provider", () => VideoProvider.OpenAI, "Video generation provider."); +var modelOption = new Option("--model", "Model ID (provider-specific). Uses provider default if omitted."); +var outputOption = new Option("--output", "Output file path (.mp4)."); +var durationOption = new Option("--duration", "Duration in seconds."); +var aspectRatioOption = new Option("--aspect-ratio", "Aspect ratio (e.g. 16:9, 9:16, 1:1)."); +var seedOption = new Option("--seed", "Seed for reproducible generation."); +var audioOption = new Option("--audio", () => false, "Generate audio (Veo 3+ only)."); +var negativePromptOption = new Option("--negative-prompt", "What to avoid in the generated video."); +var widthOption = new Option("--width", "Video width in pixels."); +var heightOption = new Option("--height", "Video height in pixels."); +var formatOption = new Option("--format", () => "data", "Response format: data or uri."); + +// ── generate (text-to-video) ─────────────────────────────────────────────── +var generatePromptArg = new Argument("prompt", "Text prompt describing the video."); +var generateCommand = new Command("generate", "Generate a video from a text prompt.") +{ + generatePromptArg, providerOption, modelOption, outputOption, + durationOption, aspectRatioOption, seedOption, audioOption, + negativePromptOption, widthOption, heightOption, formatOption, +}; + +generateCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(generatePromptArg); + var provider = context.ParseResult.GetValueForOption(providerOption); + string? model = context.ParseResult.GetValueForOption(modelOption); + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + var opts = ParseSharedOptions(context, model); + + using var generator = CreateGenerator(provider, model); + var request = new VideoGenerationRequest(prompt); + ApplyNegativePrompt(request, context.ParseResult.GetValueForOption(negativePromptOption)); + + var operation = await generator.GenerateAsync(request, opts); + await CompleteAndSaveAsync(operation, opts, outputPath); +}); + +// ── image-to-video ───────────────────────────────────────────────────────── +var i2vPromptArg = new Argument("prompt", "Text prompt."); +var imageOption = new Option("--image", "Input image file (path or URL).") { IsRequired = true }; + +var i2vCommand = new Command("image-to-video", "Generate video from an image + text prompt.") +{ + i2vPromptArg, imageOption, providerOption, modelOption, outputOption, + durationOption, aspectRatioOption, seedOption, audioOption, + negativePromptOption, widthOption, heightOption, formatOption, +}; + +i2vCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(i2vPromptArg); + string imagePath = context.ParseResult.GetValueForOption(imageOption)!; + var provider = context.ParseResult.GetValueForOption(providerOption); + string? model = context.ParseResult.GetValueForOption(modelOption); + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + var opts = ParseSharedOptions(context, model); + + List media; + if (imagePath.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + media = [new UriContent(new Uri(imagePath), "image/jpeg")]; + } + else + { + media = [await DataContent.LoadFromAsync(imagePath)]; + } + + using var generator = CreateGenerator(provider, model); + var request = new VideoGenerationRequest(prompt, media); + ApplyNegativePrompt(request, context.ParseResult.GetValueForOption(negativePromptOption)); + + var operation = await generator.GenerateAsync(request, opts); + await CompleteAndSaveAsync(operation, opts, outputPath); +}); + +// ── edit ─────────────────────────────────────────────────────────────────── +var editPromptArg = new Argument("prompt", "Prompt describing the edit."); +var editVideoOption = new Option("--video", "Video ID to edit.") { IsRequired = true }; +var editInputOption = new Option("--input", "Video file to upload for editing (if no --video ID)."); + +var editCommand = new Command("edit", "Edit an existing video.") +{ + editPromptArg, editVideoOption, editInputOption, providerOption, + modelOption, outputOption, formatOption, +}; + +editCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(editPromptArg); + string videoId = context.ParseResult.GetValueForOption(editVideoOption)!; + string? inputPath = context.ParseResult.GetValueForOption(editInputOption); + var provider = context.ParseResult.GetValueForOption(providerOption); + string? model = context.ParseResult.GetValueForOption(modelOption); + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(provider, model); + var opts = new VideoGenerationOptions + { + ModelId = model, + ResponseFormat = ParseFormat(format), + }; + + List? media = null; + if (inputPath is not null) + { + media = [await DataContent.LoadFromAsync(inputPath)]; + } + + var request = new VideoGenerationRequest(prompt, media) + { + OperationKind = VideoOperationKind.Edit, + SourceVideoId = videoId, + }; + + var operation = await generator.GenerateAsync(request, opts); + await CompleteAndSaveAsync(operation, opts, outputPath); +}); + +// ── extend ───────────────────────────────────────────────────────────────── +var extendPromptArg = new Argument("prompt", "Prompt for extending the video."); +var extendVideoOption = new Option("--video", "Video ID to extend.") { IsRequired = true }; + +var extendCommand = new Command("extend", "Extend a completed video.") +{ + extendPromptArg, extendVideoOption, providerOption, + modelOption, outputOption, durationOption, formatOption, +}; + +extendCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(extendPromptArg); + string videoId = context.ParseResult.GetValueForOption(extendVideoOption)!; + var provider = context.ParseResult.GetValueForOption(providerOption); + string? model = context.ParseResult.GetValueForOption(modelOption); + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + int? duration = context.ParseResult.GetValueForOption(durationOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(provider, model); + var opts = new VideoGenerationOptions + { + ModelId = model, + ResponseFormat = ParseFormat(format), + }; + + if (duration.HasValue) + { + opts.Duration = TimeSpan.FromSeconds(duration.Value); + } + + var request = new VideoGenerationRequest(prompt) + { + OperationKind = VideoOperationKind.Extend, + SourceVideoId = videoId, + }; + + var operation = await generator.GenerateAsync(request, opts); + await CompleteAndSaveAsync(operation, opts, outputPath); +}); + +// ── Root ─────────────────────────────────────────────────────────────────── +var rootCommand = new RootCommand("Multi-provider video generation POC using MEAI IVideoGenerator.\nSupports: OpenAI (Sora), Google Veo, Runway, Luma AI.") +{ + generateCommand, + i2vCommand, + editCommand, + extendCommand, +}; + +return await rootCommand.InvokeAsync(args); + +// ═══════════════════════════════════════════════════════════════════════════ +// Provider factory +// ═══════════════════════════════════════════════════════════════════════════ +static IVideoGenerator CreateGenerator(VideoProvider provider, string? model) => provider switch +{ + VideoProvider.OpenAI => CreateOpenAI(model), + VideoProvider.Veo => CreateGoogleVeo(model), + VideoProvider.Runway => CreateRunway(model), + VideoProvider.Luma => CreateLuma(model), + _ => throw new ArgumentException($"Unknown provider: {provider}"), +}; + +static IVideoGenerator CreateOpenAI(string? model) +{ + string apiKey = RequireEnvVar("OPENAI_API_KEY", "https://platform.openai.com/api-keys"); + var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug)); + return new OpenAIClient(apiKey) + .GetVideoClient() + .AsIVideoGenerator(model ?? "sora-2") + .AsBuilder() + .UseLogging(loggerFactory) + .UseOpenTelemetry(loggerFactory) + .Build(); +} + +static IVideoGenerator CreateGoogleVeo(string? model) +{ + string apiKey = RequireEnvVar("GOOGLE_API_KEY", "https://aistudio.google.com/apikey"); + return new GoogleVeoVideoGenerator(apiKey, model ?? "veo-3.1-generate-preview"); +} + +static IVideoGenerator CreateRunway(string? model) +{ + string apiKey = RequireEnvVar("RUNWAY_API_KEY", "https://dev.runwayml.com/"); + return new RunwayVideoGenerator(apiKey, model ?? "gen4_turbo"); +} + +static IVideoGenerator CreateLuma(string? model) +{ + string apiKey = RequireEnvVar("LUMA_API_KEY", "https://lumalabs.ai/dream-machine/api/keys"); + return new LumaVideoGenerator(apiKey, model ?? "ray-2"); +} + +static string RequireEnvVar(string name, string url) +{ + string? value = Environment.GetEnvironmentVariable(name); + if (string.IsNullOrEmpty(value)) + { + Console.Error.WriteLine($"Error: Set the {name} environment variable."); + Console.Error.WriteLine($"Get a key at {url}"); + Environment.Exit(1); + } + + return value!; +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Helpers +// ═══════════════════════════════════════════════════════════════════════════ +VideoGenerationOptions ParseSharedOptions(System.CommandLine.Invocation.InvocationContext context, string? model) +{ + int? duration = context.ParseResult.GetValueForOption(durationOption); + string? aspectRatio = context.ParseResult.GetValueForOption(aspectRatioOption); + int? seed = context.ParseResult.GetValueForOption(seedOption); + bool audio = context.ParseResult.GetValueForOption(audioOption); + int? width = context.ParseResult.GetValueForOption(widthOption); + int? height = context.ParseResult.GetValueForOption(heightOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + var opts = new VideoGenerationOptions + { + ModelId = model, + ResponseFormat = ParseFormat(format), + }; + + if (duration.HasValue) + { + opts.Duration = TimeSpan.FromSeconds(duration.Value); + } + + if (aspectRatio is not null) + { + opts.AspectRatio = aspectRatio; + } + + if (seed.HasValue) + { + opts.Seed = seed.Value; + } + + if (audio) + { + opts.GenerateAudio = true; + } + + if (width.HasValue && height.HasValue) + { + opts.VideoSize = new Size(width.Value, height.Value); + } + + return opts; +} + +static void ApplyNegativePrompt(VideoGenerationRequest request, string? negativePrompt) +{ + if (negativePrompt is not null) + { + request.NegativePrompt = negativePrompt; + } +} + +static VideoGenerationResponseFormat ParseFormat(string format) + => string.Equals(format, "uri", StringComparison.OrdinalIgnoreCase) + ? VideoGenerationResponseFormat.Uri + : VideoGenerationResponseFormat.Data; + +static async Task CompleteAndSaveAsync(VideoGenerationOperation operation, VideoGenerationOptions options, string? outputPath) +{ + Console.WriteLine($"OPERATION_ID: {operation.OperationId}"); + Console.WriteLine($" Provider: {operation.GetType().Name}"); + Console.WriteLine($" Status: {operation.Status}"); + + var sw = System.Diagnostics.Stopwatch.StartNew(); + await operation.WaitForCompletionAsync( + new Progress(p => + Console.WriteLine($" Progress: {p.Status}{(p.PercentComplete.HasValue ? $" ({p.PercentComplete}%)" : string.Empty)}"))); + + sw.Stop(); + Console.WriteLine($" Completed in {sw.Elapsed.TotalSeconds:F1}s"); + + var contents = await operation.GetContentsAsync(options); + Console.WriteLine($" {contents.Count} content item(s)"); + + for (int i = 0; i < contents.Count; i++) + { + string savePath = outputPath is not null && contents.Count > 1 + ? Path.Combine( + Path.GetDirectoryName(outputPath) ?? ".", + $"{Path.GetFileNameWithoutExtension(outputPath)}_{i}{Path.GetExtension(outputPath)}") + : outputPath!; + + switch (contents[i]) + { + case DataContent dc when outputPath is not null: + Directory.CreateDirectory(Path.GetDirectoryName(savePath) ?? "."); + await dc.SaveToAsync(savePath); + Console.WriteLine($" [{i}] Saved: {savePath} ({dc.Data.Length} bytes, {dc.MediaType})"); + break; + case DataContent dc: + Console.WriteLine($" [{i}] DataContent: {dc.Data.Length} bytes ({dc.MediaType})"); + break; + case UriContent uc: + Console.WriteLine($" [{i}] URI: {uc.Uri} ({uc.MediaType})"); + break; + default: + Console.WriteLine($" [{i}] {contents[i].GetType().Name}"); + break; + } + } +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Types +// ═══════════════════════════════════════════════════════════════════════════ +enum VideoProvider { OpenAI, Veo, Runway, Luma } + +// ═══════════════════════════════════════════════════════════════════════════ +// Inline provider implementations +// (In production, these would be separate NuGet packages or project references) +// ═══════════════════════════════════════════════════════════════════════════ + +// ─── Google Veo ──────────────────────────────────────────────────────────── +internal sealed class GoogleVeoVideoGenerator : IVideoGenerator +{ + private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta"; + private readonly HttpClient _httpClient; + private readonly string _apiKey; + private readonly string _modelId; + + public GoogleVeoVideoGenerator(string apiKey, string modelId, HttpClient? httpClient = null) + { + _apiKey = apiKey; + _modelId = modelId; + _httpClient = httpClient ?? new HttpClient(); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + } + + public async Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + string model = options?.ModelId ?? _modelId; + + // Build the instance object (prompt, image) + var instance = new JsonObject(); + if (request.Prompt is not null) instance["prompt"] = request.Prompt; + + if (request.OperationKind == VideoOperationKind.Create && request.StartFrame is not null) + { + if (request.StartFrame is DataContent dc && (dc.MediaType?.StartsWith("image/", StringComparison.OrdinalIgnoreCase) ?? false) && dc.Data.Length > 0) + { + instance["image"] = new JsonObject { ["bytesBase64Encoded"] = Convert.ToBase64String(dc.Data.ToArray()), ["mimeType"] = dc.MediaType }; + } + else if (request.StartFrame is UriContent uc) + { + instance["image"] = new JsonObject { ["gcsUri"] = uc.Uri.ToString() }; + } + } + + // Build the parameters object (generation config) + var parameters = new JsonObject(); + if (options?.Duration is { } dur) parameters["durationSeconds"] = (int)dur.TotalSeconds; + if (options?.AspectRatio is { } ar) parameters["aspectRatio"] = ar; + if (options?.Count is { } cnt) parameters["numberOfVideos"] = cnt; + if (options?.Seed is int seed) parameters["seed"] = seed; + if (options?.GenerateAudio == true) parameters["generateAudio"] = true; + if (request.NegativePrompt is { } neg) parameters["negativePrompt"] = neg; + if (options?.AdditionalProperties?.TryGetValue("personGeneration", out object? pg) == true && pg is string pgs) parameters["personGeneration"] = pgs; + + // Wrap in instances/parameters envelope for predictLongRunning + var body = new JsonObject + { + ["instances"] = new JsonArray { instance }, + }; + if (parameters.Count > 0) body["parameters"] = parameters; + + string url = $"{BaseUrl}/models/{model}:predictLongRunning?key={_apiKey}"; + using var content = new StringContent(body.ToJsonString(), System.Text.Encoding.UTF8, "application/json"); + using var response = await _httpClient.PostAsync(url, content, cancellationToken); + string responseBody = await response.Content.ReadAsStringAsync(cancellationToken); + if (!response.IsSuccessStatusCode) + { + throw new HttpRequestException($"Google Veo API error {(int)response.StatusCode} ({response.StatusCode}): {responseBody}"); + } + var result = JsonDocument.Parse(responseBody); + string opName = result.RootElement.GetProperty("name").GetString()!; + return new GoogleVeoVideoGenerationOperation(opName, _apiKey, _httpClient, model); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + => serviceKey is null && serviceType.IsInstanceOfType(this) ? this : null; + + public void Dispose() => _httpClient.Dispose(); +} + +internal sealed class GoogleVeoVideoGenerationOperation : VideoGenerationOperation +{ + private const string BaseUrl = "https://generativelanguage.googleapis.com/v1beta"; + private readonly HttpClient _httpClient; + private readonly string _apiKey; + private bool _done; + private string? _status; + private string? _failureReason; + private readonly List _videoUris = []; + + public GoogleVeoVideoGenerationOperation(string opName, string apiKey, HttpClient httpClient, string modelId) + { + OperationId = opName; ModelId = modelId; _apiKey = apiKey; _httpClient = httpClient; _status = "PROCESSING"; + } + + public override string? OperationId { get; } + public override string? Status => _status; + public override int? PercentComplete => _done ? 100 : null; + public override bool IsCompleted => _done; + public override string? FailureReason => _failureReason; + + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + using var resp = await _httpClient.GetAsync($"{BaseUrl}/{OperationId}?key={_apiKey}", cancellationToken); + string body = await resp.Content.ReadAsStringAsync(cancellationToken); + if (!resp.IsSuccessStatusCode) + { + throw new HttpRequestException($"Google Veo poll error {(int)resp.StatusCode} ({resp.StatusCode}): {body}"); + } + using var doc = JsonDocument.Parse(body); + var root = doc.RootElement; + _done = root.TryGetProperty("done", out var d) && d.GetBoolean(); + if (root.TryGetProperty("error", out var err)) { _failureReason = err.ToString(); _status = "FAILED"; _done = true; } + else if (_done) + { + _status = "COMPLETED"; + _videoUris.Clear(); + // predictLongRunning response: response.generateVideoResponse.generatedSamples[].video.uri + if (root.TryGetProperty("response", out var response) && + response.TryGetProperty("generateVideoResponse", out var videoResponse)) + { + if (videoResponse.TryGetProperty("generatedSamples", out var samples)) + foreach (var s in samples.EnumerateArray()) + if (s.TryGetProperty("video", out var video) && video.TryGetProperty("uri", out var uri)) + _videoUris.Add(uri.GetString()!); + + if (_videoUris.Count == 0) + { + string? reason = null; + if (videoResponse.TryGetProperty("raiMediaFilteredCount", out var fc) && fc.GetInt32() > 0) + { + reason = $"Video filtered by safety filters ({fc.GetInt32()} filtered)."; + if (videoResponse.TryGetProperty("raiMediaFilteredReasons", out var reasons)) reason += $" Reasons: {reasons}"; + } + else + { + reason = $"No videos in response. Full response: {response}"; + } + + _status = "FAILED"; + _failureReason = reason; + } + } + else + { + _status = "FAILED"; + _failureReason = $"Unexpected response format: {root}"; + } + } + } + + public override async Task WaitForCompletionAsync(IProgress? progress = null, CancellationToken cancellationToken = default) + { + while (!IsCompleted) + { + await Task.Delay(TimeSpan.FromSeconds(10), cancellationToken); + await UpdateAsync(cancellationToken); + progress?.Report(new VideoGenerationProgress(_status, PercentComplete)); + } + + if (_status == "FAILED") throw new InvalidOperationException($"Video generation failed: {_failureReason}"); + } + + public override async Task> GetContentsAsync(VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + if (!IsCompleted) throw new InvalidOperationException("Not completed."); + if (_videoUris.Count == 0) await UpdateAsync(cancellationToken); + var results = new List(); + foreach (var uri in _videoUris) + { + // Append API key to download URI + string downloadUri = uri.Contains('?') ? $"{uri}&key={_apiKey}" : $"{uri}?key={_apiKey}"; + if (options?.ResponseFormat == VideoGenerationResponseFormat.Uri) { results.Add(new UriContent(new Uri(uri), "video/mp4")); continue; } + using var r = await _httpClient.GetAsync(downloadUri, cancellationToken); r.EnsureSuccessStatusCode(); + results.Add(new DataContent(await r.Content.ReadAsByteArrayAsync(cancellationToken), "video/mp4")); + } + + return results; + } +} + +// ─── Runway ──────────────────────────────────────────────────────────────── +internal sealed class RunwayVideoGenerator : IVideoGenerator +{ + private const string BaseUrl = "https://api.dev.runwayml.com"; + private readonly HttpClient _httpClient; + private readonly string _modelId; + + public RunwayVideoGenerator(string apiKey, string modelId, HttpClient? httpClient = null) + { + _modelId = modelId; + _httpClient = httpClient ?? new HttpClient(); + _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + _httpClient.DefaultRequestHeaders.Add("X-Runway-Version", "2024-11-06"); + } + + public async Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + string model = options?.ModelId ?? _modelId; + string endpoint; + JsonObject body; + + bool hasVideo = request.SourceVideo is DataContent svDc && svDc.MediaType?.StartsWith("video/", StringComparison.OrdinalIgnoreCase) == true; + bool hasImage = request.StartFrame is DataContent sfDc && sfDc.MediaType?.StartsWith("image/", StringComparison.OrdinalIgnoreCase) == true; + + if (request.OperationKind == VideoOperationKind.Edit && hasVideo) + { + endpoint = "/v1/video_to_video"; + body = new JsonObject { ["model"] = "gen4_aleph", ["promptText"] = request.Prompt ?? "" }; + string? vidUri = GetMediaUri(request.SourceVideo); + if (vidUri is not null) body["videoUri"] = vidUri; + } + else if (hasImage) + { + endpoint = "/v1/image_to_video"; + string? imgUri = GetMediaUri(request.StartFrame); + string ratio = options?.AspectRatio is { } ar ? MapAspectRatio(ar) : "1280:720"; + body = new JsonObject { ["model"] = model, ["promptText"] = request.Prompt ?? "", ["promptImage"] = imgUri ?? "", ["ratio"] = ratio }; + } + else + { + endpoint = "/v1/text_to_video"; + string ratio = options?.AspectRatio is { } ar ? MapAspectRatio(ar) : "1280:720"; + body = new JsonObject { ["model"] = model, ["promptText"] = request.Prompt ?? "", ["ratio"] = ratio }; + } + + if (options?.Duration is { } dur) body["duration"] = (int)dur.TotalSeconds; + if (options?.Seed is int seed) body["seed"] = seed; + + using var content = new StringContent(body.ToJsonString(), System.Text.Encoding.UTF8, "application/json"); + using var resp = await _httpClient.PostAsync($"{BaseUrl}{endpoint}", content, cancellationToken); + string responseBody = await resp.Content.ReadAsStringAsync(cancellationToken); + resp.EnsureSuccessStatusCode(); + string taskId = JsonDocument.Parse(responseBody).RootElement.GetProperty("id").GetString()!; + return new RunwayVideoGenerationOperation(taskId, _httpClient, model); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + => serviceKey is null && serviceType.IsInstanceOfType(this) ? this : null; + public void Dispose() => _httpClient.Dispose(); + + private static string? GetMediaUri(AIContent? content) + { + if (content is null) return null; + if (content is UriContent uc) return uc.Uri.ToString(); + if (content is DataContent dc && dc.Data.Length > 0) + return dc.Uri ?? $"data:{dc.MediaType ?? "application/octet-stream"};base64,{Convert.ToBase64String(dc.Data.ToArray())}"; + return null; + } + + private static string MapAspectRatio(string ar) => ar switch + { + "16:9" => "1280:720", "9:16" => "720:1280", "1:1" => "960:960", + "4:3" => "1104:832", "3:4" => "832:1104", _ => ar, + }; +} + +internal sealed class RunwayVideoGenerationOperation : VideoGenerationOperation +{ + private const string BaseUrl = "https://api.dev.runwayml.com"; + private readonly HttpClient _httpClient; + private string? _status; + private string? _failureReason; + private string? _outputUrl; + + public RunwayVideoGenerationOperation(string taskId, HttpClient httpClient, string modelId) + { + OperationId = taskId; ModelId = modelId; _httpClient = httpClient; _status = "PENDING"; + } + + public override string? OperationId { get; } + public override string? Status => _status; + public override int? PercentComplete => _status switch { "SUCCEEDED" => 100, "RUNNING" => 50, "THROTTLED" => 10, _ => 0 }; + public override bool IsCompleted => _status is "SUCCEEDED" or "FAILED"; + public override string? FailureReason => _failureReason; + + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + using var resp = await _httpClient.GetAsync($"{BaseUrl}/v1/tasks/{OperationId}", cancellationToken); + string body = await resp.Content.ReadAsStringAsync(cancellationToken); + resp.EnsureSuccessStatusCode(); + using var doc = JsonDocument.Parse(body); + var root = doc.RootElement; + _status = root.GetProperty("status").GetString(); + if (root.TryGetProperty("failure", out var f) && f.ValueKind == JsonValueKind.String) _failureReason = f.GetString(); + if (root.TryGetProperty("output", out var o) && o.ValueKind == JsonValueKind.Array && o.GetArrayLength() > 0) _outputUrl = o[0].GetString(); + else if (root.TryGetProperty("output", out var o2) && o2.ValueKind == JsonValueKind.String) _outputUrl = o2.GetString(); + } + + public override async Task WaitForCompletionAsync(IProgress? progress = null, CancellationToken cancellationToken = default) + { + while (!IsCompleted) + { + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + await UpdateAsync(cancellationToken); + progress?.Report(new VideoGenerationProgress(_status, PercentComplete)); + } + + if (_status == "FAILED") throw new InvalidOperationException($"Task failed: {_failureReason}"); + } + + public override async Task> GetContentsAsync(VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + if (!IsCompleted) throw new InvalidOperationException("Not completed."); + if (_outputUrl is null) await UpdateAsync(cancellationToken); + if (_outputUrl is null) throw new InvalidOperationException("No output URL."); + if (options?.ResponseFormat == VideoGenerationResponseFormat.Uri) return [new UriContent(new Uri(_outputUrl), "video/mp4")]; + using var r = await _httpClient.GetAsync(_outputUrl, cancellationToken); r.EnsureSuccessStatusCode(); + return [new DataContent(await r.Content.ReadAsByteArrayAsync(cancellationToken), "video/mp4")]; + } +} + +// ─── Luma AI ─────────────────────────────────────────────────────────────── +internal sealed class LumaVideoGenerator : IVideoGenerator +{ + private const string BaseUrl = "https://api.lumalabs.ai/dream-machine/v1"; + private readonly HttpClient _httpClient; + private readonly string _modelId; + + public LumaVideoGenerator(string apiKey, string modelId, HttpClient? httpClient = null) + { + _modelId = modelId; + _httpClient = httpClient ?? new HttpClient(); + _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + } + + public async Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + string model = options?.ModelId ?? _modelId; + var body = new JsonObject { ["prompt"] = request.Prompt, ["model"] = model }; + if (options?.Duration is { } dur) body["duration"] = $"{(int)dur.TotalSeconds}s"; + if (options?.AspectRatio is { } ar) body["aspect_ratio"] = ar; + + var keyframes = new JsonObject(); + if (request.OperationKind == VideoOperationKind.Create && request.StartFrame is not null) + { + if (request.StartFrame is UriContent uc) keyframes["frame0"] = new JsonObject { ["type"] = "image", ["url"] = uc.Uri.ToString() }; + else if (request.StartFrame is DataContent dc && dc.Data.Length > 0) + keyframes["frame0"] = new JsonObject { ["type"] = "image", ["url"] = dc.Uri ?? $"data:{dc.MediaType ?? "image/png"};base64,{Convert.ToBase64String(dc.Data.ToArray())}" }; + + if (request.EndFrame is not null) + { + if (request.EndFrame is UriContent euc) keyframes["frame1"] = new JsonObject { ["type"] = "image", ["url"] = euc.Uri.ToString() }; + else if (request.EndFrame is DataContent edc && edc.Data.Length > 0) + keyframes["frame1"] = new JsonObject { ["type"] = "image", ["url"] = edc.Uri ?? $"data:{edc.MediaType ?? "image/png"};base64,{Convert.ToBase64String(edc.Data.ToArray())}" }; + } + } + else if (request.OperationKind == VideoOperationKind.Extend && request.SourceVideoId is not null) + { + keyframes["frame0"] = new JsonObject { ["type"] = "generation", ["id"] = request.SourceVideoId }; + } + + if (keyframes.Count > 0) body["keyframes"] = keyframes; + + using var content = new StringContent(body.ToJsonString(), System.Text.Encoding.UTF8, "application/json"); + using var resp = await _httpClient.PostAsync($"{BaseUrl}/generations", content, cancellationToken); + string responseBody = await resp.Content.ReadAsStringAsync(cancellationToken); + resp.EnsureSuccessStatusCode(); + string opId = JsonDocument.Parse(responseBody).RootElement.GetProperty("id").GetString()!; + return new LumaVideoGenerationOperation(opId, _httpClient, model); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + => serviceKey is null && serviceType.IsInstanceOfType(this) ? this : null; + public void Dispose() => _httpClient.Dispose(); +} + +internal sealed class LumaVideoGenerationOperation : VideoGenerationOperation +{ + private const string BaseUrl = "https://api.lumalabs.ai/dream-machine/v1"; + private readonly HttpClient _httpClient; + private string? _status; + private string? _failureReason; + private string? _videoUrl; + + public LumaVideoGenerationOperation(string opId, HttpClient httpClient, string modelId) + { + OperationId = opId; ModelId = modelId; _httpClient = httpClient; _status = "queued"; + } + + public override string? OperationId { get; } + public override string? Status => _status; + public override int? PercentComplete => _status switch { "completed" => 100, "dreaming" => 50, _ => 0 }; + public override bool IsCompleted => _status is "completed" or "failed"; + public override string? FailureReason => _failureReason; + + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + using var resp = await _httpClient.GetAsync($"{BaseUrl}/generations/{OperationId}", cancellationToken); + string body = await resp.Content.ReadAsStringAsync(cancellationToken); + resp.EnsureSuccessStatusCode(); + using var doc = JsonDocument.Parse(body); + var root = doc.RootElement; + _status = root.GetProperty("state").GetString(); + if (root.TryGetProperty("failure_reason", out var fr) && fr.ValueKind == JsonValueKind.String) _failureReason = fr.GetString(); + if (root.TryGetProperty("assets", out var assets) && assets.TryGetProperty("video", out var v) && v.ValueKind == JsonValueKind.String) _videoUrl = v.GetString(); + } + + public override async Task WaitForCompletionAsync(IProgress? progress = null, CancellationToken cancellationToken = default) + { + while (!IsCompleted) + { + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + await UpdateAsync(cancellationToken); + progress?.Report(new VideoGenerationProgress(_status, PercentComplete)); + } + + if (_status == "failed") throw new InvalidOperationException($"Generation failed: {_failureReason}"); + } + + public override async Task> GetContentsAsync(VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + if (!IsCompleted) throw new InvalidOperationException("Not completed."); + if (_videoUrl is null) await UpdateAsync(cancellationToken); + if (_videoUrl is null) throw new InvalidOperationException("No video URL."); + if (options?.ResponseFormat == VideoGenerationResponseFormat.Uri) return [new UriContent(new Uri(_videoUrl), "video/mp4")]; + using var r = await _httpClient.GetAsync(_videoUrl, cancellationToken); r.EnsureSuccessStatusCode(); + return [new DataContent(await r.Content.ReadAsByteArrayAsync(cancellationToken), "video/mp4")]; + } + + public override VideoGenerationRequest CreateExtensionRequest(string? prompt = null) + => new() { Prompt = prompt, SourceVideoId = OperationId, OperationKind = VideoOperationKind.Extend }; +} diff --git a/samples/VideoProviders/MultiProviderPOC/README.md b/samples/VideoProviders/MultiProviderPOC/README.md new file mode 100644 index 00000000000..3109f34c371 --- /dev/null +++ b/samples/VideoProviders/MultiProviderPOC/README.md @@ -0,0 +1,70 @@ +# Multi-Provider Video Generation POC + +Unified CLI that demonstrates `IVideoGenerator` across four providers — OpenAI (Sora), Google Veo, Runway, and Luma AI — using the same MEAI abstractions with first-class properties. + +## Prerequisites + +Set API keys for the providers you want to test: + +| Provider | Environment Variable | Get a Key | +|----------|---------------------|-----------| +| OpenAI | `OPENAI_API_KEY` | [platform.openai.com/api-keys](https://platform.openai.com/api-keys) | +| Google Veo | `GOOGLE_API_KEY` | [aistudio.google.com/apikey](https://aistudio.google.com/apikey) | +| Runway | `RUNWAY_API_KEY` | [dev.runwayml.com](https://dev.runwayml.com/) | +| Luma AI | `LUMA_API_KEY` | [lumalabs.ai/dream-machine/api/keys](https://lumalabs.ai/dream-machine/api/keys) | + +## Quick Start + +```bash +# Text-to-video with OpenAI +dotnet run -- generate --provider openai "A cat playing piano" --output cat.mp4 + +# Text-to-video with Google Veo + audio + negative prompt +dotnet run -- generate --provider veo "Birds on a lake" --audio --negative-prompt "people, cars" --output birds.mp4 + +# Text-to-video with Runway + seed for reproducibility +dotnet run -- generate --provider runway "A dancer spinning" --seed 42 --output dancer.mp4 + +# Text-to-video with Luma + aspect ratio +dotnet run -- generate --provider luma "Flowers blooming" --aspect-ratio 9:16 --output flowers.mp4 + +# Image-to-video (any provider) +dotnet run -- image-to-video --provider openai "The scene comes alive" --image photo.jpg --output scene.mp4 + +# Edit a video (OpenAI, Runway) +dotnet run -- edit --provider openai "Change to sunset colors" --video --output edited.mp4 + +# Extend a video (OpenAI, Luma) +dotnet run -- extend --provider openai "The scene continues" --video --output extended.mp4 +``` + +## Demo Script + +Run the automated demo that exercises each provider's supported features: + +```powershell +# Auto-detect providers from environment variables +./demo-multi-provider.ps1 + +# Run specific providers +./demo-multi-provider.ps1 -Providers "openai,veo" + +# With a reference image for image-to-video tests +./demo-multi-provider.ps1 -ReferenceImage myimage.png + +# Reset state and start fresh +./demo-multi-provider.ps1 -Reset +``` + +## Feature Matrix + +| Feature | OpenAI | Google Veo | Runway | Luma AI | +|---------|:------:|:----------:|:------:|:-------:| +| Text-to-video | ✅ | ✅ | ✅ | ✅ | +| Image-to-video | ✅ | ✅ | ✅ | ✅ | +| Video edit | ✅ | ❌ | ✅ | ❌ | +| Video extend | ✅ | ✅ | ❌ | ✅ | +| `AspectRatio` | via Size | ✅ | ✅ | ✅ | +| `Seed` | ❌ | ✅ | ✅ | ❌ | +| `GenerateAudio` | ❌ | ✅ | ❌ | ❌ | +| `NegativePrompt` | ❌ | ✅ | ❌ | ❌ | diff --git a/samples/VideoProviders/MultiProviderPOC/demo-multi-provider.ps1 b/samples/VideoProviders/MultiProviderPOC/demo-multi-provider.ps1 new file mode 100644 index 00000000000..e7e19571ca7 --- /dev/null +++ b/samples/VideoProviders/MultiProviderPOC/demo-multi-provider.ps1 @@ -0,0 +1,349 @@ +#!/usr/bin/env pwsh +# Multi-Provider Video Generation Demo +# +# Runs through relevant scenarios for each provider based on feature support. +# Set the environment variables for the providers you want to test. +# +# Environment variables: +# OPENAI_API_KEY — OpenAI Sora +# GOOGLE_API_KEY — Google Veo +# RUNWAY_API_KEY — Runway +# LUMA_API_KEY — Luma AI +# +# Usage: +# ./demo-multi-provider.ps1 # Run all configured providers +# ./demo-multi-provider.ps1 -Providers openai,veo # Run specific providers +# ./demo-multi-provider.ps1 -Reset # Clear state and start fresh + + + +param( + [string]$Providers = "", # Comma-separated: openai,veo,runway,luma (empty = auto-detect) + [string]$OutputDir = "..\..\artifacts\multi-provider-output", + [string]$ReferenceImage = "", # Optional image for image-to-video tests + [switch]$Reset +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +$ProjectDir = $PSScriptRoot + +# ── State management ──────────────────────────────────────────────────── +New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null +$stateFile = Join-Path $OutputDir "demo-state.json" + +if ($Reset -and (Test-Path $stateFile)) { + Remove-Item $stateFile -Force + Write-Host "State cleared." -ForegroundColor Yellow +} + +function Get-State { + if (Test-Path $stateFile) { return Get-Content $stateFile -Raw | ConvertFrom-Json -AsHashtable } + return @{} +} + +function Set-State([string]$Key, [string]$Value) { + $s = Get-State; $s[$Key] = $Value + $s | ConvertTo-Json | Set-Content $stateFile +} + +# ── Detect available providers ────────────────────────────────────────── +$providerMap = @{ + openai = "OPENAI_API_KEY" + veo = "GOOGLE_API_KEY" + runway = "RUNWAY_API_KEY" + luma = "LUMA_API_KEY" +} + +if ($Providers -ne "") { + $activeProviders = @($Providers -split "," | ForEach-Object { $_.Trim().ToLower() }) +} else { + $activeProviders = @() + foreach ($p in $providerMap.Keys) { + $envVar = $providerMap[$p] + if ([Environment]::GetEnvironmentVariable($envVar)) { + $activeProviders += $p + } + } +} + +if ($activeProviders.Count -eq 0) { + Write-Error "No providers configured. Set at least one API key environment variable." + exit 1 +} + +Write-Host "" +Write-Host ("=" * 70) -ForegroundColor Cyan +Write-Host " Multi-Provider Video Generation Demo" -ForegroundColor Cyan +Write-Host " Active providers: $($activeProviders -join ', ')" -ForegroundColor Cyan +Write-Host ("=" * 70) -ForegroundColor Cyan + +# ── Helpers ───────────────────────────────────────────────────────────── +function Invoke-Tool([string]$Label, [string[]]$Arguments) { + Write-Host "" + Write-Host ("─" * 70) -ForegroundColor Cyan + Write-Host " $Label" -ForegroundColor Cyan + Write-Host ("─" * 70) -ForegroundColor Cyan + Write-Host "> dotnet run --project $ProjectDir -- $($Arguments -join ' ')" -ForegroundColor DarkGray + + $output = & dotnet run --project $ProjectDir -- @Arguments 2>&1 + $output | ForEach-Object { Write-Host " $_" } + if ($LASTEXITCODE -ne 0) { + Write-Warning "Tool exited with code $LASTEXITCODE" + return "" + } + return ($output | Out-String) +} + +function Extract-Id([string]$Output, [string]$Prefix) { + if ($Output -match "$Prefix\:\s*(\S+)") { return $Matches[1] } + return "" +} + +function Skip-OrRun([string]$StateKey, [string]$Label, [string]$OutputFile, [scriptblock]$Action) { + $state = Get-State + if ($state.ContainsKey($StateKey) -and $state[$StateKey] -ne "" -and + ((-not $OutputFile) -or (Test-Path $OutputFile))) { + Write-Host "" + Write-Host ("─" * 70) -ForegroundColor DarkGray + Write-Host " SKIP: $Label ($StateKey=$($state[$StateKey]))" -ForegroundColor DarkGray + return $state[$StateKey] + } + $id = & $Action + if ($id -ne "") { Set-State $StateKey $id } + return $id +} + +# ═══════════════════════════════════════════════════════════════════════════ +# Provider-specific scenarios +# ═══════════════════════════════════════════════════════════════════════════ + +# ─── OpenAI (Sora) ───────────────────────────────────────────────────── +function Run-OpenAI { + Write-Host "" + Write-Host ("═" * 70) -ForegroundColor Green + Write-Host " OPENAI (Sora)" -ForegroundColor Green + Write-Host " Features: text-to-video, image-to-video, edit, extend, characters" -ForegroundColor Green + Write-Host ("═" * 70) -ForegroundColor Green + + # 1. Text-to-video + $t2vPath = Join-Path $OutputDir "openai_01_text2video.mp4" + $t2vId = Skip-OrRun "openai_t2v" "OpenAI: Text-to-video" $t2vPath { + $out = Invoke-Tool "OpenAI: Text-to-video (8s, 1280x720)" @( + "generate", "--provider", "openai", + "A smooth tracking shot through a neon-lit cyberpunk city at night. Rain reflects colorful lights on the wet streets.", + "--duration", "8", "--width", "1280", "--height", "720", + "--output", $t2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } + + # 2. Image-to-video (if reference image provided) + if ($ReferenceImage -ne "" -and (Test-Path $ReferenceImage)) { + $i2vPath = Join-Path $OutputDir "openai_02_image2video.mp4" + $i2vId = Skip-OrRun "openai_i2v" "OpenAI: Image-to-video" $i2vPath { + $out = Invoke-Tool "OpenAI: Image-to-video from reference" @( + "image-to-video", "--provider", "openai", + "A cinematic slow-motion shot inspired by the image, camera slowly orbiting around the subject.", + "--image", $ReferenceImage, "--duration", "4", + "--output", $i2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } + } + + # 3. Edit (requires previous video) + if ($t2vId -ne "") { + $editPath = Join-Path $OutputDir "openai_03_edit.mp4" + Skip-OrRun "openai_edit" "OpenAI: Edit video" $editPath { + $out = Invoke-Tool "OpenAI: Edit — shift to warm sunset palette" @( + "edit", "--provider", "openai", + "Shift the entire color palette to warm golden sunset tones with soft amber highlights.", + "--video", $t2vId, + "--output", $editPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + } + + # 4. Extend (requires previous video) + if ($t2vId -ne "") { + $extPath = Join-Path $OutputDir "openai_04_extend.mp4" + Skip-OrRun "openai_extend" "OpenAI: Extend video" $extPath { + $out = Invoke-Tool "OpenAI: Extend — continue the scene" @( + "extend", "--provider", "openai", + "The camera rises above the buildings to reveal a stunning panoramic view of the cyberpunk skyline.", + "--video", $t2vId, "--duration", "8", + "--output", $extPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + } +} + +# ─── Google Veo ──────────────────────────────────────────────────────── +function Run-Veo { + Write-Host "" + Write-Host ("═" * 70) -ForegroundColor Green + Write-Host " GOOGLE VEO" -ForegroundColor Green + Write-Host " Features: text-to-video, image-to-video, native audio, negative prompt, seed, aspect ratio" -ForegroundColor Green + Write-Host ("═" * 70) -ForegroundColor Green + + # 1. Text-to-video with negative prompt + $t2vPath = Join-Path $OutputDir "veo_01_text2video.mp4" + Skip-OrRun "veo_t2v" "Veo: Text-to-video" $t2vPath { + $out = Invoke-Tool "Veo: Text-to-video + negative prompt" @( + "generate", "--provider", "veo", + "A serene mountain lake at dawn, birds singing, gentle water ripples.", + "--negative-prompt", "people, buildings, cars, text, watermark", + "--aspect-ratio", "16:9", + "--duration", "8", + "--output", $t2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + + # 2. Text-to-video with seed for reproducibility + $seedPath = Join-Path $OutputDir "veo_02_seeded.mp4" + Skip-OrRun "veo_seed" "Veo: Seeded generation" $seedPath { + $out = Invoke-Tool "Veo: Text-to-video with seed=42" @( + "generate", "--provider", "veo", + "A colorful hot air balloon festival with dozens of balloons taking off at sunrise.", + "--seed", "42", + "--aspect-ratio", "9:16", + "--duration", "6", + "--output", $seedPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + + # 3. Image-to-video (if reference image provided) + if ($ReferenceImage -ne "" -and (Test-Path $ReferenceImage)) { + $i2vPath = Join-Path $OutputDir "veo_03_image2video.mp4" + Skip-OrRun "veo_i2v" "Veo: Image-to-video" $i2vPath { + $out = Invoke-Tool "Veo: Image-to-video" @( + "image-to-video", "--provider", "veo", + "The scene in the image comes to life with natural movement and ambient sounds.", + "--image", $ReferenceImage, + "--duration", "4", + "--output", $i2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + } +} + +# ─── Runway ──────────────────────────────────────────────────────────── +function Run-Runway { + Write-Host "" + Write-Host ("═" * 70) -ForegroundColor Green + Write-Host " RUNWAY" -ForegroundColor Green + Write-Host " Features: text-to-video, image-to-video, video-to-video, seed" -ForegroundColor Green + Write-Host ("═" * 70) -ForegroundColor Green + + # 1. Text-to-video with seed + $t2vPath = Join-Path $OutputDir "runway_01_text2video.mp4" + Skip-OrRun "runway_t2v" "Runway: Text-to-video" $t2vPath { + $out = Invoke-Tool "Runway: Text-to-video with seed" @( + "generate", "--provider", "runway", + "A graceful ballet dancer performing a spin in an empty theater, dramatic lighting.", + "--seed", "12345", + "--duration", "5", + "--aspect-ratio", "16:9", + "--output", $t2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + + # 2. Image-to-video (if reference image provided) + if ($ReferenceImage -ne "" -and (Test-Path $ReferenceImage)) { + $i2vPath = Join-Path $OutputDir "runway_02_image2video.mp4" + Skip-OrRun "runway_i2v" "Runway: Image-to-video" $i2vPath { + $out = Invoke-Tool "Runway: Image-to-video" @( + "image-to-video", "--provider", "runway", + "The image gradually transforms into a cinematic scene with camera movement.", + "--image", $ReferenceImage, + "--duration", "5", + "--output", $i2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + } +} + +# ─── Luma AI ─────────────────────────────────────────────────────────── +function Run-Luma { + Write-Host "" + Write-Host ("═" * 70) -ForegroundColor Green + Write-Host " LUMA AI (Dream Machine)" -ForegroundColor Green + Write-Host " Features: text-to-video, image-to-video, extend, aspect ratio, keyframes" -ForegroundColor Green + Write-Host ("═" * 70) -ForegroundColor Green + + # 1. Text-to-video with aspect ratio + $t2vPath = Join-Path $OutputDir "luma_01_text2video.mp4" + $t2vId = Skip-OrRun "luma_t2v" "Luma: Text-to-video" $t2vPath { + $out = Invoke-Tool "Luma: Text-to-video with 9:16 aspect ratio" @( + "generate", "--provider", "luma", + "A time-lapse of flowers blooming in a garden, petals unfurling in sunlight.", + "--aspect-ratio", "9:16", + "--output", $t2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } + + # 2. Image-to-video (if reference image provided) + if ($ReferenceImage -ne "" -and (Test-Path $ReferenceImage)) { + $i2vPath = Join-Path $OutputDir "luma_02_image2video.mp4" + Skip-OrRun "luma_i2v" "Luma: Image-to-video" $i2vPath { + $out = Invoke-Tool "Luma: Image-to-video from keyframe" @( + "image-to-video", "--provider", "luma", + "The image comes to life — subjects begin to move naturally.", + "--image", $ReferenceImage, + "--output", $i2vPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + } + + # 3. Extend (requires previous video) + if ($t2vId -ne "") { + $extPath = Join-Path $OutputDir "luma_03_extend.mp4" + Skip-OrRun "luma_extend" "Luma: Extend video" $extPath { + $out = Invoke-Tool "Luma: Extend — continue blooming scene" @( + "extend", "--provider", "luma", + "The garden continues to bloom as butterflies arrive and the sun moves across the sky.", + "--video", $t2vId, + "--output", $extPath + ) + return (Extract-Id $out "OPERATION_ID") + } | Out-Null + } +} + +# ═══════════════════════════════════════════════════════════════════════════ +# Run scenarios for each active provider +# ═══════════════════════════════════════════════════════════════════════════ +foreach ($p in $activeProviders) { + switch ($p) { + "openai" { Run-OpenAI } + "veo" { Run-Veo } + "runway" { Run-Runway } + "luma" { Run-Luma } + default { Write-Warning "Unknown provider: $p" } + } +} + +# ── Summary ───────────────────────────────────────────────────────────── +Write-Host "" +Write-Host ("═" * 70) -ForegroundColor Cyan +Write-Host " Demo complete! Output files:" -ForegroundColor Cyan +Write-Host ("═" * 70) -ForegroundColor Cyan +if (Test-Path $OutputDir) { + Get-ChildItem $OutputDir -Filter "*.mp4" | ForEach-Object { + Write-Host " $($_.Name) ($([math]::Round($_.Length / 1MB, 1)) MB)" -ForegroundColor Green + } +} +Write-Host "" +Write-Host " State: $stateFile" -ForegroundColor DarkGray +Write-Host " (pass -Reset to start fresh)" -ForegroundColor DarkGray diff --git a/samples/VideoProviders/PROVIDER_COMPARISON.md b/samples/VideoProviders/PROVIDER_COMPARISON.md new file mode 100644 index 00000000000..ea29409f73e --- /dev/null +++ b/samples/VideoProviders/PROVIDER_COMPARISON.md @@ -0,0 +1,216 @@ +# Video Generation Provider Comparison & MEAI API Gap Analysis + +This document summarizes findings from implementing `IVideoGenerator` across four providers (OpenAI Sora, Google Veo, Runway, and Luma AI), identifies gaps in the current MEAI abstractions, and recommends potential API additions. + +## Provider Feature Matrix + +| Feature | OpenAI (Sora) | Google Veo 3.1 | Runway | Luma AI (Ray 2) | +|---|:---:|:---:|:---:|:---:| +| **Text-to-video** | ✅ | ✅ | ✅ | ✅ | +| **Image-to-video** | ✅ | ✅ | ✅ (i2v endpoint) | ✅ (keyframe) | +| **Video edit** | ✅ | ❌ | ✅ (v2v with gen4_aleph) | ❌¹ | +| **Video extend** | ✅ | ✅ (up to 20×) | ❌ | ✅ (fwd + reverse) | +| **Characters / Avatars** | ✅ (upload video) | ❌ | ✅ (act_two + avatars) | ❌ | +| **Reference images** | ❌ | ✅ (up to 3, typed) | ✅ (v2v references) | ❌ | +| **First+last frame interp** | ❌ | ✅ | ❌ | ✅ (frame0 + frame1) | +| **Native audio** | ❌ | ✅ (Veo 3+) | ❌² | ❌³ | +| **Negative prompt** | ❌ | ✅ | ❌ | ❌ | +| **Seed / reproducibility** | ❌ | ✅ | ✅ | ❌ | +| **Resolution control** | ✅ (WxH pixels) | ✅ (720p/1080p/4k) | ✅ (ratio string) | ✅ (540p–4k) | +| **Aspect ratio** | Implied via Size | ✅ (string) | ✅ (ratio string) | ✅ (string) | +| **Duration** | ✅ (string enum: 4/8/12) | ✅ (string enum: 4/6/8) | ✅ (integer: 2–10) | ✅ (string: "5s") | +| **Multiple outputs** | ❌ | ✅ (1–4) | ❌ | ❌ | +| **Looping video** | ❌ | ❌ | ❌ | ✅ | +| **Callback/webhook** | ❌ | ❌ | ❌ | ✅ | +| **Content moderation params** | ❌ | ✅ (personGeneration) | ✅ (publicFigureThreshold) | ❌ | + +¹ Luma has a separate "Modify Video" endpoint not covered in this evaluation. +² Runway provides separate sound effect, TTS, and speech-to-speech endpoints. +³ Luma has a separate "Add Audio" endpoint. + +## Async Polling Patterns + +All four providers use an async task/operation model that maps well to `VideoGenerationOperation`: + +| Provider | Submit | Poll | ID Format | +|---|---|---|---| +| OpenAI | `POST /videos/generations` | `GET /videos/generations/{id}` | `vg_xxxxx` | +| Google Veo | `POST /models/{model}:generateVideos` | `GET /{operation.name}` | `operations/xxx` | +| Runway | `POST /v1/{type}_to_video` | `GET /v1/tasks/{id}` | UUID | +| Luma AI | `POST /dream-machine/v1/generations` | `GET /dream-machine/v1/generations/{id}` | UUID | + +**Assessment**: The `VideoGenerationOperation` pattern (submit → poll → download) is well-suited for all providers. The polling interval varies (5s for Runway/Luma, 10s for Veo). + +## Input Media Handling + +| Provider | Image Input | Video Input | Data URI Support | +|---|---|---|---| +| OpenAI | Data URI in JSON body | Multipart upload | ✅ (images) | +| Google Veo | Base64 bytes in JSON | Gemini Files API | ✅ (inline base64) | +| Runway | HTTPS URL or data URI | HTTPS URL or data URI | ✅ | +| Luma AI | HTTPS URL only¹ | HTTPS URL only | ❌ | + +¹ Luma documentation says HTTPS URLs; data URI support is undocumented. + +**Assessment**: Most providers accept data URIs or inline base64, making `DataContent` a good abstraction. However, Luma's URL-only requirement means some providers will require an out-of-band upload step. + +## Identified API Gaps + +### Gap 1: Seed / Reproducibility (HIGH PRIORITY) + +**Problem**: 3 of 4 providers support a `seed` parameter for reproducible generation. Currently this requires `AdditionalProperties["seed"]`. + +**Recommendation**: Add `int? Seed` to `VideoGenerationOptions`. + +```csharp +/// Seed for reproducible generation. Same seed + same parameters ≈ same output. +public int? Seed { get; set; } +``` + +**Providers**: Google Veo ✅, Runway ✅, Luma ❌, OpenAI ❌ + +--- + +### Gap 2: Aspect Ratio (HIGH PRIORITY) + +**Problem**: Every provider has a concept of aspect ratio (`"16:9"`, `"9:16"`, `"1:1"`, etc.) separate from pixel resolution. The current `VideoSize` property encodes pixel dimensions, but ratio is the primary concept for most providers. Mapping `Size(1280, 720)` → `"16:9"` is lossy and ambiguous. + +**Recommendation**: Add `string? AspectRatio` to `VideoGenerationOptions`. + +```csharp +/// Aspect ratio of the generated video (e.g., "16:9", "9:16", "1:1"). +public string? AspectRatio { get; set; } +``` + +**Providers**: Google Veo ✅, Runway ✅, Luma ✅, OpenAI (implicit via Size) + +--- + +### Gap 3: Negative Prompt (MEDIUM PRIORITY) + +**Problem**: Google Veo supports `negativePrompt` to exclude unwanted elements. This is a concept that exists broadly in image generation (Stable Diffusion, DALL-E) and may appear in more video providers. + +**Recommendation**: Add `string? NegativePrompt` to `VideoGenerationRequest`. + +```csharp +/// Describes what to avoid in the generated video. +public string? NegativePrompt { get; set; } +``` + +**Providers**: Google Veo ✅ (others may add support) + +--- + +### Gap 4: Reference Images with Purpose (MEDIUM PRIORITY) + +**Problem**: Google Veo supports up to 3 reference images, each with a `referenceType` ("STYLE" or "SUBJECT"). Runway's video-to-video supports reference images for style transfer. + +**Status**: ✅ ADDRESSED — `VideoGenerationRequest.ReferenceImages` (IList?) provides a first-class collection for reference images. The `referenceType` metadata can be provided via provider-specific `AdditionalProperties` on the individual `AIContent` items. + +**Providers**: Google Veo ✅ (3 refs, typed), Runway ✅ (1 ref for v2v) + +--- + +### Gap 5: Audio Generation (LOW-MEDIUM PRIORITY) + +**Problem**: Google Veo 3+ generates synchronized audio with video natively. Luma and Runway offer separate audio endpoints. As video AI evolves, audio-with-video will likely become standard. + +**Recommendation**: Add `bool? GenerateAudio` to `VideoGenerationOptions`. + +```csharp +/// Whether to generate synchronized audio alongside the video. +public bool? GenerateAudio { get; set; } +``` + +Alternatively, this could be modeled as part of `MediaType` (e.g., `"video/mp4; codecs=avc1,mp4a"`) but that's less ergonomic. + +**Providers**: Google Veo ✅ (native), Luma ✅ (separate endpoint), Runway ✅ (separate endpoint) + +--- + +### Gap 6: Keyframe / Interpolation (LOW PRIORITY) + +**Problem**: Both Luma and Google Veo support first+last frame interpolation — providing a start and end image and generating the video in between. + +**Status**: ✅ ADDRESSED — `VideoGenerationRequest.StartFrame` and `VideoGenerationRequest.EndFrame` provide first-class properties for first/last frame interpolation. + +--- + +### Gap 7: Reverse Extend (LOW PRIORITY) + +**Problem**: Luma supports extending a video backwards (generating content leading up to the existing video). This is conceptually different from forward extension. + +**Recommendation**: Consider adding `ReverseExtend` to `VideoOperationKind`, or leave as `AdditionalProperties`. + +--- + +### Gap 8: Looping Video (LOW PRIORITY) + +**Problem**: Luma supports `loop: true` to generate seamlessly looping video. + +**Recommendation**: Leave as `AdditionalProperties["loop"]` unless more providers add support. + +--- + +### Gap 9: Content Moderation / Safety Parameters (LOW PRIORITY) + +**Problem**: Both Runway (`publicFigureThreshold`) and Google Veo (`personGeneration`) have provider-specific content moderation controls. These are safety parameters rather than creative controls. + +**Recommendation**: Leave as `AdditionalProperties` — these are inherently provider-specific policies. + +--- + +## Problems Encountered During Implementation + +### 1. Runway's Separate Endpoints + +Runway uses three separate endpoints (`text_to_video`, `image_to_video`, `video_to_video`) rather than a single unified endpoint. The `IVideoGenerator.GenerateAsync` single-method approach requires the implementation to inspect `StartFrame`/`SourceVideo` properties to determine which endpoint to call. This works cleanly with the new typed properties. + +### 2. Luma's URL-Only Image Input + +Luma requires HTTPS URLs for images — it doesn't accept data URIs or inline base64. Implementations targeting Luma need an upload step before generation, which is outside the scope of `GenerateAsync`. The `UriContent` type helps, but `DataContent` users will need pre-upload. + +### 3. Google Veo's Extension Model + +Veo video extension requires uploading the source video through the Gemini Files API first, then referencing it. A simple `SourceVideoId` string is insufficient for the multi-step extension workflow. The extension operation also has limitations (720p only, 7s segments, up to 20 times). + +### 4. Ratio vs Size Ambiguity + +Every provider has a different approach to sizing: +- **OpenAI**: Width × Height pixels (e.g., 1280×720) +- **Google Veo**: Named resolution string + optional aspect ratio +- **Runway**: Fixed ratio strings (e.g., `"1280:720"`, `"1104:832"`) +- **Luma**: Named resolution (540p/720p/1080p/4k) + optional aspect ratio + +The `Size VideoSize` property maps well to OpenAI but requires lossy conversion for others. Adding `AspectRatio` as a separate property would help significantly. + +### 5. Duration Representation + +All providers handle duration differently: +- **OpenAI**: String enum (`"4"`, `"8"`, `"12"`) +- **Google Veo**: String enum (`"4"`, `"6"`, `"8"`) +- **Runway**: Integer (2–10) +- **Luma**: String with unit (`"5s"`) + +`TimeSpan Duration` is a good neutral abstraction, but the valid values are provider- and model-specific. Documentation should make clear that providers snap to supported values. + +## Summary of Recommendations + +| Priority | Recommendation | Rationale | +|---|---|---| +| **HIGH** | Add `string? AspectRatio` to `VideoGenerationOptions` | Universal concept across all providers, lossy via `VideoSize` alone | +| **HIGH** | Add `int? Seed` to `VideoGenerationOptions` | 3 of 4 providers support it, common for iterative creative workflows | +| **MEDIUM** | Add `string? NegativePrompt` to `VideoGenerationRequest` | Proven concept from image gen; Veo supports it, others likely will | +| **MEDIUM** | Add typed reference media concept | Veo + Runway use reference images with purpose; different from input media | +| **LOW-MED** | Add `bool? GenerateAudio` to `VideoGenerationOptions` | Growing trend for integrated audio; 3 providers offer it in some form | +| **LOW** | Consider `ReverseExtend` in `VideoOperationKind` | Luma-specific for now, but a useful concept for storytelling | + +## What Works Well + +- **`VideoGenerationOperation` pattern**: The submit → poll → download lifecycle maps perfectly to all four providers. +- **`VideoOperationKind` enum**: Create/Edit/Extend covers the core operations well. +- **`StartFrame`/`EndFrame`/`ReferenceImages` properties**: Handles image-to-video, interpolation, and reference images with clear semantics for all providers. +- **`AdditionalProperties` escape hatch**: Provider-specific features (concepts, camera motion, content moderation) flow through cleanly. +- **`GetService()` pattern**: Enables provider-specific extensions (like OpenAI's `UploadVideoCharacterAsync`) without polluting the interface. +- **`VideoGenerationResponseFormat`**: Uri vs Data choice is useful for all providers. +- **`TimeSpan Duration`**: Clean neutral type that each provider maps to its own format. diff --git a/samples/VideoProviders/Runway/Program.cs b/samples/VideoProviders/Runway/Program.cs new file mode 100644 index 00000000000..23e981ca7b9 --- /dev/null +++ b/samples/VideoProviders/Runway/Program.cs @@ -0,0 +1,230 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Runway Video Generation Sample +// +// Usage examples: +// dotnet run -- text-to-video "A cute bunny hopping in a meadow" +// dotnet run -- image-to-video "The scene comes alive" --image bunny.jpg +// dotnet run -- video-to-video "Add easter elements" --video https://example.com/bunny.mp4 +// +// Environment: +// RUNWAY_API_KEY — your Runway API key from https://dev.runwayml.com/ + +using System.CommandLine; +using Microsoft.Extensions.AI; +using Runway; + +var modelOption = new Option("--model", () => "gen4_turbo", "Model (gen4.5, gen4_turbo, gen4_aleph, veo3.1, veo3)."); +var outputOption = new Option("--output", "Output file path (.mp4)."); +var durationOption = new Option("--duration", "Duration in seconds (2-10)."); +var seedOption = new Option("--seed", "Seed for reproducibility."); +var formatOption = new Option("--format", () => "data", "Response format: data or uri."); + +// ── text-to-video ─────────────────────────────────────────────────────────── +var t2vPromptArg = new Argument("prompt", "Text prompt."); +var t2vRatioOption = new Option("--ratio", () => "1280:720", "Output ratio (1280:720, 720:1280)."); + +var t2vCommand = new Command("text-to-video", "Generate video from text only.") +{ + t2vPromptArg, modelOption, outputOption, durationOption, seedOption, t2vRatioOption, formatOption, +}; + +t2vCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(t2vPromptArg); + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + int? duration = context.ParseResult.GetValueForOption(durationOption); + int? seed = context.ParseResult.GetValueForOption(seedOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + var options = BuildOptions(model, duration, seed, format); + var request = new VideoGenerationRequest(prompt); + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── image-to-video ────────────────────────────────────────────────────────── +var i2vPromptArg = new Argument("prompt", "Text prompt to describe the video."); +var i2vImageOption = new Option("--image", "Input image (file path or HTTPS URL).") { IsRequired = true }; + +var i2vCommand = new Command("image-to-video", "Generate video from an image + prompt.") +{ + i2vPromptArg, i2vImageOption, modelOption, outputOption, durationOption, seedOption, formatOption, +}; + +i2vCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(i2vPromptArg); + string imagePath = context.ParseResult.GetValueForOption(i2vImageOption)!; + string model = context.ParseResult.GetValueForOption(modelOption)!; + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + int? duration = context.ParseResult.GetValueForOption(durationOption); + int? seed = context.ParseResult.GetValueForOption(seedOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator(model); + + List media; + if (imagePath.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + media = [new UriContent(new Uri(imagePath), "image/jpeg")]; + } + else + { + media = [await DataContent.LoadFromAsync(imagePath)]; + } + + var options = BuildOptions(model, duration, seed, format); + var request = new VideoGenerationRequest(prompt, media); + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── video-to-video ────────────────────────────────────────────────────────── +var v2vPromptArg = new Argument("prompt", "Prompt describing the style transfer."); +var v2vVideoOption = new Option("--video", "Source video (file path or HTTPS URL).") { IsRequired = true }; +var v2vRefImageOption = new Option("--reference", "Reference image for style guidance."); + +var v2vCommand = new Command("video-to-video", "Transform a video with gen4_aleph.") +{ + v2vPromptArg, v2vVideoOption, v2vRefImageOption, outputOption, seedOption, formatOption, +}; + +v2vCommand.SetHandler(async (context) => +{ + string prompt = context.ParseResult.GetValueForArgument(v2vPromptArg); + string videoPath = context.ParseResult.GetValueForOption(v2vVideoOption)!; + string? refImage = context.ParseResult.GetValueForOption(v2vRefImageOption); + string? outputPath = context.ParseResult.GetValueForOption(outputOption); + int? seed = context.ParseResult.GetValueForOption(seedOption); + string format = context.ParseResult.GetValueForOption(formatOption)!; + + using var generator = CreateGenerator("gen4_aleph"); + + List media; + if (videoPath.StartsWith("http", StringComparison.OrdinalIgnoreCase)) + { + media = [new UriContent(new Uri(videoPath), "video/mp4")]; + } + else + { + media = [await DataContent.LoadFromAsync(videoPath)]; + } + + var options = BuildOptions("gen4_aleph", null, seed, format); + + // Add reference image if provided + if (refImage is not null) + { + var refs = new System.Text.Json.Nodes.JsonArray + { + new System.Text.Json.Nodes.JsonObject + { + ["type"] = "image", + ["uri"] = refImage.StartsWith("http", StringComparison.OrdinalIgnoreCase) + ? refImage + : $"data:image/png;base64,{Convert.ToBase64String(await File.ReadAllBytesAsync(refImage))}", + }, + }; + options.AdditionalProperties ??= []; + options.AdditionalProperties["references"] = refs; + } + + var request = new VideoGenerationRequest(prompt, media) + { + OperationKind = VideoOperationKind.Edit, + }; + + var operation = await generator.GenerateAsync(request, options); + await CompleteAndSaveAsync(operation, options, outputPath); +}); + +// ── Root ──────────────────────────────────────────────────────────────────── +var rootCommand = new RootCommand("Runway video generation sample using MEAI IVideoGenerator.") +{ + t2vCommand, + i2vCommand, + v2vCommand, +}; + +return await rootCommand.InvokeAsync(args); + +// ═══════════════════════════════════════════════════════════════════════════ +// Helpers +// ═══════════════════════════════════════════════════════════════════════════ +static IVideoGenerator CreateGenerator(string model) +{ + string? apiKey = Environment.GetEnvironmentVariable("RUNWAY_API_KEY"); + if (string.IsNullOrEmpty(apiKey)) + { + Console.Error.WriteLine("Error: Set the RUNWAY_API_KEY environment variable."); + Console.Error.WriteLine("Get a key at https://dev.runwayml.com/"); + Environment.Exit(1); + } + + return new RunwayVideoGenerator(apiKey, model); +} + +static VideoGenerationOptions BuildOptions(string model, int? duration, int? seed, string format) +{ + var options = new VideoGenerationOptions + { + ModelId = model, + ResponseFormat = string.Equals(format, "uri", StringComparison.OrdinalIgnoreCase) + ? VideoGenerationResponseFormat.Uri + : VideoGenerationResponseFormat.Data, + }; + + if (duration.HasValue) + { + options.Duration = TimeSpan.FromSeconds(duration.Value); + } + + if (seed.HasValue) + { + options.Seed = seed.Value; + } + + return options; +} + +static async Task CompleteAndSaveAsync(VideoGenerationOperation operation, VideoGenerationOptions options, string? outputPath) +{ + Console.WriteLine($"TASK_ID: {operation.OperationId}"); + Console.WriteLine($" Status: {operation.Status}"); + + var sw = System.Diagnostics.Stopwatch.StartNew(); + await operation.WaitForCompletionAsync( + new Progress(p => + Console.WriteLine($" Progress: {p.Status}{(p.PercentComplete.HasValue ? $" ({p.PercentComplete}%)" : "")}"))); + + sw.Stop(); + Console.WriteLine($" Completed in {sw.Elapsed.TotalSeconds:F1}s"); + + var contents = await operation.GetContentsAsync(options); + Console.WriteLine($" {contents.Count} content item(s)"); + + for (int i = 0; i < contents.Count; i++) + { + switch (contents[i]) + { + case DataContent dc when outputPath is not null: + Directory.CreateDirectory(Path.GetDirectoryName(outputPath) ?? "."); + await dc.SaveToAsync(outputPath); + Console.WriteLine($" [{i}] Saved: {outputPath} ({dc.Data.Length} bytes)"); + break; + case DataContent dc: + Console.WriteLine($" [{i}] DataContent: {dc.Data.Length} bytes ({dc.MediaType})"); + break; + case UriContent uc: + Console.WriteLine($" [{i}] URI: {uc.Uri}"); + break; + default: + Console.WriteLine($" [{i}] {contents[i].GetType().Name}"); + break; + } + } +} diff --git a/samples/VideoProviders/Runway/README.md b/samples/VideoProviders/Runway/README.md new file mode 100644 index 00000000000..4078b511fec --- /dev/null +++ b/samples/VideoProviders/Runway/README.md @@ -0,0 +1,63 @@ +# Runway Video Generation Sample + +This sample demonstrates using the **Microsoft.Extensions.AI** `IVideoGenerator` abstraction with Runway's generative video API. + +## Getting Access + +1. Go to [https://dev.runwayml.com/](https://dev.runwayml.com/) and sign in +2. Navigate to **API Keys** and create a new key +3. Runway requires a paid plan with credits — see [pricing](https://runwayml.com/pricing) +4. API version header `X-Runway-Version: 2024-11-06` is required on all requests + +## Environment Setup + +```bash +export RUNWAY_API_KEY="rw_xxxx" +``` + +## Models + +| Model | ID | Capabilities | Credits/sec | +|---|---|---|---| +| Gen-4.5 | `gen4.5` | Text-to-video only | 12 | +| Gen-4 Turbo | `gen4_turbo` | Text-to-video, image-to-video | 4 | +| Gen-4 Aleph | `gen4_aleph` | Video-to-video only | 4 | +| Veo 3.1 | `veo3.1` | Text-to-video (via Runway) | 4 | +| Veo 3 | `veo3` | Text-to-video (via Runway) | 4 | + +## Supported Operations + +| Operation | MEAI Mapping | Endpoint | +|---|---|---| +| Text-to-video | `VideoOperationKind.Create`, no `StartFrame` | `POST /v1/text_to_video` | +| Image-to-video | `VideoOperationKind.Create` + `StartFrame` (image) | `POST /v1/image_to_video` | +| Video-to-video | `VideoOperationKind.Edit` + `SourceVideo` (video) | `POST /v1/video_to_video` | + +## Usage + +```bash +# Text-to-video +dotnet run -- text-to-video "A cute bunny hopping in a meadow" --output bunny.mp4 + +# Image-to-video +dotnet run -- image-to-video "The scene comes alive" --image bunny.jpg --duration 10 --output scene.mp4 + +# Video-to-video (gen4_aleph) with style reference +dotnet run -- video-to-video "Add easter elements" --video https://example.com/cats.mp4 --reference style.jpg --output styled.mp4 + +# With seed for reproducibility +dotnet run -- text-to-video "A sunset over mountains" --seed 42 --output sunset.mp4 +``` + +## API Gaps / Limitations + +- **No extend**: Runway does not have an endpoint for extending a completed video. `VideoOperationKind.Extend` cannot be mapped. +- **Separate endpoints**: Runway uses three separate endpoints (`text_to_video`, `image_to_video`, `video_to_video`) requiring the implementation to dispatch based on input media type, rather than a single unified endpoint. +- **Ratio vs Size**: Runway uses fixed ratio strings (`"1280:720"`, `"720:1280"`, etc.) rather than arbitrary pixel dimensions. The `VideoSize` → ratio mapping loses information. +- **Character performance** (`act_two`): Runway has a unique `character_performance` endpoint for driving a character with a reference video. This is fundamentally different from OpenAI's character system and has no MEAI equivalent. +- **Seed**: Available via `AdditionalProperties` — consider promoting to a first-class option. +- **Image position**: Runway's `image_to_video` accepts an array of `PromptImages` with `position` (currently only `"first"`). MEAI models this via `StartFrame` for the first frame. +- **Duration as integer**: Runway passes duration as an integer (2-10), while OpenAI requires a string enum. The MEAI `TimeSpan Duration` maps cleanly to both. +- **Video-to-video references**: `gen4_aleph` supports `references` (array of image references for style). These could be modeled via `ReferenceImages` on the request. +- **Content moderation**: Runway has `contentModeration.publicFigureThreshold` — provider-specific safety control. +- **No resolution control for v2v**: For video-to-video, the output resolution is determined by the input video. diff --git a/samples/VideoProviders/Runway/Runway.csproj b/samples/VideoProviders/Runway/Runway.csproj new file mode 100644 index 00000000000..552fe62669e --- /dev/null +++ b/samples/VideoProviders/Runway/Runway.csproj @@ -0,0 +1,16 @@ + + + + Exe + net10.0 + enable + enable + $(NoWarn);MEAI001 + + + + + + + + diff --git a/samples/VideoProviders/Runway/RunwayVideoGenerationOperation.cs b/samples/VideoProviders/Runway/RunwayVideoGenerationOperation.cs new file mode 100644 index 00000000000..a1aa8fe78d6 --- /dev/null +++ b/samples/VideoProviders/Runway/RunwayVideoGenerationOperation.cs @@ -0,0 +1,132 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Text.Json; +using Microsoft.Extensions.AI; + +namespace Runway; + +/// +/// Tracks an in-flight Runway task, polling GET /v1/tasks/{id} for status. +/// +/// +/// Runway task states: PENDING, THROTTLED, RUNNING, SUCCEEDED, FAILED. +/// The API recommends polling no more than once every 5 seconds. +/// +internal sealed class RunwayVideoGenerationOperation : VideoGenerationOperation +{ + private const string BaseUrl = "https://api.dev.runwayml.com"; + + private readonly HttpClient _httpClient; + private string? _status; + private string? _failureReason; + private string? _outputUrl; + private int? _progressPercent; + + public RunwayVideoGenerationOperation(string operationId, HttpClient httpClient, string modelId) + { + OperationId = operationId; + ModelId = modelId; + _httpClient = httpClient; + _status = "PENDING"; + } + + public override string? OperationId { get; } + + public override string? Status => _status; + + public override int? PercentComplete => _progressPercent ?? _status switch + { + "SUCCEEDED" => 100, + "FAILED" => null, + "RUNNING" => 50, + "THROTTLED" => 10, + _ => 0, + }; + + public override bool IsCompleted => _status is "SUCCEEDED" or "FAILED"; + + public override string? FailureReason => _failureReason; + + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + using var response = await _httpClient.GetAsync($"{BaseUrl}/v1/tasks/{OperationId}", cancellationToken); + string body = await response.Content.ReadAsStringAsync(cancellationToken); + response.EnsureSuccessStatusCode(); + + using var doc = JsonDocument.Parse(body); + var root = doc.RootElement; + + _status = root.GetProperty("status").GetString(); + + if (root.TryGetProperty("failure", out var failure) && failure.ValueKind == JsonValueKind.String) + { + _failureReason = failure.GetString(); + } + + if (root.TryGetProperty("progress", out var prog) && prog.TryGetDouble(out double progressVal)) + { + _progressPercent = (int)(progressVal * 100); + } + + // Output can be a single URL or an array + if (root.TryGetProperty("output", out var output)) + { + if (output.ValueKind == JsonValueKind.Array && output.GetArrayLength() > 0) + { + _outputUrl = output[0].GetString(); + } + else if (output.ValueKind == JsonValueKind.String) + { + _outputUrl = output.GetString(); + } + } + } + + public override async Task WaitForCompletionAsync( + IProgress? progress = null, + CancellationToken cancellationToken = default) + { + while (!IsCompleted) + { + await Task.Delay(TimeSpan.FromSeconds(5), cancellationToken); + await UpdateAsync(cancellationToken); + progress?.Report(new VideoGenerationProgress(_status, PercentComplete)); + } + + if (_status == "FAILED") + { + throw new InvalidOperationException($"Video generation failed: {_failureReason}"); + } + } + + public override async Task> GetContentsAsync( + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + if (!IsCompleted || _status == "FAILED") + { + throw new InvalidOperationException("The operation has not completed successfully."); + } + + if (_outputUrl is null) + { + await UpdateAsync(cancellationToken); + } + + if (_outputUrl is null) + { + throw new InvalidOperationException("No output URL available after completion."); + } + + if (options?.ResponseFormat == VideoGenerationResponseFormat.Uri) + { + return [new UriContent(new Uri(_outputUrl), "video/mp4")]; + } + + using var response = await _httpClient.GetAsync(_outputUrl, cancellationToken); + response.EnsureSuccessStatusCode(); + byte[] data = await response.Content.ReadAsByteArrayAsync(cancellationToken); + return [new DataContent(data, "video/mp4")]; + } +} diff --git a/samples/VideoProviders/Runway/RunwayVideoGenerator.cs b/samples/VideoProviders/Runway/RunwayVideoGenerator.cs new file mode 100644 index 00000000000..b0cd93f4a0e --- /dev/null +++ b/samples/VideoProviders/Runway/RunwayVideoGenerator.cs @@ -0,0 +1,237 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Drawing; +using System.Net.Http.Headers; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.Extensions.AI; + +namespace Runway; + +/// +/// Implements for the Runway API. +/// Supports text-to-video, image-to-video, and video-to-video (gen4_aleph). +/// +/// +/// API Reference: https://docs.dev.runwayml.com/api +/// Endpoints: +/// POST /v1/text_to_video +/// POST /v1/image_to_video +/// POST /v1/video_to_video +/// GET /v1/tasks/{id} +/// +internal sealed class RunwayVideoGenerator : IVideoGenerator +{ + private const string BaseUrl = "https://api.dev.runwayml.com"; + private const string ApiVersion = "2024-11-06"; + private readonly HttpClient _httpClient; + private readonly string _modelId; + + public RunwayVideoGenerator(string apiKey, string modelId = "gen4_turbo", HttpClient? httpClient = null) + { + _modelId = modelId; + _httpClient = httpClient ?? new HttpClient(); + _httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", apiKey); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + _httpClient.DefaultRequestHeaders.Add("X-Runway-Version", ApiVersion); + } + + public async Task GenerateAsync( + VideoGenerationRequest request, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(request); + + string model = options?.ModelId ?? _modelId; + string endpoint; + JsonObject body; + + // Determine which endpoint to use based on operation kind and media + if (request.OperationKind == VideoOperationKind.Edit && request.SourceVideo is not null) + { + // Video-to-video (gen4_aleph only) + endpoint = "/v1/video_to_video"; + body = BuildVideoToVideoBody(request, model, options); + } + else if (request.StartFrame is not null) + { + // Image-to-video + endpoint = "/v1/image_to_video"; + body = BuildImageToVideoBody(request, model, options); + } + else + { + // Text-to-video + endpoint = "/v1/text_to_video"; + body = BuildTextToVideoBody(request, model, options); + } + + string json = body.ToJsonString(); + using var content = new StringContent(json, System.Text.Encoding.UTF8, "application/json"); + using var response = await _httpClient.PostAsync($"{BaseUrl}{endpoint}", content, cancellationToken); + + string responseBody = await response.Content.ReadAsStringAsync(cancellationToken); + response.EnsureSuccessStatusCode(); + + var result = JsonDocument.Parse(responseBody); + string taskId = result.RootElement.GetProperty("id").GetString()!; + + return new RunwayVideoGenerationOperation(taskId, _httpClient, model); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + { + if (serviceKey is null && serviceType.IsInstanceOfType(this)) + { + return this; + } + + return null; + } + + public void Dispose() => _httpClient.Dispose(); + + private static JsonObject BuildTextToVideoBody(VideoGenerationRequest request, string model, VideoGenerationOptions? options) + { + var body = new JsonObject + { + ["model"] = model, + ["promptText"] = request.Prompt ?? string.Empty, + ["ratio"] = options?.AspectRatio is { } ar ? MapAspectRatio(ar) : MapRatio(options?.VideoSize), + }; + + if (options?.Duration is { } duration) + { + body["duration"] = (int)duration.TotalSeconds; + } + + AddSeed(body, options); + return body; + } + + private static JsonObject BuildImageToVideoBody(VideoGenerationRequest request, string model, VideoGenerationOptions? options) + { + string? imageUri = GetContentUri(request.StartFrame); + + var body = new JsonObject + { + ["model"] = model, + ["promptText"] = request.Prompt ?? string.Empty, + ["promptImage"] = imageUri ?? string.Empty, + ["ratio"] = options?.AspectRatio is { } ar ? MapAspectRatio(ar) : MapRatioImageToVideo(options?.VideoSize), + }; + + if (options?.Duration is { } duration) + { + body["duration"] = (int)duration.TotalSeconds; + } + + AddSeed(body, options); + return body; + } + + private static JsonObject BuildVideoToVideoBody(VideoGenerationRequest request, string model, VideoGenerationOptions? options) + { + string? videoUri = GetContentUri(request.SourceVideo); + + var body = new JsonObject + { + ["model"] = "gen4_aleph", // video-to-video only supports gen4_aleph + ["promptText"] = request.Prompt ?? string.Empty, + ["videoUri"] = videoUri ?? string.Empty, + }; + + // Reference images for style transfer + if (options?.AdditionalProperties?.TryGetValue("references", out object? refs) == true && refs is JsonArray refsArray) + { + body["references"] = JsonNode.Parse(refsArray.ToJsonString())!; + } + + AddSeed(body, options); + return body; + } + + private static void AddSeed(JsonObject body, VideoGenerationOptions? options) + { + // Prefer first-class Seed property, fall back to AdditionalProperties + if (options?.Seed is int seed) + { + body["seed"] = seed; + } + else if (options?.AdditionalProperties?.TryGetValue("seed", out object? seedObj) == true && seedObj is int seedInt) + { + body["seed"] = seedInt; + } + } + + private static string? GetContentUri(AIContent? content) + { + if (content is null) + { + return null; + } + + if (content is UriContent uc && uc.Uri is not null) + { + return uc.Uri.ToString(); + } + + if (content is DataContent dc && dc.Data.Length > 0) + { + return dc.Uri ?? $"data:{dc.MediaType ?? "application/octet-stream"};base64,{Convert.ToBase64String(dc.Data.ToArray())}"; + } + + return null; + } + + private static string MapRatio(Size? size) + { + if (size is null) + { + return "1280:720"; + } + + double ratio = (double)size.Value.Width / size.Value.Height; + return ratio > 1.5 ? "1280:720" : "720:1280"; + } + + private static string MapRatioImageToVideo(Size? size) + { + if (size is null) + { + return "1280:720"; + } + + // Runway image-to-video supports more ratios + double ratio = (double)size.Value.Width / size.Value.Height; + if (ratio > 2.0) + { + return "1584:672"; + } + + if (ratio > 1.2) + { + return "1280:720"; + } + + if (ratio > 0.9) + { + return "960:960"; + } + + return "720:1280"; + } + + /// Maps an aspect ratio string like "16:9" to a Runway ratio string like "1280:720". + private static string MapAspectRatio(string aspectRatio) => aspectRatio switch + { + "16:9" => "1280:720", + "9:16" => "720:1280", + "1:1" => "960:960", + "4:3" => "1104:832", + "3:4" => "832:1104", + _ => aspectRatio.Replace(':', ':'), // pass through as-is if already in Runway format + }; +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs index 6feffa455c0..4f46189aac3 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs @@ -159,6 +159,9 @@ private static JsonSerializerOptions CreateDefaultOptions() [JsonSerializable(typeof(ImageGenerationOptions))] [JsonSerializable(typeof(ImageGenerationResponse))] + // IVideoGenerator + [JsonSerializable(typeof(VideoGenerationOptions))] + // IHostedFileClient [JsonSerializable(typeof(HostedFileClientOptions))] [JsonSerializable(typeof(HostedFileClientMetadata))] diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/DelegatingVideoGenerator.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/DelegatingVideoGenerator.cs new file mode 100644 index 00000000000..725de70a076 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/DelegatingVideoGenerator.cs @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// +/// Provides an optional base class for an that passes through calls to another instance. +/// +/// +/// This is recommended as a base type when building generators that can be chained in any order around an underlying . +/// The default implementation simply passes each call to the inner generator instance. +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public class DelegatingVideoGenerator : IVideoGenerator +{ + /// + /// Initializes a new instance of the class. + /// + /// The wrapped generator instance. + /// is . + protected DelegatingVideoGenerator(IVideoGenerator innerGenerator) + { + InnerGenerator = Throw.IfNull(innerGenerator); + } + + /// + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + + /// Gets the inner . + protected IVideoGenerator InnerGenerator { get; } + + /// + public virtual Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + return InnerGenerator.GenerateAsync(request, options, cancellationToken); + } + + /// + public virtual object? GetService(Type serviceType, object? serviceKey = null) + { + _ = Throw.IfNull(serviceType); + + // If the key is non-null, we don't know what it means so pass through to the inner service. + return + serviceKey is null && serviceType.IsInstanceOfType(this) ? this : + InnerGenerator.GetService(serviceType, serviceKey); + } + + /// Provides a mechanism for releasing unmanaged resources. + /// if being called from ; otherwise, . + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + InnerGenerator.Dispose(); + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/IVideoGenerator.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/IVideoGenerator.cs new file mode 100644 index 00000000000..5502508d912 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/IVideoGenerator.cs @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents a generator of videos. +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public interface IVideoGenerator : IDisposable +{ + /// + /// Submits a video generation request and returns a that can be used to + /// monitor progress, wait for completion, and download the generated content. + /// + /// The video generation request containing the prompt and optional media inputs. + /// The video generation options to configure the request. + /// The to monitor for cancellation requests. The default is . + /// is . + /// A representing the submitted video generation job. + Task GenerateAsync(VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default); + + /// Asks the for an object of the specified type . + /// The type of object being requested. + /// An optional key that can be used to help identify the target service. + /// The found object, otherwise . + /// is . + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the , + /// including itself or any services it might be wrapping. + /// + object? GetService(Type serviceType, object? serviceKey = null); +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationOperation.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationOperation.cs new file mode 100644 index 00000000000..3c3623d29d9 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationOperation.cs @@ -0,0 +1,115 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents an in-flight or completed video generation operation. +/// +/// +/// +/// When is called, the provider submits a video generation +/// job and returns a immediately. The caller can then: +/// +/// +/// Check and for the current state. +/// Call to poll for updated status. +/// Call to poll until the operation reaches a terminal state. +/// Call to download the generated video content. +/// Call or to derive +/// follow-up requests from a completed video. +/// +/// +/// Providers implement this abstract class to supply their own polling, download, and derived-request logic. +/// Provider-specific operations (e.g., character upload) can be exposed as additional public methods on +/// the concrete subclass. +/// +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public abstract class VideoGenerationOperation +{ + /// Gets the provider-specific identifier for this operation. + public abstract string? OperationId { get; } + + /// Gets the current status of the operation (e.g., "queued", "in_progress", "completed", "failed"). + public abstract string? Status { get; } + + /// Gets the completion percentage (0–100), or if not available. + public abstract int? PercentComplete { get; } + + /// Gets a value indicating whether the operation has reached a terminal state. + public abstract bool IsCompleted { get; } + + /// Gets the failure reason if the operation failed, or . + public abstract string? FailureReason { get; } + + /// Gets or sets the model ID used for the operation. + public string? ModelId { get; set; } + + /// Gets or sets usage details for the video generation operation. + public UsageDetails? Usage { get; set; } + + /// Gets or sets the raw representation of the operation from an underlying implementation. + [JsonIgnore] + public object? RawRepresentation { get; set; } + + /// Gets or sets any additional properties associated with the operation. + public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } + + /// Polls the provider for the current status of this operation. + /// The to monitor for cancellation requests. + /// A task that completes when the status has been refreshed. + public abstract Task UpdateAsync(CancellationToken cancellationToken = default); + + /// Polls the provider until the operation reaches a terminal state. + /// An optional to receive progress updates during waiting. + /// The to monitor for cancellation requests. + /// A task that completes when the operation has finished. + /// The operation failed. + public abstract Task WaitForCompletionAsync( + IProgress? progress = null, + CancellationToken cancellationToken = default); + + /// Downloads the completed video content. + /// Optional options that may influence the download (e.g., ). + /// The to monitor for cancellation requests. + /// The generated video content items. + /// The operation has not completed successfully. + public abstract Task> GetContentsAsync( + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default); + + /// Creates a to edit this completed video. + /// The prompt describing the desired edits. + /// A configured for editing. + public virtual VideoGenerationRequest CreateEditRequest(string prompt) + { + return new VideoGenerationRequest + { + Prompt = prompt, + SourceVideoId = OperationId, + OperationKind = VideoOperationKind.Edit, + }; + } + + /// Creates a to extend this completed video. + /// An optional prompt to guide the extension. + /// A configured for extension. + public virtual VideoGenerationRequest CreateExtensionRequest(string? prompt = null) + { + return new VideoGenerationRequest + { + Prompt = prompt, + SourceVideoId = OperationId, + OperationKind = VideoOperationKind.Extend, + }; + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationOptions.cs new file mode 100644 index 00000000000..9e23eea6a82 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationOptions.cs @@ -0,0 +1,151 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Drawing; +using System.Text.Json.Serialization; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents the options for a video generation request. +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public class VideoGenerationOptions +{ + /// Initializes a new instance of the class. + public VideoGenerationOptions() + { + } + + /// Initializes a new instance of the class, performing a shallow copy of all properties from . + protected VideoGenerationOptions(VideoGenerationOptions? other) + { + if (other is null) + { + return; + } + + AdditionalProperties = other.AdditionalProperties?.Clone(); + AspectRatio = other.AspectRatio; + Count = other.Count; + Duration = other.Duration; + FramesPerSecond = other.FramesPerSecond; + GenerateAudio = other.GenerateAudio; + MediaType = other.MediaType; + ModelId = other.ModelId; + RawRepresentationFactory = other.RawRepresentationFactory; + ResponseFormat = other.ResponseFormat; + Seed = other.Seed; + VideoSize = other.VideoSize; + } + + /// + /// Gets or sets the desired aspect ratio for the generated video (e.g., "16:9", "9:16", "1:1"). + /// + public string? AspectRatio { get; set; } + + /// + /// Gets or sets the number of videos to generate. + /// + public int? Count { get; set; } + + /// + /// Gets or sets the desired duration for the generated video. + /// + /// + /// If a provider only supports fixed durations, the closest supported duration is used. + /// + public TimeSpan? Duration { get; set; } + + /// + /// Gets or sets the desired frames per second for the generated video. + /// + public int? FramesPerSecond { get; set; } + + /// + /// Gets or sets whether to generate synchronized audio alongside the video. + /// + public bool? GenerateAudio { get; set; } + + /// + /// Gets or sets the media type (also known as MIME type) of the generated video. + /// + public string? MediaType { get; set; } + + /// + /// Gets or sets the model ID to use for video generation. + /// + public string? ModelId { get; set; } + + /// + /// Gets or sets a callback responsible for creating the raw representation of the video generation options from an underlying implementation. + /// + /// + /// The underlying implementation can have its own representation of options. + /// When is invoked with a , + /// that implementation can convert the provided options into its own representation in order to use it while performing + /// the operation. For situations where a consumer knows which concrete is being used + /// and how it represents options, a new instance of that implementation-specific options type can be returned by this + /// callback for the implementation to use instead of creating a new instance. + /// Such implementations might mutate the supplied options instance further based on other settings supplied on this + /// instance or from other inputs, therefore, it is strongly recommended to not + /// return shared instances and instead make the callback return a new instance on each call. + /// This is typically used to set an implementation-specific setting that isn't otherwise exposed from the strongly typed + /// properties on . + /// + [JsonIgnore] + public Func? RawRepresentationFactory { get; set; } + + /// + /// Gets or sets the response format of the generated video. + /// + public VideoGenerationResponseFormat? ResponseFormat { get; set; } + + /// + /// Gets or sets a seed value for reproducible generation. + /// + public int? Seed { get; set; } + + /// + /// Gets or sets the size (resolution) of the generated video. + /// + /// + /// If a provider only supports fixed sizes, the closest supported size is used. + /// + public Size? VideoSize { get; set; } + + /// Gets or sets any additional properties associated with the options. + /// + /// This dictionary can be used to pass provider-specific settings that are not covered by + /// the strongly typed properties on this class. Refer to provider documentation for supported keys. + /// Unknown keys are typically forwarded as-is to the provider's API request body. + /// + public AdditionalPropertiesDictionary? AdditionalProperties { get; set; } + + /// Produces a clone of the current instance. + /// A clone of the current instance. + public virtual VideoGenerationOptions Clone() => new(this); +} + +/// +/// Represents the requested response format of the generated video. +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public enum VideoGenerationResponseFormat +{ + /// + /// The generated video is returned as a URI pointing to the video resource. + /// + Uri, + + /// + /// The generated video is returned as in-memory video data. + /// + Data, + + /// + /// The generated video is returned as a hosted resource identifier, which can be used to retrieve the video later. + /// + Hosted, +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationProgress.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationProgress.cs new file mode 100644 index 00000000000..afdc405fc63 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationProgress.cs @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents the progress of a video generation operation. +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public readonly struct VideoGenerationProgress : IEquatable +{ + /// Initializes a new instance of the struct. + /// The current status of the video generation (e.g. "queued", "in_progress", "completed"). + /// The completion percentage, from 0 to 100, or if not available. + public VideoGenerationProgress(string? status, int? percentComplete) + { + Status = status; + PercentComplete = percentComplete; + } + + /// + /// Gets the current status of the video generation (e.g. "queued", "in_progress", "completed", "failed"). + /// + public string? Status { get; } + + /// + /// Gets the completion percentage, from 0 to 100, or if not available. + /// + public int? PercentComplete { get; } + + /// Determines whether two instances are equal. + public static bool operator ==(VideoGenerationProgress left, VideoGenerationProgress right) + { + return left.Equals(right); + } + + /// Determines whether two instances are not equal. + public static bool operator !=(VideoGenerationProgress left, VideoGenerationProgress right) + { + return !left.Equals(right); + } + + /// + public bool Equals(VideoGenerationProgress other) => + string.Equals(Status, other.Status, StringComparison.Ordinal) && PercentComplete == other.PercentComplete; + + /// + public override bool Equals(object? obj) => obj is VideoGenerationProgress other && Equals(other); + + /// + public override int GetHashCode() + { +#if NET + return HashCode.Combine(Status, PercentComplete); +#else + int hash = Status?.GetHashCode() ?? 0; + return (hash * 397) ^ PercentComplete.GetHashCode(); +#endif + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationRequest.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationRequest.cs new file mode 100644 index 00000000000..2e330fe9308 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGenerationRequest.cs @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Represents a request for video generation. +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public class VideoGenerationRequest +{ + /// Initializes a new instance of the class. + public VideoGenerationRequest() + { + } + + /// Initializes a new instance of the class. + /// The prompt to guide the video generation. + public VideoGenerationRequest(string prompt) + { + Prompt = prompt; + } + + /// Gets or sets the prompt to guide the video generation. + public string? Prompt { get; set; } + + /// Gets or sets a negative prompt describing what to avoid in the generated video. + public string? NegativePrompt { get; set; } + + /// Gets or sets the kind of video operation to perform. + /// + /// Defaults to . Set to or + /// when working with an existing video referenced by + /// or . + /// + public VideoOperationKind OperationKind { get; set; } + + /// Gets or sets the provider-specific ID of an existing video to edit or extend. + /// + /// This is typically the of a previously completed + /// video generation. Use or + /// to create a request with this property set. + /// + public string? SourceVideoId { get; set; } + + /// Gets or sets the starting frame image for image-to-video generation. + /// + /// When provided with , the provider uses this image as the + /// initial frame from which the video is generated. Typically an image content such as + /// or with an image media type. + /// + public AIContent? StartFrame { get; set; } + + /// Gets or sets the ending frame image for video interpolation. + /// + /// When provided alongside , providers that support frame interpolation + /// generate a video that transitions from to this ending frame. + /// Not all providers support this feature. + /// + public AIContent? EndFrame { get; set; } + + /// Gets or sets reference images for style or subject guidance. + /// + /// Reference images influence the visual style or subject matter of the generated video without + /// being used as literal frames. For example, a provider may use these for style transfer or + /// subject consistency. Not all providers support this feature. + /// + public IList? ReferenceImages { get; set; } + + /// Gets or sets the source video content for editing. + /// + /// Used when is and the source + /// video is provided as content rather than by ID. Typically a or + /// with a video media type. To reference a previously generated video + /// by its ID, use instead. + /// + public AIContent? SourceVideo { get; set; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGeneratorExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGeneratorExtensions.cs new file mode 100644 index 00000000000..89dabcdff5a --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGeneratorExtensions.cs @@ -0,0 +1,201 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Net.Mime; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extension methods for . +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public static class VideoGeneratorExtensions +{ + /// Asks the for an object of type . + /// The type of the object to be retrieved. + /// The generator. + /// An optional key that can be used to help identify the target service. + /// The found object, otherwise . + /// is . + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that may be provided by the , + /// including itself or any services it might be wrapping. + /// + public static TService? GetService(this IVideoGenerator generator, object? serviceKey = null) + { + _ = Throw.IfNull(generator); + + return generator.GetService(typeof(TService), serviceKey) is TService service ? service : default; + } + + /// + /// Asks the for an object of the specified type + /// and throws an exception if one isn't available. + /// + /// The generator. + /// The type of object being requested. + /// An optional key that can be used to help identify the target service. + /// The found object. + /// is . + /// is . + /// No service of the requested type for the specified key is available. + /// + /// The purpose of this method is to allow for the retrieval of services that are required to be provided by the , + /// including itself or any services it might be wrapping. + /// + public static object GetRequiredService(this IVideoGenerator generator, Type serviceType, object? serviceKey = null) + { + _ = Throw.IfNull(generator); + _ = Throw.IfNull(serviceType); + + return + generator.GetService(serviceType, serviceKey) ?? + throw Throw.CreateMissingServiceException(serviceType, serviceKey); + } + + /// + /// Asks the for an object of type + /// and throws an exception if one isn't available. + /// + /// The type of the object to be retrieved. + /// The generator. + /// An optional key that can be used to help identify the target service. + /// The found object. + /// is . + /// No service of the requested type for the specified key is available. + /// + /// The purpose of this method is to allow for the retrieval of strongly typed services that are required to be provided by the , + /// including itself or any services it might be wrapping. + /// + public static TService GetRequiredService(this IVideoGenerator generator, object? serviceKey = null) + { + _ = Throw.IfNull(generator); + + if (generator.GetService(typeof(TService), serviceKey) is not TService service) + { + throw Throw.CreateMissingServiceException(typeof(TService), serviceKey); + } + + return service; + } + + /// + /// Generates videos based on a text prompt. + /// + /// The video generator. + /// The prompt to guide the video generation. + /// The video generation options to configure the request. + /// The to monitor for cancellation requests. The default is . + /// or is . + /// A representing the submitted video generation job. + public static Task GenerateVideoAsync( + this IVideoGenerator generator, + string prompt, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(generator); + _ = Throw.IfNull(prompt); + + return generator.GenerateAsync(new VideoGenerationRequest { Prompt = prompt }, options, cancellationToken); + } + + /// + /// Submits an edit request for existing video content using the specified prompt. + /// + /// The video generator. + /// The source video content to edit. + /// The prompt to guide the video editing. + /// The video generation options to configure the request. + /// The to monitor for cancellation requests. The default is . + /// , , or is . + /// A representing the submitted video generation job. + public static Task EditVideoAsync( + this IVideoGenerator generator, + AIContent sourceVideo, + string prompt, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(generator); + _ = Throw.IfNull(sourceVideo); + _ = Throw.IfNull(prompt); + + return generator.GenerateAsync( + new VideoGenerationRequest { Prompt = prompt, SourceVideo = sourceVideo, OperationKind = VideoOperationKind.Edit }, + options, cancellationToken); + } + + /// + /// Submits an edit request for a single video using the specified prompt. + /// + /// The video generator. + /// The single video to use as input. + /// The prompt to guide the video editing. + /// The video generation options to configure the request. + /// The to monitor for cancellation requests. The default is . + /// , , or is . + /// A representing the submitted video generation job. + public static Task EditVideoAsync( + this IVideoGenerator generator, + DataContent sourceVideo, + string prompt, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(generator); + _ = Throw.IfNull(sourceVideo); + _ = Throw.IfNull(prompt); + + return generator.GenerateAsync( + new VideoGenerationRequest { Prompt = prompt, SourceVideo = sourceVideo, OperationKind = VideoOperationKind.Edit }, + options, cancellationToken); + } + + /// + /// Submits an edit request for video data provided as a byte array. + /// + /// The video generator. + /// The byte array containing the video data to use as input. + /// The filename for the video data. + /// The prompt to guide the video generation. + /// The video generation options to configure the request. + /// The to monitor for cancellation requests. The default is . + /// + /// , , or is . + /// + /// A representing the submitted video generation job. + public static Task EditVideoAsync( + this IVideoGenerator generator, + ReadOnlyMemory sourceVideoData, + string fileName, + string prompt, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(generator); + _ = Throw.IfNull(fileName); + _ = Throw.IfNull(prompt); + + string mediaType = GetMediaTypeFromFileName(fileName); + var dataContent = new DataContent(sourceVideoData, mediaType) { Name = fileName }; + + return generator.GenerateAsync( + new VideoGenerationRequest { Prompt = prompt, SourceVideo = dataContent, OperationKind = VideoOperationKind.Edit }, + options, cancellationToken); + } + + /// + /// Gets the media type based on the file extension. + /// + /// The filename to extract the media type from. + /// The inferred media type. + private static string GetMediaTypeFromFileName(string fileName) + { + return MediaTypeMap.GetMediaType(fileName) ?? "video/mp4"; + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGeneratorMetadata.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGeneratorMetadata.cs new file mode 100644 index 00000000000..8dd966fff4d --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoGeneratorMetadata.cs @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// Provides metadata about an . +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public class VideoGeneratorMetadata +{ + /// Initializes a new instance of the class. + /// + /// The name of the video generation provider, if applicable. Where possible, this should map to the + /// appropriate name defined in the OpenTelemetry Semantic Conventions for Generative AI systems. + /// + /// The URL for accessing the video generation provider, if applicable. + /// The ID of the video generation model used by default, if applicable. + public VideoGeneratorMetadata(string? providerName = null, Uri? providerUri = null, string? defaultModelId = null) + { + DefaultModelId = defaultModelId; + ProviderName = providerName; + ProviderUri = providerUri; + } + + /// Gets the name of the video generation provider. + /// + /// Where possible, this maps to the appropriate name defined in the + /// OpenTelemetry Semantic Conventions for Generative AI systems. + /// + public string? ProviderName { get; } + + /// Gets the URL for accessing the video generation provider. + public Uri? ProviderUri { get; } + + /// Gets the ID of the default model used by this video generator. + /// + /// This value can be if no default model is set on the corresponding . + /// An individual request may override this value via . + /// + public string? DefaultModelId { get; } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoOperationKind.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoOperationKind.cs new file mode 100644 index 00000000000..78dcda46f39 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Video/VideoOperationKind.cs @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; + +namespace Microsoft.Extensions.AI; + +/// +/// Specifies the kind of video generation operation to perform. +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public enum VideoOperationKind +{ + /// + /// Create a new video from a text prompt, optionally guided by a starting frame image + /// supplied via . + /// + Create, + + /// + /// Edit an existing video identified by + /// or provided via . + /// + Edit, + + /// + /// Extend an existing video identified by . + /// + Extend, +} diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj b/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj index dfd21f5451f..c9ddf95e00e 100644 --- a/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj @@ -28,6 +28,7 @@ true true true + true true diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs index 9a5ebd0d06a..7dcc48168f3 100644 --- a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs @@ -11,7 +11,10 @@ using System.Text.Json; using System.Text.Json.Nodes; using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; using OpenAI; using OpenAI.Assistants; using OpenAI.Audio; @@ -21,6 +24,7 @@ using OpenAI.Files; using OpenAI.Images; using OpenAI.Responses; +using OpenAI.Videos; #pragma warning disable MEAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. #pragma warning disable SA1515 // Single-line comment should be preceded by blank line @@ -184,6 +188,84 @@ public static ITextToSpeechClient AsITextToSpeechClient(this AudioClient audioCl public static IImageGenerator AsIImageGenerator(this ImageClient imageClient) => new OpenAIImageGenerator(imageClient); + /// Gets an for use with this . + /// The client. + /// The model ID to use for video generation (e.g. "sora-2"). + /// An that can be used to generate videos via the . + /// is . + /// + /// + /// The returned submits video generation jobs and returns + /// instances. The endpoint is chosen based on the + /// and : + /// + /// + /// + /// Text-to-video (): Generates a new video from + /// the text prompt via POST /videos. + /// + /// + /// Image-to-video (): When + /// contains image content, uses it as an + /// input_reference via POST /videos. + /// + /// + /// Edit by video ID (): When + /// is set, edits the video via + /// POST /videos/edits. + /// + /// + /// Edit by upload (): When + /// contains video content and no + /// is set, uploads the video for editing. + /// + /// + /// Extend (): When + /// is set, extends the video via + /// POST /videos/extensions. + /// + /// + /// + /// Character IDs can be included in the create request by passing a characters key + /// in as a JSON array. Characters + /// can also be uploaded via . + /// + /// + /// Any keys in are forwarded + /// as-is to the OpenAI API request body. + /// + /// + [Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] + public static IVideoGenerator AsIVideoGenerator(this VideoClient videoClient, string? modelId = null) => + new OpenAIVideoGenerator(videoClient, modelId); + + /// + /// Uploads a character asset from a video for use in subsequent video generation requests. + /// + /// The video generator backed by an OpenAI . + /// The name of the character. + /// The video content containing the character. + /// The to monitor for cancellation requests. + /// The provider-specific character ID that can be passed in + /// under the "characters" key. + /// The is not + /// backed by an OpenAI . + [Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] + public static Task UploadVideoCharacterAsync( + this IVideoGenerator generator, + string name, + DataContent videoContent, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(generator); + _ = Throw.IfNull(videoContent); + + OpenAIVideoGenerator openAIGenerator = generator.GetService() + ?? throw new InvalidOperationException("The video generator is not backed by an OpenAI VideoClient."); + + return openAIGenerator.UploadVideoCharacterAsync(name, videoContent, cancellationToken); + } + /// Gets an for use with this . /// The client. /// The number of dimensions to generate in each embedding. diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIVideoGenerationOperation.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIVideoGenerationOperation.cs new file mode 100644 index 00000000000..7aab67537ed --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIVideoGenerationOperation.cs @@ -0,0 +1,149 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ClientModel; +using System.ClientModel.Primitives; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using OpenAI.Videos; + +namespace Microsoft.Extensions.AI; + +/// +/// Represents an OpenAI video generation operation returned by . +/// +/// +/// Use to upload character assets +/// that can be referenced in subsequent video generation requests. +/// +[Experimental(DiagnosticIds.Experiments.AIOpenAIVideoClient)] +public sealed class OpenAIVideoGenerationOperation : VideoGenerationOperation +{ + /// Default polling interval for checking video generation status. + private static readonly TimeSpan _defaultPollingInterval = TimeSpan.FromSeconds(10); + + private readonly VideoClient _videoClient; + private string? _operationId; + private string? _status; + private int? _percentComplete; + private string? _failureReason; + + /// Initializes a new instance of the class. + internal OpenAIVideoGenerationOperation(VideoClient videoClient, string operationId, string status, int? percentComplete) + { + _videoClient = videoClient; + _operationId = operationId; + _status = status; + _percentComplete = percentComplete; + } + + /// + public override string? OperationId => _operationId; + + /// + public override string? Status => _status; + + /// + public override int? PercentComplete => _percentComplete; + + /// + public override bool IsCompleted => IsTerminalStatus(_status); + + /// + public override string? FailureReason => _failureReason; + + /// + public override async Task UpdateAsync(CancellationToken cancellationToken = default) + { + if (_operationId is null || IsCompleted) + { + return; + } + + var opts = new RequestOptions { CancellationToken = cancellationToken }; + ClientResult result = await _videoClient.GetVideoAsync(_operationId, opts).ConfigureAwait(false); + ParseStatus(result.GetRawResponse().Content); + } + + /// + public override async Task WaitForCompletionAsync( + IProgress? progress = null, + CancellationToken cancellationToken = default) + { + progress?.Report(new VideoGenerationProgress(_status, _percentComplete)); + + while (!IsCompleted) + { + await Task.Delay(_defaultPollingInterval, cancellationToken).ConfigureAwait(false); + await UpdateAsync(cancellationToken).ConfigureAwait(false); + progress?.Report(new VideoGenerationProgress(_status, _percentComplete)); + } + + if (string.Equals(_status, "failed", StringComparison.OrdinalIgnoreCase)) + { + throw new InvalidOperationException(_failureReason ?? "Video generation failed."); + } + } + + /// + public override async Task> GetContentsAsync( + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + if (!IsCompleted) + { + throw new InvalidOperationException("The operation has not completed. Call WaitForCompletionAsync first."); + } + + if (string.Equals(_status, "failed", StringComparison.OrdinalIgnoreCase)) + { + throw new InvalidOperationException(_failureReason ?? "Video generation failed."); + } + + string contentType = options?.MediaType ?? "video/mp4"; + + if (options?.ResponseFormat is VideoGenerationResponseFormat.Uri or + VideoGenerationResponseFormat.Hosted) + { + string baseUrl = _videoClient.Endpoint.ToString().TrimEnd('/'); + var videoUri = new Uri($"{baseUrl}/videos/{_operationId}/content"); + return [new UriContent(videoUri, contentType)]; + } + + var dlOpts = new RequestOptions { CancellationToken = cancellationToken }; + ClientResult downloadResult = await _videoClient.DownloadVideoAsync( + _operationId!, options: dlOpts).ConfigureAwait(false); + BinaryData videoData = downloadResult.GetRawResponse().Content; + return [new DataContent(videoData.ToMemory(), contentType)]; + } + + private static bool IsTerminalStatus(string? status) => + string.Equals(status, "completed", StringComparison.OrdinalIgnoreCase) || + string.Equals(status, "failed", StringComparison.OrdinalIgnoreCase) || + string.Equals(status, "expired", StringComparison.OrdinalIgnoreCase); + + /// Parses status fields from a video job JSON response. + private void ParseStatus(BinaryData content) + { + using JsonDocument doc = JsonDocument.Parse(content); + _status = doc.RootElement.GetProperty("status").GetString(); + + if (doc.RootElement.TryGetProperty("progress", out JsonElement progressEl) && + progressEl.TryGetInt32(out int pct)) + { + _percentComplete = pct; + } + + if (string.Equals(_status, "failed", StringComparison.OrdinalIgnoreCase) && + doc.RootElement.TryGetProperty("error", out JsonElement errorEl) && + errorEl.TryGetProperty("message", out JsonElement msgEl)) + { + _failureReason = msgEl.GetString(); + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIVideoGenerator.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIVideoGenerator.cs new file mode 100644 index 00000000000..9a5b6568eaf --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIVideoGenerator.cs @@ -0,0 +1,463 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ClientModel; +using System.ClientModel.Primitives; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Drawing; +using System.IO; +using System.Text; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; +using Microsoft.Shared.Text; +using OpenAI.Videos; + +namespace Microsoft.Extensions.AI; + +/// Represents an for an OpenAI . +/// +/// +/// This implementation uses the OpenAI video generation API. Video generation is asynchronous: +/// submits a generation job and returns an +/// that can be used to poll for completion and download the result. +/// +/// The endpoint chosen depends on +/// and : +/// +/// +/// Text-to-video (): When +/// is , +/// creates a new video from the text prompt via POST /videos. +/// +/// +/// Image-to-video (): When +/// contains image content +/// (e.g., image/png), uses the image as an input_reference to guide +/// new video creation via POST /videos. A sends the +/// image URL in JSON; a uploads the image bytes via +/// multipart/form-data. +/// +/// +/// Edit by video ID (): When +/// is set, edits the specified +/// video via POST /videos/edits. +/// +/// +/// Edit by upload (): When +/// contains video content +/// (e.g., video/mp4) and no +/// is set, uploads the video for editing via POST /videos/edits with multipart/form-data. +/// +/// +/// Extend (): When +/// is set, extends the completed +/// video via POST /videos/extensions. +/// +/// +/// +/// Character IDs can be included in the create request by passing a characters +/// key in as a JSON array. +/// Characters are reusable visual assets that can be uploaded via +/// . +/// +/// +[Experimental(DiagnosticIds.Experiments.AIOpenAIVideoClient)] +internal sealed class OpenAIVideoGenerator : IVideoGenerator +{ + /// Metadata about the client. + private readonly VideoGeneratorMetadata _metadata; + + /// The underlying . + private readonly VideoClient _videoClient; + + /// The model to use for video generation. + private readonly string? _defaultModelId; + + /// Initializes a new instance of the class for the specified . + /// The underlying client. + /// The model ID to use for video generation (e.g. "sora-2"). + /// is . + public OpenAIVideoGenerator(VideoClient videoClient, string? modelId = null) + { + _videoClient = Throw.IfNull(videoClient); + _defaultModelId = modelId; + + _metadata = new("openai", videoClient.Endpoint, modelId); + } + + /// + public async Task GenerateAsync( + VideoGenerationRequest request, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(request); + + string? prompt = request.Prompt; + _ = Throw.IfNull(prompt); + + string modelId = options?.ModelId ?? _defaultModelId ?? "sora-2"; + + // Determine input media based on the operation kind + DataContent? videoEditContent = null; + DataContent? imageReferenceData = null; + UriContent? imageReferenceUri = null; + + if (request.OperationKind == VideoOperationKind.Create && + request.StartFrame is { } startFrame) + { + if (startFrame is DataContent dc && dc.Data.Length > 0 && IsImageMediaType(dc.MediaType)) + { + imageReferenceData = dc; + } + else if (startFrame is UriContent uc && IsImageMediaType(uc.MediaType)) + { + imageReferenceUri = uc; + } + } + else if (request.OperationKind == VideoOperationKind.Edit && + request.SourceVideoId is null && + request.SourceVideo is DataContent editDc && editDc.Data.Length > 0 && IsVideoMediaType(editDc.MediaType)) + { + videoEditContent = editDc; + } + + // Route to the appropriate endpoint and submit the video generation job + RequestOptions reqOpts = new() { CancellationToken = cancellationToken }; + ClientResult createResult; + + if (request.OperationKind == VideoOperationKind.Extend && request.SourceVideoId is not null) + { + // POST /videos/extensions — extend a completed video + JsonObject body = new() + { + ["prompt"] = prompt, + ["video"] = new JsonObject { ["id"] = request.SourceVideoId }, + }; + + if (options?.Duration is TimeSpan extDuration) + { + body["seconds"] = ((int)extDuration.TotalSeconds).ToInvariantString(); + } + + ForwardAdditionalProperties(body, options); + using var extendContent = CreateJsonContent(body); + using PipelineMessage extendMsg = CreatePipelineRequest( + _videoClient, "/videos/extensions", extendContent, + "application/json", reqOpts); + await _videoClient.Pipeline.SendAsync(extendMsg).ConfigureAwait(false); + createResult = ClientResult.FromResponse(extendMsg.Response!); + } + else if (request.OperationKind == VideoOperationKind.Edit && request.SourceVideoId is not null) + { + // POST /videos/edits — edit an existing video by ID + JsonObject body = new() + { + ["prompt"] = prompt, + ["video"] = new JsonObject { ["id"] = request.SourceVideoId }, + }; + + ForwardAdditionalProperties(body, options); + using var editContent = CreateJsonContent(body); + using PipelineMessage editMsg = CreatePipelineRequest( + _videoClient, "/videos/edits", editContent, + "application/json", reqOpts); + await _videoClient.Pipeline.SendAsync(editMsg).ConfigureAwait(false); + createResult = ClientResult.FromResponse(editMsg.Response!); + } + else if (videoEditContent is not null) + { + // POST /videos/edits — edit an uploaded video via multipart + JsonObject formFields = new() + { + ["prompt"] = prompt, + ["model"] = modelId, + }; + + ForwardAdditionalProperties(formFields, options); + using BinaryContent editMultipart = BuildMultipartContent( + formFields, videoEditContent, "video", + out string editContentType); + using PipelineMessage editUploadMsg = CreatePipelineRequest( + _videoClient, "/videos/edits", editMultipart, + editContentType, reqOpts); + await _videoClient.Pipeline.SendAsync(editUploadMsg).ConfigureAwait(false); + createResult = ClientResult.FromResponse(editUploadMsg.Response!); + } + else + { + // POST /videos — text-to-video or image-to-video + JsonObject requestBody = new() + { + ["model"] = modelId, + ["prompt"] = prompt, + }; + + if (options?.VideoSize is Size size) + { + requestBody["size"] = $"{size.Width}x{size.Height}"; + } + + if (options?.Duration is TimeSpan duration) + { + requestBody["seconds"] = ((int)duration.TotalSeconds).ToInvariantString(); + } + + if (options?.Count is int count && count > 1) + { + requestBody["n"] = count; + } + + ForwardAdditionalProperties(requestBody, options); + + if (imageReferenceUri is not null) + { + requestBody["input_reference"] = new JsonObject + { + ["image_url"] = imageReferenceUri.Uri.ToString(), + }; + } + else if (imageReferenceData is not null) + { + // The API expects input_reference as a JSON object, not a multipart file. + // Convert to a data URI so the image bytes are sent inline. + string mediaType = imageReferenceData.MediaType ?? "application/octet-stream"; + string base64 = Convert.ToBase64String(imageReferenceData.Data.ToArray()); + requestBody["input_reference"] = new JsonObject + { + ["image_url"] = $"data:{mediaType};base64,{base64}", + }; + } + + using var content = CreateJsonContent(requestBody); + createResult = await _videoClient.CreateVideoAsync( + content, "application/json", reqOpts).ConfigureAwait(false); + } + + // Parse the creation response to get the video ID and initial status + using JsonDocument createDoc = JsonDocument.Parse( + createResult.GetRawResponse().Content); + string videoId = createDoc.RootElement.GetProperty("id").GetString()!; + string status = createDoc.RootElement.GetProperty("status").GetString()!; + int? progressPercent = null; + if (createDoc.RootElement.TryGetProperty("progress", out JsonElement progEl) && + progEl.TryGetInt32(out int pct)) + { + progressPercent = pct; + } + + return new OpenAIVideoGenerationOperation(_videoClient, videoId, status, progressPercent); + } + + /// + public object? GetService(Type serviceType, object? serviceKey = null) => + serviceType is null ? throw new ArgumentNullException(nameof(serviceType)) : + serviceKey is not null ? null : + serviceType == typeof(VideoGeneratorMetadata) ? _metadata : + serviceType == typeof(VideoClient) ? _videoClient : + serviceType.IsInstanceOfType(this) ? this : + null; + + /// + void IDisposable.Dispose() + { + // Nothing to dispose. Implementation required for the IVideoGenerator interface. + } + + /// Uploads a character asset from a video for use in subsequent video generation requests. + internal async Task UploadVideoCharacterAsync( + string name, + DataContent videoContent, + CancellationToken cancellationToken = default) + { + string boundary = $"----MEAI{Guid.NewGuid():N}"; + string contentType = $"multipart/form-data; boundary={boundary}"; + + using var ms = new MemoryStream(); + WriteFormField(ms, boundary, "name", name); + + string fileName = videoContent.Name ?? "character.mp4"; + string mediaType = videoContent.MediaType ?? "video/mp4"; + WriteFilePart(ms, boundary, "video", fileName, mediaType, videoContent.Data); + WriteString(ms, $"--{boundary}--\r\n"); + + using BinaryContent content = BinaryContent.Create(new BinaryData(ms.ToArray())); + RequestOptions reqOpts = new() { CancellationToken = cancellationToken }; + using PipelineMessage message = CreatePipelineRequest( + _videoClient, "/videos/characters", content, contentType, reqOpts); + + await _videoClient.Pipeline.SendAsync(message).ConfigureAwait(false); + + PipelineResponse response = message.Response!; + using JsonDocument doc = JsonDocument.Parse(response.Content); + JsonElement root = doc.RootElement; + + // The API may return an error object with a "message" property. + if (root.TryGetProperty("error", out JsonElement errorElement)) + { + string errorMessage = errorElement.TryGetProperty("message", out JsonElement msgElement) + ? msgElement.GetString() ?? "Unknown error" + : errorElement.ToString(); + throw new InvalidOperationException($"Character upload failed: {errorMessage}"); + } + + if (root.TryGetProperty("id", out JsonElement idElement)) + { + return idElement.GetString()!; + } + + throw new InvalidOperationException( + $"Character upload response did not contain an 'id' property. Response: {response.Content}"); + } + + /// Creates a for a POST request to a path not yet exposed by the SDK. + private static PipelineMessage CreatePipelineRequest( + VideoClient videoClient, string path, BinaryContent content, + string contentType, RequestOptions options) + { + string baseUrl = videoClient.Endpoint.ToString().TrimEnd('/'); + Uri uri = new($"{baseUrl}{path}"); + PipelineMessageClassifier classifier = PipelineMessageClassifier.Create( + stackalloc ushort[] { 200, 201, 202 }); + PipelineMessage message = videoClient.Pipeline.CreateMessage( + uri, "POST", classifier); + message.Request.Headers.Set("Content-Type", contentType); + message.Request.Headers.Set("Accept", "application/json"); + message.Request.Content = content; + message.Apply(options); + return message; + } + + /// Determines whether the given media type represents a video format. + private static bool IsVideoMediaType(string? mediaType) => + mediaType is not null && + mediaType.StartsWith("video/", StringComparison.OrdinalIgnoreCase); + + /// Determines whether the given media type represents an image format. + /// Treats or unspecified media types as images for backward compatibility. + private static bool IsImageMediaType(string? mediaType) => + mediaType is null || + mediaType.StartsWith("image/", StringComparison.OrdinalIgnoreCase); + + /// Forwards additional properties to the JSON body. + private static void ForwardAdditionalProperties(JsonObject body, VideoGenerationOptions? options) + { + if (options?.AdditionalProperties is not { } props) + { + return; + } + + foreach (KeyValuePair prop in props) + { + body[prop.Key] = ToJsonNode(prop.Value); + } + } + + /// Creates a containing the serialized JSON object. + private static Utf8JsonBinaryContent CreateJsonContent(JsonObject body) + { + var content = new Utf8JsonBinaryContent(); + body.WriteTo(content.JsonWriter); + return content; + } + + /// Builds a multipart/form-data body containing the form fields and a file part. + private static BinaryContent BuildMultipartContent( + JsonObject formFields, + DataContent fileContent, + string filePartName, + out string contentType) + { + string boundary = $"----MEAI{Guid.NewGuid():N}"; + contentType = $"multipart/form-data; boundary={boundary}"; + + using var ms = new MemoryStream(); + + foreach (KeyValuePair prop in formFields) + { + if (prop.Value is null) + { + continue; + } + + string fieldValue = + prop.Value is JsonValue jsonValue && + jsonValue.TryGetValue(out string? stringValue) + ? stringValue + : prop.Value.ToString(); + + WriteFormField(ms, boundary, prop.Key, fieldValue); + } + + string fileName = fileContent.Name ?? filePartName; + string mediaType = + fileContent.MediaType ?? "application/octet-stream"; + WriteFilePart(ms, boundary, filePartName, fileName, mediaType, fileContent.Data); + + WriteString(ms, $"--{boundary}--\r\n"); + + return BinaryContent.Create(new BinaryData(ms.ToArray())); + } + + /// Writes a simple text form field to a multipart stream. + private static void WriteFormField( + MemoryStream ms, string boundary, string name, string value) + { + string header = + $"--{boundary}\r\n" + + $"Content-Disposition: form-data; name=\"{name}\"\r\n\r\n" + + $"{value}\r\n"; + WriteString(ms, header); + } + + /// Writes a file part to a multipart stream. + private static void WriteFilePart( + MemoryStream ms, + string boundary, + string name, + string fileName, + string mediaType, + ReadOnlyMemory data) + { + string header = + $"--{boundary}\r\n" + + $"Content-Disposition: form-data; name=\"{name}\"; " + + $"filename=\"{fileName}\"\r\n" + + $"Content-Type: {mediaType}\r\n\r\n"; + WriteString(ms, header); +#if NET + ms.Write(data.Span); +#else + byte[] bytes = data.ToArray(); + ms.Write(bytes, 0, bytes.Length); +#endif + WriteString(ms, "\r\n"); + } + + /// Writes a UTF-8 string to a stream. + private static void WriteString(MemoryStream ms, string value) + { + byte[] bytes = Encoding.UTF8.GetBytes(value); + ms.Write(bytes, 0, bytes.Length); + } + + /// Converts an value to a in an AOT-safe manner. + private static JsonNode? ToJsonNode(object? value) => value switch + { + null => null, + JsonNode node => node, + string s => JsonValue.Create(s), + bool b => JsonValue.Create(b), + int i => JsonValue.Create(i), + long l => JsonValue.Create(l), + float f => JsonValue.Create(f), + double d => JsonValue.Create(d), + _ => JsonValue.Create(value.ToString()!), + }; +} diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/Utf8JsonBinaryContent.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/Utf8JsonBinaryContent.cs new file mode 100644 index 00000000000..49a4244d0d5 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/Utf8JsonBinaryContent.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.ClientModel; +using System.IO; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.AI; + +/// A that writes UTF-8 JSON directly to the pipeline stream. +internal sealed class Utf8JsonBinaryContent : BinaryContent +{ + private readonly MemoryStream _stream = new(); + private readonly BinaryContent _content; + + public Utf8JsonBinaryContent() + { + _content = Create(_stream); + JsonWriter = new Utf8JsonWriter(_stream); + } + + public Utf8JsonWriter JsonWriter { get; } + + public override async Task WriteToAsync(Stream stream, CancellationToken cancellationToken = default) + { + await JsonWriter.FlushAsync(cancellationToken).ConfigureAwait(false); + await _content.WriteToAsync(stream, cancellationToken).ConfigureAwait(false); + } + + public override void WriteTo(Stream stream, CancellationToken cancellationToken = default) + { + JsonWriter.Flush(); + _content.WriteTo(stream, cancellationToken); + } + + public override bool TryComputeLength(out long length) + { + length = JsonWriter.BytesCommitted + JsonWriter.BytesPending; + return true; + } + + public override void Dispose() + { + JsonWriter.Dispose(); + _content.Dispose(); + _stream.Dispose(); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/ChatCompletion/OpenTelemetryVideoGenerator.cs b/src/Libraries/Microsoft.Extensions.AI/ChatCompletion/OpenTelemetryVideoGenerator.cs new file mode 100644 index 00000000000..ba0fa330f7d --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/ChatCompletion/OpenTelemetryVideoGenerator.cs @@ -0,0 +1,336 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Diagnostics.Metrics; +using System.Drawing; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +#pragma warning disable SA1111 // Closing parenthesis should be on line of last parameter +#pragma warning disable SA1113 // Comma should be on the same line as previous parameter + +namespace Microsoft.Extensions.AI; + +/// Represents a delegating video generator that implements the OpenTelemetry Semantic Conventions for Generative AI systems. +/// +/// This class provides an implementation of the Semantic Conventions for Generative AI systems v1.40, defined at . +/// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change. +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public sealed class OpenTelemetryVideoGenerator : DelegatingVideoGenerator +{ + private readonly ActivitySource _activitySource; + private readonly Meter _meter; + + private readonly Histogram _tokenUsageHistogram; + private readonly Histogram _operationDurationHistogram; + + private readonly string? _defaultModelId; + private readonly string? _providerName; + private readonly string? _serverAddress; + private readonly int _serverPort; + + private readonly ILogger? _logger; + + /// Initializes a new instance of the class. + /// The underlying . + /// The to use for emitting any logging data from the client. + /// An optional source name that will be used on the telemetry data. + public OpenTelemetryVideoGenerator(IVideoGenerator innerGenerator, ILogger? logger = null, string? sourceName = null) + : base(innerGenerator) + { + Debug.Assert(innerGenerator is not null, "Should have been validated by the base ctor"); + + _logger = logger; + + if (innerGenerator!.GetService() is VideoGeneratorMetadata metadata) + { + _defaultModelId = metadata.DefaultModelId; + _providerName = metadata.ProviderName; + _serverAddress = metadata.ProviderUri?.Host; + _serverPort = metadata.ProviderUri?.Port ?? 0; + } + + string name = string.IsNullOrEmpty(sourceName) ? OpenTelemetryConsts.DefaultSourceName : sourceName!; + _activitySource = new(name); + _meter = new(name); + + _tokenUsageHistogram = _meter.CreateHistogram( + OpenTelemetryConsts.GenAI.Client.TokenUsage.Name, + OpenTelemetryConsts.TokensUnit, + OpenTelemetryConsts.GenAI.Client.TokenUsage.Description, + advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.TokenUsage.ExplicitBucketBoundaries } + ); + + _operationDurationHistogram = _meter.CreateHistogram( + OpenTelemetryConsts.GenAI.Client.OperationDuration.Name, + OpenTelemetryConsts.SecondsUnit, + OpenTelemetryConsts.GenAI.Client.OperationDuration.Description, + advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.OperationDuration.ExplicitBucketBoundaries } + ); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposing) + { + _activitySource.Dispose(); + _meter.Dispose(); + } + + base.Dispose(disposing); + } + + /// + /// Gets or sets a value indicating whether potentially sensitive information should be included in telemetry. + /// + /// + /// if potentially sensitive information should be included in telemetry; + /// if telemetry shouldn't include raw inputs and outputs. + /// The default value is , unless the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + /// environment variable is set to "true" (case-insensitive). + /// + /// + /// By default, telemetry includes metadata, such as token counts, but not raw inputs + /// and outputs, such as message content, function call arguments, and function call results. + /// The default value can be overridden by setting the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + /// environment variable to "true". Explicitly setting this property will override the environment variable. + /// + public bool EnableSensitiveData { get; set; } = TelemetryHelpers.EnableSensitiveDataDefault; + + /// + public override object? GetService(Type serviceType, object? serviceKey = null) => + serviceType == typeof(ActivitySource) ? _activitySource : + base.GetService(serviceType, serviceKey); + + /// + public async override Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(request); + + using Activity? activity = CreateAndConfigureActivity(request, options); + Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null; + string? requestModelId = options?.ModelId ?? _defaultModelId; + + VideoGenerationOperation? operation = null; + Exception? error = null; + try + { + operation = await base.GenerateAsync(request, options, cancellationToken).ConfigureAwait(false); + return operation; + } + catch (Exception ex) + { + error = ex; + throw; + } + finally + { + TraceResponse(activity, requestModelId, operation, error, stopwatch); + } + } + + /// Creates an activity for a video generation request, or returns if not enabled. + private Activity? CreateAndConfigureActivity(VideoGenerationRequest request, VideoGenerationOptions? options) + { + Activity? activity = null; + if (_activitySource.HasListeners()) + { + string? modelId = options?.ModelId ?? _defaultModelId; + + activity = _activitySource.StartActivity( + string.IsNullOrWhiteSpace(modelId) ? OpenTelemetryConsts.GenAI.GenerateContentName : $"{OpenTelemetryConsts.GenAI.GenerateContentName} {modelId}", + ActivityKind.Client); + + if (activity is { IsAllDataRequested: true }) + { + _ = activity + .AddTag(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName) + .AddTag(OpenTelemetryConsts.GenAI.Output.Type, OpenTelemetryConsts.TypeVideo) + .AddTag(OpenTelemetryConsts.GenAI.Request.Model, modelId) + .AddTag(OpenTelemetryConsts.GenAI.Provider.Name, _providerName); + + if (_serverAddress is not null) + { + _ = activity + .AddTag(OpenTelemetryConsts.Server.Address, _serverAddress) + .AddTag(OpenTelemetryConsts.Server.Port, _serverPort); + } + + if (options is not null) + { + if (options.Count is int count) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Request.ChoiceCount, count); + } + + if (options.VideoSize is Size size) + { + _ = activity + .AddTag("gen_ai.request.video.width", size.Width) + .AddTag("gen_ai.request.video.height", size.Height); + } + + if (options.Duration is TimeSpan duration) + { + _ = activity.AddTag("gen_ai.request.video.duration", duration.TotalSeconds); + } + + if (options.FramesPerSecond is int fps) + { + _ = activity.AddTag("gen_ai.request.video.fps", fps); + } + } + + if (EnableSensitiveData) + { + List content = []; + + if (request.Prompt is not null) + { + content.Add(new TextContent(request.Prompt)); + } + + if (request.StartFrame is not null) + { + content.Add(request.StartFrame); + } + + if (request.EndFrame is not null) + { + content.Add(request.EndFrame); + } + + if (request.ReferenceImages is not null) + { + content.AddRange(request.ReferenceImages); + } + + if (request.SourceVideo is not null) + { + content.Add(request.SourceVideo); + } + + _ = activity.AddTag( + OpenTelemetryConsts.GenAI.Input.Messages, + OpenTelemetryChatClient.SerializeChatMessages([new(ChatRole.User, content)])); + + if (options?.AdditionalProperties is { } props) + { + foreach (KeyValuePair prop in props) + { + _ = activity.AddTag(prop.Key, prop.Value); + } + } + } + } + } + + return activity; + } + + /// Adds video generation operation information to the activity. + private void TraceResponse( + Activity? activity, + string? requestModelId, + VideoGenerationOperation? operation, + Exception? error, + Stopwatch? stopwatch) + { + if (_operationDurationHistogram.Enabled && stopwatch is not null) + { + TagList tags = default; + + AddMetricTags(ref tags, requestModelId); + if (error is not null) + { + tags.Add(OpenTelemetryConsts.Error.Type, error.GetType().FullName); + } + + _operationDurationHistogram.Record(stopwatch.Elapsed.TotalSeconds, tags); + } + + if (error is not null) + { + _ = activity? + .AddTag(OpenTelemetryConsts.Error.Type, error.GetType().FullName) + .SetStatus(ActivityStatusCode.Error, error.Message); + + if (_logger is not null) + { + OpenTelemetryLog.OperationException(_logger, error); + } + } + + if (operation is not null) + { + if (activity is { IsAllDataRequested: true }) + { + _ = activity.AddTag("gen_ai.operation.id", operation.OperationId); + _ = activity.AddTag("gen_ai.operation.status", operation.Status); + } + + if (operation.Usage is { } usage) + { + if (_tokenUsageHistogram.Enabled) + { + if (usage.InputTokenCount is long inputTokens) + { + TagList tags = default; + tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeInput); + AddMetricTags(ref tags, requestModelId); + _tokenUsageHistogram.Record((int)inputTokens, tags); + } + + if (usage.OutputTokenCount is long outputTokens) + { + TagList tags = default; + tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeOutput); + AddMetricTags(ref tags, requestModelId); + _tokenUsageHistogram.Record((int)outputTokens, tags); + } + } + + if (activity is { IsAllDataRequested: true }) + { + if (usage.InputTokenCount is long inputTokens) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.InputTokens, (int)inputTokens); + } + + if (usage.OutputTokenCount is long outputTokens) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.OutputTokens, (int)outputTokens); + } + } + } + } + + void AddMetricTags(ref TagList tags, string? requestModelId) + { + tags.Add(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName); + + if (requestModelId is not null) + { + tags.Add(OpenTelemetryConsts.GenAI.Request.Model, requestModelId); + } + + tags.Add(OpenTelemetryConsts.GenAI.Provider.Name, _providerName); + + if (_serverAddress is string endpointAddress) + { + tags.Add(OpenTelemetryConsts.Server.Address, endpointAddress); + tags.Add(OpenTelemetryConsts.Server.Port, _serverPort); + } + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/ChatCompletion/OpenTelemetryVideoGeneratorBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/ChatCompletion/OpenTelemetryVideoGeneratorBuilderExtensions.cs new file mode 100644 index 00000000000..402a89e7841 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/ChatCompletion/OpenTelemetryVideoGeneratorBuilderExtensions.cs @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extensions for configuring instances. +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public static class OpenTelemetryVideoGeneratorBuilderExtensions +{ + /// + /// Adds OpenTelemetry support to the video generator pipeline, following the OpenTelemetry Semantic Conventions for Generative AI systems. + /// + /// + /// The draft specification this follows is available at . + /// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change. + /// + /// The . + /// An optional to use to create a logger for logging events. + /// An optional source name that will be used on the telemetry data. + /// An optional callback that can be used to configure the instance. + /// The . + public static VideoGeneratorBuilder UseOpenTelemetry( + this VideoGeneratorBuilder builder, + ILoggerFactory? loggerFactory = null, + string? sourceName = null, + Action? configure = null) => + Throw.IfNull(builder).Use((innerGenerator, services) => + { + loggerFactory ??= services.GetService(); + + var g = new OpenTelemetryVideoGenerator(innerGenerator, loggerFactory?.CreateLogger(typeof(OpenTelemetryVideoGenerator)), sourceName); + configure?.Invoke(g); + + return g; + }); +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Image/ImageGeneratorBuilderServiceCollectionExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/Image/ImageGeneratorBuilderServiceCollectionExtensions.cs index 3413e9957cb..ab615e03e15 100644 --- a/src/Libraries/Microsoft.Extensions.AI/Image/ImageGeneratorBuilderServiceCollectionExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.AI/Image/ImageGeneratorBuilderServiceCollectionExtensions.cs @@ -13,26 +13,26 @@ namespace Microsoft.Extensions.DependencyInjection; [Experimental(DiagnosticIds.Experiments.AIImageGeneration, UrlFormat = DiagnosticIds.UrlFormat)] public static class ImageGeneratorBuilderServiceCollectionExtensions { - /// Registers a singleton in the . + /// Registers an in the . /// The to which the generator should be added. /// The inner that represents the underlying backend. /// The service lifetime for the generator. Defaults to . /// An that can be used to build a pipeline around the inner generator. /// or is . - /// The generator is registered as a singleton service. + /// The generator is registered with the specified . public static ImageGeneratorBuilder AddImageGenerator( this IServiceCollection serviceCollection, IImageGenerator innerGenerator, ServiceLifetime lifetime = ServiceLifetime.Singleton) => AddImageGenerator(serviceCollection, _ => innerGenerator, lifetime); - /// Registers a singleton in the . + /// Registers an in the . /// The to which the generator should be added. /// A callback that produces the inner that represents the underlying backend. /// The service lifetime for the generator. Defaults to . /// An that can be used to build a pipeline around the inner generator. /// or is . - /// The generator is registered as a singleton service. + /// The generator is registered with the specified . public static ImageGeneratorBuilder AddImageGenerator( this IServiceCollection serviceCollection, Func innerGeneratorFactory, @@ -46,14 +46,14 @@ public static ImageGeneratorBuilder AddImageGenerator( return builder; } - /// Registers a keyed singleton in the . + /// Registers a keyed in the . /// The to which the generator should be added. /// The key with which to associate the generator. /// The inner that represents the underlying backend. /// The service lifetime for the generator. Defaults to . /// An that can be used to build a pipeline around the inner generator. - /// , , or is . - /// The generator is registered as a scoped service. + /// or is . + /// The generator is registered with the specified . public static ImageGeneratorBuilder AddKeyedImageGenerator( this IServiceCollection serviceCollection, object? serviceKey, @@ -61,14 +61,14 @@ public static ImageGeneratorBuilder AddKeyedImageGenerator( ServiceLifetime lifetime = ServiceLifetime.Singleton) => AddKeyedImageGenerator(serviceCollection, serviceKey, _ => innerGenerator, lifetime); - /// Registers a keyed singleton in the . + /// Registers a keyed in the . /// The to which the generator should be added. /// The key with which to associate the generator. /// A callback that produces the inner that represents the underlying backend. /// The service lifetime for the generator. Defaults to . /// An that can be used to build a pipeline around the inner generator. - /// , , or is . - /// The generator is registered as a scoped service. + /// or is . + /// The generator is registered with the specified . public static ImageGeneratorBuilder AddKeyedImageGenerator( this IServiceCollection serviceCollection, object? serviceKey, diff --git a/src/Libraries/Microsoft.Extensions.AI/OpenTelemetryConsts.cs b/src/Libraries/Microsoft.Extensions.AI/OpenTelemetryConsts.cs index 8ffbd0b9dec..ba7f49a110a 100644 --- a/src/Libraries/Microsoft.Extensions.AI/OpenTelemetryConsts.cs +++ b/src/Libraries/Microsoft.Extensions.AI/OpenTelemetryConsts.cs @@ -21,6 +21,7 @@ internal static class OpenTelemetryConsts public const string TypeText = "text"; public const string TypeJson = "json"; public const string TypeImage = "image"; + public const string TypeVideo = "video"; public const string TypeAudio = "audio"; public const string TokenTypeInput = "input"; diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/ConfigureOptionsVideoGenerator.cs b/src/Libraries/Microsoft.Extensions.AI/Video/ConfigureOptionsVideoGenerator.cs new file mode 100644 index 00000000000..21522420b20 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/ConfigureOptionsVideoGenerator.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Represents a delegating video generator that configures a instance used by the remainder of the pipeline. +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public sealed class ConfigureOptionsVideoGenerator : DelegatingVideoGenerator +{ + /// The callback delegate used to configure options. + private readonly Action _configureOptions; + + /// Initializes a new instance of the class with the specified callback. + /// The inner generator. + /// + /// The delegate to invoke to configure the instance. It is passed a clone of the caller-supplied instance + /// (or a newly constructed instance if the caller-supplied instance is ). + /// + /// or is . + /// + /// The delegate is passed either a new instance of if + /// the caller didn't supply a instance, or a clone (via of the caller-supplied + /// instance if one was supplied. + /// + public ConfigureOptionsVideoGenerator(IVideoGenerator innerGenerator, Action configure) + : base(innerGenerator) + { + _configureOptions = Throw.IfNull(configure); + } + + /// + public override async Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + return await base.GenerateAsync(request, Configure(options), cancellationToken); + } + + /// Creates and configures the to pass along to the inner generator. + private VideoGenerationOptions Configure(VideoGenerationOptions? options) + { + options = options?.Clone() ?? new(); + + _configureOptions(options); + + return options; + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/ConfigureOptionsVideoGeneratorBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/Video/ConfigureOptionsVideoGeneratorBuilderExtensions.cs new file mode 100644 index 00000000000..25b9b958abf --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/ConfigureOptionsVideoGeneratorBuilderExtensions.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extensions for configuring instances. +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public static class ConfigureOptionsVideoGeneratorBuilderExtensions +{ + /// + /// Adds a callback that configures a to be passed to the next generator in the pipeline. + /// + /// The . + /// + /// The delegate to invoke to configure the instance. + /// It is passed a clone of the caller-supplied instance (or a newly constructed instance if the caller-supplied instance is ). + /// + /// or is . + /// + /// This method can be used to set default options. The delegate is passed either a new instance of + /// if the caller didn't supply a instance, or a clone (via ) + /// of the caller-supplied instance if one was supplied. + /// + /// The . + public static VideoGeneratorBuilder ConfigureOptions( + this VideoGeneratorBuilder builder, Action configure) + { + _ = Throw.IfNull(builder); + _ = Throw.IfNull(configure); + + return builder.Use(innerGenerator => new ConfigureOptionsVideoGenerator(innerGenerator, configure)); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/LoggingVideoGenerator.cs b/src/Libraries/Microsoft.Extensions.AI/Video/LoggingVideoGenerator.cs new file mode 100644 index 00000000000..f57762c7b49 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/LoggingVideoGenerator.cs @@ -0,0 +1,123 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// A delegating video generator that logs video generation operations to an . +/// +/// +/// The provided implementation of is thread-safe for concurrent use so long as the +/// employed is also thread-safe for concurrent use. +/// +/// +/// When the employed enables , the contents of +/// prompts and options are logged. These prompts and options may contain sensitive application data. +/// is disabled by default and should never be enabled in a production environment. +/// Prompts and options are not logged at other logging levels. +/// +/// +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public partial class LoggingVideoGenerator : DelegatingVideoGenerator +{ + /// An instance used for all logging. + private readonly ILogger _logger; + + /// The to use for serialization of state written to the logger. + private JsonSerializerOptions _jsonSerializerOptions; + + /// Initializes a new instance of the class. + /// The underlying . + /// An instance that will be used for all logging. + /// or is . + public LoggingVideoGenerator(IVideoGenerator innerGenerator, ILogger logger) + : base(innerGenerator) + { + _logger = Throw.IfNull(logger); + _jsonSerializerOptions = AIJsonUtilities.DefaultOptions; + } + + /// Gets or sets JSON serialization options to use when serializing logging data. + /// The value being set is . + public JsonSerializerOptions JsonSerializerOptions + { + get => _jsonSerializerOptions; + set => _jsonSerializerOptions = Throw.IfNull(value); + } + + /// + public override async Task GenerateAsync( + VideoGenerationRequest request, VideoGenerationOptions? options = null, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(request); + + if (_logger.IsEnabled(LogLevel.Debug)) + { + if (_logger.IsEnabled(LogLevel.Trace)) + { + LogInvokedSensitive(nameof(GenerateAsync), request.Prompt ?? string.Empty, AsJson(options), AsJson(this.GetService())); + } + else + { + LogInvoked(nameof(GenerateAsync)); + } + } + + try + { + var operation = await base.GenerateAsync(request, options, cancellationToken); + + if (_logger.IsEnabled(LogLevel.Debug)) + { + if (_logger.IsEnabled(LogLevel.Trace)) + { + LogCompletedSensitive(nameof(GenerateAsync), $"OperationId={operation.OperationId}, Status={operation.Status}"); + } + else + { + LogCompleted(nameof(GenerateAsync)); + } + } + + return operation; + } + catch (OperationCanceledException) + { + LogInvocationCanceled(nameof(GenerateAsync)); + throw; + } + catch (Exception ex) + { + LogInvocationFailed(nameof(GenerateAsync), ex); + throw; + } + } + + private string AsJson(T value) => TelemetryHelpers.AsJson(value, _jsonSerializerOptions); + + [LoggerMessage(LogLevel.Debug, "{MethodName} invoked.")] + private partial void LogInvoked(string methodName); + + [LoggerMessage(LogLevel.Trace, "{MethodName} invoked: Prompt: {Prompt}. Options: {VideoGenerationOptions}. Metadata: {VideoGeneratorMetadata}.")] + private partial void LogInvokedSensitive(string methodName, string prompt, string videoGenerationOptions, string videoGeneratorMetadata); + + [LoggerMessage(LogLevel.Debug, "{MethodName} completed.")] + private partial void LogCompleted(string methodName); + + [LoggerMessage(LogLevel.Trace, "{MethodName} completed: {VideoGenerationOperation}.")] + private partial void LogCompletedSensitive(string methodName, string videoGenerationOperation); + + [LoggerMessage(LogLevel.Debug, "{MethodName} canceled.")] + private partial void LogInvocationCanceled(string methodName); + + [LoggerMessage(LogLevel.Error, "{MethodName} failed.")] + private partial void LogInvocationFailed(string methodName, Exception error); +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/LoggingVideoGeneratorBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/Video/LoggingVideoGeneratorBuilderExtensions.cs new file mode 100644 index 00000000000..22d42ae6130 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/LoggingVideoGeneratorBuilderExtensions.cs @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extensions for configuring instances. +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public static class LoggingVideoGeneratorBuilderExtensions +{ + /// Adds logging to the video generator pipeline. + /// The . + /// + /// An optional used to create a logger with which logging should be performed. + /// If not supplied, a required instance will be resolved from the service provider. + /// + /// An optional callback that can be used to configure the instance. + /// The . + /// is . + /// + /// + /// When the employed enables , the contents of + /// prompts and options are logged. These prompts and options may contain sensitive application data. + /// is disabled by default and should never be enabled in a production environment. + /// Prompts and options are not logged at other logging levels. + /// + /// + public static VideoGeneratorBuilder UseLogging( + this VideoGeneratorBuilder builder, + ILoggerFactory? loggerFactory = null, + Action? configure = null) + { + _ = Throw.IfNull(builder); + + return builder.Use((innerGenerator, services) => + { + loggerFactory ??= services.GetRequiredService(); + + // If the factory we resolve is for the null logger, the LoggingVideoGenerator will end up + // being an expensive nop, so skip adding it and just return the inner generator. + if (loggerFactory == NullLoggerFactory.Instance) + { + return innerGenerator; + } + + var videoGenerator = new LoggingVideoGenerator(innerGenerator, loggerFactory.CreateLogger(typeof(LoggingVideoGenerator))); + configure?.Invoke(videoGenerator); + return videoGenerator; + }); + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilder.cs b/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilder.cs new file mode 100644 index 00000000000..016dc4debc2 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilder.cs @@ -0,0 +1,86 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// A builder for creating pipelines of . +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public sealed class VideoGeneratorBuilder +{ + private readonly Func _innerGeneratorFactory; + + /// The registered generator factory instances. + private List>? _generatorFactories; + + /// Initializes a new instance of the class. + /// The inner that represents the underlying backend. + /// is . + public VideoGeneratorBuilder(IVideoGenerator innerGenerator) + { + _ = Throw.IfNull(innerGenerator); + _innerGeneratorFactory = _ => innerGenerator; + } + + /// Initializes a new instance of the class. + /// A callback that produces the inner that represents the underlying backend. + /// is . + public VideoGeneratorBuilder(Func innerGeneratorFactory) + { + _innerGeneratorFactory = Throw.IfNull(innerGeneratorFactory); + } + + /// Builds an that represents the entire pipeline. Calls to this instance will pass through each of the pipeline stages in turn. + /// + /// The that should provide services to the instances. + /// If null, an empty will be used. + /// + /// An instance of that represents the entire pipeline. + public IVideoGenerator Build(IServiceProvider? services = null) + { + services ??= EmptyServiceProvider.Instance; + var videoGenerator = _innerGeneratorFactory(services); + + // To match intuitive expectations, apply the factories in reverse order, so that the first factory added is the outermost. + if (_generatorFactories is not null) + { + for (var i = _generatorFactories.Count - 1; i >= 0; i--) + { + videoGenerator = _generatorFactories[i](videoGenerator, services) ?? + throw new InvalidOperationException( + $"The {nameof(VideoGeneratorBuilder)} entry at index {i} returned null. " + + $"Ensure that the callbacks passed to {nameof(Use)} return non-null {nameof(IVideoGenerator)} instances."); + } + } + + return videoGenerator; + } + + /// Adds a factory for an intermediate video generator to the video generator pipeline. + /// The generator factory function. + /// The updated instance. + /// is . + public VideoGeneratorBuilder Use(Func generatorFactory) + { + _ = Throw.IfNull(generatorFactory); + + return Use((innerGenerator, _) => generatorFactory(innerGenerator)); + } + + /// Adds a factory for an intermediate video generator to the video generator pipeline. + /// The generator factory function. + /// The updated instance. + /// is . + public VideoGeneratorBuilder Use(Func generatorFactory) + { + _ = Throw.IfNull(generatorFactory); + + (_generatorFactories ??= []).Add(generatorFactory); + return this; + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilderServiceCollectionExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilderServiceCollectionExtensions.cs new file mode 100644 index 00000000000..23514815e2a --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilderServiceCollectionExtensions.cs @@ -0,0 +1,85 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.AI; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.DependencyInjection; + +/// Provides extension methods for registering with a . +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public static class VideoGeneratorBuilderServiceCollectionExtensions +{ + /// Registers an in the . + /// The to which the generator should be added. + /// The inner that represents the underlying backend. + /// The service lifetime for the generator. Defaults to . + /// An that can be used to build a pipeline around the inner generator. + /// or is . + /// The generator is registered with the specified . + public static VideoGeneratorBuilder AddVideoGenerator( + this IServiceCollection serviceCollection, + IVideoGenerator innerGenerator, + ServiceLifetime lifetime = ServiceLifetime.Singleton) + => AddVideoGenerator(serviceCollection, _ => innerGenerator, lifetime); + + /// Registers an in the . + /// The to which the generator should be added. + /// A callback that produces the inner that represents the underlying backend. + /// The service lifetime for the generator. Defaults to . + /// An that can be used to build a pipeline around the inner generator. + /// or is . + /// The generator is registered with the specified . + public static VideoGeneratorBuilder AddVideoGenerator( + this IServiceCollection serviceCollection, + Func innerGeneratorFactory, + ServiceLifetime lifetime = ServiceLifetime.Singleton) + { + _ = Throw.IfNull(serviceCollection); + _ = Throw.IfNull(innerGeneratorFactory); + + var builder = new VideoGeneratorBuilder(innerGeneratorFactory); + serviceCollection.Add(new ServiceDescriptor(typeof(IVideoGenerator), builder.Build, lifetime)); + return builder; + } + + /// Registers a keyed in the . + /// The to which the generator should be added. + /// The key with which to associate the generator. + /// The inner that represents the underlying backend. + /// The service lifetime for the generator. Defaults to . + /// An that can be used to build a pipeline around the inner generator. + /// or is . + /// The generator is registered with the specified . + public static VideoGeneratorBuilder AddKeyedVideoGenerator( + this IServiceCollection serviceCollection, + object? serviceKey, + IVideoGenerator innerGenerator, + ServiceLifetime lifetime = ServiceLifetime.Singleton) + => AddKeyedVideoGenerator(serviceCollection, serviceKey, _ => innerGenerator, lifetime); + + /// Registers a keyed in the . + /// The to which the generator should be added. + /// The key with which to associate the generator. + /// A callback that produces the inner that represents the underlying backend. + /// The service lifetime for the generator. Defaults to . + /// An that can be used to build a pipeline around the inner generator. + /// or is . + /// The generator is registered with the specified . + public static VideoGeneratorBuilder AddKeyedVideoGenerator( + this IServiceCollection serviceCollection, + object? serviceKey, + Func innerGeneratorFactory, + ServiceLifetime lifetime = ServiceLifetime.Singleton) + { + _ = Throw.IfNull(serviceCollection); + _ = Throw.IfNull(innerGeneratorFactory); + + var builder = new VideoGeneratorBuilder(innerGeneratorFactory); + serviceCollection.Add(new ServiceDescriptor(typeof(IVideoGenerator), serviceKey, factory: (services, serviceKey) => builder.Build(services), lifetime)); + return builder; + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilderVideoGeneratorExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilderVideoGeneratorExtensions.cs new file mode 100644 index 00000000000..142441f65ad --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/Video/VideoGeneratorBuilderVideoGeneratorExtensions.cs @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Shared.DiagnosticIds; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extension methods for working with in the context of . +[Experimental(DiagnosticIds.Experiments.AIVideoGeneration, UrlFormat = DiagnosticIds.UrlFormat)] +public static class VideoGeneratorBuilderVideoGeneratorExtensions +{ + /// Creates a new using as its inner generator. + /// The generator to use as the inner generator. + /// The new instance. + /// is . + /// + /// This method is equivalent to using the constructor directly, + /// specifying as the inner generator. + /// + public static VideoGeneratorBuilder AsBuilder(this IVideoGenerator innerGenerator) + { + _ = Throw.IfNull(innerGenerator); + + return new VideoGeneratorBuilder(innerGenerator); + } +} diff --git a/src/Shared/DiagnosticIds/DiagnosticIds.cs b/src/Shared/DiagnosticIds/DiagnosticIds.cs index 94cc1a1f04a..e3e45c13e04 100644 --- a/src/Shared/DiagnosticIds/DiagnosticIds.cs +++ b/src/Shared/DiagnosticIds/DiagnosticIds.cs @@ -49,6 +49,7 @@ internal static class Experiments // All AI experiments share a diagnostic ID but have different // constants to manage which experiment each API belongs to. internal const string AIImageGeneration = AIExperiments; + internal const string AIVideoGeneration = AIExperiments; internal const string AISpeechToText = AIExperiments; internal const string AITextToSpeech = AIExperiments; internal const string AIMcpServers = AIExperiments; @@ -67,6 +68,7 @@ internal static class Experiments internal const string AIOpenAIResponses = "OPENAI001"; internal const string AIOpenAIAssistants = "OPENAI001"; internal const string AIOpenAIImageClient = "OPENAI001"; + internal const string AIOpenAIVideoClient = "OPENAI001"; internal const string AIOpenAIAudio = "OPENAI001"; internal const string AIOpenAIReasoning = "OPENAI001"; internal const string AIOpenAIRealtime = "OPENAI002"; diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestVideoGenerationOperation.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestVideoGenerationOperation.cs new file mode 100644 index 00000000000..0419a54e394 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestVideoGenerationOperation.cs @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.AI; + +public class TestVideoGenerationOperation : VideoGenerationOperation +{ + private string? _operationId; + private string? _status; + private int? _percentComplete; + + public TestVideoGenerationOperation( + string? operationId = "test-op-id", + string? status = "completed", + int? percentComplete = 100) + { + _operationId = operationId; + _status = status; + _percentComplete = percentComplete; + } + + public override string? OperationId => _operationId; + + public override string? Status => _status; + + public override int? PercentComplete => _percentComplete; + + public override bool IsCompleted => + string.Equals(_status, "completed", StringComparison.OrdinalIgnoreCase) || + string.Equals(_status, "failed", StringComparison.OrdinalIgnoreCase); + + public override string? FailureReason => null; + + public IList? Contents { get; set; } + + public override Task UpdateAsync(CancellationToken cancellationToken = default) => Task.CompletedTask; + + public override Task WaitForCompletionAsync( + IProgress? progress = null, + CancellationToken cancellationToken = default) => Task.CompletedTask; + + public override Task> GetContentsAsync( + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + => Task.FromResult>(Contents ?? new List()); +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestVideoGenerator.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestVideoGenerator.cs new file mode 100644 index 00000000000..e32dd595166 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestVideoGenerator.cs @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.AI; + +public sealed class TestVideoGenerator : IVideoGenerator +{ + public TestVideoGenerator() + { + GetServiceCallback = DefaultGetServiceCallback; + } + + public IServiceProvider? Services { get; set; } + + public Func>? GenerateVideosAsyncCallback { get; set; } + + public Func GetServiceCallback { get; set; } + + public bool DisposeInvoked { get; private set; } + + private object? DefaultGetServiceCallback(Type serviceType, object? serviceKey) + => serviceType is not null && serviceKey is null && serviceType.IsInstanceOfType(this) ? this : null; + + public Task GenerateAsync( + VideoGenerationRequest request, + VideoGenerationOptions? options = null, + CancellationToken cancellationToken = default) + { + return GenerateVideosAsyncCallback?.Invoke(request, options, cancellationToken) ?? + Task.FromResult(new TestVideoGenerationOperation()); + } + + public object? GetService(Type serviceType, object? serviceKey = null) + { + return GetServiceCallback.Invoke(serviceType, serviceKey); + } + + public void Dispose() + { + DisposeInvoked = true; + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/DelegatingVideoGeneratorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/DelegatingVideoGeneratorTests.cs new file mode 100644 index 00000000000..fa06547f4f2 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/DelegatingVideoGeneratorTests.cs @@ -0,0 +1,100 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class DelegatingVideoGeneratorTests +{ + [Fact] + public void RequiresInnerVideoGenerator() + { + Assert.Throws("innerGenerator", () => new TestDelegatingVideoGenerator(null!)); + } + + [Fact] + public async Task GenerateVideosAsyncDefaultsToInnerGeneratorAsync() + { + var expectedOperation = new TestVideoGenerationOperation(); + using var inner = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => Task.FromResult(expectedOperation) + }; + + using var delegating = new TestDelegatingVideoGenerator(inner); + var result = await delegating.GenerateAsync(new VideoGenerationRequest("Test")); + Assert.Same(expectedOperation, result); + } + + [Fact] + public void GetServiceThrowsForNullType() + { + using var inner = new TestVideoGenerator(); + using var generator = new TestDelegatingVideoGenerator(inner); + Assert.Throws("serviceType", () => generator.GetService(null!)); + } + + [Fact] + public void GetServiceReturnsSelfIfCompatibleWithRequestAndKeyIsNull() + { + using var inner = new TestVideoGenerator(); + using var generator = new TestDelegatingVideoGenerator(inner); + Assert.Same(generator, generator.GetService(typeof(DelegatingVideoGenerator))); + Assert.Same(generator, generator.GetService(typeof(IVideoGenerator))); + } + + [Fact] + public void GetServiceDelegatesToInnerIfKeyIsNotNull() + { + using var inner = new TestVideoGenerator + { + GetServiceCallback = (type, key) => key is not null ? "inner-result" : null + }; + + using var generator = new TestDelegatingVideoGenerator(inner); + Assert.Equal("inner-result", generator.GetService(typeof(string), "someKey")); + } + + [Fact] + public void GetServiceDelegatesToInnerIfNotCompatibleWithRequest() + { + using var inner = new TestVideoGenerator + { + GetServiceCallback = (type, key) => type == typeof(string) ? "inner-result" : null + }; + + using var generator = new TestDelegatingVideoGenerator(inner); + Assert.Equal("inner-result", generator.GetService(typeof(string))); + } + + [Fact] + public void Dispose_SetsFlag() + { + using var inner = new TestVideoGenerator(); + var generator = new TestDelegatingVideoGenerator(inner); + Assert.False(inner.DisposeInvoked); + generator.Dispose(); + Assert.True(inner.DisposeInvoked); + } + + [Fact] + public void Dispose_MultipleCallsSafe() + { + using var inner = new TestVideoGenerator(); + var generator = new TestDelegatingVideoGenerator(inner); + generator.Dispose(); + generator.Dispose(); + Assert.True(inner.DisposeInvoked); + } + + private sealed class TestDelegatingVideoGenerator : DelegatingVideoGenerator + { + public TestDelegatingVideoGenerator(IVideoGenerator innerGenerator) + : base(innerGenerator) + { + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGenerationOptionsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGenerationOptionsTests.cs new file mode 100644 index 00000000000..2b092ee79b7 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGenerationOptionsTests.cs @@ -0,0 +1,156 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Drawing; +using System.Text.Json; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class VideoGenerationOptionsTests +{ + [Fact] + public void Constructor_Defaults() + { + var options = new VideoGenerationOptions(); + Assert.Null(options.AspectRatio); + Assert.Null(options.Count); + Assert.Null(options.Duration); + Assert.Null(options.FramesPerSecond); + Assert.Null(options.GenerateAudio); + Assert.Null(options.MediaType); + Assert.Null(options.ModelId); + Assert.Null(options.RawRepresentationFactory); + Assert.Null(options.ResponseFormat); + Assert.Null(options.Seed); + Assert.Null(options.VideoSize); + Assert.Null(options.AdditionalProperties); + } + + [Fact] + public void Properties_Roundtrip() + { + var options = new VideoGenerationOptions + { + AspectRatio = "16:9", + Count = 3, + Duration = TimeSpan.FromSeconds(15), + FramesPerSecond = 30, + GenerateAudio = true, + MediaType = "video/webm", + ModelId = "sora", + ResponseFormat = VideoGenerationResponseFormat.Data, + Seed = 42, + VideoSize = new Size(1280, 720), + AdditionalProperties = new() { ["key"] = "value" }, + }; + + Assert.Equal("16:9", options.AspectRatio); + Assert.Equal(3, options.Count); + Assert.Equal(TimeSpan.FromSeconds(15), options.Duration); + Assert.Equal(30, options.FramesPerSecond); + Assert.True(options.GenerateAudio); + Assert.Equal("video/webm", options.MediaType); + Assert.Equal("sora", options.ModelId); + Assert.Equal(VideoGenerationResponseFormat.Data, options.ResponseFormat); + Assert.Equal(42, options.Seed); + Assert.Equal(new Size(1280, 720), options.VideoSize); + Assert.Equal("value", options.AdditionalProperties["key"]); + } + + [Fact] + public void Clone_CreatesIndependentCopy() + { + var original = new VideoGenerationOptions + { + AspectRatio = "9:16", + Count = 2, + Duration = TimeSpan.FromSeconds(5), + FramesPerSecond = 24, + GenerateAudio = true, + MediaType = "video/mp4", + ModelId = "model-1", + ResponseFormat = VideoGenerationResponseFormat.Uri, + Seed = 123, + VideoSize = new Size(1920, 1080), + AdditionalProperties = new() { ["key"] = "value" }, + }; + + var clone = original.Clone(); + + Assert.NotSame(original, clone); + Assert.Equal(original.AspectRatio, clone.AspectRatio); + Assert.Equal(original.Count, clone.Count); + Assert.Equal(original.Duration, clone.Duration); + Assert.Equal(original.FramesPerSecond, clone.FramesPerSecond); + Assert.Equal(original.GenerateAudio, clone.GenerateAudio); + Assert.Equal(original.MediaType, clone.MediaType); + Assert.Equal(original.ModelId, clone.ModelId); + Assert.Equal(original.ResponseFormat, clone.ResponseFormat); + Assert.Equal(original.Seed, clone.Seed); + Assert.Equal(original.VideoSize, clone.VideoSize); + Assert.NotSame(original.AdditionalProperties, clone.AdditionalProperties); + } + + [Fact] + public void Clone_FromNull_ReturnsDefaults() + { + var options = new DerivedVideoGenerationOptions(null); + Assert.Null(options.AspectRatio); + Assert.Null(options.Count); + Assert.Null(options.Duration); + Assert.Null(options.GenerateAudio); + Assert.Null(options.ModelId); + Assert.Null(options.Seed); + } + + [Theory] + [InlineData(VideoGenerationResponseFormat.Uri)] + [InlineData(VideoGenerationResponseFormat.Data)] + [InlineData(VideoGenerationResponseFormat.Hosted)] + public void ResponseFormat_EnumValues(VideoGenerationResponseFormat format) + { + var options = new VideoGenerationOptions { ResponseFormat = format }; + Assert.Equal(format, options.ResponseFormat); + } + + [Fact] + public void JsonSerialization_Roundtrip() + { + var options = new VideoGenerationOptions + { + AspectRatio = "1:1", + Count = 2, + Duration = TimeSpan.FromSeconds(10), + FramesPerSecond = 24, + GenerateAudio = true, + MediaType = "video/mp4", + ModelId = "test-model", + Seed = 99, + VideoSize = new Size(640, 480), + ResponseFormat = VideoGenerationResponseFormat.Data, + AdditionalProperties = new() { ["custom"] = "prop" }, + }; + + string json = JsonSerializer.Serialize(options, AIJsonUtilities.DefaultOptions); + var deserialized = JsonSerializer.Deserialize(json, AIJsonUtilities.DefaultOptions); + + Assert.NotNull(deserialized); + Assert.Equal(options.AspectRatio, deserialized!.AspectRatio); + Assert.Equal(options.Count, deserialized.Count); + Assert.Equal(options.GenerateAudio, deserialized.GenerateAudio); + Assert.Equal(options.MediaType, deserialized.MediaType); + Assert.Equal(options.ModelId, deserialized.ModelId); + Assert.Equal(options.ResponseFormat, deserialized.ResponseFormat); + Assert.Equal(options.Seed, deserialized.Seed); + } + + private class DerivedVideoGenerationOptions : VideoGenerationOptions + { + public DerivedVideoGenerationOptions(VideoGenerationOptions? other) + : base(other) + { + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorExtensionsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorExtensionsTests.cs new file mode 100644 index 00000000000..759084dc3d5 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorExtensionsTests.cs @@ -0,0 +1,167 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class VideoGeneratorExtensionsTests +{ + [Fact] + public void GetService_Generic_NullGenerator_Throws() + { + Assert.Throws("generator", () => ((IVideoGenerator)null!).GetService()); + } + + [Fact] + public void GetService_Generic_ReturnsService() + { + using var generator = new TestVideoGenerator(); + var result = generator.GetService(); + Assert.Same(generator, result); + } + + [Fact] + public void GetRequiredService_NullGenerator_Throws() + { + Assert.Throws("generator", () => ((IVideoGenerator)null!).GetRequiredService(typeof(IVideoGenerator))); + } + + [Fact] + public void GetRequiredService_NullType_Throws() + { + using var generator = new TestVideoGenerator(); + Assert.Throws("serviceType", () => generator.GetRequiredService(null!)); + } + + [Fact] + public void GetRequiredService_ServiceNotAvailable_Throws() + { + using var generator = new TestVideoGenerator(); + Assert.Throws(() => generator.GetRequiredService(typeof(string))); + } + + [Fact] + public void GetRequiredService_Generic_ServiceNotAvailable_Throws() + { + using var generator = new TestVideoGenerator(); + Assert.Throws(() => generator.GetRequiredService()); + } + + [Fact] + public async Task GenerateVideosAsync_NullGenerator_Throws() + { + await Assert.ThrowsAsync("generator", () => + ((IVideoGenerator)null!).GenerateVideoAsync("Test")); + } + + [Fact] + public async Task GenerateVideosAsync_NullPrompt_Throws() + { + using var generator = new TestVideoGenerator(); + await Assert.ThrowsAsync("prompt", () => + generator.GenerateVideoAsync(null!)); + } + + [Fact] + public async Task GenerateVideosAsync_CallsGenerateAsync() + { + VideoGenerationRequest? capturedRequest = null; + using var generator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => + { + capturedRequest = request; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + await generator.GenerateVideoAsync("A cat video"); + + Assert.NotNull(capturedRequest); + Assert.Equal("A cat video", capturedRequest!.Prompt); + Assert.Null(capturedRequest.StartFrame); + } + + [Fact] + public async Task EditVideosAsync_NullGenerator_Throws() + { + await Assert.ThrowsAsync("generator", () => + ((IVideoGenerator)null!).EditVideoAsync(new DataContent("dGVzdA=="u8.ToArray(), "video/mp4"), "prompt")); + } + + [Fact] + public async Task EditVideosAsync_NullSourceVideo_Throws() + { + using var generator = new TestVideoGenerator(); + await Assert.ThrowsAsync("sourceVideo", () => + generator.EditVideoAsync((AIContent)null!, "prompt")); + } + + [Fact] + public async Task EditVideosAsync_NullPrompt_Throws() + { + using var generator = new TestVideoGenerator(); + await Assert.ThrowsAsync("prompt", () => + generator.EditVideoAsync(new DataContent("dGVzdA=="u8.ToArray(), "video/mp4"), null!)); + } + + [Fact] + public async Task EditVideoAsync_DataContent_CallsGenerateAsync() + { + VideoGenerationRequest? capturedRequest = null; + using var generator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => + { + capturedRequest = request; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + var originalVideo = new DataContent("dGVzdA=="u8.ToArray(), "video/mp4"); + await generator.EditVideoAsync(originalVideo, "Make it faster"); + + Assert.NotNull(capturedRequest); + Assert.Equal("Make it faster", capturedRequest!.Prompt); + Assert.NotNull(capturedRequest.SourceVideo); + } + + [Fact] + public async Task EditVideoAsync_ByteArray_CallsGenerateAsync() + { + VideoGenerationRequest? capturedRequest = null; + using var generator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => + { + capturedRequest = request; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + await generator.EditVideoAsync(new byte[] { 1, 2, 3, 4 }, "test.mp4", "Add effects"); + + Assert.NotNull(capturedRequest); + Assert.Equal("Add effects", capturedRequest!.Prompt); + Assert.NotNull(capturedRequest.SourceVideo); + } + + [Fact] + public async Task EditVideoAsync_ByteArray_NullFileName_Throws() + { + using var generator = new TestVideoGenerator(); + await Assert.ThrowsAsync("fileName", () => + generator.EditVideoAsync(new byte[] { 1 }, null!, "prompt")); + } + + [Fact] + public async Task EditVideoAsync_ByteArray_NullPrompt_Throws() + { + using var generator = new TestVideoGenerator(); + await Assert.ThrowsAsync("prompt", () => + generator.EditVideoAsync(new byte[] { 1 }, "test.mp4", null!)); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorMetadataTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorMetadataTests.cs new file mode 100644 index 00000000000..526d2b38560 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorMetadataTests.cs @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class VideoGeneratorMetadataTests +{ + [Fact] + public void Constructor_NullValues() + { + var metadata = new VideoGeneratorMetadata(); + Assert.Null(metadata.ProviderName); + Assert.Null(metadata.ProviderUri); + Assert.Null(metadata.DefaultModelId); + } + + [Fact] + public void Constructor_WithValues() + { + var uri = new Uri("https://api.example.com/v1"); + var metadata = new VideoGeneratorMetadata("test-provider", uri, "sora"); + Assert.Equal("test-provider", metadata.ProviderName); + Assert.Equal(uri, metadata.ProviderUri); + Assert.Equal("sora", metadata.DefaultModelId); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorTests.cs new file mode 100644 index 00000000000..84531d2a321 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Video/VideoGeneratorTests.cs @@ -0,0 +1,134 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Drawing; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class VideoGeneratorTests +{ + [Fact] + public void GetService_WithServiceKey_ReturnsNull() + { + using var generator = new TestVideoGenerator(); + Assert.Null(generator.GetService(typeof(IVideoGenerator), "key")); + } + + [Fact] + public void GetService_WithoutServiceKey_CallsCallback() + { + using var generator = new TestVideoGenerator(); + var result = generator.GetService(typeof(IVideoGenerator)); + Assert.Same(generator, result); + } + + [Fact] + public async Task GenerateVideosAsync_CallsCallback() + { + var expectedRequest = new VideoGenerationRequest("Test prompt"); + var expectedOperation = new TestVideoGenerationOperation(); + + using var generator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => + { + Assert.Same(expectedRequest, request); + return Task.FromResult(expectedOperation); + } + }; + + var result = await generator.GenerateAsync(expectedRequest); + Assert.Same(expectedOperation, result); + } + + [Fact] + public async Task GenerateVideosAsync_NoCallback_ReturnsDefaultOperation() + { + using var generator = new TestVideoGenerator(); + var result = await generator.GenerateAsync(new VideoGenerationRequest("Test")); + Assert.NotNull(result); + Assert.True(result.IsCompleted); + } + + [Fact] + public void Dispose_SetsFlag() + { + var generator = new TestVideoGenerator(); + Assert.False(generator.DisposeInvoked); + generator.Dispose(); + Assert.True(generator.DisposeInvoked); + } + + [Fact] + public void Dispose_MultipleCallsSafe() + { + var generator = new TestVideoGenerator(); + generator.Dispose(); + generator.Dispose(); // Should not throw + Assert.True(generator.DisposeInvoked); + } + + [Fact] + public async Task GenerateVideosAsync_WithOptions_PassesThroughCorrectly() + { + var options = new VideoGenerationOptions + { + Count = 2, + VideoSize = new Size(1920, 1080), + MediaType = "video/mp4", + ModelId = "sora", + Duration = TimeSpan.FromSeconds(10), + FramesPerSecond = 24, + ResponseFormat = VideoGenerationResponseFormat.Data + }; + + VideoGenerationOptions? capturedOptions = null; + + using var generator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, opts, ct) => + { + capturedOptions = opts; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + await generator.GenerateAsync(new VideoGenerationRequest("Test"), options); + + Assert.NotNull(capturedOptions); + Assert.Equal(2, capturedOptions!.Count); + Assert.Equal(new Size(1920, 1080), capturedOptions.VideoSize); + Assert.Equal("video/mp4", capturedOptions.MediaType); + Assert.Equal("sora", capturedOptions.ModelId); + Assert.Equal(TimeSpan.FromSeconds(10), capturedOptions.Duration); + Assert.Equal(24, capturedOptions.FramesPerSecond); + Assert.Equal(VideoGenerationResponseFormat.Data, capturedOptions.ResponseFormat); + } + + [Fact] + public async Task GenerateVideosAsync_WithEditRequest_PassesThroughCorrectly() + { + var sourceVideo = new DataContent("dGVzdA=="u8.ToArray(), "video/mp4"); + var request = new VideoGenerationRequest("Edit this") { SourceVideo = sourceVideo }; + + VideoGenerationRequest? capturedRequest = null; + + using var generator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (req, opts, ct) => + { + capturedRequest = req; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + await generator.GenerateAsync(request); + + Assert.NotNull(capturedRequest); + Assert.Equal("Edit this", capturedRequest!.Prompt); + Assert.NotNull(capturedRequest.SourceVideo); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/VideoGeneratorIntegrationTests.cs b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/VideoGeneratorIntegrationTests.cs new file mode 100644 index 00000000000..d1eb0bd68ba --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/VideoGeneratorIntegrationTests.cs @@ -0,0 +1,110 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Threading.Tasks; +using Microsoft.TestUtilities; +using Xunit; + +#pragma warning disable CA2214 // Do not call overridable methods in constructors + +namespace Microsoft.Extensions.AI; + +public abstract class VideoGeneratorIntegrationTests : IDisposable +{ + private readonly IVideoGenerator? _generator; + + protected VideoGeneratorIntegrationTests() + { + _generator = CreateGenerator(); + } + + public void Dispose() + { + _generator?.Dispose(); + GC.SuppressFinalize(this); + } + + protected abstract IVideoGenerator? CreateGenerator(); + + [ConditionalFact] + public virtual async Task GenerateVideosAsync_SingleVideoGeneration() + { + SkipIfNotEnabled(); + + var options = new VideoGenerationOptions + { + Count = 1 + }; + + var operation = await _generator.GenerateVideoAsync("A simple animation of a bouncing ball", options); + + Assert.NotNull(operation); + Assert.NotNull(operation.OperationId); + + await operation.WaitForCompletionAsync(); + Assert.True(operation.IsCompleted); + + var contents = await operation.GetContentsAsync(); + Assert.NotEmpty(contents); + + var content = Assert.Single(contents); + switch (content) + { + case UriContent uc: + Assert.StartsWith("http", uc.Uri.Scheme, StringComparison.Ordinal); + break; + + case DataContent dc: + Assert.False(dc.Data.IsEmpty); + Assert.StartsWith("video/", dc.MediaType, StringComparison.Ordinal); + break; + + default: + Assert.Fail($"Unexpected content type: {content.GetType()}"); + break; + } + } + + [ConditionalFact] + public virtual async Task GenerateVideosAsync_MultipleVideos() + { + SkipIfNotEnabled(); + + var options = new VideoGenerationOptions + { + Count = 2 + }; + + var operation = await _generator.GenerateVideoAsync("A cat sitting on a table", options); + + Assert.NotNull(operation); + + await operation.WaitForCompletionAsync(); + Assert.True(operation.IsCompleted); + + var contents = await operation.GetContentsAsync(); + Assert.NotEmpty(contents); + Assert.Equal(2, contents.Count); + + foreach (var content in contents) + { + Assert.IsType(content); + var dataContent = (DataContent)content; + Assert.False(dataContent.Data.IsEmpty); + Assert.StartsWith("video/", dataContent.MediaType, StringComparison.Ordinal); + } + } + + [MemberNotNull(nameof(_generator))] + protected void SkipIfNotEnabled() + { + string? skipIntegration = TestRunnerConfiguration.Instance["SkipIntegrationTests"]; + + if (skipIntegration is not null || _generator is null) + { + throw new SkipTestException("Generator is not enabled."); + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIVideoGeneratorIntegrationTests.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIVideoGeneratorIntegrationTests.cs new file mode 100644 index 00000000000..c00043a2edc --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIVideoGeneratorIntegrationTests.cs @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma warning disable OPENAI001 // Experimental OpenAI APIs + +namespace Microsoft.Extensions.AI; + +public class OpenAIVideoGeneratorIntegrationTests : VideoGeneratorIntegrationTests +{ + protected override IVideoGenerator? CreateGenerator() + => IntegrationTestHelpers.GetOpenAIClient()? + .GetVideoClient() + .AsIVideoGenerator(TestRunnerConfiguration.Instance["OpenAI:VideoModel"] ?? "sora"); +} diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIVideoGeneratorTests.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIVideoGeneratorTests.cs new file mode 100644 index 00000000000..773dee0177b --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIVideoGeneratorTests.cs @@ -0,0 +1,233 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma warning disable OPENAI001 // Experimental OpenAI APIs + +using System; +using System.ClientModel; +using System.ClientModel.Primitives; +using System.Net; +using System.Net.Http; +using System.Text; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; +using OpenAI; +using OpenAI.Videos; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class OpenAIVideoGeneratorTests +{ + [Fact] + public void AsIVideoGenerator_InvalidArgs_Throws() + { + Assert.Throws("videoClient", () => ((VideoClient)null!).AsIVideoGenerator()); + } + + [Fact] + public void AsIVideoGenerator_OpenAIClient_ProducesExpectedMetadata() + { + Uri endpoint = new("http://localhost/some/endpoint"); + string model = "sora"; + + var client = new OpenAIClient(new ApiKeyCredential("key"), new OpenAIClientOptions { Endpoint = endpoint }); + + IVideoGenerator videoGenerator = client.GetVideoClient().AsIVideoGenerator(model); + var metadata = videoGenerator.GetService(); + Assert.Equal(endpoint, metadata?.ProviderUri); + Assert.Equal(model, metadata?.DefaultModelId); + } + + [Fact] + public void GetService_ReturnsExpectedServices() + { + var client = new OpenAIClient(new ApiKeyCredential("key")); + IVideoGenerator videoGenerator = client.GetVideoClient().AsIVideoGenerator("sora"); + + Assert.Same(videoGenerator, videoGenerator.GetService()); + Assert.Same(videoGenerator, videoGenerator.GetService()); + Assert.NotNull(videoGenerator.GetService()); + Assert.NotNull(videoGenerator.GetService()); + } + + [Fact] + public async Task GenerateAsync_CreateWithDuration_EmitsSecondsAsString() + { + string? capturedBody = null; + using var handler = new RoutingHandler(request => + { + capturedBody = request.Content!.ReadAsStringAsync().Result; + return CreateVideoResponse("vid_123", "queued"); + }); + using HttpClient httpClient = new(handler); + using var generator = CreateGenerator(httpClient); + + await generator.GenerateAsync( + new VideoGenerationRequest { Prompt = "test" }, + new VideoGenerationOptions { Duration = TimeSpan.FromSeconds(8) }); + + Assert.NotNull(capturedBody); + using var doc = JsonDocument.Parse(capturedBody); + JsonElement root = doc.RootElement; + + // The API requires seconds as a string enum, not an integer + Assert.Equal(JsonValueKind.String, root.GetProperty("seconds").ValueKind); + Assert.Equal("8", root.GetProperty("seconds").GetString()); + } + + [Fact] + public async Task GenerateAsync_ExtendWithDuration_EmitsSecondsAsString() + { + string? capturedBody = null; + using var handler = new RoutingHandler(request => + { + capturedBody = request.Content!.ReadAsStringAsync().Result; + return CreateVideoResponse("vid_456", "queued"); + }); + using HttpClient httpClient = new(handler); + using var generator = CreateGenerator(httpClient); + + await generator.GenerateAsync( + new VideoGenerationRequest + { + Prompt = "continue the scene", + OperationKind = VideoOperationKind.Extend, + SourceVideoId = "vid_original", + }, + new VideoGenerationOptions { Duration = TimeSpan.FromSeconds(12) }); + + Assert.NotNull(capturedBody); + using var doc = JsonDocument.Parse(capturedBody); + JsonElement root = doc.RootElement; + + Assert.Equal(JsonValueKind.String, root.GetProperty("seconds").ValueKind); + Assert.Equal("12", root.GetProperty("seconds").GetString()); + } + + [Fact] + public async Task GenerateAsync_ImageReferenceData_SentAsJsonDataUri() + { + string? capturedBody = null; + string? capturedContentType = null; + using var handler = new RoutingHandler(request => + { + capturedContentType = request.Content!.Headers.ContentType?.ToString(); + capturedBody = request.Content!.ReadAsStringAsync().Result; + return CreateVideoResponse("vid_789", "queued"); + }); + using HttpClient httpClient = new(handler); + using var generator = CreateGenerator(httpClient); + + byte[] imageBytes = [0x89, 0x50, 0x4E, 0x47]; // PNG magic bytes + await generator.GenerateAsync( + new VideoGenerationRequest + { + Prompt = "animate this image", + StartFrame = new DataContent(imageBytes, "image/png"), + }); + + // Should be JSON, not multipart + Assert.NotNull(capturedContentType); + Assert.Contains("application/json", capturedContentType); + + Assert.NotNull(capturedBody); + using var doc = JsonDocument.Parse(capturedBody); + JsonElement root = doc.RootElement; + + // input_reference should be a JSON object with image_url as a data URI + Assert.True(root.TryGetProperty("input_reference", out JsonElement inputRef)); + Assert.Equal(JsonValueKind.Object, inputRef.ValueKind); + string imageUrl = inputRef.GetProperty("image_url").GetString()!; + Assert.StartsWith("data:image/png;base64,", imageUrl); + } + + [Fact] + public async Task UploadVideoCharacterAsync_ApiError_ThrowsWithMessage() + { + using var handler = new RoutingHandler(_ => + new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent( + """{"error": {"message": "Video too long"}}""", + Encoding.UTF8, + "application/json"), + }); + using HttpClient httpClient = new(handler); + using var generator = CreateGenerator(httpClient); + + var ex = await Assert.ThrowsAsync( + () => generator.UploadVideoCharacterAsync( + "TestChar", + new DataContent(new byte[100], "video/mp4"))); + + Assert.Contains("Video too long", ex.Message); + } + + [Fact] + public async Task UploadVideoCharacterAsync_MissingId_ThrowsWithResponseBody() + { + using var handler = new RoutingHandler(_ => + new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent( + """{"unexpected": "response"}""", + Encoding.UTF8, + "application/json"), + }); + using HttpClient httpClient = new(handler); + using var generator = CreateGenerator(httpClient); + + var ex = await Assert.ThrowsAsync( + () => generator.UploadVideoCharacterAsync( + "TestChar", + new DataContent(new byte[100], "video/mp4"))); + + Assert.Contains("did not contain an 'id' property", ex.Message); + Assert.Contains("unexpected", ex.Message); + } + + [Fact] + public async Task UploadVideoCharacterAsync_Success_ReturnsCharacterId() + { + using var handler = new RoutingHandler(_ => + new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent( + """{"id": "char_abc123", "created_at": 0, "name": "TestChar"}""", + Encoding.UTF8, + "application/json"), + }); + using HttpClient httpClient = new(handler); + using var generator = CreateGenerator(httpClient); + + string id = await generator.UploadVideoCharacterAsync( + "TestChar", + new DataContent(new byte[100], "video/mp4")); + + Assert.Equal("char_abc123", id); + } + + private static IVideoGenerator CreateGenerator(HttpClient httpClient) => + new OpenAIClient( + new ApiKeyCredential("test-key"), + new OpenAIClientOptions { Transport = new HttpClientPipelineTransport(httpClient) }) + .GetVideoClient() + .AsIVideoGenerator("sora-2"); + + private static HttpResponseMessage CreateVideoResponse(string id, string status) + { + string json = $$"""{"id": "{{id}}", "status": "{{status}}"}"""; + return new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent(json, Encoding.UTF8, "application/json"), + }; + } + + private sealed class RoutingHandler(Func handler) : HttpMessageHandler + { + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) => + Task.FromResult(handler(request)); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj index c92459ef493..8abe55bb8b9 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj @@ -25,6 +25,8 @@ + + diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Video/ConfigureOptionsVideoGeneratorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/ConfigureOptionsVideoGeneratorTests.cs new file mode 100644 index 00000000000..8c506a88354 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/ConfigureOptionsVideoGeneratorTests.cs @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class ConfigureOptionsVideoGeneratorTests +{ + [Fact] + public void InvalidArgs_Throws() + { + using var generator = new TestVideoGenerator(); + Assert.Throws("innerGenerator", () => new ConfigureOptionsVideoGenerator(null!, _ => { })); + Assert.Throws("configure", () => new ConfigureOptionsVideoGenerator(generator, null!)); + } + + [Fact] + public async Task ConfigureCallback_ReceivesClonedOptions() + { + var originalOptions = new VideoGenerationOptions { ModelId = "original-model" }; + VideoGenerationOptions? capturedOptions = null; + + using var inner = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => + { + capturedOptions = options; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + using var configured = new ConfigureOptionsVideoGenerator(inner, opts => + { + opts.ModelId = "configured-model"; + }); + + await configured.GenerateAsync(new VideoGenerationRequest("Test"), originalOptions); + + Assert.NotNull(capturedOptions); + Assert.NotSame(originalOptions, capturedOptions); + Assert.Equal("configured-model", capturedOptions!.ModelId); + Assert.Equal("original-model", originalOptions.ModelId); // Original unchanged + } + + [Fact] + public async Task ConfigureCallback_WithNullOptions_CreatesNewInstance() + { + VideoGenerationOptions? capturedOptions = null; + + using var inner = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, ct) => + { + capturedOptions = options; + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + using var configured = new ConfigureOptionsVideoGenerator(inner, opts => + { + opts.ModelId = "new-model"; + }); + + await configured.GenerateAsync(new VideoGenerationRequest("Test"), null); + + Assert.NotNull(capturedOptions); + Assert.Equal("new-model", capturedOptions!.ModelId); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Video/LoggingVideoGeneratorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/LoggingVideoGeneratorTests.cs new file mode 100644 index 00000000000..96c00c097a0 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/LoggingVideoGeneratorTests.cs @@ -0,0 +1,142 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading.Tasks; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Logging.Testing; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class LoggingVideoGeneratorTests +{ + [Fact] + public void LoggingVideoGenerator_InvalidArgs_Throws() + { + Assert.Throws("innerGenerator", () => new LoggingVideoGenerator(null!, NullLogger.Instance)); + Assert.Throws("logger", () => new LoggingVideoGenerator(new TestVideoGenerator(), null!)); + } + + [Fact] + public void UseLogging_AvoidsInjectingNopGenerator() + { + using var innerGenerator = new TestVideoGenerator(); + + Assert.Null(innerGenerator.AsBuilder().UseLogging(NullLoggerFactory.Instance).Build().GetService(typeof(LoggingVideoGenerator))); + Assert.Same(innerGenerator, innerGenerator.AsBuilder().UseLogging(NullLoggerFactory.Instance).Build().GetService(typeof(IVideoGenerator))); + + using var factory = LoggerFactory.Create(b => b.AddFakeLogging()); + Assert.NotNull(innerGenerator.AsBuilder().UseLogging(factory).Build().GetService(typeof(LoggingVideoGenerator))); + + ServiceCollection c = new(); + c.AddFakeLogging(); + var services = c.BuildServiceProvider(); + Assert.NotNull(innerGenerator.AsBuilder().UseLogging().Build(services).GetService(typeof(LoggingVideoGenerator))); + Assert.NotNull(innerGenerator.AsBuilder().UseLogging(null).Build(services).GetService(typeof(LoggingVideoGenerator))); + Assert.Null(innerGenerator.AsBuilder().UseLogging(NullLoggerFactory.Instance).Build(services).GetService(typeof(LoggingVideoGenerator))); + } + + [Theory] + [InlineData(LogLevel.Trace)] + [InlineData(LogLevel.Debug)] + [InlineData(LogLevel.Information)] + public async Task GenerateVideosAsync_LogsInvocationAndCompletion(LogLevel level) + { + var collector = new FakeLogCollector(); + + ServiceCollection c = new(); + c.AddLogging(b => b.AddProvider(new FakeLoggerProvider(collector)).SetMinimumLevel(level)); + var services = c.BuildServiceProvider(); + + using IVideoGenerator innerGenerator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, cancellationToken) => + { + return Task.FromResult(new TestVideoGenerationOperation()); + }, + }; + + using IVideoGenerator generator = innerGenerator + .AsBuilder() + .UseLogging() + .Build(services); + + await generator.GenerateAsync( + new VideoGenerationRequest("A beautiful sunset"), + new VideoGenerationOptions { ModelId = "sora" }); + + var logs = collector.GetSnapshot(); + if (level is LogLevel.Trace) + { + Assert.Collection(logs, + entry => Assert.True( + entry.Message.Contains($"{nameof(IVideoGenerator.GenerateAsync)} invoked:") && + entry.Message.Contains("A beautiful sunset") && + entry.Message.Contains("sora")), + entry => Assert.Contains($"{nameof(IVideoGenerator.GenerateAsync)} completed:", entry.Message)); + } + else if (level is LogLevel.Debug) + { + Assert.Collection(logs, + entry => Assert.True(entry.Message.Contains($"{nameof(IVideoGenerator.GenerateAsync)} invoked.") && !entry.Message.Contains("A beautiful sunset")), + entry => Assert.True(entry.Message.Contains($"{nameof(IVideoGenerator.GenerateAsync)} completed.") && !entry.Message.Contains("sora"))); + } + else + { + Assert.Empty(logs); + } + } + + [Theory] + [InlineData(LogLevel.Trace)] + [InlineData(LogLevel.Debug)] + [InlineData(LogLevel.Information)] + public async Task GenerateVideosAsync_WithSourceVideo_LogsInvocationAndCompletion(LogLevel level) + { + var collector = new FakeLogCollector(); + using ILoggerFactory loggerFactory = LoggerFactory.Create(b => b.AddProvider(new FakeLoggerProvider(collector)).SetMinimumLevel(level)); + + using IVideoGenerator innerGenerator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, cancellationToken) => + { + return Task.FromResult(new TestVideoGenerationOperation()); + } + }; + + using IVideoGenerator generator = innerGenerator + .AsBuilder() + .UseLogging(loggerFactory) + .Build(); + + AIContent sourceVideo = new DataContent((byte[])[1, 2, 3, 4], "video/mp4"); + await generator.GenerateAsync( + new VideoGenerationRequest("Make it more colorful") { SourceVideo = sourceVideo }, + new VideoGenerationOptions { ModelId = "sora" }); + + var logs = collector.GetSnapshot(); + if (level is LogLevel.Trace) + { + Assert.Collection(logs, + entry => Assert.True( + entry.Message.Contains($"{nameof(IVideoGenerator.GenerateAsync)} invoked:") && + entry.Message.Contains("Make it more colorful") && + entry.Message.Contains("sora")), + entry => Assert.Contains($"{nameof(IVideoGenerator.GenerateAsync)} completed", entry.Message)); + } + else if (level is LogLevel.Debug) + { + Assert.Collection(logs, + entry => Assert.True(entry.Message.Contains($"{nameof(IVideoGenerator.GenerateAsync)} invoked.") && !entry.Message.Contains("Make it more colorful")), + entry => Assert.True(entry.Message.Contains($"{nameof(IVideoGenerator.GenerateAsync)} completed.") && !entry.Message.Contains("sora"))); + } + else + { + Assert.Empty(logs); + } + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Video/OpenTelemetryVideoGeneratorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/OpenTelemetryVideoGeneratorTests.cs new file mode 100644 index 00000000000..0ee0ea1976e --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/OpenTelemetryVideoGeneratorTests.cs @@ -0,0 +1,194 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Testing; +using OpenTelemetry.Trace; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class OpenTelemetryVideoGeneratorTests +{ + [Fact] + public void InvalidArgs_Throws() + { + Assert.Throws("innerGenerator", () => new OpenTelemetryVideoGenerator(null!)); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task ExpectedInformationLogged_Async(bool enableSensitiveData) + { + var sourceName = Guid.NewGuid().ToString(); + var activities = new List(); + using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder() + .AddSource(sourceName) + .AddInMemoryExporter(activities) + .Build(); + + using var innerGenerator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = async (request, options, cancellationToken) => + { + await Task.Yield(); + + return new TestVideoGenerationOperation + { + Usage = new() + { + InputTokenCount = 10, + OutputTokenCount = 20, + TotalTokenCount = 30, + }, + }; + }, + + GetServiceCallback = (serviceType, serviceKey) => + serviceType == typeof(VideoGeneratorMetadata) ? new VideoGeneratorMetadata("testservice", new Uri("http://localhost:12345/something"), "amazingmodel") : + null, + }; + + using var g = innerGenerator + .AsBuilder() + .UseOpenTelemetry(null, sourceName, configure: instance => + { + instance.EnableSensitiveData = enableSensitiveData; + }) + .Build(); + + VideoGenerationRequest request = new() + { + Prompt = "This is the input prompt.", + SourceVideo = new UriContent("http://example/input.mp4", "video/mp4"), + }; + + VideoGenerationOptions options = new() + { + Count = 2, + VideoSize = new(1920, 1080), + Duration = TimeSpan.FromSeconds(10), + FramesPerSecond = 24, + MediaType = "video/mp4", + ModelId = "mycoolvideomodel", + AdditionalProperties = new() + { + ["service_tier"] = "value1", + ["SomethingElse"] = "value2", + }, + }; + + await g.GenerateAsync(request, options); + + var activity = Assert.Single(activities); + + Assert.NotNull(activity.Id); + Assert.NotEmpty(activity.Id); + + Assert.Equal("localhost", activity.GetTagItem("server.address")); + Assert.Equal(12345, (int)activity.GetTagItem("server.port")!); + + Assert.Equal("generate_content mycoolvideomodel", activity.DisplayName); + Assert.Equal("testservice", activity.GetTagItem("gen_ai.provider.name")); + + Assert.Equal("mycoolvideomodel", activity.GetTagItem("gen_ai.request.model")); + Assert.Equal(2, activity.GetTagItem("gen_ai.request.choice.count")); + Assert.Equal(1920, activity.GetTagItem("gen_ai.request.video.width")); + Assert.Equal(1080, activity.GetTagItem("gen_ai.request.video.height")); + Assert.Equal(10.0, activity.GetTagItem("gen_ai.request.video.duration")); + Assert.Equal(24, activity.GetTagItem("gen_ai.request.video.fps")); + Assert.Equal(enableSensitiveData ? "value1" : null, activity.GetTagItem("service_tier")); + Assert.Equal(enableSensitiveData ? "value2" : null, activity.GetTagItem("SomethingElse")); + + Assert.Equal(10, activity.GetTagItem("gen_ai.usage.input_tokens")); + Assert.Equal(20, activity.GetTagItem("gen_ai.usage.output_tokens")); + + Assert.True(activity.Duration.TotalMilliseconds > 0); + + var tags = activity.Tags.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); + + // Operation metadata is always recorded + Assert.Equal("test-op-id", activity.GetTagItem("gen_ai.operation.id")); + Assert.Equal("completed", activity.GetTagItem("gen_ai.operation.status")); + + if (enableSensitiveData) + { + Assert.Equal(ReplaceWhitespace(""" + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": "This is the input prompt." + }, + { + "type": "uri", + "uri": "http://example/input.mp4", + "mime_type": "video/mp4", + "modality": "video" + } + ] + } + ] + """), ReplaceWhitespace(tags["gen_ai.input.messages"])); + } + else + { + Assert.False(tags.ContainsKey("gen_ai.input.messages")); + } + + static string ReplaceWhitespace(string? input) => Regex.Replace(input ?? "", @"\s+", " ").Trim(); + } + + [Fact] + public async Task ExceptionLogged_Async() + { + var sourceName = Guid.NewGuid().ToString(); + var activities = new List(); + using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder() + .AddSource(sourceName) + .AddInMemoryExporter(activities) + .Build(); + + var collector = new FakeLogCollector(); + using var loggerFactory = LoggerFactory.Create(b => b.AddProvider(new FakeLoggerProvider(collector))); + + var expectedException = new InvalidOperationException("test exception message"); + + using var innerGenerator = new TestVideoGenerator + { + GenerateVideosAsyncCallback = (request, options, cancellationToken) => throw expectedException, + GetServiceCallback = (serviceType, serviceKey) => + serviceType == typeof(VideoGeneratorMetadata) ? new VideoGeneratorMetadata("testservice", new Uri("http://localhost:12345"), "testmodel") : + null, + }; + + using var g = innerGenerator + .AsBuilder() + .UseOpenTelemetry(loggerFactory, sourceName) + .Build(); + + await Assert.ThrowsAsync(() => + g.GenerateAsync(new VideoGenerationRequest { Prompt = "a cat video" })); + + var activity = Assert.Single(activities); + + // Existing error behavior is preserved + Assert.Equal(expectedException.GetType().FullName, activity.GetTagItem("error.type")); + Assert.Equal(ActivityStatusCode.Error, activity.Status); + + // Exception is logged via ILogger + var logEntry = Assert.Single(collector.GetSnapshot()); + Assert.Equal("gen_ai.client.operation.exception", logEntry.Id.Name); + Assert.Equal(LogLevel.Warning, logEntry.Level); + Assert.Same(expectedException, logEntry.Exception); + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Video/SingletonVideoGeneratorExtensions.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/SingletonVideoGeneratorExtensions.cs new file mode 100644 index 00000000000..93f2905e73a --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/SingletonVideoGeneratorExtensions.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace Microsoft.Extensions.AI; + +public static class SingletonVideoGeneratorExtensions +{ + public static VideoGeneratorBuilder UseSingletonMiddleware(this VideoGeneratorBuilder builder) + => builder.Use((inner, services) + => new VideoGeneratorDependencyInjectionPatterns.SingletonMiddleware(inner, services)); +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Video/VideoGeneratorBuilderTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/VideoGeneratorBuilderTests.cs new file mode 100644 index 00000000000..dbc4608e3c7 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/VideoGeneratorBuilderTests.cs @@ -0,0 +1,103 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.Extensions.DependencyInjection; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class VideoGeneratorBuilderTests +{ + [Fact] + public void PassesServiceProviderToFactories() + { + var expectedServiceProvider = new ServiceCollection().BuildServiceProvider(); + using TestVideoGenerator expectedInnerGenerator = new(); + using TestVideoGenerator expectedOuterGenerator = new(); + + var builder = new VideoGeneratorBuilder(services => + { + Assert.Same(expectedServiceProvider, services); + return expectedInnerGenerator; + }); + + builder.Use((innerGenerator, serviceProvider) => + { + Assert.Same(expectedServiceProvider, serviceProvider); + Assert.Same(expectedInnerGenerator, innerGenerator); + return expectedOuterGenerator; + }); + + Assert.Same(expectedOuterGenerator, builder.Build(expectedServiceProvider)); + } + + [Fact] + public void BuildsPipelineInOrderAdded() + { + using TestVideoGenerator expectedInnerGenerator = new(); + var builder = new VideoGeneratorBuilder(expectedInnerGenerator); + + builder.Use(next => new InnerGeneratorCapturingVideoGenerator("First", next)); + builder.Use(next => new InnerGeneratorCapturingVideoGenerator("Second", next)); + builder.Use(next => new InnerGeneratorCapturingVideoGenerator("Third", next)); + + var first = (InnerGeneratorCapturingVideoGenerator)builder.Build(); + + Assert.Equal("First", first.Name); + var second = (InnerGeneratorCapturingVideoGenerator)first.InnerGenerator; + Assert.Equal("Second", second.Name); + var third = (InnerGeneratorCapturingVideoGenerator)second.InnerGenerator; + Assert.Equal("Third", third.Name); + Assert.Same(expectedInnerGenerator, third.InnerGenerator); + } + + [Fact] + public void DoesNotAcceptNullInnerService() + { + Assert.Throws("innerGenerator", () => new VideoGeneratorBuilder((IVideoGenerator)null!)); + Assert.Throws("innerGenerator", () => ((IVideoGenerator)null!).AsBuilder()); + } + + [Fact] + public void DoesNotAcceptNullFactories() + { + Assert.Throws("innerGeneratorFactory", () => new VideoGeneratorBuilder((Func)null!)); + } + + [Fact] + public void DoesNotAllowFactoriesToReturnNull() + { + using var innerGenerator = new TestVideoGenerator(); + VideoGeneratorBuilder builder = new(innerGenerator); + builder.Use(_ => null!); + var ex = Assert.Throws(() => builder.Build()); + Assert.Contains("entry at index 0", ex.Message); + } + + [Fact] + public void UsesEmptyServiceProviderWhenNoServicesProvided() + { + using var innerGenerator = new TestVideoGenerator(); + VideoGeneratorBuilder builder = new(innerGenerator); + builder.Use((innerGenerator, serviceProvider) => + { + Assert.Null(serviceProvider.GetService(typeof(object))); + + var keyedServiceProvider = Assert.IsAssignableFrom(serviceProvider); + Assert.Null(keyedServiceProvider.GetKeyedService(typeof(object), "key")); + Assert.Throws(() => keyedServiceProvider.GetRequiredKeyedService(typeof(object), "key")); + + return innerGenerator; + }); + builder.Build(); + } + + private sealed class InnerGeneratorCapturingVideoGenerator(string name, IVideoGenerator innerGenerator) : DelegatingVideoGenerator(innerGenerator) + { +#pragma warning disable S3604 // False positive: Member initializer values should not be redundant + public string Name { get; } = name; +#pragma warning restore S3604 + public new IVideoGenerator InnerGenerator => base.InnerGenerator; + } +} diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Video/VideoGeneratorDependencyInjectionPatterns.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/VideoGeneratorDependencyInjectionPatterns.cs new file mode 100644 index 00000000000..da9fe96150b --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Video/VideoGeneratorDependencyInjectionPatterns.cs @@ -0,0 +1,178 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.Extensions.DependencyInjection; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class VideoGeneratorDependencyInjectionPatterns +{ + private IServiceCollection ServiceCollection { get; } = new ServiceCollection(); + + [Fact] + public void CanRegisterSingletonUsingFactory() + { + // Arrange/Act + ServiceCollection.AddVideoGenerator(services => new TestVideoGenerator { Services = services }) + .UseSingletonMiddleware(); + + // Assert + var services = ServiceCollection.BuildServiceProvider(); + using var scope1 = services.CreateScope(); + using var scope2 = services.CreateScope(); + + var instance1 = scope1.ServiceProvider.GetRequiredService(); + var instance1Copy = scope1.ServiceProvider.GetRequiredService(); + var instance2 = scope2.ServiceProvider.GetRequiredService(); + + // Each scope gets the same instance, because it's singleton + var instance = Assert.IsType(instance1); + Assert.Same(instance, instance1Copy); + Assert.Same(instance, instance2); + Assert.IsType(instance.InnerGenerator); + } + + [Fact] + public void CanRegisterSingletonUsingSharedInstance() + { + // Arrange/Act + using var singleton = new TestVideoGenerator(); + ServiceCollection.AddVideoGenerator(singleton) + .UseSingletonMiddleware(); + + // Assert + var services = ServiceCollection.BuildServiceProvider(); + using var scope1 = services.CreateScope(); + using var scope2 = services.CreateScope(); + + var instance1 = scope1.ServiceProvider.GetRequiredService(); + var instance1Copy = scope1.ServiceProvider.GetRequiredService(); + var instance2 = scope2.ServiceProvider.GetRequiredService(); + + // Each scope gets the same instance, because it's singleton + var instance = Assert.IsType(instance1); + Assert.Same(instance, instance1Copy); + Assert.Same(instance, instance2); + Assert.IsType(instance.InnerGenerator); + } + + [Fact] + public void CanRegisterKeyedSingletonUsingFactory() + { + // Arrange/Act + ServiceCollection.AddKeyedVideoGenerator("mykey", services => new TestVideoGenerator { Services = services }) + .UseSingletonMiddleware(); + + // Assert + var services = ServiceCollection.BuildServiceProvider(); + using var scope1 = services.CreateScope(); + using var scope2 = services.CreateScope(); + + Assert.Null(services.GetService()); + + var instance1 = scope1.ServiceProvider.GetRequiredKeyedService("mykey"); + var instance1Copy = scope1.ServiceProvider.GetRequiredKeyedService("mykey"); + var instance2 = scope2.ServiceProvider.GetRequiredKeyedService("mykey"); + + // Each scope gets the same instance, because it's singleton + var instance = Assert.IsType(instance1); + Assert.Same(instance, instance1Copy); + Assert.Same(instance, instance2); + Assert.IsType(instance.InnerGenerator); + } + + [Fact] + public void CanRegisterKeyedSingletonUsingSharedInstance() + { + // Arrange/Act + using var singleton = new TestVideoGenerator(); + ServiceCollection.AddKeyedVideoGenerator("mykey", singleton) + .UseSingletonMiddleware(); + + // Assert + var services = ServiceCollection.BuildServiceProvider(); + using var scope1 = services.CreateScope(); + using var scope2 = services.CreateScope(); + + Assert.Null(services.GetService()); + + var instance1 = scope1.ServiceProvider.GetRequiredKeyedService("mykey"); + var instance1Copy = scope1.ServiceProvider.GetRequiredKeyedService("mykey"); + var instance2 = scope2.ServiceProvider.GetRequiredKeyedService("mykey"); + + // Each scope gets the same instance, because it's singleton + var instance = Assert.IsType(instance1); + Assert.Same(instance, instance1Copy); + Assert.Same(instance, instance2); + Assert.IsType(instance.InnerGenerator); + } + + [Theory] + [InlineData(null)] + [InlineData(ServiceLifetime.Singleton)] + [InlineData(ServiceLifetime.Scoped)] + [InlineData(ServiceLifetime.Transient)] + public void AddVideoGenerator_RegistersExpectedLifetime(ServiceLifetime? lifetime) + { + ServiceCollection sc = new(); + ServiceLifetime expectedLifetime = lifetime ?? ServiceLifetime.Singleton; + VideoGeneratorBuilder builder = lifetime.HasValue + ? sc.AddVideoGenerator(services => new TestVideoGenerator(), lifetime.Value) + : sc.AddVideoGenerator(services => new TestVideoGenerator()); + + ServiceDescriptor sd = Assert.Single(sc); + Assert.Equal(typeof(IVideoGenerator), sd.ServiceType); + Assert.False(sd.IsKeyedService); + Assert.Null(sd.ImplementationInstance); + Assert.NotNull(sd.ImplementationFactory); + Assert.IsType(sd.ImplementationFactory(null!)); + Assert.Equal(expectedLifetime, sd.Lifetime); + } + + [Theory] + [InlineData(null)] + [InlineData(ServiceLifetime.Singleton)] + [InlineData(ServiceLifetime.Scoped)] + [InlineData(ServiceLifetime.Transient)] + public void AddKeyedVideoGenerator_RegistersExpectedLifetime(ServiceLifetime? lifetime) + { + ServiceCollection sc = new(); + ServiceLifetime expectedLifetime = lifetime ?? ServiceLifetime.Singleton; + VideoGeneratorBuilder builder = lifetime.HasValue + ? sc.AddKeyedVideoGenerator("key", services => new TestVideoGenerator(), lifetime.Value) + : sc.AddKeyedVideoGenerator("key", services => new TestVideoGenerator()); + + ServiceDescriptor sd = Assert.Single(sc); + Assert.Equal(typeof(IVideoGenerator), sd.ServiceType); + Assert.True(sd.IsKeyedService); + Assert.Equal("key", sd.ServiceKey); + Assert.Null(sd.KeyedImplementationInstance); + Assert.NotNull(sd.KeyedImplementationFactory); + Assert.IsType(sd.KeyedImplementationFactory(null!, null!)); + Assert.Equal(expectedLifetime, sd.Lifetime); + } + + [Fact] + public void AddKeyedVideoGenerator_WorksWithNullServiceKey() + { + ServiceCollection sc = new(); + sc.AddKeyedVideoGenerator(null, _ => new TestVideoGenerator()); + + ServiceDescriptor sd = Assert.Single(sc); + Assert.Equal(typeof(IVideoGenerator), sd.ServiceType); + Assert.False(sd.IsKeyedService); + Assert.Null(sd.ServiceKey); + Assert.Null(sd.ImplementationInstance); + Assert.NotNull(sd.ImplementationFactory); + Assert.IsType(sd.ImplementationFactory(null!)); + Assert.Equal(ServiceLifetime.Singleton, sd.Lifetime); + } + + public class SingletonMiddleware(IVideoGenerator inner, IServiceProvider services) : DelegatingVideoGenerator(inner) + { + public new IVideoGenerator InnerGenerator => base.InnerGenerator; + public IServiceProvider Services => services; + } +}