diff --git a/README.md b/README.md
index f7ab5b7..ffe01e6 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,19 @@ scribe transcribe meeting.wav --language es    # Spanish
 scribe transcribe meeting.wav --language fr    # French
 ```
 
+### Live streaming from microphone
+
+```bash
+scribe stream                      # multilingual (25 languages, ~11s latency)
+scribe stream --engine nemotron    # English-only, low latency (~560ms), with punctuation
+scribe stream --format jsonl       # JSONL output (one JSON object per line)
+scribe stream --output meeting.txt # save to file while streaming
+```
+
+> **Note on streaming engines:**
+> - **Default** (Parakeet TDT v3): Supports all 25 languages including Spanish. Higher latency (~11s for confirmed text) because it uses a batch model in sliding windows. Live preview appears on screen while speaking.
+> - **Nemotron** (`--engine nemotron`): English-only but much faster (~560ms latency). Includes punctuation and capitalization. Recommended for English-only meetings.
+
 ### Pre-download models
 
 ```bash
@@ -143,8 +156,10 @@ Tested on Apple Silicon (M-series):
 |------|-------|---------|
 | Transcription only | ~130x real-time | 4-min file in 1.7s |
 | Transcription + diarization | ~30x real-time | 4-min file in 7.5s |
+| Live streaming (Nemotron) | ~560ms latency | English, with punctuation |
+| Live streaming (default) | ~11s latency | 25 languages |
 
-Models are downloaded automatically on first use (~600MB for ASR, ~50MB for diarization).
+Models are downloaded automatically on first use (~600MB per model). First run may take a minute.
 
 ## Requirements
 
@@ -159,7 +174,7 @@ Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, G
 
 scribe is built on the shoulders of excellent open-source projects:
 
-- **[NVIDIA Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3)** (CC-BY-4.0) — The speech recognition model that powers transcription
+- **[NVIDIA Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3)** (CC-BY-4.0) — Speech recognition models (TDT v3 for batch, Nemotron for streaming)
 - **[FluidAudio](https://github.com/FluidInference/FluidAudio)** (Apache 2.0) by FluidInference — CoreML speech processing SDK for Apple Silicon
 - **[pyannote.audio](https://github.com/pyannote/pyannote-audio)** (MIT) by Herve Bredin — The diarization model architecture
 - **[swift-argument-parser](https://github.com/apple/swift-argument-parser)** (Apache 2.0) by Apple — CLI argument parsing
diff --git a/Sources/scribe/Commands/Stream.swift b/Sources/scribe/Commands/Stream.swift
new file mode 100644
index 0000000..705dbb2
--- /dev/null
+++ b/Sources/scribe/Commands/Stream.swift
@@ -0,0 +1,574 @@
+import ArgumentParser
+import AVFoundation
+import FluidAudio
+import Foundation
+
+/// Thread-safe state for tracking incremental transcript output.
+private actor StreamState {
+    private var lastFullTranscript = ""
+    private var printedLength = 0
+
+    /// Given the full accumulated transcript, return only the new portion.
+    /// Handles model revisions by finding the longest common prefix.
+    func getNewText(_ fullTranscript: String) -> String? {
+        let text = fullTranscript.trimmingCharacters(in: .whitespaces)
+        guard !text.isEmpty else { return nil }
+        guard text != lastFullTranscript else { return nil }
+
+        lastFullTranscript = text
+
+        // Find how much of the text we've already printed
+        if text.count > printedLength {
+            let newPortion = String(text.dropFirst(printedLength)).trimmingCharacters(in: .whitespaces)
+            if !newPortion.isEmpty {
+                printedLength = text.count
+                return newPortion
+            }
+        }
+
+        return nil
+    }
+
+    /// Get the live preview (last N chars of full transcript).
+    func getPreview(_ fullTranscript: String, maxLen: Int = 100) -> String? {
+        let text = fullTranscript.trimmingCharacters(in: .whitespaces)
+        guard !text.isEmpty else { return nil }
+        guard text != lastFullTranscript else { return nil }
+        return String(text.suffix(maxLen))
+    }
+}
+
+struct Stream: AsyncParsableCommand {
+    static let configuration = CommandConfiguration(
+        abstract: "Stream live audio transcription from microphone."
+    )
+
+    @Option(name: .long, help: "Output format: text or jsonl.")
+    var format: StreamOutputFormat = .text
+
+    @Option(name: .long, help: "Streaming engine: default (multilingual, higher latency) or nemotron (English-only, low latency ~560ms).")
+    var engine: StreamEngine = .default
+
+    @Option(name: .shortAndLong, help: "Save transcript to file.")
+    var output: String?
+
+    @Option(name: .long, help: "Save captured audio to WAV file. After streaming ends, the saved audio is automatically re-transcribed with the batch engine for higher accuracy.")
+    var saveAudio: String?
+
+    @Flag(name: .long, help: "Show status information.")
+    var verbose: Bool = false
+
+    @Option(name: .long, help: "Read audio from a WAV/audio file instead of microphone (for testing/eval). Exits when file is consumed.")
+    var audioFile: String?
+
+    func run() async throws {
+        switch engine {
+        case .nemotron:
+            try await runNemotron()
+        case .default:
+            try await runSlidingWindow()
+        }
+    }
+
+    // MARK: - Nemotron Engine (English-only, low latency)
+
+    private func runNemotron() async throws {
+        log("Initializing streaming ASR (Nemotron 1120ms, English-only)...")
+        log("Downloading model if needed (first run only, ~600MB)...")
+
+        let engine = StreamingAsrEngineFactory.create(.nemotron1120ms)
+
+        do {
+            try await engine.loadModels()
+        } catch {
+            log("Model loading failed: \(error.localizedDescription)")
+            log("Cleaning cache and retrying download...")
+
+            let cacheDir = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first!
+                .appendingPathComponent("FluidAudio/Models/nemotron-streaming")
+            try? FileManager.default.removeItem(at: cacheDir)
+
+            let freshEngine = StreamingAsrEngineFactory.create(.nemotron1120ms)
+            try await freshEngine.loadModels()
+            log("Retry successful. Warming up...")
+            try await warmUpEngine(freshEngine)
+            log("Ready.")
+            try await runNemotronWithEngine(freshEngine)
+            return
+        }
+
+        log("Models loaded. Warming up...")
+        try await warmUpEngine(engine)
+        log("Ready.")
+        try await runNemotronWithEngine(engine)
+    }
+
+    /// Force CoreML model compilation by running one silent chunk through the engine.
+    /// Without this, the first real audio chunk blocks for 10-20s while CoreML compiles
+    /// the neural network, causing all live output to batch up and appear at once.
+    private func warmUpEngine(_ engine: any StreamingAsrEngine) async throws {
+        guard let fmt = AVAudioFormat(
+            commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: 1, interleaved: false
+        ) else { return }
+
+        // One complete chunk of silence (1120ms = 17920 samples at 16kHz).
+        let chunkSamples: AVAudioFrameCount = 17920
+        guard let silence = AVAudioPCMBuffer(pcmFormat: fmt, frameCapacity: chunkSamples) else { return }
+        silence.frameLength = chunkSamples
+        // Buffer is zero-initialized by default — silence.
+
+        nonisolated(unsafe) let buf = silence
+        try await engine.appendAudio(buf)
+        try await engine.processBufferedAudio()
+        // Discard the silence tokens so they don't pollute the real transcript.
+        try await engine.reset()
+    }
+
+    /// Unified entry point for the Nemotron pipeline. Both file and mic modes
+    /// flow through the same drain loop — the only difference is the audio source.
+    private func runNemotronWithEngine(_ engine: any StreamingAsrEngine) async throws {
+        let startTime = Date()
+        let outputFile = openOutputFile()
+
+        // Open audio recording file if --save-audio was passed.
+        let audioWriter = try openAudioWriter()
+
+        // Shared queue between the audio source (file or mic) and the drain loop.
+        // AsyncStream.Continuation.yield() is non-async and audio-thread-safe — this is
+        // FluidAudio's own pattern (see SlidingWindowAsrManager:63-65, 182-184).
+        let (stream, continuation) = AsyncStream<AVAudioPCMBuffer>.makeStream()
+
+        // Start the source. Mic mode keeps the AVAudioEngine and DispatchSourceSignal
+        // alive in `micResources` for the lifetime of the stream.
+        var micResources: NemotronMicResources? = nil
+        if let path = audioFile {
+            try await feedNemotronFromFile(path: path, continuation: continuation)
+        } else {
+            micResources = try startMicSource(continuation: continuation)
+        }
+
+        // Drain loop runs until the continuation finishes (file end or SIGINT).
+        try await runNemotronDrainLoop(
+            engine: engine,
+            stream: stream,
+            startTime: startTime,
+            outputFile: outputFile,
+            audioWriter: audioWriter
+        )
+
+        // Cleanup mic resources after the drain loop has flushed via finish().
+        if let res = micResources {
+            res.audioEngine.stop()
+            res.audioEngine.inputNode.removeTap(onBus: 0)
+            res.signalSource.cancel()
+        }
+
+        outputFile?.closeFile()
+
+        // If we recorded audio, run batch transcription for a polished final transcript.
+        if let audioPath = saveAudio, audioWriter != nil {
+            log("Saved audio to: \(audioPath)")
+            log("Running batch transcription for polished output...")
+            try await runBatchTranscription(audioPath: audioPath, outputFile: outputFile)
+        }
+    }
+
+    /// Drain the audio queue, feed the engine, poll the partial transcript, and emit
+    /// deltas. Used for both mic and file modes — the queue is the only difference.
+    private func runNemotronDrainLoop(
+        engine: any StreamingAsrEngine,
+        stream: AsyncStream<AVAudioPCMBuffer>,
+        startTime: Date,
+        outputFile: FileHandle?,
+        audioWriter: AVAudioFile? = nil
+    ) async throws {
+        let state = StreamState()
+
+        for await buffer in stream {
+            // Save audio for later batch processing if --save-audio was passed.
+            if let writer = audioWriter {
+                try? writer.write(from: buffer)
+            }
+
+            nonisolated(unsafe) let buf = buffer
+            try await engine.appendAudio(buf)
+            try await engine.processBufferedAudio()
+
+            // Live preview to stderr (text format only) — overwritten on each tick.
+            let current = await engine.getPartialTranscript()
+            emitLivePreview(current, startTime: startTime)
+
+            // Delta emission to stdout (and output file).
+            if let newText = await state.getNewText(current) {
+                emitDelta(newText, startTime: startTime, outputFile: outputFile)
+            }
+        }
+
+        // Stream ended — flush any tail audio via finish() and emit remaining content.
+        // This is the bug fix from the previous phase: finish() may decode tokens that
+        // never reached the per-chunk path, so we always run its result through the
+        // delta logic to capture them.
+        let finalText = try await engine.finish()
+        if let newText = await state.getNewText(finalText) {
+            emitDelta(newText, startTime: startTime, outputFile: outputFile)
+        }
+
+        // Newline so the final stderr preview line doesn't run into the next prompt.
+        if format == .text {
+            FileHandle.standardError.write(Data("\n".utf8))
+        }
+    }
+
+    /// File source adapter — reads the whole file via AudioConverter (16kHz mono Float32),
+    /// pushes it as a single buffer, and finishes the continuation so the drain loop exits.
+    private func feedNemotronFromFile(
+        path: String,
+        continuation: AsyncStream<AVAudioPCMBuffer>.Continuation
+    ) async throws {
+        let url = URL(fileURLWithPath: path)
+        guard FileManager.default.fileExists(atPath: url.path) else {
+            throw ValidationError("Audio file not found: \(path)")
+        }
+
+        if verbose { log("Loading audio file: \(path)") }
+        let converter = AudioConverter()
+        let samples = try converter.resampleAudioFile(url)
+        let duration = Double(samples.count) / 16000.0
+        if verbose {
+            log(String(format: "Loaded %.0fs (%.1f min), %d samples", duration, duration / 60, samples.count))
+        }
+        log("Streaming from file (no microphone)...")
+
+        guard let buffer = makeMonoFloat32Buffer(from: samples) else {
+            throw ValidationError("Failed to allocate audio buffer")
+        }
+        continuation.yield(buffer)
+        continuation.finish()
+    }
+
+    /// Mic source adapter — installs the input tap, pre-resamples each tap buffer to
+    /// 16kHz mono Float32 (matching the file path), yields it onto the shared stream,
+    /// and wires SIGINT to a DispatchSourceSignal that finishes the continuation
+    /// (replacing the brutal Darwin.exit(0) that used to drop tail audio).
+    private func startMicSource(
+        continuation: AsyncStream<AVAudioPCMBuffer>.Continuation
+    ) throws -> NemotronMicResources {
+        let audioEngine = AVAudioEngine()
+        let inputNode = audioEngine.inputNode
+        let inputFormat = inputNode.outputFormat(forBus: 0)
+
+        if verbose {
+            log("Microphone: \(inputFormat.sampleRate)Hz, \(inputFormat.channelCount) ch")
+        }
+
+        // AudioConverter is a final class with only immutable state — safe to call
+        // resampleBuffer() from the audio render thread.
+        nonisolated(unsafe) let converter = AudioConverter()
+        nonisolated(unsafe) let cont = continuation
+
+        inputNode.installTap(onBus: 0, bufferSize: 4096, format: inputFormat) { buffer, _ in
+            do {
+                let samples = try converter.resampleBuffer(buffer)
+                if let outBuffer = makeMonoFloat32Buffer(from: samples) {
+                    cont.yield(outBuffer)
+                }
+            } catch {
+                // Drop the buffer on resample error rather than crash the audio thread.
+            }
+        }
+
+        try audioEngine.start()
+        log("Listening (English, low latency)... press Ctrl+C to stop")
+
+        // Graceful shutdown via DispatchSourceSignal — replaces Darwin.exit(0).
+        // The handler runs on a regular dispatch queue (not the signal context), so
+        // it's safe to call continuation.finish() from here.
+        signal(SIGINT, SIG_IGN)
+        let sigSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .global())
+        sigSource.setEventHandler {
+            FileHandle.standardError.write(Data("\n[scribe] Stopping...\n".utf8))
+            cont.finish()
+        }
+        sigSource.resume()
+
+        return NemotronMicResources(audioEngine: audioEngine, signalSource: sigSource)
+    }
+
+    // MARK: - Nemotron emission helpers
+
+    /// Live preview line on stderr (text format only). Overwrites itself with `\r`.
+    private func emitLivePreview(_ fullTranscript: String, startTime: Date) {
+        guard format == .text else { return }
+        let preview = String(fullTranscript.trimmingCharacters(in: .whitespaces).suffix(100))
+        guard !preview.isEmpty else { return }
+        let elapsed = Date().timeIntervalSince(startTime)
+        let ts = formatStreamTimestamp(elapsed)
+        FileHandle.standardError.write(Data("\r\u{1B}[K[\(ts)] \(preview)".utf8))
+    }
+
+    /// Emit a confirmed delta to stdout (and output file). Format-aware.
+    ///
+    /// Text mode: tokens flow naturally without timestamps. A newline is inserted
+    /// after sentence-ending punctuation (. ? !) so output reads as prose:
+    ///   "Hello computer. Can you help me with something?\n"
+    ///
+    /// JSONL mode: one JSON object per delta, with timestamp (for machine consumption).
+    private func emitDelta(_ newText: String, startTime: Date, outputFile: FileHandle?) {
+        let elapsed = Date().timeIntervalSince(startTime)
+
+        switch format {
+        case .text:
+            // Clear the live preview line before writing confirmed text.
+            FileHandle.standardError.write(Data("\r\u{1B}[K".utf8))
+
+            // Stream text naturally: append tokens, newline on sentence boundaries.
+            var textToWrite = newText
+            // Insert newline after sentence-ending punctuation followed by a space or end.
+            textToWrite = textToWrite.replacingOccurrences(of: ". ", with: ".\n")
+            textToWrite = textToWrite.replacingOccurrences(of: "? ", with: "?\n")
+            textToWrite = textToWrite.replacingOccurrences(of: "! ", with: "!\n")
+            // Also handle punctuation at the very end of the delta.
+            if textToWrite.hasSuffix(".") || textToWrite.hasSuffix("?") || textToWrite.hasSuffix("!") {
+                textToWrite += "\n"
+            }
+
+            // Write WITHOUT trailing newline (tokens append naturally on the same line).
+            FileHandle.standardOutput.write(Data(textToWrite.utf8))
+            fflush(stdout)
+            outputFile?.write(Data(textToWrite.utf8))
+
+        case .jsonl:
+            let jsonObj: [String: Any] = [
+                "time": round(elapsed * 10) / 10,
+                "text": newText,
+            ]
+            guard let data = try? JSONSerialization.data(withJSONObject: jsonObj, options: [.sortedKeys]),
+                  let jsonStr = String(data: data, encoding: .utf8) else {
+                return
+            }
+            print(jsonStr)
+            fflush(stdout)
+            outputFile?.write(Data((jsonStr + "\n").utf8))
+        }
+    }
+
+    private func openOutputFile() -> FileHandle? {
+        guard let outputPath = output else { return nil }
+        FileManager.default.createFile(atPath: outputPath, contents: nil)
+        return FileHandle(forWritingAtPath: outputPath)
+    }
+
+    /// Open an AVAudioFile for writing 16kHz mono Float32 WAV — the same format the
+    /// engine sees, so the saved file can be batch-transcribed directly.
+    private func openAudioWriter() throws -> AVAudioFile? {
+        guard let path = saveAudio else { return nil }
+        guard let fmt = AVAudioFormat(
+            commonFormat: .pcmFormatFloat32, sampleRate: 16000, channels: 1, interleaved: false
+        ) else {
+            throw ValidationError("Failed to create audio format for recording")
+        }
+        return try AVAudioFile(forWriting: URL(fileURLWithPath: path), settings: fmt.settings)
+    }
+
+    /// Run batch transcription (Parakeet TDT v3) on a saved audio file for a polished
+    /// final transcript. Called after the stream ends when --save-audio is used.
+    private func runBatchTranscription(audioPath: String, outputFile: FileHandle?) async throws {
+        let converter = AudioConverter()
+        let samples = try converter.resampleAudioFile(URL(fileURLWithPath: audioPath))
+        let duration = Double(samples.count) / 16000.0
+
+        let asrModels = try await AsrModels.downloadAndLoad(version: .v3)
+        let asrManager = AsrManager()
+        try await asrManager.initialize(models: asrModels)
+
+        let startTime = Date()
+        let asrResult = try await asrManager.transcribe(samples, source: .system)
+        let elapsed = Date().timeIntervalSince(startTime)
+
+        log(String(format: "Batch transcription: %.0fs audio in %.1fs (%.0fx real-time)",
+                   duration, elapsed, duration / elapsed))
+
+        // Emit the polished transcript.
+        let text = asrResult.text.trimmingCharacters(in: .whitespaces)
+
+        FileHandle.standardError.write(Data("\n--- Polished transcript (batch) ---\n".utf8))
+        print(text)
+        fflush(stdout)
+
+        if let file = outputFile {
+            file.write(Data("\n--- Polished transcript (batch) ---\n".utf8))
+            file.write(Data((text + "\n").utf8))
+        }
+    }
+
+    // MARK: - SlidingWindow Engine (Multilingual, default)
+
+    private func runSlidingWindow() async throws {
+        if audioFile != nil {
+            throw ValidationError("--audio-file is not yet supported with the multilingual engine. Use --engine nemotron for now.")
+        }
+
+        log("Initializing streaming ASR (Parakeet TDT v3, multilingual)...")
+        log("Downloading model if needed (first run only, ~600MB)...")
+
+        let streamManager = SlidingWindowAsrManager(config: .streaming)
+        try await streamManager.start(source: .microphone)
+
+        log("Models loaded.")
+
+        let audioEngine = AVAudioEngine()
+        let inputNode = audioEngine.inputNode
+        let inputFormat = inputNode.outputFormat(forBus: 0)
+
+        if verbose {
+            log("Microphone: \(inputFormat.sampleRate)Hz, \(inputFormat.channelCount) ch")
+        }
+
+        let startTime = Date()
+        var outputFile: FileHandle? = nil
+        var lastConfirmedText = ""
+        var lastVolatileText = ""
+
+        if let outputPath = output {
+            FileManager.default.createFile(atPath: outputPath, contents: nil)
+            outputFile = FileHandle(forWritingAtPath: outputPath)
+        }
+
+        setupSignalHandler()
+
+        inputNode.installTap(onBus: 0, bufferSize: 4096, format: inputFormat) { buffer, _ in
+            nonisolated(unsafe) let buf = buffer
+            streamManager.streamAudio(buf)
+        }
+
+        try audioEngine.start()
+        log("Listening (multilingual)... press Ctrl+C to stop")
+
+        let updates = await streamManager.transcriptionUpdates
+        for await update in updates {
+            let text = update.text.trimmingCharacters(in: .whitespaces)
+            guard !text.isEmpty else { continue }
+
+            let elapsed = Date().timeIntervalSince(startTime)
+
+            if update.isConfirmed {
+                guard text != lastConfirmedText else { continue }
+                lastConfirmedText = text
+                lastVolatileText = ""
+                emitLine(text: text, elapsed: elapsed, outputFile: outputFile)
+            } else {
+                guard text != lastVolatileText else { continue }
+                lastVolatileText = text
+
+                switch format {
+                case .text:
+                    let ts = formatStreamTimestamp(elapsed)
+                    let preview = String(text.suffix(100))
+                    FileHandle.standardError.write(Data("\r\u{1B}[K[\(ts)] \(preview)".utf8))
+                case .jsonl:
+                    break
+                }
+            }
+        }
+
+        audioEngine.stop()
+        inputNode.removeTap(onBus: 0)
+        _ = try await streamManager.finish()
+    }
+
+    // MARK: - Helpers
+
+    private func emitLine(text: String, elapsed: Double, outputFile: FileHandle?) {
+        let line: String
+
+        switch format {
+        case .text:
+            let ts = formatStreamTimestamp(elapsed)
+            FileHandle.standardError.write(Data("\r\u{1B}[K".utf8))
+            line = "[\(ts)] \(text)"
+        case .jsonl:
+            let jsonObj: [String: Any] = [
+                "time": round(elapsed * 10) / 10,
+                "text": text,
+            ]
+            if let data = try? JSONSerialization.data(withJSONObject: jsonObj, options: [.sortedKeys]),
+               let jsonStr = String(data: data, encoding: .utf8) {
+                line = jsonStr
+            } else {
+                return
+            }
+        }
+
+        print(line)
+        fflush(stdout)
+
+        if let file = outputFile {
+            file.write(Data((line + "\n").utf8))
+        }
+    }
+
+    private func setupSignalHandler() {
+        signal(SIGINT) { _ in
+            FileHandle.standardError.write(Data("\n".utf8))
+            Darwin.exit(0)
+        }
+    }
+
+    private func log(_ message: String) {
+        FileHandle.standardError.write(Data("[scribe] \(message)\n".utf8))
+    }
+}
+
+private func formatStreamTimestamp(_ seconds: Double) -> String {
+    let totalSeconds = Int(seconds)
+    let hours = totalSeconds / 3600
+    let minutes = (totalSeconds % 3600) / 60
+    let secs = totalSeconds % 60
+    if hours > 0 {
+        return String(format: "%02d:%02d:%02d", hours, minutes, secs)
+    }
+    return String(format: "%02d:%02d", minutes, secs)
+}
+
+/// Resources held alive by mic mode for the duration of a stream session.
+/// The AVAudioEngine and DispatchSourceSignal must outlive the drain loop —
+/// dropping them would stop audio capture and cancel the signal handler.
+private struct NemotronMicResources {
+    let audioEngine: AVAudioEngine
+    let signalSource: any DispatchSourceSignal
+}
+
+/// Wrap a `[Float]` of 16kHz mono samples into a fresh AVAudioPCMBuffer.
+/// Used by both the file source (whole-file buffer) and the mic source
+/// (per-tap-buffer wrapping after resampling).
+private func makeMonoFloat32Buffer(from samples: [Float]) -> AVAudioPCMBuffer? {
+    guard let format = AVAudioFormat(
+        commonFormat: .pcmFormatFloat32,
+        sampleRate: 16000,
+        channels: 1,
+        interleaved: false
+    ) else {
+        return nil
+    }
+    let frameCount = AVAudioFrameCount(samples.count)
+    guard frameCount > 0,
+          let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else {
+        return nil
+    }
+    buffer.frameLength = frameCount
+    if let channelData = buffer.floatChannelData {
+        samples.withUnsafeBufferPointer { src in
+            memcpy(channelData[0], src.baseAddress!, samples.count * MemoryLayout<Float>.stride)
+        }
+    }
+    return buffer
+}
+
+enum StreamOutputFormat: String, ExpressibleByArgument, CaseIterable, Sendable {
+    case text, jsonl
+}
+
+enum StreamEngine: String, ExpressibleByArgument, CaseIterable, Sendable {
+    case `default`
+    case nemotron
+}
diff --git a/Sources/scribe/ScribeCLI.swift b/Sources/scribe/ScribeCLI.swift
index 8d2e877..8a58abb 100644
--- a/Sources/scribe/ScribeCLI.swift
+++ b/Sources/scribe/ScribeCLI.swift
@@ -6,7 +6,7 @@ struct ScribeCLI: AsyncParsableCommand {
         commandName: "scribe",
         abstract: "State-of-the-art local audio transcription with speaker diarization for macOS.",
         version: "0.2.1",
-        subcommands: [Transcribe.self, Models.self],
+        subcommands: [Transcribe.self, Stream.self, Models.self],
         defaultSubcommand: Transcribe.self
     )
 }
diff --git a/experiments/eval/eval.py b/experiments/eval/eval.py
index 1ce2a98..0649bef 100644
--- a/experiments/eval/eval.py
+++ b/experiments/eval/eval.py
@@ -235,24 +235,28 @@ def load_dipco(session_ids: list[str]) -> list[dict]:
 # ========== Core eval functions ==========
 
 def run_scribe(wav_path: Path, model: str = "large-v3-turbo",
-               scribe_bin: str = "scribe") -> tuple[str, float]:
-    """Run scribe transcribe on an audio file. Returns (transcript, elapsed_secs)."""
-    # Build command — v0.2+ doesn't have --model flag (uses Parakeet by default)
+               scribe_bin: str = "scribe", mode: str = "batch") -> tuple[str, float]:
+    """Run scribe on an audio file. Returns (transcript, elapsed_secs).
+
+    mode="batch":     calls `scribe transcribe ...` (offline, full audio at once)
+    mode="streaming": calls `scribe stream --audio-file ...` (chunked through streaming engine)
+    """
+    if mode == "streaming":
+        return _run_scribe_streaming(wav_path, scribe_bin)
+    return _run_scribe_batch(wav_path, model, scribe_bin)
+
+
+def _run_scribe_batch(wav_path: Path, model: str, scribe_bin: str) -> tuple[str, float]:
     cmd = [scribe_bin, "transcribe", str(wav_path), "--format", "txt"]
 
-    # Check if scribe supports --model flag (v0.1.x only)
+    # Check if scribe supports --model flag (v0.1.x only — v0.2+ uses Parakeet by default)
     version_result = subprocess.run([scribe_bin, "--version"], capture_output=True, text=True)
     version = version_result.stdout.strip()
     if version.startswith("0.1"):
         cmd.extend(["--model", model])
 
     start = time.monotonic()
-    result = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        timeout=1800,
-    )
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=1800)
     elapsed = time.monotonic() - start
 
     if result.returncode != 0:
@@ -263,13 +267,48 @@ def run_scribe(wav_path: Path, model: str = "large-v3-turbo",
     text_parts = []
     for line in lines:
         cleaned = re.sub(r"^\[[\d:]+\]\s*(?:(?:Speaker\s+\d+|Unknown):\s*)?", "", line)
-        cleaned = re.sub(r"\*([^*]+)\*", r"\1", cleaned)  # Remove italic markers
+        cleaned = re.sub(r"\*([^*]+)\*", r"\1", cleaned)
         cleaned = cleaned.strip()
         if cleaned and cleaned != "-":
             text_parts.append(cleaned)
 
-    transcript = " ".join(text_parts)
-    return transcript, elapsed
+    return " ".join(text_parts), elapsed
+
+
+def _run_scribe_streaming(wav_path: Path, scribe_bin: str) -> tuple[str, float]:
+    """Feed an audio file through `scribe stream --audio-file` and parse JSONL output.
+
+    Each JSONL line is `{"text": "...", "time": 1.2}` representing a confirmed delta.
+    Concatenate the `text` fields in order to build the full hypothesis.
+    """
+    cmd = [
+        scribe_bin, "stream",
+        "--audio-file", str(wav_path),
+        "--engine", "nemotron",
+        "--format", "jsonl",
+    ]
+
+    start = time.monotonic()
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=1800)
+    elapsed = time.monotonic() - start
+
+    if result.returncode != 0:
+        raise RuntimeError(f"scribe stream failed on {wav_path}: {result.stderr}")
+
+    text_parts = []
+    for line in result.stdout.strip().split("\n"):
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            obj = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        text = obj.get("text", "").strip()
+        if text:
+            text_parts.append(text)
+
+    return " ".join(text_parts), elapsed
 
 
 def compute_wer(reference: str, hypothesis: str) -> dict:
@@ -298,7 +337,7 @@ def get_scribe_version(scribe_bin: str = "scribe") -> str:
 
 
 def run_eval(entries: list[dict], model: str, scribe_version: str,
-             scribe_bin: str = "scribe") -> list[dict]:
+             scribe_bin: str = "scribe", mode: str = "batch") -> list[dict]:
     """Run evaluation on all entries."""
     results = []
 
@@ -308,17 +347,21 @@ def run_eval(entries: list[dict], model: str, scribe_version: str,
         dur_min = entry["duration_secs"] / 60
 
         print(f"\n--- {fid}: {label} ({dur_min:.1f}m, {entry['speakers']} speakers) ---")
-        print(f"  Transcribing with {model}...")
+        if mode == "streaming":
+            print(f"  Streaming via scribe stream --audio-file (engine: nemotron)...")
+        else:
+            print(f"  Transcribing with {model}...")
 
         try:
-            hypothesis, elapsed = run_scribe(entry["wav_path"], model=model, scribe_bin=scribe_bin)
+            hypothesis, elapsed = run_scribe(entry["wav_path"], model=model, scribe_bin=scribe_bin, mode=mode)
         except Exception as e:
             print(f"  ERROR: {e}")
             results.append({
                 "file_id": fid,
                 "dataset": entry["dataset"],
                 "label": label,
-                "model": model,
+                "model": "nemotron-streaming" if mode == "streaming" else model,
+                "mode": mode,
                 "duration_secs": round(entry["duration_secs"], 1),
                 "speakers": entry["speakers"],
                 "scribe_version": scribe_version,
@@ -361,7 +404,8 @@ def run_eval(entries: list[dict], model: str, scribe_version: str,
             "file_id": fid,
             "dataset": entry["dataset"],
             "label": label,
-            "model": model,
+            "model": "nemotron-streaming" if mode == "streaming" else model,
+            "mode": mode,
             "duration_secs": round(entry["duration_secs"], 1),
             "speakers": entry["speakers"],
             "scribe_version": scribe_version,
@@ -391,7 +435,7 @@ def write_csv(results: list[dict], output_path: Path):
         return
 
     fieldnames = [
-        "file_id", "dataset", "label", "model",
+        "file_id", "dataset", "label", "model", "mode",
         "duration_secs", "speakers", "scribe_version",
         "processing_secs", "rtf",
         "wer", "mer", "wil",
@@ -460,10 +504,13 @@ def main():
     parser.add_argument("--download-only", action="store_true")
     parser.add_argument("--output", type=str, help="Output CSV path")
     parser.add_argument("--scribe-bin", default="scribe", help="Path to scribe binary")
+    parser.add_argument("--mode", choices=["batch", "streaming"], default="batch",
+                        help="Eval mode: 'batch' (scribe transcribe) or 'streaming' (scribe stream --audio-file)")
     args = parser.parse_args()
 
     scribe_version = get_scribe_version(args.scribe_bin)
     print(f"scribe version: {scribe_version} ({args.scribe_bin})")
+    print(f"Mode: {args.mode}")
     print(f"Model: {args.model}")
     print(f"Dataset: {args.dataset}")
 
@@ -481,8 +528,11 @@ def main():
         entries.extend(load_tedlium(file_ids))
 
     if args.dataset in ("mls-spanish", "all"):
-        print("\n--- Loading MLS Spanish ---")
-        entries.extend(load_mls_spanish(args.max_files))
+        if args.mode == "streaming":
+            print("\n--- Skipping MLS Spanish (Nemotron streaming engine is English-only) ---")
+        else:
+            print("\n--- Loading MLS Spanish ---")
+            entries.extend(load_mls_spanish(args.max_files))
 
     if args.dataset == "dipco":
         print("\n--- Loading DiPCo ---")
@@ -499,12 +549,15 @@ def main():
 
     # Run eval
     print(f"\n--- Running evaluation ({len(entries)} files) ---")
-    results = run_eval(entries, args.model, scribe_version, scribe_bin=args.scribe_bin)
+    results = run_eval(entries, args.model, scribe_version, scribe_bin=args.scribe_bin, mode=args.mode)
 
     # Write results
-    output_path = Path(args.output) if args.output else (
-        RESULTS_DIR / f"scribe-{scribe_version}-{args.model}-{args.dataset}.csv"
-    )
+    if args.output:
+        output_path = Path(args.output)
+    elif args.mode == "streaming":
+        output_path = RESULTS_DIR / f"scribe-{scribe_version}-streaming-{args.dataset}.csv"
+    else:
+        output_path = RESULTS_DIR / f"scribe-{scribe_version}-{args.model}-{args.dataset}.csv"
     write_csv(results, output_path)
     print_summary(results)
 
diff --git a/experiments/eval/results/scribe-0.2.1-large-v3-turbo-earnings21.csv b/experiments/eval/results/scribe-0.2.1-large-v3-turbo-earnings21.csv
index faf6aed..9c3ce7e 100644
--- a/experiments/eval/results/scribe-0.2.1-large-v3-turbo-earnings21.csv
+++ b/experiments/eval/results/scribe-0.2.1-large-v3-turbo-earnings21.csv
@@ -1,6 +1,6 @@
-file_id,dataset,label,model,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,proper_noun_total,proper_noun_found,proper_noun_recall,error
-4386541,earnings21,Cumulus Media Inc,large-v3-turbo,1097.1,5,0.2.1,8.1,0.0073,0.1473,0.1404,0.2086,208,59,133,2715,2789,30,22,0.7333,
-4387332,earnings21,Zagg Inc,large-v3-turbo,1310.2,6,0.2.1,10.5,0.008,0.127,0.1226,0.1878,287,75,142,3969,4036,36,26,0.7222,
-4392809,earnings21,Lands End Inc,large-v3-turbo,1438.0,6,0.2.1,11.0,0.0076,0.1081,0.1052,0.1482,183,143,109,4025,3991,30,24,0.8,
-4384683,earnings21,One Gas Inc,large-v3-turbo,1476.8,8,0.2.1,10.6,0.0072,0.1487,0.1419,0.1934,199,166,170,3599,3603,40,29,0.725,
-4367318,earnings21,AcelRX Pharmaceuticals Inc,large-v3-turbo,1720.8,7,0.2.1,12.5,0.0073,0.1221,0.1192,0.1727,248,174,106,4323,4255,42,25,0.5952,
+file_id,dataset,label,model,mode,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,proper_noun_total,proper_noun_found,proper_noun_recall,error
+4386541,earnings21,Cumulus Media Inc,large-v3-turbo,batch,1097.1,5,0.2.1,8.2,0.0075,0.1473,0.1404,0.2086,208,59,133,2715,2789,30,22,0.7333,
+4387332,earnings21,Zagg Inc,large-v3-turbo,batch,1310.2,6,0.2.1,10.4,0.008,0.127,0.1226,0.1878,287,75,142,3969,4036,36,26,0.7222,
+4392809,earnings21,Lands End Inc,large-v3-turbo,batch,1438.0,6,0.2.1,11.0,0.0076,0.1081,0.1052,0.1482,183,143,109,4025,3991,30,24,0.8,
+4384683,earnings21,One Gas Inc,large-v3-turbo,batch,1476.8,8,0.2.1,10.7,0.0073,0.1487,0.1419,0.1934,199,166,170,3599,3603,40,29,0.725,
+4367318,earnings21,AcelRX Pharmaceuticals Inc,large-v3-turbo,batch,1720.8,7,0.2.1,12.5,0.0073,0.1221,0.1192,0.1727,248,174,106,4323,4255,42,25,0.5952,
diff --git a/experiments/eval/results/scribe-0.2.1-large-v3-turbo-mls-spanish.csv b/experiments/eval/results/scribe-0.2.1-large-v3-turbo-mls-spanish.csv
index 19ad949..d8aafb7 100644
--- a/experiments/eval/results/scribe-0.2.1-large-v3-turbo-mls-spanish.csv
+++ b/experiments/eval/results/scribe-0.2.1-large-v3-turbo-mls-spanish.csv
@@ -1,4 +1,4 @@
-file_id,dataset,label,model,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,error
-chapter_11991,mls-spanish,MLS Spanish chapter_11991,large-v3-turbo,4019.9,1,0.2.1,49.9,0.0124,0.1489,0.1473,0.2229,786,520,100,9443,9023,
-chapter_1378,mls-spanish,MLS Spanish chapter_1378,large-v3-turbo,5640.8,1,0.2.1,70.6,0.0125,0.0499,0.0495,0.0799,461,145,121,14580,14556,
-chapter_567,mls-spanish,MLS Spanish chapter_567,large-v3-turbo,5262.6,1,0.2.1,66.1,0.0126,0.0787,0.0777,0.1228,646,230,186,13489,13445,
+file_id,dataset,label,model,mode,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,proper_noun_total,proper_noun_found,proper_noun_recall,error
+chapter_11991,mls-spanish,MLS Spanish chapter_11991,large-v3-turbo,batch,4019.9,1,0.2.1,50.2,0.0125,0.1489,0.1473,0.2229,786,520,100,9443,9023,0,0,0.0,
+chapter_1378,mls-spanish,MLS Spanish chapter_1378,large-v3-turbo,batch,5640.8,1,0.2.1,70.6,0.0125,0.0499,0.0495,0.0799,461,145,121,14580,14556,0,0,0.0,
+chapter_567,mls-spanish,MLS Spanish chapter_567,large-v3-turbo,batch,5262.6,1,0.2.1,66.7,0.0127,0.0787,0.0777,0.1228,646,230,186,13489,13445,0,0,0.0,
diff --git a/experiments/eval/results/scribe-0.2.1-large-v3-turbo-tedlium.csv b/experiments/eval/results/scribe-0.2.1-large-v3-turbo-tedlium.csv
index 07f94a6..8c8b894 100644
--- a/experiments/eval/results/scribe-0.2.1-large-v3-turbo-tedlium.csv
+++ b/experiments/eval/results/scribe-0.2.1-large-v3-turbo-tedlium.csv
@@ -1,6 +1,6 @@
-file_id,dataset,label,model,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,error
-talk_0,tedlium,TED talk talk_0,large-v3-turbo,1249.0,1,0.2.1,8.6,0.0068,0.0652,0.0646,0.0889,72,90,27,2897,2834,
-talk_1,tedlium,TED talk talk_1,large-v3-turbo,1505.8,1,0.2.1,11.1,0.0074,0.0663,0.065,0.091,117,82,89,4342,4349,
-talk_2,tedlium,TED talk talk_2,large-v3-turbo,834.0,1,0.2.1,6.3,0.0076,0.0734,0.073,0.1027,73,89,13,2383,2307,
-talk_3,tedlium,TED talk talk_3,large-v3-turbo,1095.5,1,0.2.1,7.8,0.0071,0.0378,0.0376,0.0545,52,42,20,3015,2993,
-talk_4,tedlium,TED talk talk_4,large-v3-turbo,459.1,1,0.2.1,3.6,0.0078,0.0563,0.0552,0.0776,35,19,31,1510,1522,
+file_id,dataset,label,model,mode,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,proper_noun_total,proper_noun_found,proper_noun_recall,error
+talk_0,tedlium,TED talk talk_0,large-v3-turbo,batch,1249.0,1,0.2.1,8.7,0.007,0.0652,0.0646,0.0889,72,90,27,2897,2834,0,0,0.0,
+talk_1,tedlium,TED talk talk_1,large-v3-turbo,batch,1505.8,1,0.2.1,11.2,0.0074,0.0663,0.065,0.091,117,82,89,4342,4349,0,0,0.0,
+talk_2,tedlium,TED talk talk_2,large-v3-turbo,batch,834.0,1,0.2.1,6.3,0.0076,0.0734,0.073,0.1027,73,89,13,2383,2307,0,0,0.0,
+talk_3,tedlium,TED talk talk_3,large-v3-turbo,batch,1095.5,1,0.2.1,7.8,0.0071,0.0378,0.0376,0.0545,52,42,20,3015,2993,0,0,0.0,
+talk_4,tedlium,TED talk talk_4,large-v3-turbo,batch,459.1,1,0.2.1,3.6,0.0079,0.0563,0.0552,0.0776,35,19,31,1510,1522,0,0,0.0,
diff --git a/experiments/eval/results/scribe-0.2.1-streaming-earnings21.csv b/experiments/eval/results/scribe-0.2.1-streaming-earnings21.csv
new file mode 100644
index 0000000..da380c5
--- /dev/null
+++ b/experiments/eval/results/scribe-0.2.1-streaming-earnings21.csv
@@ -0,0 +1,6 @@
+file_id,dataset,label,model,mode,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,proper_noun_total,proper_noun_found,proper_noun_recall,error
+4386541,earnings21,Cumulus Media Inc,nemotron-streaming,streaming,1097.1,5,0.2.1,102.9,0.0938,0.2063,0.1987,0.2615,182,274,104,2715,2545,30,22,0.7333,
+4387332,earnings21,Zagg Inc,nemotron-streaming,streaming,1310.2,6,0.2.1,121.9,0.093,0.2273,0.2216,0.2789,238,562,102,3969,3509,36,23,0.6389,
+4392809,earnings21,Lands End Inc,nemotron-streaming,streaming,1438.0,6,0.2.1,130.1,0.0905,0.1588,0.1535,0.1988,189,312,138,4025,3851,30,22,0.7333,
+4384683,earnings21,One Gas Inc,nemotron-streaming,streaming,1476.8,8,0.2.1,137.5,0.0931,0.1814,0.1739,0.2262,197,300,156,3599,3455,40,26,0.65,
+4367318,earnings21,AcelRX Pharmaceuticals Inc,nemotron-streaming,streaming,1720.8,7,0.2.1,167.1,0.0971,0.1594,0.1558,0.208,238,353,98,4323,4068,42,24,0.5714,
diff --git a/experiments/eval/results/scribe-0.2.1-streaming-tedlium.csv b/experiments/eval/results/scribe-0.2.1-streaming-tedlium.csv
new file mode 100644
index 0000000..1e4c22c
--- /dev/null
+++ b/experiments/eval/results/scribe-0.2.1-streaming-tedlium.csv
@@ -0,0 +1,6 @@
+file_id,dataset,label,model,mode,duration_secs,speakers,scribe_version,processing_secs,rtf,wer,mer,wil,substitutions,deletions,insertions,ref_words,hyp_words,proper_noun_total,proper_noun_found,proper_noun_recall,error
+talk_0,tedlium,TED talk talk_0,nemotron-streaming,streaming,1249.0,1,0.2.1,106.7,0.0854,0.0746,0.0743,0.0986,72,133,11,2897,2775,0,0,0.0,
+talk_1,tedlium,TED talk talk_1,nemotron-streaming,streaming,1505.8,1,0.2.1,139.3,0.0925,0.0905,0.0903,0.1215,140,242,11,4342,4111,0,0,0.0,
+talk_2,tedlium,TED talk talk_2,nemotron-streaming,streaming,834.0,1,0.2.1,75.3,0.0903,0.0793,0.0788,0.1116,81,93,15,2383,2305,0,0,0.0,
+talk_3,tedlium,TED talk talk_3,nemotron-streaming,streaming,1095.5,1,0.2.1,101.8,0.0929,0.0441,0.044,0.0641,62,64,7,3015,2958,0,0,0.0,
+talk_4,tedlium,TED talk talk_4,nemotron-streaming,streaming,459.1,1,0.2.1,44.3,0.0965,0.0609,0.0606,0.0776,26,58,8,1510,1460,0,0,0.0,
diff --git a/experiments/eval/summarize.py b/experiments/eval/summarize.py
index d915d88..8dc6031 100644
--- a/experiments/eval/summarize.py
+++ b/experiments/eval/summarize.py
@@ -121,25 +121,33 @@ def print_markdown(results: list[dict]):
                 row += " — |"
         print(row)
 
-    # --- Proper noun recall section (Earnings-21 only) ---
+    # --- Proper noun recall section (Earnings-21 only, split by mode) ---
     pn_results = [r for r in results if r["dataset"] == "earnings21" and int(r.get("proper_noun_total", 0)) > 0]
     if pn_results:
         print("\n## Proper Noun Recall (Earnings-21)\n")
         print("WER averages all words equally. Proper noun recall measures how many entity-tagged ")
         print("phrases (PERSON, ORG, GPE, PRODUCT, LAW, WORK_OF_ART, FAC) appear in the hypothesis.\n")
-        print("| File | Company | WER | Proper Noun Recall |")
-        print("|---|---|---|---|")
-        total_pn = found_pn = 0
-        for r in sorted(pn_results, key=lambda x: x["file_id"]):
-            wer = float(r["wer"])
-            pn_total = int(r["proper_noun_total"])
-            pn_found = int(r["proper_noun_found"])
-            pn_recall = float(r["proper_noun_recall"])
-            print(f"| {r['file_id']} | {r['label']} | {wer:.1%} | {pn_recall:.1%} ({pn_found}/{pn_total}) |")
-            total_pn += pn_total
-            found_pn += pn_found
-        agg_recall = found_pn / total_pn if total_pn else 0
-        print(f"\n**Aggregate**: {agg_recall:.1%} ({found_pn}/{total_pn} entities)")
+
+        modes = sorted(set(r.get("mode", "batch") for r in pn_results))
+        for mode in modes:
+            mode_results = [r for r in pn_results if r.get("mode", "batch") == mode]
+            if not mode_results:
+                continue
+            label_model = mode_results[0]["model"]
+            print(f"\n### {mode} ({label_model})\n")
+            print("| File | Company | WER | Proper Noun Recall |")
+            print("|---|---|---|---|")
+            total_pn = found_pn = 0
+            for r in sorted(mode_results, key=lambda x: x["file_id"]):
+                wer = float(r["wer"])
+                pn_total = int(r["proper_noun_total"])
+                pn_found = int(r["proper_noun_found"])
+                pn_recall = float(r["proper_noun_recall"])
+                print(f"| {r['file_id']} | {r['label']} | {wer:.1%} | {pn_recall:.1%} ({pn_found}/{pn_total}) |")
+                total_pn += pn_total
+                found_pn += pn_found
+            agg_recall = found_pn / total_pn if total_pn else 0
+            print(f"\n**Aggregate**: {agg_recall:.1%} ({found_pn}/{total_pn} entities)")
 
     # --- Table 2: Per-file detail for primary model ---
     primary_model = "large-v3-turbo"