Custom Providers (Subclassing)

Bring your own LLM, embedding, TTS, image, video, transcription, or 3D backend to Blazen — one typed trait, seven bindings

CustomProvider is Blazen’s flagship integration point. Every public binding exposes it as the same foreign-implementable typed trait: subclass the class (or conform to the protocol) from Python, Node, WASM, Go, Swift, Kotlin, or Ruby, override one or more of 16 typed methods, and the resulting handle plugs directly into agents, workflows, pipelines, Memory, and the batch helpers — exactly like a built-in provider.

What is CustomProvider?

CustomProvider is a single trait with 16 async methods covering every capability Blazen knows how to dispatch:

  • 3 completion / embedding methodscomplete, stream, embed
  • 13 typed compute methodstext_to_speech, generate_music, generate_sfx, clone_voice, list_voices, delete_voice, generate_image, upscale_image, text_to_video, image_to_video, transcribe, generate_3d, remove_background

The same conceptual API exists in all 7 bindings. Names follow each language’s idiomatic case (text_to_speech in Python / Ruby, textToSpeech in JS / Swift / Kotlin / Go), but the semantics are identical. Every method you do not override raises a typed Unsupported error when called — no stub returns, no silent failures.

Concrete-handle types vary by binding:

BindingSubclass / conform toHandle returned by factories
Pythonclass MyProv(CustomProvider): ...CustomProvider
Nodeclass MyProv extends CustomProvider { ... }CustomProvider
WASMclass MyProv { ... } + CustomProvider.fromJsObject(id, instance)CustomProvider
Gotype MyProv struct { blazen.UnsupportedCustomProvider }*CustomProviderHandle
Swiftclass MyProv: CustomProvider { ... }CustomProviderHandle
Kotlinclass MyProv : CustomProviderBase() { ... }CustomProviderHandle
Rubyclass MyProv < Blazen::CustomProvider; ... endCustomProviderHandle

Quick start — subclass to add a TTS provider

Each example below overrides text_to_speech only; every other method inherits the Unsupported default.

# Python
from blazen import CustomProvider, SpeechRequest, AudioResult

class ElevenLabsTTS(CustomProvider):
    def __init__(self, api_key: str):
        super().__init__(provider_id="elevenlabs")
        self._api_key = api_key

    async def text_to_speech(self, request: SpeechRequest) -> AudioResult:
        audio_bytes = await elevenlabs_call(self._api_key, request.text, request.voice)
        return AudioResult(
            audio=[{"media": {"base64": base64.b64encode(audio_bytes).decode(),
                              "media_type": "mpeg"}}],
            timing={"total_ms": 0, "queue_ms": None, "execution_ms": None},
            metadata={},
        )

provider = ElevenLabsTTS(api_key="sk-...")
result = await provider.text_to_speech(SpeechRequest(text="hi", voice="rachel"))
// Node — extend CustomProvider; the constructor detects the subclass and
// installs a JsCustomProviderAdapter automatically.
import { ApiProtocol, CustomProvider } from "blazen";

class ElevenLabsTTS extends CustomProvider {
  constructor(private apiKey: string) {
    super({ providerId: "elevenlabs", protocol: ApiProtocol.custom() });
  }

  async textToSpeech(request) {
    const audio = await elevenlabsCall(this.apiKey, request.text, request.voice);
    return {
      audio: [{
        media: { base64: Buffer.from(audio).toString("base64"), mediaType: "mpeg" },
      }],
      timing: { totalMs: 0, queueMs: null, executionMs: null },
      metadata: {},
    };
  }
}

const provider = new ElevenLabsTTS("sk-...");
const audio = await provider.textToSpeech({ text: "hi", voice: "rachel" });
// WASM — there's no class-extends across the wasm-bindgen boundary, so wrap
// a plain JS instance via CustomProvider.fromJsObject(...).
import { CustomProvider } from "@zorpx/blazen";

class ElevenLabsTTS {
  constructor(apiKey) { this.apiKey = apiKey; }
  async textToSpeech(request) {
    const audio = await elevenlabsCall(this.apiKey, request.text, request.voice);
    return {
      audio: [{ media: { base64: btoa(audio), mediaType: "mpeg" } }],
      timing: { totalMs: 0 },
      metadata: {},
    };
  }
}

const provider = CustomProvider.fromJsObject("elevenlabs", new ElevenLabsTTS("sk-..."));
const audio = await provider.textToSpeech({ text: "hi", voice: "rachel" });
// Go — embed UnsupportedCustomProvider for the Unsupported defaults, then
// override the methods you implement.
package main

import (
    "github.com/zorpxinc/blazen-go"
)

type ElevenLabsTTS struct {
    blazen.UnsupportedCustomProvider
    apiKey string
}

func (p *ElevenLabsTTS) ProviderId() string { return "elevenlabs" }

func (p *ElevenLabsTTS) TextToSpeech(req blazen.SpeechRequest) (blazen.AudioResult, error) {
    audio, err := elevenlabsCall(p.apiKey, req.Text, req.Voice)
    if err != nil {
        return blazen.AudioResult{}, err
    }
    return blazen.AudioResult{
        Audio: []blazen.GeneratedAudio{{
            Media: blazen.MediaOutput{Base64: base64.StdEncoding.EncodeToString(audio), MediaType: "mpeg"},
        }},
    }, nil
}

handle := blazen.CustomProviderFrom(&ElevenLabsTTS{apiKey: "sk-..."})
audio, err := handle.TextToSpeech(blazen.SpeechRequest{Text: "hi", Voice: blazen.Some("rachel")})
// Swift — protocol-extension defaults supply Unsupported for every method.
// Conform a class and override what you support.
import BlazenSwift

final class ElevenLabsTTS: CustomProvider {
    let apiKey: String
    init(apiKey: String) { self.apiKey = apiKey }

    func providerId() -> String { "elevenlabs" }

    func textToSpeech(request: SpeechRequest) async throws -> AudioResult {
        let audio = try await elevenlabsCall(apiKey, request.text, request.voice)
        return AudioResult(
            audio: [GeneratedAudio(media: MediaOutput(
                base64: audio.base64EncodedString(), mediaType: "mpeg"))],
            timing: nil, metadata: [:])
    }
}

let handle = Providers.custom(ElevenLabsTTS(apiKey: "sk-..."))
let result = try await handle.textToSpeech(
    request: SpeechRequest(text: "hi", voice: "rachel"))
// Kotlin — extend CustomProviderBase (abstract class with Unsupported-throwing
// suspend defaults). Kotlin interfaces can't carry suspend default methods,
// so the base class is the idiomatic path.
import dev.zorpx.blazen.*

class ElevenLabsTts(private val apiKey: String) : CustomProviderBase() {
    override fun providerId(): String = "elevenlabs"

    override suspend fun textToSpeech(request: SpeechRequest): AudioResult {
        val audio = elevenlabsCall(apiKey, request.text, request.voice)
        return AudioResult(
            audio = listOf(GeneratedAudio(
                media = MediaOutput(base64 = audio.encodeBase64(), mediaType = "mpeg"))),
            timing = null, metadata = emptyMap())
    }
}

val handle: CustomProviderHandle = Blazen.customProvider(ElevenLabsTts("sk-..."))
val result = handle.textToSpeech(SpeechRequest(text = "hi", voice = "rachel"))
# Ruby — subclass Blazen::CustomProvider and override any of 16 typed
# methods. Returning a Hash matching the result-record's serde schema, a
# Blazen::*Result wrapper, or raising a Blazen::*Error all round-trip back
# through the cabi vtable as typed values.
require "base64"
require "blazen"

class ElevenLabsTts < Blazen::CustomProvider
  def initialize(api_key)
    super()
    @api_key = api_key
  end

  def provider_id = "elevenlabs"

  def text_to_speech(_request)
    audio = elevenlabs_call(@api_key, "hello", "rachel")
    {
      "audio" => [
        {
          "media" => {
            "base64"     => Base64.strict_encode64(audio),
            "media_type" => { "type" => "mp3" },
            "metadata"   => {},
          },
        },
      ],
      "timing"   => { "total_ms" => 0 },
      "metadata" => {},
    }
    # Equivalently: Blazen::AudioResult.new(<same hash>) — the trampoline
    # accepts either a Hash (it JSON-encodes and asks the cabi to build
    # the handle) or a pre-constructed Blazen::*Result wrapper (it steals
    # the cabi handle via #take_ptr!).
  end
end

handle = Blazen::CustomProvider.from_subclass(ElevenLabsTts.new("sk-..."))
# `handle` is a Blazen::CustomProviderHandle the rest of Blazen treats as
# a regular provider.

Quick start — factory for OpenAI-compatible servers

Ollama, LM Studio, vLLM, llama.cpp’s server, TGI, and any other OpenAI-chat-completions-wire-format backend ship as built-in factories. No subclassing required:

# Python
from blazen import CustomProvider, OpenAiCompatConfig

p = CustomProvider.ollama(model="llama3.1")
p = CustomProvider.lm_studio(model="my-model", host="127.0.0.1", port=1234)
p = CustomProvider.openai_compat("vllm", OpenAiCompatConfig(
    base_url="http://gpu:8000/v1", default_model="meta-llama/Llama-3-70B"))
// Node
import { CustomProvider } from "blazen";

const ollama  = CustomProvider.ollama("llama3.1");
const lmStdio = CustomProvider.lmStudio("my-model", "127.0.0.1", 1234);
const vllm    = CustomProvider.openaiCompat("vllm", {
  baseUrl: "http://gpu:8000/v1", defaultModel: "meta-llama/Llama-3-70B",
});
// WASM
import { CustomProvider } from "@zorpx/blazen";

const ollama = CustomProvider.ollama("llama3.1");
const lmStd  = CustomProvider.lmStudio("my-model");
// Go
ollama  := blazen.Ollama("localhost", 11434, "llama3.1")
lmStdio := blazen.LMStudio("localhost", 1234, "my-model")
vllm    := blazen.OpenAICompat("vllm", &blazen.OpenAICompatConfig{
    BaseURL: "http://gpu:8000/v1", DefaultModel: "meta-llama/Llama-3-70B"})
// Swift
let ollama = Providers.ollama(model: "llama3.1")
let lmStd  = Providers.lmStudio(host: "127.0.0.1", port: 1234, model: "my-model")
// Kotlin
val ollama = Blazen.ollama(model = "llama3.1")
val lmStd  = Blazen.lmStudio(model = "my-model", port = 1234u)
# Ruby
ollama  = Blazen.ollama(model: "llama3.1")
lm_stdio = Blazen.lm_studio(model: "my-model", port: 1234)

All factories return a CustomProvider / CustomProviderHandle that exposes the same 16 methods as a subclassed implementation, but only complete / stream / embed succeed — the 13 compute methods raise Unsupported because the OpenAI chat-completions wire format does not cover those capabilities.

The 16 methods

MethodArgumentReturnRole
completeCompletionRequest (messages + options)CompletionResponsechat completion
streamCompletionRequest + sinkvoid (streaming chunks via sink / callback)chat completion
embedList<String>EmbeddingResponseembeddings
text_to_speechSpeechRequestAudioResultspeech synthesis
generate_musicMusicRequestAudioResultmusic generation
generate_sfxMusicRequestAudioResultsound-effect generation
clone_voiceVoiceCloneRequestVoiceHandlevoice cloning
list_voices(none)List<VoiceHandle>voice library listing
delete_voiceVoiceHandlevoidvoice library cleanup
generate_imageImageRequestImageResultimage generation
upscale_imageUpscaleRequestImageResultimage upscale
text_to_videoVideoRequestVideoResulttext-to-video
image_to_videoVideoRequestVideoResultimage-to-video
transcribeTranscriptionRequestTranscriptionResultaudio transcription
generate_3dThreeDRequestThreeDResult3D model generation
remove_backgroundBackgroundRemovalRequestImageResultbackground removal

Each binding ships idiomatic record types backing these arguments and returns. Python and Ruby use snake_case fields; Node, WASM, Go, Swift, and Kotlin use camelCase. The Rust definitions in blazen-llm are the source of truth; bindings are regenerated from the workspace by scripts/regen-bindings.sh and pnpm --filter blazen run build.

Instance defaults

Every CustomProvider / BaseProvider exposes builder methods to set defaults applied to every call before delegation. The four universal builders are:

  • with_system_prompt(prompt) — prepend a system message
  • with_tools(tools) — default tool list for completion
  • with_response_format(format) — default response_format (e.g. JSON-mode)
  • with_before_request(hook) — universal pre-flight hook seeing the Rust method name ("complete", "text_to_speech", …) and the request body for mutation

A completion-specific with_before_completion(hook) typed-request hook is also available; nine role-specific *ProviderDefaults types (AudioSpeechProviderDefaults, AudioMusicProviderDefaults, VoiceCloningProviderDefaults, ImageGenerationProviderDefaults, ImageUpscaleProviderDefaults, VideoProviderDefaults, TranscriptionProviderDefaults, ThreeDProviderDefaults, BackgroundRemovalProviderDefaults) carry their own before-hook for each compute capability.

# Python — chain builders on the inherited BaseProvider surface
provider = (
    CustomProvider.ollama(model="llama3.1")
    .with_system_prompt("Be terse.")
    .with_response_format({"type": "json_object"})
    .with_before_request(lambda method, body: body.setdefault("trace_id", "abc"))
)
// Node — same builder surface on every CustomProvider
const provider = CustomProvider.ollama("llama3.1")
  .withSystemPrompt("Be terse.")
  .withResponseFormat({ type: "json_object" })
  .withBeforeRequest(async (method, body) => { body.trace_id = "abc"; });
// Rust / Swift / Kotlin — builders return a new handle with mutated defaults
let provider = ollama("localhost", 11434, "llama3.1")
    .with_system_prompt("Be terse.")
    .with_response_format(serde_json::json!({"type": "json_object"}));
// Go — JSON-encoded variants of with_tools_json and with_response_format_json
provider := blazen.Ollama("localhost", 11434, "llama3.1")
provider = provider.WithSystemPrompt("Be terse.")
provider = provider.WithResponseFormatJson(`{"type":"json_object"}`)
# Ruby — fluent builder chain on the CustomProviderHandle
provider = Blazen.ollama(model: "llama3.1")
  .with_system_prompt("Be terse.")
  .with_response_format_json('{"type":"json_object"}')

Per-role defaults plug in via with_defaults(...):

from blazen import CustomProvider, AudioSpeechProviderDefaults

# Attach a per-role "before-TTS" hook that mutates SpeechRequest
defaults = AudioSpeechProviderDefaults(before=stamp_speech_request)
provider = my_tts_provider.with_speech_defaults(defaults)
val provider = Blazen.ollama(model = "llama3.1")
    .withDefaults(CompletionProviderDefaults(systemPrompt = "Be terse."))

Typed extract<T> extraction

BaseProvider.extract(schema, messages) is the ergonomic structured-output API. Each binding accepts whatever schema container is idiomatic for its language:

# Python — pydantic BaseModel subclass
from pydantic import BaseModel

class Recipe(BaseModel):
    title: str
    ingredients: list[str]
    steps: list[str]

provider = CustomProvider.ollama(model="llama3.1")
recipe = await provider.extract(Recipe, [
    ChatMessage.user("Give me a chocolate chip cookie recipe."),
])
print(recipe.title)
// Node — zod schema or raw JSON Schema
import { z } from "zod";

const Recipe = z.object({
  title: z.string(),
  ingredients: z.array(z.string()),
  steps: z.array(z.string()),
});

const provider = CustomProvider.ollama("llama3.1");
const recipe = await provider.extract(Recipe, [
  { role: "user", content: "Give me a cookie recipe." },
]);
// WASM — JSON Schema object (zod schemas converted via zod-to-json-schema)
const provider = CustomProvider.ollama("llama3.1");
const recipe = await provider.extract(
  {
    type: "object",
    properties: {
      title: { type: "string" },
      ingredients: { type: "array", items: { type: "string" } },
    },
    required: ["title", "ingredients"],
  },
  [{ role: "user", content: "Give me a cookie recipe." }],
);
// Go — JSON Schema as a []byte; result is JSON the caller unmarshals.
schema := []byte(`{
    "type":"object",
    "properties":{"title":{"type":"string"}},
    "required":["title"]
}`)
provider := blazen.Ollama("localhost", 11434, "llama3.1")
jsonStr, err := provider.AsBaseProvider().Extract(string(schema), messages)
var recipe Recipe
_ = json.Unmarshal([]byte(jsonStr), &recipe)
// Swift — any Decodable type. The wrapper derives JSON Schema and decodes.
struct Recipe: Decodable {
    let title: String
    let ingredients: [String]
    let steps: [String]
}

let provider = Providers.ollama(model: "llama3.1")
let recipe: Recipe = try await provider.extract(
    schema: Recipe.self, messages: messages)
// Kotlin — KSerializer<T> from kotlinx.serialization.
import kotlinx.serialization.Serializable
import kotlinx.serialization.serializer

@Serializable
data class Recipe(val title: String, val ingredients: List<String>)

val provider = Blazen.ollama(model = "llama3.1")
val recipe: Recipe = provider.extract(Recipe.serializer(), messages)
# Ruby — schema as a Ruby Hash; result is a Hash the caller validates.
schema = {
  "type" => "object",
  "properties" => { "title" => { "type" => "string" } },
  "required" => ["title"],
}
provider = Blazen.ollama(model: "llama3.1")
recipe = provider.extract(schema, messages)  # => { "title" => "..." }

Internally extract populates response_format with the JSON Schema, issues complete(), parses the assistant content as JSON, and validates against the supplied schema container.

ApiProtocol reference

ApiProtocol is the enum that selects how a CustomProvider talks to its backend. There are two variants:

  • ApiProtocol.openai(config) — Blazen owns HTTP, SSE parsing, tool serialization, and embeddings; the foreign side only supplies the endpoint config (OpenAiCompatConfig).
  • ApiProtocol.custom() — Blazen forwards every dispatched method through to the foreign-implemented CustomProvider.

The four factories pick the right variant automatically:

  • ollama / lm_studioApiProtocol.openai({ base_url: ... })
  • openai_compat(provider_id, config)ApiProtocol.openai(config)
  • from_subclass / from_js_object / Providers.custom(...) / Blazen.customProvider(...) / CustomProviderFrom(...)ApiProtocol.custom()

You can construct it explicitly when calling CustomProvider(provider_id, protocol=...) directly, but the factories cover every realistic use case.

Limitations

One limitation to flag for V1:

  • Node prototype-walk re-entrance. When the Node CustomProvider base constructor detects subclassing it walks the prototype chain to find user-defined methods. If a subclass’s constructor invokes a capability method on this before super() returns, the override has not yet been installed and the call will see the base class’s Unsupported-raising default. Always finish super() before dispatching capability methods on this.

See also

  • Batch Completions — subclassed providers work with the batch helper
  • Memory — drop in custom embedders and backends
  • Media Generation — the typed compute surfaces your CustomProvider plugs into
  • Local Inference — pair subclassing with ModelManager for per-pool memory budgeting (CPU RAM and GPU VRAM) on in-process providers