Full Customization
If you want total control, you can self-host LiveKit, bring your own frontend and backend, and use ZeroWeight only for avatar delivery.
Recommended architecture
- ZeroWeight AI handles avatar bundle retrieval.
- Your backend handles auth, policy, and token minting.
- Your LiveKit deployment handles room infrastructure.
- Your frontend uses either
@zeroweight/reactor@zeroweight/renderer.
Custom endpoint wiring
<LiveKitAvatarSession
avatarId="your-avatar-id"
livekitUrl="wss://your-livekit-server.example.com"
sessionDuration={180}
inactivityTimeout={45000}
api={{
getBundle: (avatarId) =>
fetch(`/api/avatars/bundle/${avatarId}`).then((r) => r.json()),
getLiveKitToken: (avatarId) =>
fetch(
`/api/livekit/token?avatar_id=${avatarId}&name=${userName}`
).then((r) => r.json())
}}
/>LiveKit backend example
import asyncio
import json
import logging
import re
from typing import AsyncIterable
from livekit import rtc
from livekit.agents import Agent, AgentServer, AgentSession, JobContext, cli
from livekit.agents.llm import ChatContext
from livekit.plugins import cartesia, openai
logger = logging.getLogger("agent")
DEFAULT_PROMPT = """
You are a helpful voice assistant for ZeroWeight AI.
Keep answers concise, friendly, and useful.
[CRITICAL: EMOTIONS AND ACTIONS]
You must express your speech emotions and perform physical avatar actions using inline tags.
Do NOT say these tags out loud; integrate them naturally into your response text.
1. Speech Emotions (TTS):
Use the tag: <emotion value="EMOTION" />
Supported emotions: neutral, happy, angry, excited, content, sad, scared.
Example: <emotion value="angry" /> How dare you speak to me like I'm just a robot!
2. Avatar Actions (Frontend):
Use the tag: [action:ACTION_NAME]
The available actions for you are:
- speaking: normal speaking
- wave_hand: waving your hand to greet people
RULE: Place physical gesture tags (like [action:wave_hand]) at the very END of your response.
Example: <emotion value="excited" /> [action:speaking] I am so thrilled to meet you! [action:wave_hand]
""".strip()
class Assistant(Agent):
def __init__(self, room: rtc.Room, chat_ctx: ChatContext, instructions: str) -> None:
super().__init__(chat_ctx=chat_ctx, instructions=instructions)
self.room = room
self._pending_end_actions = []
async def trigger_pending_actions(self):
for action in self._pending_end_actions:
payload = json.dumps({"type": "AVATAR_UPDATE", "action": action})
await self.room.local_participant.publish_data(payload, reliable=True)
self._pending_end_actions.clear()
async def _publish_action_now(self, action: str) -> None:
payload = json.dumps({"type": "AVATAR_UPDATE", "action": action})
await self.room.local_participant.publish_data(payload, reliable=True)
async def tts_node(self, text: AsyncIterable[str], model_settings):
action_pattern = re.compile(r"\\[action:([^\\]]+)\\]")
async def filtered_text_stream():
async for chunk in text:
if not chunk:
continue
matches = action_pattern.findall(chunk)
for action in matches:
action = action.strip()
if action.startswith("speaking"):
asyncio.create_task(self._publish_action_now(action))
else:
self._pending_end_actions.append(action)
cleaned = action_pattern.sub("", chunk).strip()
if cleaned:
yield cleaned + " "
async for audio_frame in super().tts_node(filtered_text_stream(), model_settings):
yield audio_frame
server = AgentServer()
@server.rtc_session()
async def simple_agent(ctx: JobContext):
await ctx.connect()
chat_ctx = ChatContext()
chat_ctx.add_message(role="system", content=DEFAULT_PROMPT)
assistant = Assistant(room=ctx.room, chat_ctx=chat_ctx, instructions=DEFAULT_PROMPT)
session = AgentSession(
llm=openai.realtime.RealtimeModel(model="gpt-realtime-mini"),
tts="sonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
)
)
@session.on("agent_state_changed")
def on_agent_state_changed(ev):
state_str = str(ev).upper() if not hasattr(ev, "new_state") else str(ev.new_state).upper()
if "LISTENING" in state_str or "IDLE" in state_str:
asyncio.create_task(assistant.trigger_pending_actions())
await session.start(agent=assistant, room=ctx.room)
if __name__ == "__main__":
cli.run_app(server)Best fit
Choose this model when you need complete control over:
- Room creation and lifecycle
- Region and infrastructure decisions
- Custom observability
- Enterprise auth and compliance workflows