---
title: "Multi-Tenant Cost Savings for Bursty Workloads"
description: "How much capacity does pooling N independent bursty tenants actually save? An end-to-end discrete-event simulation."
author: "Adam Fillion"
date: "2026-06-06"
categories: [multi-tenancy, simulation, distributed-systems]
draft: false
---
Each tenant is a Markov-modulated Poisson process (calm: 0.2 req/s, burst: 20 req/s). A shared server with unlimited concurrency and 1.0 s service time absorbs all of them. The peak in-flight count is the capacity you'd need to provision.

```{python}
#| code-fold: true
#| code-summary: "Show simulation source (multi_tenant_provisioning.py)"
#| eval: false
"""Bursty multi-tenant load: measuring max concurrency for capacity provisioning."""
from __future__ import annotations
import random
from dataclasses import dataclass
from typing import TYPE_CHECKING
from happysimulator import Data, Entity, Event, Instant, Probe, Simulation, SimulationSummary
if TYPE_CHECKING:
from collections.abc import Generator
class MMPPSource(Entity):
"""Two-state Markov-modulated Poisson Process source."""
def __init__(self, name, target, *, calm_rate, burst_rate,
calm_mean_dwell_s, burst_mean_dwell_s,
stop_after=None, event_type="Request", rng=None):
super().__init__(name)
self.target = target
self.calm_rate = calm_rate
self.burst_rate = burst_rate
self.calm_mean_dwell_s = calm_mean_dwell_s
self.burst_mean_dwell_s = burst_mean_dwell_s
self.stop_after = stop_after
self.event_type = event_type
self._rng = rng if rng is not None else random.Random()
self.state = "calm"
self.generated_count = 0
self._arrival_epoch = 0
@property
def current_rate(self):
return self.burst_rate if self.state == "burst" else self.calm_rate
def _mean_dwell(self):
return self.burst_mean_dwell_s if self.state == "burst" else self.calm_mean_dwell_s
def start_events(self, start_time):
events = []
dwell = self._rng.expovariate(1.0 / self._mean_dwell())
transition_at = start_time + dwell
if self.stop_after is None or transition_at <= self.stop_after:
events.append(Event(time=transition_at, event_type="_StateTransition", target=self))
rate = self.current_rate
if rate > 0:
inter = self._rng.expovariate(rate)
arrival_at = start_time + inter
if self.stop_after is None or arrival_at <= self.stop_after:
events.append(Event(time=arrival_at, event_type="_Arrival", target=self,
context={"epoch": self._arrival_epoch}))
return events
def handle_event(self, event):
if event.event_type == "_StateTransition":
self.state = "burst" if self.state == "calm" else "calm"
self._arrival_epoch += 1
events = []
dwell = self._rng.expovariate(1.0 / self._mean_dwell())
transition_at = self.now + dwell
if self.stop_after is None or transition_at <= self.stop_after:
events.append(Event(time=transition_at, event_type="_StateTransition", target=self))
nxt = self._schedule_next_arrival()
if nxt is not None:
events.append(nxt)
return events
if event.event_type == "_Arrival":
if event.context.get("epoch") != self._arrival_epoch:
return []
self.generated_count += 1
payload = Event(time=self.now, event_type=self.event_type, target=self.target,
context={"created_at": self.now, "request_id": self.generated_count})
nxt = self._schedule_next_arrival()
return [payload, nxt] if nxt is not None else [payload]
return []
def _schedule_next_arrival(self):
rate = self.current_rate
if rate <= 0:
return None
inter = self._rng.expovariate(rate)
next_at = self.now + inter
if self.stop_after is not None and next_at > self.stop_after:
return None
return Event(time=next_at, event_type="_Arrival", target=self,
context={"epoch": self._arrival_epoch})
def downstream_entities(self):
return [self.target]
class ConcurrencyMeasuringServer(Entity):
"""Unlimited-concurrency server with a fixed service time."""
def __init__(self, name, *, service_time_s=1.0):
super().__init__(name)
self.service_time_s = service_time_s
self.in_flight = 0
self.max_concurrency = 0
self.completed_count = 0
def handle_event(self, event):
self.in_flight += 1
if self.in_flight > self.max_concurrency:
self.max_concurrency = self.in_flight
try:
yield self.service_time_s
finally:
self.in_flight -= 1
self.completed_count += 1
def run_simulation(*, num_tenants=1, duration_s=600.0, drain_s=5.0,
calm_rate=0.2, burst_rate=20.0,
calm_mean_dwell_s=50.0, burst_mean_dwell_s=2.0,
service_time_s=1.0, probe_interval_s=0.05, seed=42):
"""N independent MMPP sources feeding one shared server. Peak in_flight
is the capacity required to serve the pool without queueing."""
if seed is not None:
random.seed(seed)
stop_after = Instant.from_seconds(duration_s)
server = ConcurrencyMeasuringServer("Server", service_time_s=service_time_s)
sources = []
for i in range(num_tenants):
tenant_seed = (seed or 0) * 1_000_000 + i if seed is not None else None
tenant_rng = random.Random(tenant_seed) if tenant_seed is not None else random.Random()
sources.append(MMPPSource(
f"Tenant-{i}", target=server,
calm_rate=calm_rate, burst_rate=burst_rate,
calm_mean_dwell_s=calm_mean_dwell_s,
burst_mean_dwell_s=burst_mean_dwell_s,
stop_after=stop_after, rng=tenant_rng,
))
in_flight_probe, in_flight_data = Probe.on(server, "in_flight", interval=probe_interval_s)
sim = Simulation(start_time=Instant.Epoch, duration=duration_s + drain_s,
entities=[*sources, server], probes=[in_flight_probe])
bootstrap = []
for src in sources:
bootstrap.extend(src.start_events(Instant.Epoch))
sim.schedule(bootstrap)
sim.run()
return server, in_flight_data
# Reproduce headline numbers
# python examples/performance/multi_tenant_provisioning.py --comparison --duration 600
```