Multi-Tenant Cost Savings for Bursty Workloads

multi-tenancy
simulation
distributed-systems
How much capacity does pooling N independent bursty tenants actually save? An end-to-end discrete-event simulation.
Author

Adam Fillion

Published

June 6, 2026

Each tenant is a Markov-modulated Poisson process (calm: 0.2 req/s, burst: 20 req/s). A shared server with unlimited concurrency and 1.0 s service time absorbs all of them. The peak in-flight count is the capacity you’d need to provision.

Aggregate in-flight as the tenant pool grows from 1 to 1,000. Per-tenant cost collapses: 30 → 5.6 → 2.43 → 1.38 slots.
Show simulation source (multi_tenant_provisioning.py)
"""Bursty multi-tenant load: measuring max concurrency for capacity provisioning."""

from __future__ import annotations
import random
from dataclasses import dataclass
from typing import TYPE_CHECKING

from happysimulator import Data, Entity, Event, Instant, Probe, Simulation, SimulationSummary

if TYPE_CHECKING:
    from collections.abc import Generator


class MMPPSource(Entity):
    """Two-state Markov-modulated Poisson Process source."""

    def __init__(self, name, target, *, calm_rate, burst_rate,
                 calm_mean_dwell_s, burst_mean_dwell_s,
                 stop_after=None, event_type="Request", rng=None):
        super().__init__(name)
        self.target = target
        self.calm_rate = calm_rate
        self.burst_rate = burst_rate
        self.calm_mean_dwell_s = calm_mean_dwell_s
        self.burst_mean_dwell_s = burst_mean_dwell_s
        self.stop_after = stop_after
        self.event_type = event_type
        self._rng = rng if rng is not None else random.Random()
        self.state = "calm"
        self.generated_count = 0
        self._arrival_epoch = 0

    @property
    def current_rate(self):
        return self.burst_rate if self.state == "burst" else self.calm_rate

    def _mean_dwell(self):
        return self.burst_mean_dwell_s if self.state == "burst" else self.calm_mean_dwell_s

    def start_events(self, start_time):
        events = []
        dwell = self._rng.expovariate(1.0 / self._mean_dwell())
        transition_at = start_time + dwell
        if self.stop_after is None or transition_at <= self.stop_after:
            events.append(Event(time=transition_at, event_type="_StateTransition", target=self))
        rate = self.current_rate
        if rate > 0:
            inter = self._rng.expovariate(rate)
            arrival_at = start_time + inter
            if self.stop_after is None or arrival_at <= self.stop_after:
                events.append(Event(time=arrival_at, event_type="_Arrival", target=self,
                                    context={"epoch": self._arrival_epoch}))
        return events

    def handle_event(self, event):
        if event.event_type == "_StateTransition":
            self.state = "burst" if self.state == "calm" else "calm"
            self._arrival_epoch += 1
            events = []
            dwell = self._rng.expovariate(1.0 / self._mean_dwell())
            transition_at = self.now + dwell
            if self.stop_after is None or transition_at <= self.stop_after:
                events.append(Event(time=transition_at, event_type="_StateTransition", target=self))
            nxt = self._schedule_next_arrival()
            if nxt is not None:
                events.append(nxt)
            return events
        if event.event_type == "_Arrival":
            if event.context.get("epoch") != self._arrival_epoch:
                return []
            self.generated_count += 1
            payload = Event(time=self.now, event_type=self.event_type, target=self.target,
                            context={"created_at": self.now, "request_id": self.generated_count})
            nxt = self._schedule_next_arrival()
            return [payload, nxt] if nxt is not None else [payload]
        return []

    def _schedule_next_arrival(self):
        rate = self.current_rate
        if rate <= 0:
            return None
        inter = self._rng.expovariate(rate)
        next_at = self.now + inter
        if self.stop_after is not None and next_at > self.stop_after:
            return None
        return Event(time=next_at, event_type="_Arrival", target=self,
                     context={"epoch": self._arrival_epoch})

    def downstream_entities(self):
        return [self.target]


class ConcurrencyMeasuringServer(Entity):
    """Unlimited-concurrency server with a fixed service time."""

    def __init__(self, name, *, service_time_s=1.0):
        super().__init__(name)
        self.service_time_s = service_time_s
        self.in_flight = 0
        self.max_concurrency = 0
        self.completed_count = 0

    def handle_event(self, event):
        self.in_flight += 1
        if self.in_flight > self.max_concurrency:
            self.max_concurrency = self.in_flight
        try:
            yield self.service_time_s
        finally:
            self.in_flight -= 1
            self.completed_count += 1


def run_simulation(*, num_tenants=1, duration_s=600.0, drain_s=5.0,
                   calm_rate=0.2, burst_rate=20.0,
                   calm_mean_dwell_s=50.0, burst_mean_dwell_s=2.0,
                   service_time_s=1.0, probe_interval_s=0.05, seed=42):
    """N independent MMPP sources feeding one shared server. Peak in_flight
    is the capacity required to serve the pool without queueing."""
    if seed is not None:
        random.seed(seed)
    stop_after = Instant.from_seconds(duration_s)
    server = ConcurrencyMeasuringServer("Server", service_time_s=service_time_s)

    sources = []
    for i in range(num_tenants):
        tenant_seed = (seed or 0) * 1_000_000 + i if seed is not None else None
        tenant_rng = random.Random(tenant_seed) if tenant_seed is not None else random.Random()
        sources.append(MMPPSource(
            f"Tenant-{i}", target=server,
            calm_rate=calm_rate, burst_rate=burst_rate,
            calm_mean_dwell_s=calm_mean_dwell_s,
            burst_mean_dwell_s=burst_mean_dwell_s,
            stop_after=stop_after, rng=tenant_rng,
        ))

    in_flight_probe, in_flight_data = Probe.on(server, "in_flight", interval=probe_interval_s)
    sim = Simulation(start_time=Instant.Epoch, duration=duration_s + drain_s,
                     entities=[*sources, server], probes=[in_flight_probe])

    bootstrap = []
    for src in sources:
        bootstrap.extend(src.start_events(Instant.Epoch))
    sim.schedule(bootstrap)
    sim.run()
    return server, in_flight_data


# Reproduce headline numbers
# python examples/performance/multi_tenant_provisioning.py --comparison --duration 600