Demo: Benchmark Runner — Meça Performance da Prompt API

Visão Geral

Ferramenta de benchmark que roda N prompts sequenciais contra a Prompt API e mede três métricas-chave: time-to-first-token (TTFT), latência total e tokens/segundo. Permite comparar diferentes configurações (temperature, topK) lado a lado com resultados em tabela e barras visuais.

Pra quem: Desenvolvedores que querem entender os limites de performance do modelo on-device antes de colocar em produção.

Técnica principal: performance.now() + streaming via promptStreaming() pra capturar o timestamp do primeiro chunk e calcular throughput real token a token.

Wireframe

┌──────────────────────────────────────────────────────────────┐
│  ⚡ Performance Benchmark Runner                              │
├──────────────────────────────────────────────────────────────┤
│                                                              │
│  Configuração do Batch                                       │
│  ┌────────────────────────────────────────────────────────┐  │
│  │ Prompts: [textarea com 1 prompt por linha]             │  │
│  │                                                        │  │
│  └────────────────────────────────────────────────────────┘  │
│                                                              │
│  Temperature: [0.0 ▰▰▰▱▱ 2.0]   topK: [1─────40]          │
│  Repetições por prompt: [3]                                  │
│                                                              │
│  [ ▶ Iniciar Benchmark ]  [ ⬜ Parar ]                       │
│                                                              │
│  ┌────────────────────────────────────────────────────────┐  │
│  │ Progresso: ████████░░░░░░░░  5/12 prompts  (42%)      │  │
│  └────────────────────────────────────────────────────────┘  │
│                                                              │
│  ┌────────────────────────────────────────────────────────┐  │
│  │  #  │ Prompt (trunc)  │ TTFT   │ Total  │ Tok/s │ Len │  │
│  │─────┼─────────────────┼────────┼────────┼───────┼─────│  │
│  │  1  │ "Explique qu…"  │ 142ms  │ 1.8s   │ 18.3  │ 33  │  │
│  │  2  │ "Liste 5 fr…"   │ 128ms  │ 2.1s   │ 21.0  │ 44  │  │
│  │  3  │ "Resuma o c…"   │ 155ms  │ 1.5s   │ 16.7  │ 25  │  │
│  └────────────────────────────────────────────────────────┘  │
│                                                              │
│  Resumo                                                      │
│  ┌────────────────────────────────────────────────────────┐  │
│  │  TTFT médio: 142ms │ Latência média: 1.8s │ 18.7 tk/s │  │
│  │  ████████████████░░░░░░░░░░  (barra tok/s relativo)   │  │
│  └────────────────────────────────────────────────────────┘  │
│                                                              │
│  ⚠️ Requer Chrome 138+ com Prompt API habilitada            │
└──────────────────────────────────────────────────────────────┘

HTML

<section class="demo-container" id="benchmark-runner">
  <h2>⚡ Performance Benchmark Runner</h2>

  <div id="status-bar" class="status" hidden>
    <span id="status-message"></span>
  </div>

  <!-- Configuração -->
  <fieldset class="config-panel">
    <legend>Configuração do Batch</legend>

    <label for="prompts-input">Prompts (1 por linha):</label>
    <textarea
      id="prompts-input"
      rows="5"
      placeholder="Explique o que é recursão em 2 frases&#10;Liste 5 frutas tropicais&#10;Resuma o conceito de API REST"
    ></textarea>

    <div class="config-row">
      <label>
        Temperature: <output id="temp-value">1.0</output>
        <input type="range" id="temperature" min="0" max="2" step="0.1" value="1.0">
      </label>
      <label>
        Top-K: <output id="topk-value">8</output>
        <input type="range" id="topk" min="1" max="40" step="1" value="8">
      </label>
      <label>
        Repetições: 
        <input type="number" id="repetitions" min="1" max="10" value="3">
      </label>
    </div>
  </fieldset>

  <!-- Controles -->
  <div class="actions">
    <button id="btn-start" disabled>▶ Iniciar Benchmark</button>
    <button id="btn-stop" disabled>⬜ Parar</button>
  </div>

  <!-- Progresso -->
  <div id="progress-section" hidden>
    <div class="progress-bar">
      <div id="progress-fill"></div>
    </div>
    <span id="progress-text">0/0 prompts (0%)</span>
  </div>

  <!-- Tabela de resultados -->
  <div id="results-section" hidden>
    <table id="results-table">
      <thead>
        <tr>
          <th>#</th>
          <th>Prompt</th>
          <th>TTFT</th>
          <th>Total</th>
          <th>Tok/s</th>
          <th>Tokens</th>
        </tr>
      </thead>
      <tbody id="results-body"></tbody>
    </table>
  </div>

  <!-- Resumo -->
  <div id="summary-section" hidden>
    <h3>Resumo</h3>
    <div class="summary-grid">
      <div class="metric-card">
        <span class="metric-label">TTFT médio</span>
        <span id="avg-ttft" class="metric-value">—</span>
      </div>
      <div class="metric-card">
        <span class="metric-label">Latência média</span>
        <span id="avg-latency" class="metric-value">—</span>
      </div>
      <div class="metric-card">
        <span class="metric-label">Throughput médio</span>
        <span id="avg-throughput" class="metric-value">—</span>
      </div>
    </div>
    <div id="throughput-bar" class="visual-bar">
      <div id="throughput-fill"></div>
    </div>
  </div>
</section>

Código JavaScript

class BenchmarkRunner {
  constructor() {
    this.session = null;
    this.abortController = null;
    this.results = [];

    // DOM
    this.promptsInput = document.getElementById("prompts-input");
    this.tempSlider = document.getElementById("temperature");
    this.topkSlider = document.getElementById("topk");
    this.repsInput = document.getElementById("repetitions");
    this.btnStart = document.getElementById("btn-start");
    this.btnStop = document.getElementById("btn-stop");
    this.progressFill = document.getElementById("progress-fill");
    this.progressText = document.getElementById("progress-text");
    this.resultsBody = document.getElementById("results-body");
    this.statusBar = document.getElementById("status-bar");
    this.statusMessage = document.getElementById("status-message");

    this.init();
  }

  async init() {
    if (!("LanguageModel" in window)) {
      this.showStatus("❌ Prompt API não disponível. Use Chrome 138+.", "error");
      return;
    }

    const availability = await LanguageModel.availability();
    if (availability === "unavailable") {
      this.showStatus("❌ Modelo não disponível neste dispositivo.", "error");
      return;
    }

    if (availability === "downloading") {
      this.showStatus("⏳ Baixando modelo...", "loading");
    }

    this.btnStart.disabled = false;
    this.hideStatus();
    this.bindEvents();
  }

  bindEvents() {
    this.btnStart.addEventListener("click", () => this.runBenchmark());
    this.btnStop.addEventListener("click", () => this.stop());
    this.tempSlider.addEventListener("input", (e) => {
      document.getElementById("temp-value").textContent = e.target.value;
    });
    this.topkSlider.addEventListener("input", (e) => {
      document.getElementById("topk-value").textContent = e.target.value;
    });
  }

  async createSession() {
    return await LanguageModel.create({
      temperature: parseFloat(this.tempSlider.value),
      topK: parseInt(this.topkSlider.value),
      monitor(m) {
        m.addEventListener("downloadprogress", (e) => {
          const pct = Math.round(e.loaded * 100);
          document.getElementById("status-message").textContent =
            `⏳ Baixando modelo... ${pct}%`;
        });
      }
    });
  }

  async runBenchmark() {
    const prompts = this.promptsInput.value
      .split("\n")
      .map(l => l.trim())
      .filter(Boolean);

    if (!prompts.length) {
      this.showStatus("⚠️ Adicione pelo menos 1 prompt.", "error");
      return;
    }

    const reps = parseInt(this.repsInput.value) || 1;
    const totalRuns = prompts.length * reps;

    this.results = [];
    this.abortController = new AbortController();
    this.setRunning(true);
    this.showProgress(0, totalRuns);

    document.getElementById("results-section").hidden = false;
    document.getElementById("summary-section").hidden = true;
    this.resultsBody.innerHTML = "";

    let runIndex = 0;

    try {
      this.session = await this.createSession();

      for (let rep = 0; rep < reps; rep++) {
        for (const prompt of prompts) {
          if (this.abortController.signal.aborted) throw new DOMException("Aborted", "AbortError");

          const metrics = await this.measurePrompt(prompt);
          runIndex++;

          this.results.push(metrics);
          this.appendRow(runIndex, prompt, metrics);
          this.showProgress(runIndex, totalRuns);
        }

        // Recria sessão entre repetições pra evitar context buildup
        if (rep < reps - 1) {
          this.session.destroy();
          this.session = await this.createSession();
        }
      }

      this.showSummary();
    } catch (err) {
      if (err.name !== "AbortError") {
        this.showStatus(`❌ Erro: ${err.message}`, "error");
      }
    } finally {
      this.setRunning(false);
      if (this.session) this.session.destroy();
      this.session = null;
    }
  }

  async measurePrompt(prompt) {
    const t0 = performance.now();
    let firstTokenTime = null;
    let tokenCount = 0;
    let fullResponse = "";

    const stream = await this.session.promptStreaming(prompt, {
      signal: this.abortController.signal
    });

    for await (const chunk of stream) {
      if (firstTokenTime === null) {
        firstTokenTime = performance.now();
      }
      fullResponse = chunk; // promptStreaming retorna acumulado
      tokenCount++;
    }

    const tEnd = performance.now();
    const ttft = firstTokenTime ? firstTokenTime - t0 : 0;
    const totalLatency = tEnd - t0;
    // Estimativa: ~4 chars por token (heurística para português)
    const estimatedTokens = Math.ceil(fullResponse.length / 4);
    const tokensPerSec = totalLatency > 0
      ? (estimatedTokens / (totalLatency / 1000))
      : 0;

    return {
      prompt,
      ttft: Math.round(ttft),
      totalLatency: Math.round(totalLatency),
      tokensPerSec: parseFloat(tokensPerSec.toFixed(1)),
      tokenCount: estimatedTokens,
      responseLength: fullResponse.length
    };
  }

  appendRow(index, prompt, metrics) {
    const tr = document.createElement("tr");
    const truncated = prompt.length > 30 ? prompt.slice(0, 30) + "…" : prompt;
    tr.innerHTML = `
      <td>${index}</td>
      <td title="${this.escapeHtml(prompt)}">${this.escapeHtml(truncated)}</td>
      <td>${metrics.ttft}ms</td>
      <td>${(metrics.totalLatency / 1000).toFixed(2)}s</td>
      <td>${metrics.tokensPerSec}</td>
      <td>${metrics.tokenCount}</td>
    `;
    this.resultsBody.appendChild(tr);
  }

  showSummary() {
    if (!this.results.length) return;

    const avg = (arr) => arr.reduce((a, b) => a + b, 0) / arr.length;

    const avgTtft = Math.round(avg(this.results.map(r => r.ttft)));
    const avgLatency = (avg(this.results.map(r => r.totalLatency)) / 1000).toFixed(2);
    const avgThroughput = avg(this.results.map(r => r.tokensPerSec)).toFixed(1);
    const maxThroughput = Math.max(...this.results.map(r => r.tokensPerSec));

    document.getElementById("avg-ttft").textContent = `${avgTtft}ms`;
    document.getElementById("avg-latency").textContent = `${avgLatency}s`;
    document.getElementById("avg-throughput").textContent = `${avgThroughput} tok/s`;

    // Barra visual relativa ao máximo
    const pct = maxThroughput > 0 ? (avgThroughput / maxThroughput) * 100 : 0;
    document.getElementById("throughput-fill").style.width = `${pct}%`;

    document.getElementById("summary-section").hidden = false;
  }

  showProgress(current, total) {
    const section = document.getElementById("progress-section");
    section.hidden = false;
    const pct = Math.round((current / total) * 100);
    this.progressFill.style.width = `${pct}%`;
    this.progressText.textContent = `${current}/${total} prompts (${pct}%)`;
  }

  stop() {
    if (this.abortController) this.abortController.abort();
  }

  setRunning(running) {
    this.btnStart.disabled = running;
    this.btnStop.disabled = !running;
    this.btnStart.textContent = running ? "⏳ Executando..." : "▶ Iniciar Benchmark";
  }

  showStatus(msg, type) {
    this.statusBar.hidden = false;
    this.statusBar.className = `status status-${type}`;
    this.statusMessage.textContent = msg;
  }

  hideStatus() { this.statusBar.hidden = true; }

  escapeHtml(str) {
    const div = document.createElement("div");
    div.textContent = str;
    return div.innerHTML;
  }
}

document.addEventListener("DOMContentLoaded", () => new BenchmarkRunner());

Fluxo UX

Página carrega → verifica LanguageModel no window, habilita controles
Usuário configura → define prompts (1 por linha), ajusta temperature/topK/repetições
Clica “Iniciar Benchmark” → barra de progresso aparece, tabela começa a popular linha a linha
Cada prompt executa → streaming mede TTFT no primeiro chunk, conta tokens até fim
Entre repetições → sessão destruída e recriada (isola context window)
Benchmark completo → painel de resumo com médias e barra visual de throughput
“Parar” a qualquer momento → AbortController cancela o prompt atual, exibe resultados parciais

Edge Cases e Tratamento de Erros

Cenário	Tratamento
Prompt API indisponível	Mensagem + desabilita controles
Textarea vazia	Bloqueia execução, aviso inline
Prompt muito longo (excede context)	Erro capturado, pula pro próximo, registra na tabela
Modelo em download	Progress bar do download antes de iniciar
Abort no meio do batch	Exibe resultados parciais + resumo do que completou
Session quota excedida	Recria sessão e retenta uma vez
Navegador trava (prompt pesado)	Timeout de 30s por prompt com fallback
Temperature 0 + topK 1	Funciona normalmente (output determinístico)
Muitas repetições (>5)	Alerta que pode demorar, mas permite
Estimativa de tokens imprecisa	Nota na UI: “~estimado, 4 chars/token”

CSS Essencial

.config-panel {
  border: 1px solid #e5e7eb;
  border-radius: 8px;
  padding: 1.5rem;
  margin-bottom: 1rem;
}

.config-row {
  display: flex;
  gap: 1.5rem;
  flex-wrap: wrap;
  margin-top: 1rem;
}

.config-row label {
  display: flex;
  flex-direction: column;
  gap: 0.25rem;
  font-size: 0.875rem;
}

.progress-bar {
  width: 100%;
  height: 12px;
  background: #e5e7eb;
  border-radius: 6px;
  overflow: hidden;
  margin-bottom: 0.5rem;
}

#progress-fill, #throughput-fill {
  height: 100%;
  background: #2563eb;
  border-radius: 6px;
  transition: width 0.3s ease;
}

#results-table {
  width: 100%;
  border-collapse: collapse;
  font-size: 0.875rem;
  margin-top: 1rem;
}

#results-table th,
#results-table td {
  padding: 0.5rem 0.75rem;
  text-align: left;
  border-bottom: 1px solid #e5e7eb;
}

#results-table th {
  background: #f9fafb;
  font-weight: 600;
}

#results-table tr:hover {
  background: #f3f4f6;
}

.summary-grid {
  display: grid;
  grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
  gap: 1rem;
  margin: 1rem 0;
}

.metric-card {
  background: #f9fafb;
  border-radius: 8px;
  padding: 1rem;
  text-align: center;
}

.metric-label {
  display: block;
  font-size: 0.75rem;
  color: #6b7280;
  text-transform: uppercase;
  letter-spacing: 0.05em;
}

.metric-value {
  display: block;
  font-size: 1.5rem;
  font-weight: 700;
  color: #111827;
  margin-top: 0.25rem;
}

.visual-bar {
  width: 100%;
  height: 8px;
  background: #e5e7eb;
  border-radius: 4px;
  overflow: hidden;
}

.status-error { color: #dc2626; }
.status-loading { color: #2563eb; }