{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Day 30 - Quantization, Fusion, Benchmark Harness"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "import numpy as np, csv\n",
        "from pathlib import Path\n",
        "rng=np.random.default_rng(0)"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "W=rng.normal(size=(128,256)).astype(np.float32)\n",
        "X=rng.normal(size=(4,256)).astype(np.float32)\n",
        "def qpc(W):\n",
        "    max_abs=np.max(np.abs(W),axis=1,keepdims=True)\n",
        "    scale=np.maximum(max_abs/127,1e-8).astype(np.float32)\n",
        "    Q=np.round(W/scale).clip(-127,127).astype(np.int8)\n",
        "    return Q,scale.squeeze(1)\n",
        "Q,scale=qpc(W)\n",
        "W_hat=Q.astype(np.float32)*scale[:,None]\n",
        "Y=np.einsum('bd,od->bo', X, W)\n",
        "Yq=np.einsum('bd,od->bo', X, W_hat)\n",
        "print('mse',np.mean((W-W_hat)**2),'bytes',W.nbytes,Q.nbytes+scale.nbytes)\n",
        "print('out diff',np.max(np.abs(Y-Yq)))"
      ],
      "outputs": [],
      "execution_count": null
    },
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "fields=['setup','batch_or_concurrency','prompt_tokens','output_tokens','ttft_ms','tok_per_s','peak_mem_gb','notes']; rows=[dict(setup='yours_fp16',batch_or_concurrency=1,prompt_tokens='TBD',output_tokens='TBD',ttft_ms='TBD',tok_per_s='TBD',peak_mem_gb='TBD',notes='baseline')]\n",
        "out=Path('day30_benchmark_template.csv')\n",
        "with out.open('w',newline='') as f: writer=csv.DictWriter(f,fieldnames=fields); writer.writeheader(); writer.writerows(rows)\n",
        "print(out.resolve())"
      ],
      "outputs": [],
      "execution_count": null
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "pygments_lexer": "ipython3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}