From 4e8c4be649a20936d375348761a7ee4f3c9bc57c Mon Sep 17 00:00:00 2001 From: Krishna Ayyalasomayajula Date: Sat, 30 May 2026 17:53:15 -0500 Subject: [PATCH] docs: append sections S7-S8 (render loop, resize) --- docs/01-rainbow-triangle.md | 374 ++++++++++++++++++++++++++++++++++++ 1 file changed, 374 insertions(+) diff --git a/docs/01-rainbow-triangle.md b/docs/01-rainbow-triangle.md index e94a35b..789e024 100644 --- a/docs/01-rainbow-triangle.md +++ b/docs/01-rainbow-triangle.md @@ -885,3 +885,377 @@ stereoscopic (VR) or multi-viewport single-pass rendering. Not used here. **`cache: None`** — no pipeline cache. A pipeline cache stores compiled shader binaries to speed up subsequent pipeline creation. Useful when creating many pipelines dynamically; for a single pipeline, caching has no practical benefit. + +## S7: The Render Loop — Recording and Submitting Commands + +New concept: **command buffers are scripts, not function calls.** You cannot call +GPU operations directly from CPU code. Instead, you record commands into a +[command buffer](concepts/GLOSSARY.md#command-buffer) — a script that the GPU +queue executes asynchronously. Think of it like building an assembly listing: +each recording method appends an instruction. When the script is complete, you +submit it atomically to the [queue](concepts/GLOSSARY.md#queue). The GPU executes +all instructions in parallel, in whatever order it determines is optimal. There +is no `.await` on a draw call. The CPU returns immediately after submission and +continues the next frame while the GPU works in the background. + +> **Key insight #4 — Command buffers are scripts, not function calls:** +> `create_command_encoder()` opens a recording session. `begin_render_pass()` +> starts a scoped drawing block. `render_pass.draw()` appends a draw command. +> `encoder.finish()` seals the script. `queue.submit()` dispatches it. The GPU +> executes it later, in parallel. There is no `.await` on a draw call. + +### The `render(&mut self)` Method Signature + +```rust +fn render(&mut self) { + // ... +} +``` + +This is a **fully synchronous** method. It runs on the winit event loop thread +(triggered by `RedrawRequested`), has no `async` keyword, no `.await`, and takes +no tokio handle. All wgpu recording and submission operations are synchronous +and fast — they only encode instructions and push them to the queue; they do not +wait for GPU completion. + +### Acquiring a Back Buffer from the Swapchain + +```rust +let status = self.surface.get_current_texture(); +``` + +`get_current_texture()` is how you acquire a back buffer from the +[swapchain](concepts/GLOSSARY.md#swapchain). This is the framebuffer you render +into for this frame. In a triple-buffered swapchain (`PresentMode::Mailbox`), +there are up to two spare back buffers waiting for you. `get_current_texture()` +hands you the next available one. + +In wgpu 29+, this method returns a `CurrentSurfaceTexture` **enum** — not a +`Result`. The swapchain can be in seven distinct states, and every state is a +valid, non-error condition: + +> **Key insight #5 — 7 swapchain states you must handle:** `Success(buf)` — +> render normally. `Suboptimal(buf)` — render but reconfig is advisable. +> `Timeout` — skip frame (GPU late). `Occluded` — skip frame (window behind +> another). `Outdated` — `self.resize()` to reconfigure. `Lost` — skip frame +> (display server restarted). `Validation` — skip frame (API misuse; check +> logs). + +WHY `match` on 7 variants: `get_current_texture()` does not return a `Result`. +All 7 states are valid and the match must be exhaustive. The Rust compiler +enforces this — you cannot miss a variant. + +### The Complete `render` Implementation + +```rust +fn render(&mut self) { + let status = self.surface.get_current_texture(); + + match status { + wgpu::SurfaceStatus::Success(surface_texture) + | wgpu::SurfaceStatus::Suboptimal(surface_texture) => { + // Drive GPU work: shader compilation, memory allocation, fence signaling + if let Err(e) = self.device.poll(wgpu::Maintain::Wait) { + log::error!("Device poll failed: {e}"); + return; + } + + let texture_view = surface_texture.texture.create_view(&Default::default()); + + let mut encoder = self.device.create_command_encoder( + &wgpu::CommandEncoderDescriptor { + label: Some("Main Command Encoder"), + }, + ); + + { + let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Main Render Pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &texture_view, + depth_slice: None, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color { + r: 0.1, + g: 0.1, + b: 0.1, + a: 1.0, + }), + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + multiview_mask: None, + }); + + render_pass.set_pipeline(&self.pipeline); + render_pass.set_vertex_buffer(0, self.vertex_buffer.slice(..)); + render_pass.draw(0..3, 0..1); + } // render_pass drops here — render pass ends automatically + + self.queue.submit(std::iter::once(encoder.finish())); + surface_texture.present(); + } + + wgpu::SurfaceStatus::Timeout => { + // GPU took too long to finish previous work. Skip this frame. + log::warn!("Surface status: Timeout — skipping frame"); + } + + wgpu::SurfaceStatus::Occluded => { + // Window is fully occluded by another window. Skip rendering. + log::debug!("Surface status: Occluded — skipping frame"); + } + + wgpu::SurfaceStatus::Outdated => { + // Swapchain resolution no longer matches window. Reconfigure. + log::warn!("Surface status: Outdated — resizing"); + if let Some(window) = &self.window { + self.resize(window.inner_size()); + } + } + + wgpu::SurfaceStatus::Lost => { + // Display server restarted or GPU lost. Fatal without re-init. + log::error!("Surface status: Lost — cannot recover without re-creating State"); + } + + wgpu::SurfaceStatus::Validation { source, description } => { + // wgpu validated your descriptor and found it invalid. + log::error!("Surface validation: {source} — {description}"); + } + } +} +``` + +### Step by Step + +**`surface.get_current_texture()`** — Acquires the next available back buffer +from the [swapchain](concepts/GLOSSARY.md#swapchain). The swapchain cycles through +2–3 pre-allocated back buffers. This call returns immediately if a buffer is +available; it does not block on the GPU. + +**`device.poll(wgpu::Maintain::Wait)`** — **Synchronous** call that drives +in-flight GPU work to completion: shader compilation fences, memory allocation, +and queue signaling. Without this, resources accumulate because the device does +not reclaim finished work. Called once per frame. Returns +`Result<(), MaintainError>` — if the device is lost, you recover by +re-creating the device. + +WHY this is synchronous: `poll()` does not spawn a task or use `.await`. It +runs a small internal loop checking Vulkan fence objects until all in-flight +work is done, then returns. On a busy GPU this can take a few milliseconds per +frame — that is normal. + +**`texture.create_view(&Default::default())`** — A [texture view](concepts/GLOSSARY.md#texture-view) +is how wgpu references a texture's memory inside a render pass. The GPU does +not accept raw texture handles in render pass attachments — it requires a view +that describes the mip level range, aspect, and dimension format. +`Default::default()` creates a full-view covering all mip levels and all aspects. + +**`device.create_command_encoder(&desc)`** — Opens a recording session. The +[command encoder](concepts/GLOSSARY.md#command-buffer) is where you append +instructions. Think of it as building a function body: you add statements, then +`finish()` closes the function and returns the compiled buffer. + +**`encoder.begin_render_pass(&desc)`** — Starts a scoped drawing block. The +[render pass](concepts/GLOSSARY.md#render-pass) descriptor defines the target +attachments (color, depth, stencil). The returned `RenderPass` is a scoped +guard — when it drops, the render pass ends automatically. + +### Render Pass Color Attachment + +```rust +color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &texture_view, + depth_slice: None, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color { r: 0.1, g: 0.1, b: 0.1, a: 1.0 }), + store: wgpu::StoreOp::Store, + }, +})], +``` + +**`RenderPassColorAttachment` has exactly 4 fields:** + +- **`view: &texture_view`** — the framebuffer we draw into. Must match the + color target format in the [render pipeline](concepts/GLOSSARY.md#render-pipeline). +- **`depth_slice: None`** — only used for 3D texture slices. Not applicable + to 2D rendering. +- **`resolve_target: None`** — only used for MSAA resolve. When multisampling + is active, the render pass writes to a multisampled buffer and resolves into + this target. We have no MSAA, so `None`. +- **`ops`** — [operations](concepts/GLOSSARY.md#render-pass) controlling load + and store behavior. Two sub-fields: + - **`load: LoadOp::Clear(color)`** — before drawing, fill the entire + framebuffer with this color. **This IS your background color.** Dark gray. + `LoadOp::Load` keeps existing pixels (used in UI compositing where you + draw on top of previous content). + - **`store: StoreOp::Store`** — after drawing, keep what was written. The + GPU writes the result back to the texture so the swapchain can present it. + `StoreOp::Discard` throws away the result — used for offscreen renders + where only the depth/stencil result matters. + +**`depth_stencil_attachment: None`** — No depth or stencil buffer. When you +have a depth texture, it goes here. + +**`timestamp_writes: None`** — GPU hardware timestamps for profiling. Not used +in production rendering; requires a query set. + +**`occlusion_query_set: None`** — hardware occlusion queries (count fragments +that pass the depth test). Useful for visibility-based culling. + +**`multiview_mask: None`** — multiview rendering mask for VR / multi-viewport. + +### Binding State and Drawing + +**`render_pass.set_pipeline(&self.pipeline)`** — Tells the GPU which +[render pipeline](concepts/GLOSSARY.md#render-pipeline) to use for subsequent +draw calls. The pipeline encapsulates the shader programs, vertex format, +primitive topology, and output configuration. Must be set before any draw call +in a render pass. Switching pipelines mid-pass is expensive and should be +minimized. + +WHY this is necessary: the GPU hardware does not store pipeline state between +frames. Every render pass starts with no pipeline bound. You must set it every +frame. + +**`render_pass.set_vertex_buffer(0, self.vertex_buffer.slice(..))`** — Binds the +[vertex buffer](concepts/GLOSSARY.md#vertex-buffer) to slot 0. +`buffer.slice(..)` creates a [buffer slice](concepts/GLOSSARY.md#buffer-slice) +covering the full buffer (equivalent to `buffer.slice(0..)`). Slot 0 corresponds +to the first layout in the pipeline's vertex buffer layouts array. If you had +multiple vertex buffers (e.g., separate position and instance buffers), you'd +bind them to slots 0, 1, etc. + +**`render_pass.draw(0..3, 0..1)`** — The draw command. Two `Range` +arguments: +- First range `0..3` — vertex range. Draw vertices 0, 1, 2 (three vertices + forming one triangle). +- Second range `0..1` — instance range. Draw instance 0 (one instance). + +WHY two ranges: the vertex range controls which vertices from the buffer are +read. The instance range controls instanced rendering — the same geometry drawn +multiple times with different instance-data attributes. For a single triangle, +one draw call with `0..1` instances is correct. + +**Render pass scope drop** — When the `render_pass` variable goes out of scope +(the closing `}` in the block), the drop implementation ends the render pass +and performs validation. If you forgot to set the pipeline or bind a required +buffer, wgpu reports the error at drop time, not at draw time. + +**`encoder.finish()`** — Seals the command encoder. Returns the finished +[command buffer](concepts/GLOSSARY.md#command-buffer) ready for submission. +After `finish()`, the encoder cannot be used again. + +**`queue.submit(iter)`** — Dispatches one or more command buffers to the GPU. +Takes an iterator of command buffers. We submit exactly one: the frame's command +buffer. This is a fire-and-forget call — it queues the work and returns +immediately. The GPU executes it asynchronously, in parallel with your next +frame's CPU work. + +**`surface_texture.present()`** — Queues the rendered back buffer for display. +This tells the swapchain: "this buffer is done, show it on screen." **If you +forget this, you render to a buffer nobody sees.** The swapchain cycles the +buffer from "render target" to "front buffer" on the next vsync. + +### Why the Match Arms Differ + +- **`Success` / `Suboptimal`** — both deliver a `SurfaceTexture` you can render + into. The difference: `Suboptimal` means the current swapchain configuration + is not ideal for the GPU (e.g., format mismatch). You render normally but + should consider reconfiguring the surface during idle time. +- **`Timeout`** — the GPU exceeded the wait threshold for a back buffer. Skip + the frame. The GPU will catch up. +- **`Occluded`** — another fully covers your window. Skip rendering entirely — + the display server will not show your output. Saves GPU work. +- **`Outdated`** — the swapchain was created for a resolution that no longer + matches the window. Reconfigure the surface to match. +- **`Lost`** — the GPU or display server has been reset. Without re-creating + the device and surface, you cannot recover. In a real application, you'd + trigger a full re-initialization. +- **`Validation`** — wgpu rejected the surface configuration due to API misuse. + Check logs for the description. This is a programming error, not a runtime + condition. + +Note: `pre_present_notify()` does **not** exist in wgpu 29. Do not call it. The +device polling via `device.poll()` is the only frame synchronization mechanism +you need. + +## S8: Handling Window Resize + +WHY `surface.configure()` on resize: The swapchain allocates back buffers at a +fixed dimension. When the window size changes, the old back buffers no longer +match the window's display surface. Presenting a mismatched-size buffer causes +undefined behavior — the display server clips, stretches, or rejects it. +`surface.configure()` allocates new back buffers matching the new dimensions and +discards the old ones. + +WHY `width.max(1)`: On some display servers, minimizing a window briefly +reports `0 × 0` size before restoring. A zero-dimension surface allocation +panics. Clamping to 1 ensures the swapchain always has valid dimensions. + +WHY `std::mem::take(&mut self.config.view_formats)`: The `view_formats` field +of `SurfaceConfiguration` is an owned `Vec`. When constructing +the new configuration, you move the vector out of the old config rather than +cloning it. `mem::take` replaces the field with `Vec::new()` (zero allocation) +and returns the original vector. This avoids a heap allocation for what is +typically a 1-element vec. + +```rust +fn resize(&mut self, size: wgpu::dpi::PhysicalSize) { + if size.width > 0 && size.height > 0 { + let config = wgpu::SurfaceConfiguration { + usage: self.config.usage, + format: self.config.format, + width: size.width.max(1), + height: size.height.max(1), + present_mode: self.config.present_mode, + desired_maximum_frame_latency: self.config.desired_maximum_frame_latency, + alpha_mode: self.config.alpha_mode, + view_formats: std::mem::take(&mut self.config.view_formats), + }; + self.surface.configure(&self.device, &config); + self.config = config; + } +} +``` + +FIELD BY FIELD: + +**`usage` / `format` / `present_mode` / `alpha_mode`** — carried over from the +old config unchanged. These properties are negotiated once at init time +and do not change on resize. + +**`width` / `height`** — the new dimensions, clamped to at least 1. + +**`desired_maximum_frame_latency`** — swapchain back-pressure setting. Kept from +the old config. This value controls how many frames the swapchain buffers +between CPU submission and GPU presentation. A value of 2 (triple buffering) +provides smooth frame pacing under variable CPU/GPU load. See S3 init step 5. + +**`view_formats`** — additional texture formats the surface can create views +with. `std::mem::take()` moves the owned vector from the old config into the +new config. After `take()`, the old config's `view_formats` is an empty `Vec`. +This avoids a `clone()` of the vector. Since the old config is about to be +overwritten by `self.config = config`, the emptied field is irrelevant. + +**`surface.configure(&self.device, &config)`** — takes a reference to the +`Device` and the new `SurfaceConfiguration`. This is not async. It allocates the +new swapchain buffers and replaces the old ones. Any in-flight renders using +old buffers complete normally; the new buffers are available after this call +returns. + +### When `resize` Is Called + +In our `App::window_event` handler (S2), the `WindowEvent::Resized(size)` arm +calls `state.resize(window, size)`. The resize fires once for every dimension +change. On fast window resizing, you may receive dozens of resize events in +succession. `surface.configure()` is fast enough to handle this — each call +discards old buffers and allocates new ones. The GPU continues processing +in-flight frames with the old buffer dimensions; there is no visual glitch +because the swapchain handles the transition seamlessly.