aboutsummaryrefslogtreecommitdiff
path: root/dxg/src/imm.c
blob: 28baa9983789800025226ae7e76afa38855bca33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/* Immediate-mode renderer.

Geometry is given by client code and buffered in an upload-heap buffer stored
in host memory.
When the buffer fills up or the client is done, a draw call is issued. The draw
call reads directly from the buffer in host memory; there is no intermediate
buffer copy.
The renderer double-buffers two host-side buffers so that the client can
continue specifying more data into a second buffer while the contents of the
first buffer are rendered.
If the first buffer is still being rendered while the client loops around, then
the client must wait before issuing further geometry.
Once the render of the first buffer completes, the process starts again,
ping-ponging between the two buffers.*/
#include <dxg/imm.h>
#include <dxg/dxcommon.h>

#include <imm_vs.h> // generated
#include <imm_ps.h> // generated

#define WIN32_LEAN_AND_MEAN
#include <Windows.h> // OutputDebugStringA

#include <stdint.h>
#include <stdlib.h>

static ID3D12Resource* create_buffer(ID3D12Device* pDevice, size_t size) {
    assert(pDevice);
    const D3D12_HEAP_PROPERTIES props = {
        .Type                 = D3D12_HEAP_TYPE_UPLOAD,
        .CPUPageProperty      = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
        .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
        .CreationNodeMask     = 0,
        .VisibleNodeMask      = 0
    };
    const D3D12_RESOURCE_DESC desc = {
        .Dimension        = D3D12_RESOURCE_DIMENSION_BUFFER,
        .Alignment        = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
        .Width            = size,
        .Height           = 1,
        .DepthOrArraySize = 1,
        .MipLevels        = 1,
        .Format           = DXGI_FORMAT_UNKNOWN,
        .SampleDesc       = {.Count = 1, .Quality = 0},
        .Layout           = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
        .Flags            = D3D12_RESOURCE_FLAG_NONE
    };
    ID3D12Resource* pBuffer = NULL;
    TrapIfFailed(pDevice->lpVtbl->CreateCommittedResource(
        pDevice,
        &props,
        D3D12_HEAP_FLAG_NONE,
        &desc,
        D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER,
        NULL,
        &IID_ID3D12Resource,
        &pBuffer));
    return pBuffer;
}

typedef struct GraphicsState {
    D3D12_VIEWPORT              viewport;
    D3D12_CPU_DESCRIPTOR_HANDLE hBackBufferView;
    D3D12_CPU_DESCRIPTOR_HANDLE hDepthStencilView;
} GraphicsState;

// Set of per-draw resources. The renderer cycles between sets per draw.
typedef struct ResourceSet {
    ID3D12Resource* pVertexBuffer;
    CommandRecorder cmdRec;
} ResourceSet;

typedef struct DxgImm {
    ID3D12Device*        pDevice;
    ID3D12CommandQueue*  pCmdQueue;
    ID3D12PipelineState* pPipelineState;
    ID3D12RootSignature* pRootSignature;
    GraphicsState        graphicsState;
    ResourceSet          resources[2];
    int                  cur; // Index to current resource set. New geometry written here.
    float*               pCurrentBufferData; // Mapped region of current buffer.
    size_t               bufferSizeVerts; // Num verts per buffer.
    ID3D12Fence*         pFence;
    HANDLE               fenceEvent;
    uint64_t             fenceValue;
    size_t               vertsWritten; // Verts written to current buffer.
    bool                 wait; // Whether the next draw should wait.
} DxgImm;

static inline size_t vertex_size_bytes() {
    return 3 * sizeof(float);
}

static inline size_t verts_byte_count(size_t numVerts) {
    return numVerts * vertex_size_bytes();
}

static inline size_t dxg_imm_verts_left(const DxgImm* imm) {
    assert(imm);
    assert(imm->bufferSizeVerts >= imm->vertsWritten);
    return imm->bufferSizeVerts - imm->vertsWritten;
}

static void dxg_imm_copy_verts(DxgImm* imm, const float* pVerts, size_t count) {
    assert(imm);
    assert(pVerts);
    assert(count <= dxg_imm_verts_left(imm));
    memcpy(&imm->pCurrentBufferData[imm->vertsWritten], pVerts, verts_byte_count(count));
    imm->vertsWritten += count;
}

// Set up the current resource set for drawing.
static void dxg_imm_set_up_resource_set(DxgImm* imm) {
    assert(imm);
    ResourceSet* const pResources = &imm->resources[imm->cur];
    TrapIfFailed(pResources->pVertexBuffer->lpVtbl->Map(
        pResources->pVertexBuffer, 0, NULL, &imm->pCurrentBufferData));
    dxg_cmdrec_reset(&pResources->cmdRec);
}

// Move on to the next resource set.
static ID3D12Resource* dxg_imm_next_resource_set(DxgImm* imm) {
    assert(imm);
    ResourceSet* const pResources = &imm->resources[imm->cur];
    // Unmap the current buffer.
    // TODO: Do we actually need to do this or can we leave it mapped? If the
    // latter, then we could just map both buffers and let them be.
    pResources->pVertexBuffer->lpVtbl->Unmap(pResources->pVertexBuffer, 0, NULL);
    // Move on to the next resource set.
    imm->cur = (imm->cur + 1) & 1;
    imm->vertsWritten = 0;
    // Set up the new resource set.
    dxg_imm_set_up_resource_set(imm);
}

// Wait for the current buffer to be available for writing.
static void dxg_imm_wait(DxgImm* imm) {
    assert(imm);
    assert(imm->wait);
    // We only need to wait upon the first round around both buffers.
    // First Signal is on fence value 1, 0 is not actually Signaled.
    if (imm->fenceValue > 2) { // TODO: Do we need this check?
        // The last buffer (not current) was Signaled with fenceValue - 1.
        // The current buffer was therefore Signaled two fence values ago, or
        // fenceValue - 2.
        dxg_wait(imm->pFence, imm->fenceEvent, imm->fenceValue - 2);
    }
    imm->wait = false;
}

// Draw the current buffer.
static void dxg_imm_draw(DxgImm* imm) {
    assert(imm);
    ResourceSet* const pResourceSet = &imm->resources[imm->cur];
    ID3D12Resource* const pCurrentBuffer = pResourceSet->pVertexBuffer;
    ID3D12GraphicsCommandList* const pCmdList = pResourceSet->cmdRec.pCmdList;
    const D3D12_VIEWPORT* const pViewport = &imm->graphicsState.viewport;
    const D3D12_RECT scissor = {
        .bottom = pViewport->Height,
        .left   = 0,
        .right  = pViewport->Width,
        .top    = 0,
    };
    const D3D12_VERTEX_BUFFER_VIEW vertexBufferView = {
        .BufferLocation = pCurrentBuffer->lpVtbl->GetGPUVirtualAddress(pCurrentBuffer),
        .SizeInBytes    = verts_byte_count(imm->vertsWritten),
        .StrideInBytes  = vertex_size_bytes(),
    };
    pCmdList->lpVtbl->RSSetViewports(pCmdList, 1, pViewport);
    pCmdList->lpVtbl->RSSetScissorRects(pCmdList, 1, &scissor);
    pCmdList->lpVtbl->OMSetRenderTargets(
        pCmdList, 1, &imm->graphicsState.hBackBufferView, false, &imm->graphicsState.hDepthStencilView);
    pCmdList->lpVtbl->SetPipelineState(pCmdList, imm->pPipelineState);
    pCmdList->lpVtbl->SetGraphicsRootSignature(pCmdList, imm->pRootSignature);
    pCmdList->lpVtbl->IASetPrimitiveTopology(pCmdList, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
    pCmdList->lpVtbl->IASetVertexBuffers(pCmdList, 0, 1, &vertexBufferView);
    pCmdList->lpVtbl->DrawInstanced(pCmdList, imm->vertsWritten, 1, 0, 0);
    pCmdList->lpVtbl->Close(pCmdList);
    ID3D12CommandList* const cmdLists[] = {(ID3D12CommandList*)pCmdList};
    ID3D12CommandQueue* const pCmdQueue = imm->pCmdQueue;
    pCmdQueue->lpVtbl->ExecuteCommandLists(pCmdQueue, 1, cmdLists);
}

DxgImm* dxg_imm_init(ID3D12Device* pDevice, ID3D12CommandQueue* pCmdQueue, DXGI_FORMAT swapChainRtvFormat, DXGI_SAMPLE_DESC swapChainSampleDesc, size_t bufferSizeVerts) {
    assert(pDevice);
    assert(pCmdQueue);

    DxgImm* imm = calloc(1, sizeof(DxgImm));
    if (!imm) {
        return 0;
    }

    imm->pDevice         = pDevice;
    imm->pCmdQueue       = pCmdQueue;
    imm->bufferSizeVerts = bufferSizeVerts;
    imm->fenceValue      = 0;

    // TODO: Move this to the application side.
    const D3D_SHADER_MODEL model = D3D_SHADER_MODEL_6_5;
    D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = { model };
    HRESULT result = pDevice->lpVtbl->CheckFeatureSupport(
        pDevice, D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel));
    if (FAILED(result) || (shaderModel.HighestShaderModel < model)) {
        DEBUG_PRINT("ERROR: Shader Model 6.5 is not supported!\n");
        TrapIfFailed(result);
    }

    const D3D12_SHADER_BYTECODE vs_bytecode = {
        .pShaderBytecode = imm_vs,
        .BytecodeLength  = sizeof(imm_vs)
    };

    const D3D12_SHADER_BYTECODE ps_bytecode = {
        .pShaderBytecode = imm_ps,
        .BytecodeLength  = sizeof(imm_ps)
    };

    // TODO: Find out how many root parameters to use.
    // Let's do bindless rendering to keep things flexible.
    const D3D12_ROOT_SIGNATURE_DESC rootsig_desc = {
        .NumParameters     = 0,
        .pParameters       = NULL,
        .NumStaticSamplers = 0,
        .pStaticSamplers   = NULL,
        .Flags             = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT
    };

    ID3DBlob* pRootSignature = NULL;
    ID3DBlob* pErrors = NULL;
    result = D3D12SerializeRootSignature(
        &rootsig_desc,
        D3D_ROOT_SIGNATURE_VERSION_1,
        &pRootSignature,
        &pErrors);
    if (FAILED(result)) {
        if (pErrors) {
            DEBUG_PRINT(pErrors->lpVtbl->GetBufferPointer(pErrors));
        }
        TrapIfFailed(result);
    }

    TrapIfFailed(imm->pDevice->lpVtbl->CreateRootSignature(
        imm->pDevice,
        0,
        pRootSignature->lpVtbl->GetBufferPointer(pRootSignature),
        pRootSignature->lpVtbl->GetBufferSize(pRootSignature),
        &IID_ID3D12RootSignature,
        &imm->pRootSignature));

    const D3D12_INPUT_ELEMENT_DESC input_layout[] = {
        { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }
    };
    const D3D12_INPUT_LAYOUT_DESC input_layout_desc = {
        .pInputElementDescs = input_layout,
        .NumElements        = COUNTOF(input_layout)
    };

    const D3D12_GRAPHICS_PIPELINE_STATE_DESC gpso = {
        .pRootSignature        = imm->pRootSignature,
        .VS                    = vs_bytecode,
        .PS                    = ps_bytecode,
        .BlendState            = CD3DX12_BLEND_DESC_DEFAULT(),
        .SampleMask            = PointSampling,
        .RasterizerState       = CD3DX12_RASTERIZER_DESC_DEFAULT(),
        .InputLayout           = input_layout_desc,
        .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
        .NumRenderTargets      = 1,
        .RTVFormats            = {swapChainRtvFormat},
        .SampleDesc            = swapChainSampleDesc
    };
    TrapIfFailed(imm->pDevice->lpVtbl->CreateGraphicsPipelineState(
        imm->pDevice, &gpso, &IID_ID3D12PipelineState, &imm->pPipelineState));

    const size_t bufferSize = verts_byte_count(bufferSizeVerts);
    for (int i = 0; i < 2; ++i) {
        imm->resources[i].pVertexBuffer = create_buffer(pDevice, bufferSize);
        if (!imm->resources[i].pVertexBuffer) {
            dxg_imm_destroy(&imm);
        }
        TrapIfFailed(dxg_cmdrec_init(&imm->resources[i].cmdRec, pDevice));
    }
    imm->cur = 0;
    dxg_imm_set_up_resource_set(imm);

    TrapIfFailed(pDevice->lpVtbl->CreateFence(
        pDevice, imm->fenceValue, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, &imm->pFence));

    if ((imm->fenceEvent = CreateEvent(NULL, FALSE, FALSE, NULL)) == NULL) {
        TrapIfFailed(HRESULT_FROM_WIN32(GetLastError()));
    }

    return imm;
}

void dxg_imm_destroy(DxgImm** ppImm) {
    assert(ppImm);
    DxgImm* imm = *ppImm;
    if (imm) {
        for (int i = 0; i < 2; ++i) {
            SafeRelease(imm->resources[i].pVertexBuffer);
            dxg_cmdrec_destroy(&imm->resources[i].cmdRec);
        }
        SafeRelease(imm->pRootSignature);
        SafeRelease(imm->pPipelineState);
        SafeRelease(imm->pFence);
        if (imm->fenceEvent != NULL) {
            CloseHandle(imm->fenceEvent);
        }
        free(imm);
        *ppImm = 0;
    }
}

void dxg_imm_set_graphics_state(
        DxgImm* imm,
        const D3D12_VIEWPORT* pViewport,
        D3D12_CPU_DESCRIPTOR_HANDLE hBackBufferView,
        D3D12_CPU_DESCRIPTOR_HANDLE hDepthStencilView) {
    assert(imm);
    assert(pViewport);
    assert(hBackBufferView.ptr);
    assert(hDepthStencilView.ptr);
    imm->graphicsState = (GraphicsState) {
        .viewport          = *pViewport,
        .hBackBufferView   = hBackBufferView,
        .hDepthStencilView = hDepthStencilView,
    };
}

void dxg_imm_flush(DxgImm* imm) {
    assert(imm);
    if (imm->vertsWritten > 0) {
        dxg_imm_draw(imm);
        // Signal the fence so that the current buffer can be reused once the
        // draw has finished.
        ID3D12CommandQueue* pCmdQueue = imm->pCmdQueue;
        imm->fenceValue++;
        pCmdQueue->lpVtbl->Signal(pCmdQueue, imm->pFence, imm->fenceValue);
        // Next draw should Wait for the next buffer. Wait lazily on the next
        // draw to avoid a stall here.
        imm->wait = true;
        dxg_imm_next_resource_set(imm);
    }
}

void dxg_imm_draw_triangles(DxgImm* imm, const float* pVerts, size_t numTris) {
    assert(imm);
    assert(pVerts);
    // TODO: This could be a loop to handle the case where the max buffer
    // capacity cannot hold numTris. Or maybe we should rely on the caller
    // to specify a big enough capacity, but that makes the API less
    // friendly.
    size_t triCapacity = dxg_imm_verts_left(imm) / 3;
    if (triCapacity == 0) {
        dxg_imm_flush(imm);
    }
    // If we just flushed the previous buffer, then we have to wait on the next
    // one. The wait is done here, and not inside the branch above, because the
    // client code can also flush the buffer.
    if (imm->wait) {
        dxg_imm_wait(imm);
    }
    // Re-evaluate capacity. It must be >0 now.
    triCapacity = dxg_imm_verts_left(imm) / 3;
    assert(triCapacity > 0);
    const size_t numVerts = MIN(triCapacity, numTris) * 3;
    dxg_imm_copy_verts(imm, pVerts, numVerts);
}