From 6c8ae19be66cee247980a48e736a4e05d14de179 Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Tue, 2 Dec 2025 16:39:36 -0800 Subject: Immediate-mode renderer, triangle demo, shader compilation in cmake, Agility SDK --- dxg/src/dxcommon.c | 173 +++++++++++++++++++++++++ dxg/src/dxg.c | 1 - dxg/src/imm.c | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 541 insertions(+), 1 deletion(-) create mode 100644 dxg/src/dxcommon.c delete mode 100644 dxg/src/dxg.c create mode 100644 dxg/src/imm.c (limited to 'dxg/src') diff --git a/dxg/src/dxcommon.c b/dxg/src/dxcommon.c new file mode 100644 index 0000000..ecc9a88 --- /dev/null +++ b/dxg/src/dxcommon.c @@ -0,0 +1,173 @@ +#include + +// Required so that D3D12.dll can find and load D3D12Core.dll and other DLLs +// from the Agility SDK. The macro comes from CMakeLists.txt. +__declspec(dllexport) extern const UINT D3D12SDKVersion = AGILITY_SDK_VERSION; +__declspec(dllexport) extern const char* D3D12SDKPath = AGILITY_SDK_INSTALL; +D3D12_RESOURCE_BARRIER CD3DX12_RESOURCE_BARRIER_Transition( + ID3D12Resource* pResource, + D3D12_RESOURCE_STATES stateBefore, + D3D12_RESOURCE_STATES stateAfter) { + return (D3D12_RESOURCE_BARRIER){ + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition.pResource = pResource, + .Transition.StateBefore = stateBefore, + .Transition.StateAfter = stateAfter, + .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES}; +} + +D3D12_RASTERIZER_DESC CD3DX12_RASTERIZER_DESC_DEFAULT() { + return (D3D12_RASTERIZER_DESC){ + .FillMode = D3D12_FILL_MODE_SOLID, + .CullMode = D3D12_CULL_MODE_BACK, + .FrontCounterClockwise = FALSE, + .DepthBias = D3D12_DEFAULT_DEPTH_BIAS, + .DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP, + .SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, + .DepthClipEnable = TRUE, + .MultisampleEnable = FALSE, + .AntialiasedLineEnable = FALSE, + .ForcedSampleCount = 0, + .ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF}; +} + +D3D12_BLEND_DESC CD3DX12_BLEND_DESC_DEFAULT() { + const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = { + FALSE, FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + }; + D3D12_BLEND_DESC desc = { + .AlphaToCoverageEnable = FALSE, + .IndependentBlendEnable = FALSE, + }; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { + desc.RenderTarget[i] = defaultRenderTargetBlendDesc; + } + return desc; +} + +void dxg_wait(ID3D12Fence* pFence, HANDLE fenceEvent, UINT64 fenceValue) { + assert(pFence); + // Wait for commands to finish execution. + // It is possible that execution has already finished by the time we + // get here, so first check the fence's completed value. + if (pFence->lpVtbl->GetCompletedValue(pFence) < fenceValue) { + // GPU Signal still pending. Configure a Windows event and wait for it. + // The event fires when the GPU signals. + // + // Indicate that the fence event is to be fired when the fence reaches + // the given fence value. + TrapIfFailed(pFence->lpVtbl->SetEventOnCompletion(pFence, fenceValue, fenceEvent)); + // Will wake up when the fence takes on the given fence value. + WaitForSingleObject(fenceEvent, INFINITE); + } +} + +// ----------------------------------------------------------------------------- +// Command Recorder +// ----------------------------------------------------------------------------- + +HRESULT dxg_cmdrec_init(CommandRecorder* pRec, ID3D12Device* pDevice) { + assert(pRec); + assert(pDevice); + + HRESULT result = S_OK; + + const D3D12_COMMAND_LIST_TYPE type = D3D12_COMMAND_LIST_TYPE_DIRECT; + + if ((result = pDevice->lpVtbl->CreateCommandAllocator( + pDevice, type, &IID_ID3D12CommandAllocator, &pRec->pCmdAllocator)) != S_OK) { + return result; + } + + if ((result = pDevice->lpVtbl->CreateCommandList( + pDevice, 0, type, pRec->pCmdAllocator, NULL, &IID_ID3D12CommandList, &pRec->pCmdList)) != S_OK) { + return result; + } + + // Command lists start open. Close it for API convenience. + if ((result = pRec->pCmdList->lpVtbl->Close(pRec->pCmdList)) != S_OK) { + return result; + } + + return result; +} + +void dxg_cmdrec_destroy(CommandRecorder* pRec) { + assert(pRec); + SafeRelease(pRec->pCmdList); + SafeRelease(pRec->pCmdAllocator); +} + +HRESULT dxg_cmdrec_reset(CommandRecorder* pRec) { + assert(pRec); + assert(pRec->pCmdAllocator); + assert(pRec->pCmdList); + HRESULT result = S_OK; + if ((result = pRec->pCmdAllocator->lpVtbl->Reset(pRec->pCmdAllocator)) != S_OK) { + return result; + } + if ((result = pRec->pCmdList->lpVtbl->Reset(pRec->pCmdList, pRec->pCmdAllocator, NULL)) != S_OK) { + return result; + } + return result; +} + +// ----------------------------------------------------------------------------- +// Upload Buffer +// ----------------------------------------------------------------------------- + +void dxg_upload_buffer_init(UploadBuffer* pBuf, ID3D12Device* pDevice, size_t size) { + assert(pBuf); + assert(pDevice); + + pBuf->size = size; + + const D3D12_HEAP_PROPERTIES props = { + .Type = D3D12_HEAP_TYPE_UPLOAD, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE, + .MemoryPoolPreference = D3D12_MEMORY_POOL_L0, + .CreationNodeMask = 0, + .VisibleNodeMask = 0 + }; + // Constant buffers need to be aligned to 256 bytes. Other types of buffers + // do not have this requirement. To make the upload buffer general, use the + // worst-case alignment. + const D3D12_RESOURCE_DESC desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = 256, + .Width = size, + .Height = 0, + .DepthOrArraySize = 0, + .MipLevels = 0, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = (DXGI_SAMPLE_DESC){0}, + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + TrapIfFailed(pDevice->lpVtbl->CreateCommittedResource( + pDevice, + &props, + D3D12_HEAP_FLAG_NONE, + &desc, + D3D12_RESOURCE_STATE_COPY_SOURCE, + NULL, + &IID_ID3D12Resource, + &pBuf->pUploadBuffer)); +} + +void dxg_upload_buffer_destroy(UploadBuffer* pBuf, ID3D12Device* pDevice) { + assert(pDevice); + assert(pBuf); + SafeRelease(pBuf->pUploadBuffer); +} + +void dxg_upload_buffer_load(UploadBuffer* pBuf, const void* pData, size_t bytes, ID3D12Resource* pDstBuffer) { + assert(pBuf); + assert(pData); + assert(pDstBuffer); +} diff --git a/dxg/src/dxg.c b/dxg/src/dxg.c deleted file mode 100644 index e985d3d..0000000 --- a/dxg/src/dxg.c +++ /dev/null @@ -1 +0,0 @@ -int x = 2; diff --git a/dxg/src/imm.c b/dxg/src/imm.c new file mode 100644 index 0000000..28baa99 --- /dev/null +++ b/dxg/src/imm.c @@ -0,0 +1,368 @@ +/* Immediate-mode renderer. + +Geometry is given by client code and buffered in an upload-heap buffer stored +in host memory. +When the buffer fills up or the client is done, a draw call is issued. The draw +call reads directly from the buffer in host memory; there is no intermediate +buffer copy. +The renderer double-buffers two host-side buffers so that the client can +continue specifying more data into a second buffer while the contents of the +first buffer are rendered. +If the first buffer is still being rendered while the client loops around, then +the client must wait before issuing further geometry. +Once the render of the first buffer completes, the process starts again, +ping-ponging between the two buffers.*/ +#include +#include + +#include // generated +#include // generated + +#define WIN32_LEAN_AND_MEAN +#include // OutputDebugStringA + +#include +#include + +static ID3D12Resource* create_buffer(ID3D12Device* pDevice, size_t size) { + assert(pDevice); + const D3D12_HEAP_PROPERTIES props = { + .Type = D3D12_HEAP_TYPE_UPLOAD, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN, + .CreationNodeMask = 0, + .VisibleNodeMask = 0 + }; + const D3D12_RESOURCE_DESC desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, + .Width = size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = {.Count = 1, .Quality = 0}, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + ID3D12Resource* pBuffer = NULL; + TrapIfFailed(pDevice->lpVtbl->CreateCommittedResource( + pDevice, + &props, + D3D12_HEAP_FLAG_NONE, + &desc, + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, + NULL, + &IID_ID3D12Resource, + &pBuffer)); + return pBuffer; +} + +typedef struct GraphicsState { + D3D12_VIEWPORT viewport; + D3D12_CPU_DESCRIPTOR_HANDLE hBackBufferView; + D3D12_CPU_DESCRIPTOR_HANDLE hDepthStencilView; +} GraphicsState; + +// Set of per-draw resources. The renderer cycles between sets per draw. +typedef struct ResourceSet { + ID3D12Resource* pVertexBuffer; + CommandRecorder cmdRec; +} ResourceSet; + +typedef struct DxgImm { + ID3D12Device* pDevice; + ID3D12CommandQueue* pCmdQueue; + ID3D12PipelineState* pPipelineState; + ID3D12RootSignature* pRootSignature; + GraphicsState graphicsState; + ResourceSet resources[2]; + int cur; // Index to current resource set. New geometry written here. + float* pCurrentBufferData; // Mapped region of current buffer. + size_t bufferSizeVerts; // Num verts per buffer. + ID3D12Fence* pFence; + HANDLE fenceEvent; + uint64_t fenceValue; + size_t vertsWritten; // Verts written to current buffer. + bool wait; // Whether the next draw should wait. +} DxgImm; + +static inline size_t vertex_size_bytes() { + return 3 * sizeof(float); +} + +static inline size_t verts_byte_count(size_t numVerts) { + return numVerts * vertex_size_bytes(); +} + +static inline size_t dxg_imm_verts_left(const DxgImm* imm) { + assert(imm); + assert(imm->bufferSizeVerts >= imm->vertsWritten); + return imm->bufferSizeVerts - imm->vertsWritten; +} + +static void dxg_imm_copy_verts(DxgImm* imm, const float* pVerts, size_t count) { + assert(imm); + assert(pVerts); + assert(count <= dxg_imm_verts_left(imm)); + memcpy(&imm->pCurrentBufferData[imm->vertsWritten], pVerts, verts_byte_count(count)); + imm->vertsWritten += count; +} + +// Set up the current resource set for drawing. +static void dxg_imm_set_up_resource_set(DxgImm* imm) { + assert(imm); + ResourceSet* const pResources = &imm->resources[imm->cur]; + TrapIfFailed(pResources->pVertexBuffer->lpVtbl->Map( + pResources->pVertexBuffer, 0, NULL, &imm->pCurrentBufferData)); + dxg_cmdrec_reset(&pResources->cmdRec); +} + +// Move on to the next resource set. +static ID3D12Resource* dxg_imm_next_resource_set(DxgImm* imm) { + assert(imm); + ResourceSet* const pResources = &imm->resources[imm->cur]; + // Unmap the current buffer. + // TODO: Do we actually need to do this or can we leave it mapped? If the + // latter, then we could just map both buffers and let them be. + pResources->pVertexBuffer->lpVtbl->Unmap(pResources->pVertexBuffer, 0, NULL); + // Move on to the next resource set. + imm->cur = (imm->cur + 1) & 1; + imm->vertsWritten = 0; + // Set up the new resource set. + dxg_imm_set_up_resource_set(imm); +} + +// Wait for the current buffer to be available for writing. +static void dxg_imm_wait(DxgImm* imm) { + assert(imm); + assert(imm->wait); + // We only need to wait upon the first round around both buffers. + // First Signal is on fence value 1, 0 is not actually Signaled. + if (imm->fenceValue > 2) { // TODO: Do we need this check? + // The last buffer (not current) was Signaled with fenceValue - 1. + // The current buffer was therefore Signaled two fence values ago, or + // fenceValue - 2. + dxg_wait(imm->pFence, imm->fenceEvent, imm->fenceValue - 2); + } + imm->wait = false; +} + +// Draw the current buffer. +static void dxg_imm_draw(DxgImm* imm) { + assert(imm); + ResourceSet* const pResourceSet = &imm->resources[imm->cur]; + ID3D12Resource* const pCurrentBuffer = pResourceSet->pVertexBuffer; + ID3D12GraphicsCommandList* const pCmdList = pResourceSet->cmdRec.pCmdList; + const D3D12_VIEWPORT* const pViewport = &imm->graphicsState.viewport; + const D3D12_RECT scissor = { + .bottom = pViewport->Height, + .left = 0, + .right = pViewport->Width, + .top = 0, + }; + const D3D12_VERTEX_BUFFER_VIEW vertexBufferView = { + .BufferLocation = pCurrentBuffer->lpVtbl->GetGPUVirtualAddress(pCurrentBuffer), + .SizeInBytes = verts_byte_count(imm->vertsWritten), + .StrideInBytes = vertex_size_bytes(), + }; + pCmdList->lpVtbl->RSSetViewports(pCmdList, 1, pViewport); + pCmdList->lpVtbl->RSSetScissorRects(pCmdList, 1, &scissor); + pCmdList->lpVtbl->OMSetRenderTargets( + pCmdList, 1, &imm->graphicsState.hBackBufferView, false, &imm->graphicsState.hDepthStencilView); + pCmdList->lpVtbl->SetPipelineState(pCmdList, imm->pPipelineState); + pCmdList->lpVtbl->SetGraphicsRootSignature(pCmdList, imm->pRootSignature); + pCmdList->lpVtbl->IASetPrimitiveTopology(pCmdList, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->lpVtbl->IASetVertexBuffers(pCmdList, 0, 1, &vertexBufferView); + pCmdList->lpVtbl->DrawInstanced(pCmdList, imm->vertsWritten, 1, 0, 0); + pCmdList->lpVtbl->Close(pCmdList); + ID3D12CommandList* const cmdLists[] = {(ID3D12CommandList*)pCmdList}; + ID3D12CommandQueue* const pCmdQueue = imm->pCmdQueue; + pCmdQueue->lpVtbl->ExecuteCommandLists(pCmdQueue, 1, cmdLists); +} + +DxgImm* dxg_imm_init(ID3D12Device* pDevice, ID3D12CommandQueue* pCmdQueue, DXGI_FORMAT swapChainRtvFormat, DXGI_SAMPLE_DESC swapChainSampleDesc, size_t bufferSizeVerts) { + assert(pDevice); + assert(pCmdQueue); + + DxgImm* imm = calloc(1, sizeof(DxgImm)); + if (!imm) { + return 0; + } + + imm->pDevice = pDevice; + imm->pCmdQueue = pCmdQueue; + imm->bufferSizeVerts = bufferSizeVerts; + imm->fenceValue = 0; + + // TODO: Move this to the application side. + const D3D_SHADER_MODEL model = D3D_SHADER_MODEL_6_5; + D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = { model }; + HRESULT result = pDevice->lpVtbl->CheckFeatureSupport( + pDevice, D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel)); + if (FAILED(result) || (shaderModel.HighestShaderModel < model)) { + DEBUG_PRINT("ERROR: Shader Model 6.5 is not supported!\n"); + TrapIfFailed(result); + } + + const D3D12_SHADER_BYTECODE vs_bytecode = { + .pShaderBytecode = imm_vs, + .BytecodeLength = sizeof(imm_vs) + }; + + const D3D12_SHADER_BYTECODE ps_bytecode = { + .pShaderBytecode = imm_ps, + .BytecodeLength = sizeof(imm_ps) + }; + + // TODO: Find out how many root parameters to use. + // Let's do bindless rendering to keep things flexible. + const D3D12_ROOT_SIGNATURE_DESC rootsig_desc = { + .NumParameters = 0, + .pParameters = NULL, + .NumStaticSamplers = 0, + .pStaticSamplers = NULL, + .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT + }; + + ID3DBlob* pRootSignature = NULL; + ID3DBlob* pErrors = NULL; + result = D3D12SerializeRootSignature( + &rootsig_desc, + D3D_ROOT_SIGNATURE_VERSION_1, + &pRootSignature, + &pErrors); + if (FAILED(result)) { + if (pErrors) { + DEBUG_PRINT(pErrors->lpVtbl->GetBufferPointer(pErrors)); + } + TrapIfFailed(result); + } + + TrapIfFailed(imm->pDevice->lpVtbl->CreateRootSignature( + imm->pDevice, + 0, + pRootSignature->lpVtbl->GetBufferPointer(pRootSignature), + pRootSignature->lpVtbl->GetBufferSize(pRootSignature), + &IID_ID3D12RootSignature, + &imm->pRootSignature)); + + const D3D12_INPUT_ELEMENT_DESC input_layout[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 } + }; + const D3D12_INPUT_LAYOUT_DESC input_layout_desc = { + .pInputElementDescs = input_layout, + .NumElements = COUNTOF(input_layout) + }; + + const D3D12_GRAPHICS_PIPELINE_STATE_DESC gpso = { + .pRootSignature = imm->pRootSignature, + .VS = vs_bytecode, + .PS = ps_bytecode, + .BlendState = CD3DX12_BLEND_DESC_DEFAULT(), + .SampleMask = PointSampling, + .RasterizerState = CD3DX12_RASTERIZER_DESC_DEFAULT(), + .InputLayout = input_layout_desc, + .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + .NumRenderTargets = 1, + .RTVFormats = {swapChainRtvFormat}, + .SampleDesc = swapChainSampleDesc + }; + TrapIfFailed(imm->pDevice->lpVtbl->CreateGraphicsPipelineState( + imm->pDevice, &gpso, &IID_ID3D12PipelineState, &imm->pPipelineState)); + + const size_t bufferSize = verts_byte_count(bufferSizeVerts); + for (int i = 0; i < 2; ++i) { + imm->resources[i].pVertexBuffer = create_buffer(pDevice, bufferSize); + if (!imm->resources[i].pVertexBuffer) { + dxg_imm_destroy(&imm); + } + TrapIfFailed(dxg_cmdrec_init(&imm->resources[i].cmdRec, pDevice)); + } + imm->cur = 0; + dxg_imm_set_up_resource_set(imm); + + TrapIfFailed(pDevice->lpVtbl->CreateFence( + pDevice, imm->fenceValue, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, &imm->pFence)); + + if ((imm->fenceEvent = CreateEvent(NULL, FALSE, FALSE, NULL)) == NULL) { + TrapIfFailed(HRESULT_FROM_WIN32(GetLastError())); + } + + return imm; +} + +void dxg_imm_destroy(DxgImm** ppImm) { + assert(ppImm); + DxgImm* imm = *ppImm; + if (imm) { + for (int i = 0; i < 2; ++i) { + SafeRelease(imm->resources[i].pVertexBuffer); + dxg_cmdrec_destroy(&imm->resources[i].cmdRec); + } + SafeRelease(imm->pRootSignature); + SafeRelease(imm->pPipelineState); + SafeRelease(imm->pFence); + if (imm->fenceEvent != NULL) { + CloseHandle(imm->fenceEvent); + } + free(imm); + *ppImm = 0; + } +} + +void dxg_imm_set_graphics_state( + DxgImm* imm, + const D3D12_VIEWPORT* pViewport, + D3D12_CPU_DESCRIPTOR_HANDLE hBackBufferView, + D3D12_CPU_DESCRIPTOR_HANDLE hDepthStencilView) { + assert(imm); + assert(pViewport); + assert(hBackBufferView.ptr); + assert(hDepthStencilView.ptr); + imm->graphicsState = (GraphicsState) { + .viewport = *pViewport, + .hBackBufferView = hBackBufferView, + .hDepthStencilView = hDepthStencilView, + }; +} + +void dxg_imm_flush(DxgImm* imm) { + assert(imm); + if (imm->vertsWritten > 0) { + dxg_imm_draw(imm); + // Signal the fence so that the current buffer can be reused once the + // draw has finished. + ID3D12CommandQueue* pCmdQueue = imm->pCmdQueue; + imm->fenceValue++; + pCmdQueue->lpVtbl->Signal(pCmdQueue, imm->pFence, imm->fenceValue); + // Next draw should Wait for the next buffer. Wait lazily on the next + // draw to avoid a stall here. + imm->wait = true; + dxg_imm_next_resource_set(imm); + } +} + +void dxg_imm_draw_triangles(DxgImm* imm, const float* pVerts, size_t numTris) { + assert(imm); + assert(pVerts); + // TODO: This could be a loop to handle the case where the max buffer + // capacity cannot hold numTris. Or maybe we should rely on the caller + // to specify a big enough capacity, but that makes the API less + // friendly. + size_t triCapacity = dxg_imm_verts_left(imm) / 3; + if (triCapacity == 0) { + dxg_imm_flush(imm); + } + // If we just flushed the previous buffer, then we have to wait on the next + // one. The wait is done here, and not inside the branch above, because the + // client code can also flush the buffer. + if (imm->wait) { + dxg_imm_wait(imm); + } + // Re-evaluate capacity. It must be >0 now. + triCapacity = dxg_imm_verts_left(imm) / 3; + assert(triCapacity > 0); + const size_t numVerts = MIN(triCapacity, numTris) * 3; + dxg_imm_copy_verts(imm, pVerts, numVerts); +} -- cgit v1.2.3