From 54ed8f0bb0f0859e05b7b406b7e95fa71900ba40 Mon Sep 17 00:00:00 2001 From: Joshie Date: Mon, 16 Dec 2019 03:28:01 +0000 Subject: [PATCH] [d3d9] Implement Direct3D9 Frontend (#1275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Philip Rebohle Co-authored-by: Robin Kertels Co-authored-by: pchome Co-authored-by: Christopher Egert Co-authored-by: Derek Lesho Co-authored-by: Luis Cáceres Co-authored-by: Nelson Chen Co-authored-by: Edmondo Tommasina Co-authored-by: Riesi Co-authored-by: gbMichelle --- .github/ISSUE_TEMPLATE/bug_report.md | 6 +- LICENSE | 1 + dxvk.conf | 112 +- meson.build | 2 + meson_options.txt | 1 + setup_dxvk.sh | 1 + src/d3d9/d3d9.def | 22 + src/d3d9/d3d9.spec | 20 + src/d3d9/d3d9_adapter.cpp | 804 +++ src/d3d9/d3d9_adapter.h | 112 + src/d3d9/d3d9_buffer.cpp | 114 + src/d3d9/d3d9_buffer.h | 92 + src/d3d9/d3d9_caps.h | 32 + src/d3d9/d3d9_common_buffer.cpp | 124 + src/d3d9/d3d9_common_buffer.h | 202 + src/d3d9/d3d9_common_texture.cpp | 508 ++ src/d3d9/d3d9_common_texture.h | 431 ++ src/d3d9/d3d9_constant_layout.h | 26 + src/d3d9/d3d9_constant_set.h | 47 + src/d3d9/d3d9_cursor.cpp | 42 + src/d3d9/d3d9_cursor.h | 33 + src/d3d9/d3d9_device.cpp | 6530 ++++++++++++++++++ src/d3d9/d3d9_device.h | 1154 ++++ src/d3d9/d3d9_device_child.h | 61 + src/d3d9/d3d9_fixed_function.cpp | 2311 +++++++ src/d3d9/d3d9_fixed_function.h | 247 + src/d3d9/d3d9_format.cpp | 500 ++ src/d3d9/d3d9_format.h | 215 + src/d3d9/d3d9_format_helpers.cpp | 82 + src/d3d9/d3d9_format_helpers.h | 40 + src/d3d9/d3d9_hud.cpp | 36 + src/d3d9/d3d9_hud.h | 31 + src/d3d9/d3d9_include.h | 95 + src/d3d9/d3d9_initializer.cpp | 165 + src/d3d9/d3d9_initializer.h | 62 + src/d3d9/d3d9_interface.cpp | 316 + src/d3d9/d3d9_interface.h | 148 + src/d3d9/d3d9_main.cpp | 86 + src/d3d9/d3d9_monitor.cpp | 175 + src/d3d9/d3d9_monitor.h | 88 + src/d3d9/d3d9_multithread.cpp | 41 + src/d3d9/d3d9_multithread.h | 101 + src/d3d9/d3d9_names.cpp | 230 + src/d3d9/d3d9_names.h | 7 + src/d3d9/d3d9_options.cpp | 76 + src/d3d9/d3d9_options.h | 117 + src/d3d9/d3d9_query.cpp | 312 + src/d3d9/d3d9_query.h | 88 + src/d3d9/d3d9_resource.h | 87 + src/d3d9/d3d9_sampler.cpp | 47 + src/d3d9/d3d9_sampler.h | 75 + src/d3d9/d3d9_shader.cpp | 143 + src/d3d9/d3d9_shader.h | 195 + src/d3d9/d3d9_shader_permutations.h | 20 + src/d3d9/d3d9_shader_validator.h | 68 + src/d3d9/d3d9_spec_constants.h | 19 + src/d3d9/d3d9_state.cpp | 26 + src/d3d9/d3d9_state.h | 352 + src/d3d9/d3d9_stateblock.cpp | 519 ++ src/d3d9/d3d9_stateblock.h | 333 + src/d3d9/d3d9_subresource.h | 106 + src/d3d9/d3d9_surface.cpp | 184 + src/d3d9/d3d9_surface.h | 53 + src/d3d9/d3d9_swapchain.cpp | 1216 ++++ src/d3d9/d3d9_swapchain.h | 226 + src/d3d9/d3d9_swvp_emu.cpp | 358 + src/d3d9/d3d9_swvp_emu.h | 36 + src/d3d9/d3d9_texture.cpp | 252 + src/d3d9/d3d9_texture.h | 236 + src/d3d9/d3d9_util.cpp | 414 ++ src/d3d9/d3d9_util.h | 202 + src/d3d9/d3d9_vertex_declaration.cpp | 231 + src/d3d9/d3d9_vertex_declaration.h | 83 + src/d3d9/d3d9_volume.cpp | 110 + src/d3d9/d3d9_volume.h | 39 + src/d3d9/meson.build | 52 + src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp | 63 + src/d3d9/shaders/d3d9_presenter_frag.frag | 21 + src/d3d9/shaders/d3d9_presenter_vert.vert | 21 + src/d3d9/version.rc | 31 + src/dxso/dxso_analysis.cpp | 47 + src/dxso/dxso_analysis.h | 37 + src/dxso/dxso_code.cpp | 28 + src/dxso/dxso_code.h | 54 + src/dxso/dxso_common.cpp | 26 + src/dxso/dxso_common.h | 88 + src/dxso/dxso_compiler.cpp | 3579 ++++++++++ src/dxso/dxso_compiler.h | 674 ++ src/dxso/dxso_ctab.cpp | 19 + src/dxso/dxso_ctab.h | 32 + src/dxso/dxso_decoder.cpp | 276 + src/dxso/dxso_decoder.h | 271 + src/dxso/dxso_enums.cpp | 101 + src/dxso/dxso_enums.h | 164 + src/dxso/dxso_header.cpp | 24 + src/dxso/dxso_header.h | 31 + src/dxso/dxso_helpers.h | 0 src/dxso/dxso_include.h | 18 + src/dxso/dxso_isgn.h | 39 + src/dxso/dxso_modinfo.h | 17 + src/dxso/dxso_module.cpp | 84 + src/dxso/dxso_module.h | 85 + src/dxso/dxso_options.cpp | 42 + src/dxso/dxso_options.h | 42 + src/dxso/dxso_reader.cpp | 26 + src/dxso/dxso_reader.h | 60 + src/dxso/dxso_tables.cpp | 93 + src/dxso/dxso_tables.h | 11 + src/dxso/dxso_util.cpp | 67 + src/dxso/dxso_util.h | 38 + src/dxso/meson.build | 23 + src/meson.build | 7 +- src/util/config/config.cpp | 119 + tests/d3d9/meson.build | 5 + tests/d3d9/test_d3d9_buffer.cpp | 220 + tests/d3d9/test_d3d9_clear.cpp | 173 + tests/d3d9/test_d3d9_triangle.cpp | 407 ++ tests/meson.build | 1 + 118 files changed, 28889 insertions(+), 3 deletions(-) create mode 100644 src/d3d9/d3d9.def create mode 100644 src/d3d9/d3d9.spec create mode 100644 src/d3d9/d3d9_adapter.cpp create mode 100644 src/d3d9/d3d9_adapter.h create mode 100644 src/d3d9/d3d9_buffer.cpp create mode 100644 src/d3d9/d3d9_buffer.h create mode 100644 src/d3d9/d3d9_caps.h create mode 100644 src/d3d9/d3d9_common_buffer.cpp create mode 100644 src/d3d9/d3d9_common_buffer.h create mode 100644 src/d3d9/d3d9_common_texture.cpp create mode 100644 src/d3d9/d3d9_common_texture.h create mode 100644 src/d3d9/d3d9_constant_layout.h create mode 100644 src/d3d9/d3d9_constant_set.h create mode 100644 src/d3d9/d3d9_cursor.cpp create mode 100644 src/d3d9/d3d9_cursor.h create mode 100644 src/d3d9/d3d9_device.cpp create mode 100644 src/d3d9/d3d9_device.h create mode 100644 src/d3d9/d3d9_device_child.h create mode 100644 src/d3d9/d3d9_fixed_function.cpp create mode 100644 src/d3d9/d3d9_fixed_function.h create mode 100644 src/d3d9/d3d9_format.cpp create mode 100644 src/d3d9/d3d9_format.h create mode 100644 src/d3d9/d3d9_format_helpers.cpp create mode 100644 src/d3d9/d3d9_format_helpers.h create mode 100644 src/d3d9/d3d9_hud.cpp create mode 100644 src/d3d9/d3d9_hud.h create mode 100644 src/d3d9/d3d9_include.h create mode 100644 src/d3d9/d3d9_initializer.cpp create mode 100644 src/d3d9/d3d9_initializer.h create mode 100644 src/d3d9/d3d9_interface.cpp create mode 100644 src/d3d9/d3d9_interface.h create mode 100644 src/d3d9/d3d9_main.cpp create mode 100644 src/d3d9/d3d9_monitor.cpp create mode 100644 src/d3d9/d3d9_monitor.h create mode 100644 src/d3d9/d3d9_multithread.cpp create mode 100644 src/d3d9/d3d9_multithread.h create mode 100644 src/d3d9/d3d9_names.cpp create mode 100644 src/d3d9/d3d9_names.h create mode 100644 src/d3d9/d3d9_options.cpp create mode 100644 src/d3d9/d3d9_options.h create mode 100644 src/d3d9/d3d9_query.cpp create mode 100644 src/d3d9/d3d9_query.h create mode 100644 src/d3d9/d3d9_resource.h create mode 100644 src/d3d9/d3d9_sampler.cpp create mode 100644 src/d3d9/d3d9_sampler.h create mode 100644 src/d3d9/d3d9_shader.cpp create mode 100644 src/d3d9/d3d9_shader.h create mode 100644 src/d3d9/d3d9_shader_permutations.h create mode 100644 src/d3d9/d3d9_shader_validator.h create mode 100644 src/d3d9/d3d9_spec_constants.h create mode 100644 src/d3d9/d3d9_state.cpp create mode 100644 src/d3d9/d3d9_state.h create mode 100644 src/d3d9/d3d9_stateblock.cpp create mode 100644 src/d3d9/d3d9_stateblock.h create mode 100644 src/d3d9/d3d9_subresource.h create mode 100644 src/d3d9/d3d9_surface.cpp create mode 100644 src/d3d9/d3d9_surface.h create mode 100644 src/d3d9/d3d9_swapchain.cpp create mode 100644 src/d3d9/d3d9_swapchain.h create mode 100644 src/d3d9/d3d9_swvp_emu.cpp create mode 100644 src/d3d9/d3d9_swvp_emu.h create mode 100644 src/d3d9/d3d9_texture.cpp create mode 100644 src/d3d9/d3d9_texture.h create mode 100644 src/d3d9/d3d9_util.cpp create mode 100644 src/d3d9/d3d9_util.h create mode 100644 src/d3d9/d3d9_vertex_declaration.cpp create mode 100644 src/d3d9/d3d9_vertex_declaration.h create mode 100644 src/d3d9/d3d9_volume.cpp create mode 100644 src/d3d9/d3d9_volume.h create mode 100644 src/d3d9/meson.build create mode 100644 src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp create mode 100644 src/d3d9/shaders/d3d9_presenter_frag.frag create mode 100644 src/d3d9/shaders/d3d9_presenter_vert.vert create mode 100644 src/d3d9/version.rc create mode 100644 src/dxso/dxso_analysis.cpp create mode 100644 src/dxso/dxso_analysis.h create mode 100644 src/dxso/dxso_code.cpp create mode 100644 src/dxso/dxso_code.h create mode 100644 src/dxso/dxso_common.cpp create mode 100644 src/dxso/dxso_common.h create mode 100644 src/dxso/dxso_compiler.cpp create mode 100644 src/dxso/dxso_compiler.h create mode 100644 src/dxso/dxso_ctab.cpp create mode 100644 src/dxso/dxso_ctab.h create mode 100644 src/dxso/dxso_decoder.cpp create mode 100644 src/dxso/dxso_decoder.h create mode 100644 src/dxso/dxso_enums.cpp create mode 100644 src/dxso/dxso_enums.h create mode 100644 src/dxso/dxso_header.cpp create mode 100644 src/dxso/dxso_header.h create mode 100644 src/dxso/dxso_helpers.h create mode 100644 src/dxso/dxso_include.h create mode 100644 src/dxso/dxso_isgn.h create mode 100644 src/dxso/dxso_modinfo.h create mode 100644 src/dxso/dxso_module.cpp create mode 100644 src/dxso/dxso_module.h create mode 100644 src/dxso/dxso_options.cpp create mode 100644 src/dxso/dxso_options.h create mode 100644 src/dxso/dxso_reader.cpp create mode 100644 src/dxso/dxso_reader.h create mode 100644 src/dxso/dxso_tables.cpp create mode 100644 src/dxso/dxso_tables.h create mode 100644 src/dxso/dxso_util.cpp create mode 100644 src/dxso/dxso_util.h create mode 100644 src/dxso/meson.build create mode 100644 tests/d3d9/meson.build create mode 100644 tests/d3d9/test_d3d9_buffer.cpp create mode 100644 tests/d3d9/test_d3d9_clear.cpp create mode 100644 tests/d3d9/test_d3d9_triangle.cpp diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 6421606d3..6e2cc6bac 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -9,7 +9,10 @@ assignees: '' Please describe your issue as accurately as possible. If you run into a problem with a binary release, make sure to test with latest `master` as well. -**Important:** When reporting an issue with a specific game or application, such as crashes or rendering issues, please include log files and a D3D11 Apitrace (see https://github.com/apitrace/apitrace) so that the issue can be reproduced. In order to create a trace, run `wine apitrace.exe trace -a dxgi YOURGAME.exe`. DO NOT use DXVK together with apitrace! +**Important:** When reporting an issue with a specific game or application, such as crashes or rendering issues, please include log files and a D3D11/D3D9 Apitrace (see https://github.com/apitrace/apitrace) so that the issue can be reproduced. +In order to create a trace for **D3D11/D3D10**: Run `wine apitrace.exe trace -a dxgi YOURGAME.exe`. +In order to create a trace for **D3D9**: Follow https://github.com/Joshua-Ashton/d9vk/wiki/Making-a-Trace. +DO NOT use DXVK together with apitrace! ### Software information Name of the game, settings used etc. @@ -24,5 +27,6 @@ Name of the game, settings used etc. - Put a link here ### Log files +- d3d9.log: - d3d11.log: - dxgi.log: diff --git a/LICENSE b/LICENSE index e29ded54f..e42c8b48c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Copyright (c) 2017-2019 Philip Rebohle + Copyright (c) 2019 Joshua Ashton zlib/libpng license diff --git a/dxvk.conf b/dxvk.conf index cff1322bb..1abe286b1 100644 --- a/dxvk.conf +++ b/dxvk.conf @@ -6,6 +6,7 @@ # Supported values: True, False # dxgi.deferSurfaceCreation = False +# d3d9.deferSurfaceCreation = False # Enforce a stricter maximum frame latency. Overrides the application @@ -15,6 +16,7 @@ # Supported values : 0 - 16 # dxgi.maxFrameLatency = 0 +# d3d9.maxFrameLatency = 0 # Override PCI vendor and device IDs reported to the application. Can @@ -25,6 +27,9 @@ # dxgi.customDeviceId = 0000 # dxgi.customVendorId = 0000 +# d3d9.customDeviceId = 0000 +# d3d9.customVendorId = 0000 + # Report Nvidia GPUs as AMD GPUs by default. This is enabled by default # to work around issues with NVAPI, but may cause issues in some games. @@ -60,7 +65,8 @@ # # Supported values: Any non-negative number -# dxgi.syncInterval = -1 +# dxgi.syncInterval = -1 +# d3d9.presentInterval = -1 # Performs range check on dynamically indexed constant buffers in shaders. @@ -115,6 +121,7 @@ # Supported values: Any number between 0 and 16 # d3d11.samplerAnisotropy = -1 +# d3d9.samplerAnisotropy = -1 # Replaces NaN outputs from fragment shaders with zeroes for floating @@ -179,3 +186,106 @@ # ignored. The syntax is identical. # dxvk.hud = + + +# Reported shader model +# +# The shader model to state that we support in the device +# capabilities that the applicatation queries. +# +# Supported values: +# - 1: Shader Model 1 +# - 2: Shader Model 2 +# - 3: Shader Model 3 + +# d3d9.shaderModel = 3 + + +# Evict Managed on Unlock +# +# Decides whether we should evict managed resources from +# system memory when they are unlocked entirely. +# +# Supported values: +# - True, False: Always enable / disable + +# d3d9.evictManagedOnUnlock = False + + +# DPI Awareness +# +# Decides whether we should call SetProcessDPIAware on device +# creation. Helps avoid upscaling blur in modern Windows on +# Hi-DPI screens/devices. +# +# Supported values: +# - True, False: Always enable / disable + +# d3d9.dpiAware = True + + +# Strict Constant Copies +# +# Decides whether we should always copy defined constants to +# the UBO when relative addresssing is used, or only when the +# relative addressing starts a defined constant. +# +# Supported values: +# - True, False: Always enable / disable + +# d3d9.strictConstantCopies = False + + +# Strict Pow +# +# Decides whether we have an opSelect for handling pow(0,0) = 0 +# otherwise it becomes undefined. +# +# Supported values: +# - True, False: Always enable / disable + +# d3d9.strictPow = True + + +# Lenient Clear +# +# Decides whether or not we fastpath clear anyway if we are close enough to +# clearing a full render target. +# +# Supported values: +# - True, False: Always enable / disable + +# d3d9.lenientClear = False + + +# Max available memory +# +# Changes the max initial value used in tracking and GetAvailableTextureMem +# Value in Megabytes +# +# Supported values: +# - Any int32_t + +# d3d9.maxAvailableMemory = 4096 + + +# Force enable/disable floating point quirk emulation +# +# Force toggle anything * 0 emulation +# Tristate +# Supported values: +# - True/False + +# d3d9.floatEmulation = + + +# Enable dialog box mode +# +# Changes the default state of dialog box mode. +# *Disables* exclusive fullscreen when enabled. +# +# Supported values: +# - True, False: Always enable / disable + +# d3d9.enableDialogMode = False + diff --git a/meson.build b/meson.build index fd902d0ec..c12b90651 100644 --- a/meson.build +++ b/meson.build @@ -38,6 +38,7 @@ if dxvk_winelib endif wrc = find_program('wrc') lib_vulkan = declare_dependency(link_args: [ '-lwinevulkan' ]) + lib_d3d9 = declare_dependency(link_args: [ '-ld3d9' ]) lib_d3d11 = declare_dependency(link_args: [ '-ld3d11' ]) lib_dxgi = declare_dependency(link_args: [ '-ldxgi' ]) lib_d3dcompiler_43 = declare_dependency(link_args: [ '-L'+dxvk_library_path, '-ld3dcompiler_43' ]) @@ -63,6 +64,7 @@ else endif lib_vulkan = dxvk_compiler.find_library('vulkan-1', dirs : dxvk_library_path) + lib_d3d9 = dxvk_compiler.find_library('d3d9') lib_d3d11 = dxvk_compiler.find_library('d3d11') lib_dxgi = dxvk_compiler.find_library('dxgi') lib_d3dcompiler_43 = dxvk_compiler.find_library('d3dcompiler_43', dirs : dxvk_library_path) diff --git a/meson_options.txt b/meson_options.txt index b3c235f90..2a4b5b4f4 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,4 +1,5 @@ option('enable_tests', type : 'boolean', value : false) option('enable_dxgi', type : 'boolean', value : true, description: 'Build DXGI') +option('enable_d3d9', type : 'boolean', value : true, description: 'Build D3D9') option('enable_d3d10', type : 'boolean', value : true, description: 'Build D3D10') option('enable_d3d11', type : 'boolean', value : true, description: 'Build D3D11') diff --git a/setup_dxvk.sh b/setup_dxvk.sh index 01cebf7f9..2bc4a7c9f 100644 --- a/setup_dxvk.sh +++ b/setup_dxvk.sh @@ -172,6 +172,7 @@ if [ $with_dxgi -ne 0 ] || [ "$action" == "uninstall" ]; then $action dxgi fi +$action d3d9 $action d3d10 $action d3d10_1 $action d3d10core diff --git a/src/d3d9/d3d9.def b/src/d3d9/d3d9.def new file mode 100644 index 000000000..5eb9053d6 --- /dev/null +++ b/src/d3d9/d3d9.def @@ -0,0 +1,22 @@ +LIBRARY D3D9.DLL +EXPORTS + Direct3DShaderValidatorCreate9 @ 24 + + PSGPError @ 25 + PSGPSampleTexture @ 26 + + D3DPERF_BeginEvent @ 27 + D3DPERF_EndEvent @ 28 + D3DPERF_GetStatus @ 29 + D3DPERF_QueryRepeatFrame @ 30 + D3DPERF_SetMarker @ 31 + D3DPERF_SetOptions @ 32 + D3DPERF_SetRegion @ 33 + + DebugSetLevel @ 34 + DebugSetMute @ 35 + + Direct3D9EnableMaximizedWindowedModeShim @ 36 + + Direct3DCreate9 @ 37 + Direct3DCreate9Ex @ 38 diff --git a/src/d3d9/d3d9.spec b/src/d3d9/d3d9.spec new file mode 100644 index 000000000..67dc522d8 --- /dev/null +++ b/src/d3d9/d3d9.spec @@ -0,0 +1,20 @@ +@ stdcall Direct3DShaderValidatorCreate9() + +@ stdcall PSGPError(ptr long long) +@ stdcall PSGPSampleTexture(ptr long ptr long ptr) + +@ stdcall D3DPERF_BeginEvent(long wstr) +@ stdcall D3DPERF_EndEvent() +@ stdcall D3DPERF_GetStatus() +@ stdcall D3DPERF_QueryRepeatFrame() +@ stdcall D3DPERF_SetMarker(long wstr) +@ stdcall D3DPERF_SetOptions(long) +@ stdcall D3DPERF_SetRegion(long wstr) + +@ stdcall DebugSetLevel() +@ stdcall DebugSetMute() + +@ stdcall Direct3D9EnableMaximizedWindowedModeShim(long) + +@ stdcall Direct3DCreate9(long) +@ stdcall Direct3DCreate9Ex(long ptr) \ No newline at end of file diff --git a/src/d3d9/d3d9_adapter.cpp b/src/d3d9/d3d9_adapter.cpp new file mode 100644 index 000000000..c9081f82c --- /dev/null +++ b/src/d3d9/d3d9_adapter.cpp @@ -0,0 +1,804 @@ +#include "d3d9_adapter.h" + +#include "d3d9_interface.h" +#include "d3d9_monitor.h" +#include "d3d9_caps.h" +#include "d3d9_util.h" + +#include "../util/util_bit.h" +#include "../util/util_luid.h" +#include "../util/util_ratio.h" + +#include + +namespace dxvk { + + const char* GetDriverDLL(DxvkGpuVendor vendor) { + switch (vendor) { + default: + case DxvkGpuVendor::Nvidia: return "nvd3dum.dll"; + +#if defined(__x86_64__) || defined(_M_X64) + case DxvkGpuVendor::Amd: return "aticfx64.dll"; + case DxvkGpuVendor::Intel: return "igdumd64.dll"; +#else + case DxvkGpuVendor::Amd: return "aticfx32.dll"; + case DxvkGpuVendor::Intel: return "igdumd32.dll"; +#endif + } + } + + + D3D9Adapter::D3D9Adapter( + D3D9InterfaceEx* pParent, + Rc Adapter, + UINT Ordinal) + : m_parent (pParent) + , m_adapter (Adapter) + , m_ordinal (Ordinal) + , m_modeCacheFormat (D3D9Format::Unknown) + , m_d3d9Formats (Adapter, m_parent->GetOptions()) { + m_adapter->logAdapterInfo(); + } + + + HRESULT D3D9Adapter::GetAdapterIdentifier( + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier) { + if (unlikely(pIdentifier == nullptr)) + return D3DERR_INVALIDCALL; + + auto& options = m_parent->GetOptions(); + + const auto& props = m_adapter->deviceProperties(); + + ::MONITORINFOEXA monInfo; + monInfo.cbSize = sizeof(monInfo); + + if (!::GetMonitorInfoA(GetDefaultMonitor(), reinterpret_cast(&monInfo))) { + Logger::err("D3D9Adapter::GetAdapterIdentifier: Failed to query monitor info"); + return D3DERR_INVALIDCALL; + } + + GUID guid = bit::cast(m_adapter->devicePropertiesExt().coreDeviceId.deviceUUID); + + uint32_t vendorId = options.customVendorId == -1 ? props.vendorID : uint32_t(options.customVendorId); + uint32_t deviceId = options.customDeviceId == -1 ? props.deviceID : uint32_t(options.customDeviceId); + const char* desc = options.customDeviceDesc.empty() ? props.deviceName : options.customDeviceDesc.c_str(); + const char* driver = GetDriverDLL(DxvkGpuVendor(vendorId)); + + std::strncpy(pIdentifier->Description, desc, countof(pIdentifier->Description)); + std::strncpy(pIdentifier->DeviceName, monInfo.szDevice, countof(pIdentifier->DeviceName)); // The GDI device name. Not the actual device name. + std::strncpy(pIdentifier->Driver, driver, countof(pIdentifier->Driver)); // This is the driver's dll. + + pIdentifier->DeviceIdentifier = guid; + pIdentifier->DeviceId = deviceId; + pIdentifier->VendorId = vendorId; + pIdentifier->Revision = 0; + pIdentifier->SubSysId = 0; + pIdentifier->WHQLLevel = m_parent->IsExtended() ? 1 : 0; // This doesn't check with the driver on Direct3D9Ex and is always 1. + pIdentifier->DriverVersion.QuadPart = INT64_MAX; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceType( + D3DDEVTYPE DevType, + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL bWindowed) { + if (!IsSupportedBackBufferFormat( + AdapterFormat, BackBufferFormat, bWindowed)) + return D3DERR_NOTAVAILABLE; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceFormat( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3D9Format CheckFormat) { + if (!IsSupportedAdapterFormat(AdapterFormat)) + return D3DERR_INVALIDCALL; + + if (!IsSupportedDisplayFormat(AdapterFormat, false)) + return D3DERR_NOTAVAILABLE; + + const bool dmap = Usage & D3DUSAGE_DMAP; + const bool rt = Usage & D3DUSAGE_RENDERTARGET; + const bool ds = Usage & D3DUSAGE_DEPTHSTENCIL; + + const bool surface = RType == D3DRTYPE_SURFACE; + const bool texture = RType == D3DRTYPE_TEXTURE; + + const bool twoDimensional = surface || texture; + + const bool srgb = (Usage & (D3DUSAGE_QUERY_SRGBREAD | D3DUSAGE_QUERY_SRGBWRITE)) != 0; + + if (CheckFormat == D3D9Format::INST) + return D3D_OK; + + if (rt && CheckFormat == D3D9Format::A8 && m_parent->GetOptions().disableA8RT) + return D3DERR_NOTAVAILABLE; + + if (ds && !IsDepthFormat(CheckFormat)) + return D3DERR_NOTAVAILABLE; + + if (rt && CheckFormat == D3D9Format::NULL_FORMAT && twoDimensional) + return D3D_OK; + + if (rt && CheckFormat == D3D9Format::RESZ && surface) + return D3D_OK; + + if (CheckFormat == D3D9Format::ATOC && surface) + return D3D_OK; + + if (CheckFormat == D3D9Format::NVDB && surface) + return D3D_OK; + + // I really don't want to support this... + if (dmap) + return D3DERR_NOTAVAILABLE; + + auto mapping = m_d3d9Formats.GetFormatMapping(CheckFormat); + if (mapping.FormatColor == VK_FORMAT_UNDEFINED) + return D3DERR_NOTAVAILABLE; + + if (mapping.FormatSrgb == VK_FORMAT_UNDEFINED && srgb) + return D3DERR_NOTAVAILABLE; + + if (RType == D3DRTYPE_VERTEXBUFFER || RType == D3DRTYPE_INDEXBUFFER) + return D3D_OK; + + // Let's actually ask Vulkan now that we got some quirks out the way! + + return CheckDeviceVkFormat(mapping.FormatColor, Usage, RType); + } + + + HRESULT D3D9Adapter::CheckDeviceMultiSampleType( + D3DDEVTYPE DeviceType, + D3D9Format SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels) { + if (pQualityLevels != nullptr) + *pQualityLevels = 1; + + auto dst = ConvertFormatUnfixed(SurfaceFormat); + if (dst.FormatColor == VK_FORMAT_UNDEFINED) + return D3DERR_NOTAVAILABLE; + + if (MultiSampleType != D3DMULTISAMPLE_NONE + && (SurfaceFormat == D3D9Format::D32_LOCKABLE + || SurfaceFormat == D3D9Format::D32F_LOCKABLE + || SurfaceFormat == D3D9Format::D16_LOCKABLE)) + return D3DERR_NOTAVAILABLE; + + uint32_t sampleCount = std::max(MultiSampleType, 1u); + + // Check if this is a power of two... + if (sampleCount & (sampleCount - 1)) + return D3DERR_NOTAVAILABLE; + + // Therefore... + VkSampleCountFlags sampleFlags = VkSampleCountFlags(sampleCount); + + auto availableFlags = !IsDepthFormat(SurfaceFormat) + ? m_adapter->deviceProperties().limits.framebufferColorSampleCounts + : m_adapter->deviceProperties().limits.framebufferDepthSampleCounts; + + if (!(availableFlags & sampleFlags)) + return D3DERR_NOTAVAILABLE; + + if (pQualityLevels != nullptr) { + if (MultiSampleType == D3DMULTISAMPLE_NONMASKABLE) + *pQualityLevels = (32 - bit::lzcnt(availableFlags)); + else + *pQualityLevels = 1; + } + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDepthStencilMatch( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + D3D9Format RenderTargetFormat, + D3D9Format DepthStencilFormat) { + if (!IsSupportedAdapterFormat(AdapterFormat)) + return D3DERR_NOTAVAILABLE; + + if (!IsDepthFormat(DepthStencilFormat)) + return D3DERR_NOTAVAILABLE; + + auto mapping = ConvertFormatUnfixed(RenderTargetFormat); + if (mapping.FormatColor == VK_FORMAT_UNDEFINED) + return D3DERR_NOTAVAILABLE; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceFormatConversion( + D3DDEVTYPE DeviceType, + D3D9Format SourceFormat, + D3D9Format TargetFormat) { + bool sourceSupported = IsSupportedBackBufferFormat(SourceFormat, FALSE); + bool targetSupported = TargetFormat == D3D9Format::X1R5G5B5 + || TargetFormat == D3D9Format::A1R5G5B5 + || TargetFormat == D3D9Format::R5G6B5 + // || TargetFormat == D3D9Format::R8G8B8 <-- We don't support R8G8B8 + || TargetFormat == D3D9Format::X8R8G8B8 + || TargetFormat == D3D9Format::A8R8G8B8 + || TargetFormat == D3D9Format::A2R10G10B10 + || TargetFormat == D3D9Format::A16B16G16R16 + || TargetFormat == D3D9Format::A2B10G10R10 + || TargetFormat == D3D9Format::A8B8G8R8 + || TargetFormat == D3D9Format::X8B8G8R8 + || TargetFormat == D3D9Format::A16B16G16R16F + || TargetFormat == D3D9Format::A32B32G32R32F; + + return (sourceSupported && targetSupported) + ? D3D_OK + : D3DERR_NOTAVAILABLE; + } + + + HRESULT D3D9Adapter::GetDeviceCaps( + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps) { + using namespace dxvk::caps; + + if (pCaps == nullptr) + return D3DERR_INVALIDCALL; + + auto& options = m_parent->GetOptions(); + + // TODO: Actually care about what the adapter supports here. + // ^ For Intel and older cards most likely here. + + // Device Type + pCaps->DeviceType = DeviceType; + // Adapter Id + pCaps->AdapterOrdinal = m_ordinal; + // Caps 1 + pCaps->Caps = D3DCAPS_READ_SCANLINE; + // Caps 2 + pCaps->Caps2 = D3DCAPS2_FULLSCREENGAMMA + /* | D3DCAPS2_CANCALIBRATEGAMMA */ + /* | D3DCAPS2_RESERVED */ + /* | D3DCAPS2_CANMANAGERESOURCE */ + | D3DCAPS2_DYNAMICTEXTURES + | D3DCAPS2_CANAUTOGENMIPMAP + /* | D3DCAPS2_CANSHARERESOURCE */; + // Caps 3 + pCaps->Caps3 = D3DCAPS3_ALPHA_FULLSCREEN_FLIP_OR_DISCARD + | D3DCAPS3_LINEAR_TO_SRGB_PRESENTATION + | D3DCAPS3_COPY_TO_VIDMEM + | D3DCAPS3_COPY_TO_SYSTEMMEM + /* | D3DCAPS3_DXVAHD */ + /* | D3DCAPS3_DXVAHD_LIMITED */; + // Presentation Intervals + pCaps->PresentationIntervals = D3DPRESENT_INTERVAL_DEFAULT + | D3DPRESENT_INTERVAL_ONE + | D3DPRESENT_INTERVAL_TWO + | D3DPRESENT_INTERVAL_THREE + | D3DPRESENT_INTERVAL_FOUR + | D3DPRESENT_INTERVAL_IMMEDIATE; + // Cursor + pCaps->CursorCaps = D3DCURSORCAPS_COLOR; // I do not support Cursor yet, but I don't want to say I don't support it for compatibility reasons. + // Dev Caps + pCaps->DevCaps = D3DDEVCAPS_EXECUTESYSTEMMEMORY + | D3DDEVCAPS_EXECUTEVIDEOMEMORY + | D3DDEVCAPS_TLVERTEXSYSTEMMEMORY + | D3DDEVCAPS_TLVERTEXVIDEOMEMORY + /* | D3DDEVCAPS_TEXTURESYSTEMMEMORY */ + | D3DDEVCAPS_TEXTUREVIDEOMEMORY + | D3DDEVCAPS_DRAWPRIMTLVERTEX + | D3DDEVCAPS_CANRENDERAFTERFLIP + | D3DDEVCAPS_TEXTURENONLOCALVIDMEM + | D3DDEVCAPS_DRAWPRIMITIVES2 + /* | D3DDEVCAPS_SEPARATETEXTUREMEMORIES */ + | D3DDEVCAPS_DRAWPRIMITIVES2EX + | D3DDEVCAPS_HWTRANSFORMANDLIGHT + | D3DDEVCAPS_CANBLTSYSTONONLOCAL + | D3DDEVCAPS_HWRASTERIZATION + | D3DDEVCAPS_PUREDEVICE + /* | D3DDEVCAPS_QUINTICRTPATCHES */ + /* | D3DDEVCAPS_RTPATCHES */ + /* | D3DDEVCAPS_RTPATCHHANDLEZERO */ + /* | D3DDEVCAPS_NPATCHES */; + // Primitive Misc. Caps + pCaps->PrimitiveMiscCaps = D3DPMISCCAPS_MASKZ + | D3DPMISCCAPS_CULLNONE + | D3DPMISCCAPS_CULLCW + | D3DPMISCCAPS_CULLCCW + | D3DPMISCCAPS_COLORWRITEENABLE + | D3DPMISCCAPS_CLIPPLANESCALEDPOINTS + /* | D3DPMISCCAPS_CLIPTLVERTS */ + | D3DPMISCCAPS_TSSARGTEMP + | D3DPMISCCAPS_BLENDOP + /* | D3DPMISCCAPS_NULLREFERENCE */ + | D3DPMISCCAPS_INDEPENDENTWRITEMASKS + | D3DPMISCCAPS_PERSTAGECONSTANT + | D3DPMISCCAPS_FOGANDSPECULARALPHA + | D3DPMISCCAPS_SEPARATEALPHABLEND + | D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS + | D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING + | D3DPMISCCAPS_FOGVERTEXCLAMPED + | D3DPMISCCAPS_POSTBLENDSRGBCONVERT; + // Raster Caps + pCaps->RasterCaps = D3DPRASTERCAPS_DITHER + | D3DPRASTERCAPS_ZTEST + | D3DPRASTERCAPS_FOGVERTEX + | D3DPRASTERCAPS_FOGTABLE + | D3DPRASTERCAPS_MIPMAPLODBIAS + /* | D3DPRASTERCAPS_ZBUFFERLESSHSR */ + | D3DPRASTERCAPS_FOGRANGE + | D3DPRASTERCAPS_ANISOTROPY + /* | D3DPRASTERCAPS_WBUFFER */ + | D3DPRASTERCAPS_WFOG + | D3DPRASTERCAPS_ZFOG + | D3DPRASTERCAPS_COLORPERSPECTIVE + | D3DPRASTERCAPS_SCISSORTEST + | D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS + | D3DPRASTERCAPS_DEPTHBIAS + | D3DPRASTERCAPS_MULTISAMPLE_TOGGLE; // <-- TODO! (but difficult in Vk) + // Z Comparison Caps + pCaps->ZCmpCaps = D3DPCMPCAPS_NEVER + | D3DPCMPCAPS_LESS + | D3DPCMPCAPS_EQUAL + | D3DPCMPCAPS_LESSEQUAL + | D3DPCMPCAPS_GREATER + | D3DPCMPCAPS_NOTEQUAL + | D3DPCMPCAPS_GREATEREQUAL + | D3DPCMPCAPS_ALWAYS; + // Source Blend Caps + pCaps->SrcBlendCaps = D3DPBLENDCAPS_ZERO + | D3DPBLENDCAPS_ONE + | D3DPBLENDCAPS_SRCCOLOR + | D3DPBLENDCAPS_INVSRCCOLOR + | D3DPBLENDCAPS_SRCALPHA + | D3DPBLENDCAPS_INVSRCALPHA + | D3DPBLENDCAPS_DESTALPHA + | D3DPBLENDCAPS_INVDESTALPHA + | D3DPBLENDCAPS_DESTCOLOR + | D3DPBLENDCAPS_INVDESTCOLOR + | D3DPBLENDCAPS_SRCALPHASAT + | D3DPBLENDCAPS_BOTHSRCALPHA + | D3DPBLENDCAPS_BOTHINVSRCALPHA + | D3DPBLENDCAPS_BLENDFACTOR + | D3DPBLENDCAPS_INVSRCCOLOR2 + | D3DPBLENDCAPS_SRCCOLOR2; + // Destination Blend Caps + pCaps->DestBlendCaps = pCaps->SrcBlendCaps; + // Alpha Comparison Caps + pCaps->AlphaCmpCaps = pCaps->ZCmpCaps; + // Shade Caps + pCaps->ShadeCaps = D3DPSHADECAPS_COLORGOURAUDRGB + | D3DPSHADECAPS_SPECULARGOURAUDRGB + | D3DPSHADECAPS_ALPHAGOURAUDBLEND + | D3DPSHADECAPS_FOGGOURAUD; + // Texture Caps + pCaps->TextureCaps = D3DPTEXTURECAPS_PERSPECTIVE + /* | D3DPTEXTURECAPS_POW2 */ + | D3DPTEXTURECAPS_ALPHA + /* | D3DPTEXTURECAPS_SQUAREONLY */ + | D3DPTEXTURECAPS_TEXREPEATNOTSCALEDBYSIZE + | D3DPTEXTURECAPS_ALPHAPALETTE + /* | D3DPTEXTURECAPS_NONPOW2CONDITIONAL */ + | D3DPTEXTURECAPS_PROJECTED + | D3DPTEXTURECAPS_CUBEMAP + | D3DPTEXTURECAPS_VOLUMEMAP + | D3DPTEXTURECAPS_MIPMAP + | D3DPTEXTURECAPS_MIPVOLUMEMAP + | D3DPTEXTURECAPS_MIPCUBEMAP + /* | D3DPTEXTURECAPS_CUBEMAP_POW2 */ + /* | D3DPTEXTURECAPS_VOLUMEMAP_POW2 */ + /* | D3DPTEXTURECAPS_NOPROJECTEDBUMPENV */; + // Texture Filter Caps + pCaps->TextureFilterCaps = D3DPTFILTERCAPS_MINFPOINT + | D3DPTFILTERCAPS_MINFLINEAR + | D3DPTFILTERCAPS_MINFANISOTROPIC + /* | D3DPTFILTERCAPS_MINFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MINFGAUSSIANQUAD */ + | D3DPTFILTERCAPS_MIPFPOINT + | D3DPTFILTERCAPS_MIPFLINEAR + /* | D3DPTFILTERCAPS_CONVOLUTIONMONO */ + | D3DPTFILTERCAPS_MAGFPOINT + | D3DPTFILTERCAPS_MAGFLINEAR + | D3DPTFILTERCAPS_MAGFANISOTROPIC + /* | D3DPTFILTERCAPS_MAGFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MAGFGAUSSIANQUAD */; + // Cube Texture Filter Caps + pCaps->CubeTextureFilterCaps = pCaps->TextureFilterCaps; + // Volume Texture Filter Caps + pCaps->VolumeTextureFilterCaps = pCaps->TextureFilterCaps; + // Texture Address Caps + pCaps->TextureAddressCaps = D3DPTADDRESSCAPS_WRAP + | D3DPTADDRESSCAPS_MIRROR + | D3DPTADDRESSCAPS_CLAMP + | D3DPTADDRESSCAPS_BORDER + | D3DPTADDRESSCAPS_INDEPENDENTUV + | D3DPTADDRESSCAPS_MIRRORONCE; + // Volume Texture Address Caps + pCaps->VolumeTextureAddressCaps = pCaps->TextureAddressCaps; + // Line Caps + pCaps->LineCaps = D3DLINECAPS_TEXTURE + | D3DLINECAPS_ZTEST + | D3DLINECAPS_BLEND + | D3DLINECAPS_ALPHACMP + | D3DLINECAPS_FOG + | D3DLINECAPS_ANTIALIAS; //<-- Lying about doing AA lines here, we don't *fully* support that. + // Max Texture Width + pCaps->MaxTextureWidth = MaxTextureDimension; + // Max Texture Height + pCaps->MaxTextureHeight = MaxTextureDimension; + // Max Volume Extent + pCaps->MaxVolumeExtent = 8192; + // Max Texture Repeat + pCaps->MaxTextureRepeat = 8192; + // Max Texture Aspect Ratio + pCaps->MaxTextureAspectRatio = 8192; + // Max Anisotropy + pCaps->MaxAnisotropy = 16; + // Max Vertex W + pCaps->MaxVertexW = 1e10f; + // Guard Bands + pCaps->GuardBandLeft = -32768.0f; + pCaps->GuardBandTop = -32768.0f; + pCaps->GuardBandRight = 32768.0f; + pCaps->GuardBandBottom = 32768.0f; + // Extents Adjust + pCaps->ExtentsAdjust = 0.0f; + // Stencil Caps + pCaps->StencilCaps = D3DSTENCILCAPS_KEEP + | D3DSTENCILCAPS_ZERO + | D3DSTENCILCAPS_REPLACE + | D3DSTENCILCAPS_INCRSAT + | D3DSTENCILCAPS_DECRSAT + | D3DSTENCILCAPS_INVERT + | D3DSTENCILCAPS_INCR + | D3DSTENCILCAPS_DECR + | D3DSTENCILCAPS_TWOSIDED; + // FVF Caps + pCaps->FVFCaps = (MaxSimultaneousTextures & D3DFVFCAPS_TEXCOORDCOUNTMASK) + /* | D3DFVFCAPS_DONOTSTRIPELEMENTS */ + | D3DFVFCAPS_PSIZE; + // Texture Op Caps + pCaps->TextureOpCaps = D3DTEXOPCAPS_DISABLE + | D3DTEXOPCAPS_SELECTARG1 + | D3DTEXOPCAPS_SELECTARG2 + | D3DTEXOPCAPS_MODULATE + | D3DTEXOPCAPS_MODULATE2X + | D3DTEXOPCAPS_MODULATE4X + | D3DTEXOPCAPS_ADD + | D3DTEXOPCAPS_ADDSIGNED + | D3DTEXOPCAPS_ADDSIGNED2X + | D3DTEXOPCAPS_SUBTRACT + | D3DTEXOPCAPS_ADDSMOOTH + | D3DTEXOPCAPS_BLENDDIFFUSEALPHA + | D3DTEXOPCAPS_BLENDTEXTUREALPHA + | D3DTEXOPCAPS_BLENDFACTORALPHA + | D3DTEXOPCAPS_BLENDTEXTUREALPHAPM + | D3DTEXOPCAPS_BLENDCURRENTALPHA + | D3DTEXOPCAPS_PREMODULATE + | D3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR + | D3DTEXOPCAPS_MODULATECOLOR_ADDALPHA + | D3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR + | D3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA + | D3DTEXOPCAPS_BUMPENVMAP + | D3DTEXOPCAPS_BUMPENVMAPLUMINANCE + | D3DTEXOPCAPS_DOTPRODUCT3 + | D3DTEXOPCAPS_MULTIPLYADD + | D3DTEXOPCAPS_LERP; + // Max Texture Blend Stages + pCaps->MaxTextureBlendStages = MaxTextureBlendStages; + // Max Simultaneous Textures + pCaps->MaxSimultaneousTextures = MaxSimultaneousTextures; + // Vertex Processing Caps + pCaps->VertexProcessingCaps = D3DVTXPCAPS_TEXGEN + | D3DVTXPCAPS_MATERIALSOURCE7 + | D3DVTXPCAPS_DIRECTIONALLIGHTS + | D3DVTXPCAPS_POSITIONALLIGHTS + | D3DVTXPCAPS_LOCALVIEWER + | D3DVTXPCAPS_TWEENING + | D3DVTXPCAPS_TEXGEN_SPHEREMAP + /* | D3DVTXPCAPS_NO_TEXGEN_NONLOCALVIEWER*/; + // Max Active Lights + pCaps->MaxActiveLights = caps::MaxEnabledLights; + // Max User Clip Planes + pCaps->MaxUserClipPlanes = MaxClipPlanes; + // Max Vertex Blend Matrices + pCaps->MaxVertexBlendMatrices = 4; + // Max Vertex Blend Matrix Index + pCaps->MaxVertexBlendMatrixIndex = 8; + // Max Point Size + pCaps->MaxPointSize = 256.0f; + // Max Primitive Count + pCaps->MaxPrimitiveCount = 0x00555555; + // Max Vertex Index + pCaps->MaxVertexIndex = 0x00ffffff; + // Max Streams + pCaps->MaxStreams = MaxStreams; + // Max Stream Stride + pCaps->MaxStreamStride = 508; // bytes + + const uint32_t majorVersion = options.shaderModel; + const uint32_t minorVersion = options.shaderModel != 1 ? 0 : 4; + + // Shader Versions + pCaps->VertexShaderVersion = D3DVS_VERSION(majorVersion, minorVersion); + pCaps->PixelShaderVersion = D3DPS_VERSION(majorVersion, minorVersion); + + // Max Vertex Shader Const + pCaps->MaxVertexShaderConst = MaxFloatConstantsVS; + // Max PS1 Value + pCaps->PixelShader1xMaxValue = FLT_MAX; + // Dev Caps 2 + pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET + /* | D3DDEVCAPS2_DMAPNPATCH */ + /* | D3DDEVCAPS2_ADAPTIVETESSRTPATCH */ + /* | D3DDEVCAPS2_ADAPTIVETESSNPATCH */ + | D3DDEVCAPS2_CAN_STRETCHRECT_FROM_TEXTURES + /* | D3DDEVCAPS2_PRESAMPLEDDMAPNPATCH */ + | D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET; + // Max N Patch Tesselation Level + pCaps->MaxNpatchTessellationLevel = 0.0f; + // Reserved for... something + pCaps->Reserved5 = 0; + // Master adapter for us is adapter 0, atm... + pCaps->MasterAdapterOrdinal = 0; + // The group of adapters this one is in + pCaps->AdapterOrdinalInGroup = 0; + // Number of adapters in current group + pCaps->NumberOfAdaptersInGroup = 1; + // Decl Type Caps + pCaps->DeclTypes = D3DDTCAPS_UBYTE4 + | D3DDTCAPS_UBYTE4N + | D3DDTCAPS_SHORT2N + | D3DDTCAPS_SHORT4N + | D3DDTCAPS_USHORT2N + | D3DDTCAPS_USHORT4N + | D3DDTCAPS_UDEC3 + | D3DDTCAPS_DEC3N + | D3DDTCAPS_FLOAT16_2 + | D3DDTCAPS_FLOAT16_4; + // Number of simultaneous RTs + pCaps->NumSimultaneousRTs = MaxSimultaneousRenderTargets; + // Possible StretchRect filters + pCaps->StretchRectFilterCaps = D3DPTFILTERCAPS_MINFPOINT + | D3DPTFILTERCAPS_MINFLINEAR + /* | D3DPTFILTERCAPS_MINFANISOTROPIC */ + /* | D3DPTFILTERCAPS_MINFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MINFGAUSSIANQUAD */ + /* | D3DPTFILTERCAPS_MIPFPOINT */ + /* | D3DPTFILTERCAPS_MIPFLINEAR */ + /* | D3DPTFILTERCAPS_CONVOLUTIONMONO */ + | D3DPTFILTERCAPS_MAGFPOINT + | D3DPTFILTERCAPS_MAGFLINEAR + /* | D3DPTFILTERCAPS_MAGFANISOTROPIC */ + /* | D3DPTFILTERCAPS_MAGFPYRAMIDALQUAD */ + /* | D3DPTFILTERCAPS_MAGFGAUSSIANQUAD */; + + // Not too bothered about doing these longhand + // We should match whatever my AMD hardware reports here + // methinks for the best chance of stuff working. + pCaps->VS20Caps.Caps = 1; + pCaps->VS20Caps.DynamicFlowControlDepth = 24; + pCaps->VS20Caps.NumTemps = 32; + pCaps->VS20Caps.StaticFlowControlDepth = 4; + + pCaps->PS20Caps.Caps = 31; + pCaps->PS20Caps.DynamicFlowControlDepth = 24; + pCaps->PS20Caps.NumTemps = 32; + pCaps->PS20Caps.StaticFlowControlDepth = 4; + + pCaps->PS20Caps.NumInstructionSlots = options.shaderModel >= 2 ? 512 : 256; + + pCaps->VertexTextureFilterCaps = 50332416; + pCaps->MaxVShaderInstructionsExecuted = 4294967295; + pCaps->MaxPShaderInstructionsExecuted = 4294967295; + + pCaps->MaxVertexShader30InstructionSlots = options.shaderModel == 3 ? 32768 : 0; + pCaps->MaxPixelShader30InstructionSlots = options.shaderModel == 3 ? 32768 : 0; + + return D3D_OK; + } + + + HMONITOR D3D9Adapter::GetMonitor() { + return GetDefaultMonitor(); + } + + + UINT D3D9Adapter::GetAdapterModeCountEx(CONST D3DDISPLAYMODEFILTER* pFilter) { + if (pFilter == nullptr) + return 0; + + // We don't offer any interlaced formats here so early out and avoid destroying mode cache. + if (pFilter->ScanLineOrdering == D3DSCANLINEORDERING_INTERLACED) + return 0; + + CacheModes(EnumerateFormat(pFilter->Format)); + return m_modes.size(); + } + + + HRESULT D3D9Adapter::EnumAdapterModesEx( + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode) { + if (pMode == nullptr || pFilter == nullptr) + return D3DERR_INVALIDCALL; + + const D3D9Format format = + EnumerateFormat(pFilter->Format); + + if (FAILED(CheckDeviceFormat( + D3DDEVTYPE_HAL, EnumerateFormat(pFilter->Format), + D3DUSAGE_RENDERTARGET, D3DRTYPE_SURFACE, + EnumerateFormat(pFilter->Format)))) + return D3DERR_INVALIDCALL; + + CacheModes(format); + + // We don't return any scanline orderings that aren't progressive, + // The format filtering is already handled for us by cache modes + // So we can early out here and then just index. + if (pFilter->ScanLineOrdering == D3DSCANLINEORDERING_INTERLACED) + return D3DERR_INVALIDCALL; + + if (Mode >= m_modes.size()) + return D3DERR_INVALIDCALL; + + *pMode = m_modes[Mode]; + + return D3D_OK; + } + + + HRESULT D3D9Adapter::GetAdapterDisplayModeEx( + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation) { + if (pRotation != nullptr) + *pRotation = D3DDISPLAYROTATION_IDENTITY; + + D3DDISPLAYMODEFILTER filter; + filter.Size = sizeof(filter); + filter.Format = D3DFMT_X8R8G8B8; + filter.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + return this->EnumAdapterModesEx(&filter, 0, pMode); + } + + + HRESULT D3D9Adapter::GetAdapterLUID(LUID* pLUID) { + if (pLUID == nullptr) + return D3DERR_INVALIDCALL; + + auto& deviceId = m_adapter->devicePropertiesExt().coreDeviceId; + + if (deviceId.deviceLUIDValid) + *pLUID = bit::cast(deviceId.deviceLUID); + else + *pLUID = dxvk::GetAdapterLUID(m_ordinal); + + return D3D_OK; + } + + + HRESULT D3D9Adapter::CheckDeviceVkFormat( + VkFormat Format, + DWORD Usage, + D3DRESOURCETYPE RType) { + VkFormatFeatureFlags checkFlags = 0; + + if (RType != D3DRTYPE_SURFACE) + checkFlags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + if (Usage & D3DUSAGE_RENDERTARGET) { + checkFlags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + + if (Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) + checkFlags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + + if (Usage & D3DUSAGE_DEPTHSTENCIL) + checkFlags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + else + checkFlags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + VkFormatFeatureFlags checkFlagsMipGen = checkFlags; + + if (Usage & D3DUSAGE_AUTOGENMIPMAP) { + checkFlagsMipGen |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + checkFlagsMipGen |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + + VkFormatProperties fmtSupport = m_adapter->formatProperties(Format); + VkFormatFeatureFlags imgFeatures = fmtSupport.optimalTilingFeatures | fmtSupport.linearTilingFeatures; + + if ((imgFeatures & checkFlags) != checkFlags) + return D3DERR_NOTAVAILABLE; + + return ((imgFeatures & checkFlagsMipGen) != checkFlagsMipGen) + ? D3DOK_NOAUTOGEN + : D3D_OK; + } + + + void D3D9Adapter::CacheModes(D3D9Format Format) { + if (!m_modes.empty() && m_modeCacheFormat == Format) + return; // We already cached the modes for this format. No need to do it again. + + ::MONITORINFOEXW monInfo; + monInfo.cbSize = sizeof(monInfo); + + if (!::GetMonitorInfoW(GetDefaultMonitor(), reinterpret_cast(&monInfo))) { + Logger::err("D3D9Adapter::CacheModes: failed to query monitor info"); + return; + } + + m_modes.clear(); + m_modeCacheFormat = Format; + + // Skip unsupported formats + if (!IsSupportedAdapterFormat(Format) || !IsSupportedDisplayFormat(Format, false)) + return; + + auto& options = m_parent->GetOptions(); + + // Walk over all modes that the display supports and + // return those that match the requested format etc. + DEVMODEW devMode = { }; + devMode.dmSize = sizeof(DEVMODEW); + + uint32_t modeIndex = 0; + + const auto forcedRatio = Ratio(options.forceAspectRatio); + + while (::EnumDisplaySettingsW(monInfo.szDevice, modeIndex++, &devMode)) { + // Skip interlaced modes altogether + if (devMode.dmDisplayFlags & DM_INTERLACED) + continue; + + // Skip modes with incompatible formats + if (devMode.dmBitsPerPel != GetMonitorFormatBpp(Format)) + continue; + + if (!forcedRatio.undefined() && Ratio(devMode.dmPelsWidth, devMode.dmPelsHeight) != forcedRatio) + continue; + + D3DDISPLAYMODEEX mode; + mode.Size = sizeof(D3DDISPLAYMODEEX); + mode.Width = devMode.dmPelsWidth; + mode.Height = devMode.dmPelsHeight; + mode.RefreshRate = devMode.dmDisplayFrequency; + mode.Format = static_cast(Format); + mode.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + m_modes.push_back(mode); + } + + // Sort display modes by width, height and refresh rate, + // in that order. Some games rely on correct ordering. + std::sort(m_modes.begin(), m_modes.end(), + [](const D3DDISPLAYMODEEX & a, const D3DDISPLAYMODEEX & b) { + if (a.Width < b.Width) return true; + if (a.Width > b.Width) return false; + + if (a.Height < b.Height) return true; + if (a.Height > b.Height) return false; + + return a.RefreshRate < b.RefreshRate; + }); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_adapter.h b/src/d3d9/d3d9_adapter.h new file mode 100644 index 000000000..991287aec --- /dev/null +++ b/src/d3d9/d3d9_adapter.h @@ -0,0 +1,112 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_options.h" +#include "d3d9_format.h" + +#include "../dxvk/dxvk_adapter.h" + +namespace dxvk { + + class D3D9InterfaceEx; + + class D3D9Adapter { + + public: + + D3D9Adapter( + D3D9InterfaceEx* pParent, + Rc Adapter, + UINT Ordinal); + + HRESULT GetAdapterIdentifier( + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier); + + HRESULT CheckDeviceType( + D3DDEVTYPE DevType, + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL bWindowed); + + HRESULT CheckDeviceFormat( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3D9Format CheckFormat); + + HRESULT CheckDeviceMultiSampleType( + D3DDEVTYPE DeviceType, + D3D9Format SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels); + + HRESULT CheckDepthStencilMatch( + D3DDEVTYPE DeviceType, + D3D9Format AdapterFormat, + D3D9Format RenderTargetFormat, + D3D9Format DepthStencilFormat); + + HRESULT CheckDeviceFormatConversion( + D3DDEVTYPE DeviceType, + D3D9Format SourceFormat, + D3D9Format TargetFormat); + + HRESULT GetDeviceCaps( + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps); + + HMONITOR GetMonitor(); + + UINT GetAdapterModeCountEx(CONST D3DDISPLAYMODEFILTER* pFilter); + + HRESULT EnumAdapterModesEx( + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode); + + HRESULT GetAdapterDisplayModeEx( + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation); + + HRESULT GetAdapterLUID(LUID* pLUID); + + UINT GetOrdinal() { return m_ordinal; } + + Rc GetDXVKAdapter() { return m_adapter; } + + D3D9_VK_FORMAT_MAPPING GetFormatMapping( + D3D9Format Format) const { + return m_d3d9Formats.GetFormatMapping(Format); + } + + DxvkFormatInfo GetUnsupportedFormatInfo( + D3D9Format Format) const { + return m_d3d9Formats.GetUnsupportedFormatInfo(Format); + } + + private: + + HRESULT CheckDeviceVkFormat( + VkFormat Format, + DWORD Usage, + D3DRESOURCETYPE RType); + + void CacheModes(D3D9Format Format); + + D3D9InterfaceEx* m_parent; + + Rc m_adapter; + UINT m_ordinal; + + std::vector m_modes; + D3D9Format m_modeCacheFormat; + + const D3D9VkFormatTable m_d3d9Formats; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_buffer.cpp b/src/d3d9/d3d9_buffer.cpp new file mode 100644 index 000000000..f917e2d43 --- /dev/null +++ b/src/d3d9/d3d9_buffer.cpp @@ -0,0 +1,114 @@ +#include "d3d9_buffer.h" + +namespace dxvk { + + //////////////////////// + // D3D9VertexBuffer + //////////////////////// + + D3D9VertexBuffer::D3D9VertexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : D3D9VertexBufferBase( pDevice, pDesc ) { } + + + HRESULT STDMETHODCALLTYPE D3D9VertexBuffer::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DVertexBuffer9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9VertexBuffer::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9VertexBuffer::GetType() { + return D3DRTYPE_VERTEXBUFFER; + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexBuffer::GetDesc( + D3DVERTEXBUFFER_DESC* pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + D3D9_BUFFER_DESC desc; + m_buffer.GetDesc(&desc); + + pDesc->Format = static_cast(desc.Format); + pDesc->Type = desc.Type; + pDesc->Usage = desc.Usage; + pDesc->Pool = desc.Pool; + pDesc->Size = desc.Size; + pDesc->FVF = desc.FVF; + + return D3D_OK; + } + + + ////////////////////// + // D3D9IndexBuffer + ////////////////////// + + + D3D9IndexBuffer::D3D9IndexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : D3D9IndexBufferBase( pDevice, pDesc ) { } + + + HRESULT STDMETHODCALLTYPE D3D9IndexBuffer::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DIndexBuffer9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9IndexBuffer::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9IndexBuffer::GetType() { + return D3DRTYPE_INDEXBUFFER; + } + + + HRESULT STDMETHODCALLTYPE D3D9IndexBuffer::GetDesc( + D3DINDEXBUFFER_DESC* pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + D3D9_BUFFER_DESC desc; + m_buffer.GetDesc(&desc); + + pDesc->Format = static_cast(desc.Format); + pDesc->Type = desc.Type; + pDesc->Usage = desc.Usage; + pDesc->Pool = desc.Pool; + pDesc->Size = desc.Size; + + return D3D_OK; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_buffer.h b/src/d3d9/d3d9_buffer.h new file mode 100644 index 000000000..0a1582e34 --- /dev/null +++ b/src/d3d9/d3d9_buffer.h @@ -0,0 +1,92 @@ +#pragma once + +#include "d3d9_resource.h" + +#include "d3d9_common_buffer.h" + +namespace dxvk { + + template + class D3D9Buffer : public D3D9Resource { + + public: + + D3D9Buffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : D3D9Resource ( pDevice ) + , m_buffer ( pDevice, pDesc ) { } + + HRESULT STDMETHODCALLTYPE Lock( + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags) final { + return m_buffer.Lock( + OffsetToLock, + SizeToLock, + ppbData, + Flags); + } + + HRESULT STDMETHODCALLTYPE Unlock() final { + return m_buffer.Unlock(); + } + + D3D9CommonBuffer* GetCommonBuffer() { + return &m_buffer; + } + + protected: + + D3D9CommonBuffer m_buffer; + + }; + + + using D3D9VertexBufferBase = D3D9Buffer; + class D3D9VertexBuffer final : public D3D9VertexBufferBase { + + public: + + D3D9VertexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject) final; + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType() final; + + HRESULT STDMETHODCALLTYPE GetDesc( + D3DVERTEXBUFFER_DESC* pDesc) final; + + }; + + using D3D9IndexBufferBase = D3D9Buffer; + class D3D9IndexBuffer final : public D3D9IndexBufferBase { + + public: + + D3D9IndexBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject) final; + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType() final; + + HRESULT STDMETHODCALLTYPE GetDesc( + D3DINDEXBUFFER_DESC* pDesc) final; + + }; + + template + inline D3D9CommonBuffer* GetCommonBuffer(const T& pResource) { + return pResource != nullptr ? pResource->GetCommonBuffer() : nullptr; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_caps.h b/src/d3d9/d3d9_caps.h new file mode 100644 index 000000000..0b008757e --- /dev/null +++ b/src/d3d9/d3d9_caps.h @@ -0,0 +1,32 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk::caps { + + constexpr uint32_t MaxClipPlanes = 6; + constexpr uint32_t MaxSamplers = 16; + constexpr uint32_t MaxStreams = 16; + constexpr uint32_t MaxSimultaneousTextures = 8; + constexpr uint32_t MaxTextureBlendStages = MaxSimultaneousTextures; + constexpr uint32_t MaxSimultaneousRenderTargets = D3D_MAX_SIMULTANEOUS_RENDERTARGETS; + + constexpr uint32_t MaxFloatConstantsVS = 256; + constexpr uint32_t MaxFloatConstantsPS = 224; + constexpr uint32_t MaxOtherConstants = 16; + constexpr uint32_t MaxFloatConstantsSoftware = 8192; + constexpr uint32_t MaxOtherConstantsSoftware = 2048; + + constexpr uint32_t InputRegisterCount = 16; + + constexpr uint32_t MaxTextureDimension = 16384; + constexpr uint32_t MaxMipLevels = 15; + constexpr uint32_t MaxSubresources = 15 * 6; + + constexpr uint32_t MaxTransforms = 10 + 256; + + constexpr uint32_t TextureStageCount = MaxSimultaneousTextures; + + constexpr uint32_t MaxEnabledLights = 8; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_common_buffer.cpp b/src/d3d9/d3d9_common_buffer.cpp new file mode 100644 index 000000000..2ab6bb07c --- /dev/null +++ b/src/d3d9/d3d9_common_buffer.cpp @@ -0,0 +1,124 @@ +#include "d3d9_common_buffer.h" + +#include "d3d9_device.h" +#include "d3d9_util.h" + +namespace dxvk { + + D3D9CommonBuffer::D3D9CommonBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc) + : m_parent ( pDevice ), m_desc ( *pDesc ) { + m_buffer = CreateBuffer(); + if (GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + m_stagingBuffer = CreateStagingBuffer(); + + m_sliceHandle = GetMapBuffer()->getSliceHandle(); + } + + + HRESULT D3D9CommonBuffer::Lock( + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags) { + return m_parent->LockBuffer( + this, + OffsetToLock, + SizeToLock, + ppbData, + Flags); + } + + + HRESULT D3D9CommonBuffer::Unlock() { + return m_parent->UnlockBuffer(this); + } + + + void D3D9CommonBuffer::GetDesc( + D3D9_BUFFER_DESC* pDesc) { + *pDesc = m_desc; + } + + + HRESULT D3D9CommonBuffer::ValidateBufferProperties(const D3D9_BUFFER_DESC* pDesc) { + if (pDesc->Size == 0) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + + Rc D3D9CommonBuffer::CreateBuffer() const { + DxvkBufferCreateInfo info; + info.size = m_desc.Size; + info.usage = 0; + info.stages = 0; + info.access = 0; + + VkMemoryPropertyFlags memoryFlags = 0; + + if (m_desc.Type == D3DRTYPE_VERTEXBUFFER) { + info.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + info.access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + + if (m_parent->SupportsSWVP()) { + info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; + info.access |= VK_ACCESS_SHADER_WRITE_BIT; + } + } + else if (m_desc.Type == D3DRTYPE_INDEXBUFFER) { + info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + info.access |= VK_ACCESS_INDEX_READ_BIT; + } + + if (GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT) { + info.stages |= VK_PIPELINE_STAGE_HOST_BIT; + info.access |= VK_ACCESS_HOST_WRITE_BIT; + + if (!(m_desc.Usage & D3DUSAGE_WRITEONLY)) + info.access |= VK_ACCESS_HOST_READ_BIT; + + memoryFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else { + info.stages |= VK_PIPELINE_STAGE_TRANSFER_BIT; + info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + info.access |= VK_ACCESS_TRANSFER_WRITE_BIT; + + memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + + return m_parent->GetDXVKDevice()->createBuffer(info, memoryFlags); + } + + + Rc D3D9CommonBuffer::CreateStagingBuffer() const { + DxvkBufferCreateInfo info; + info.size = m_desc.Size; + info.stages = VK_PIPELINE_STAGE_HOST_BIT + | VK_PIPELINE_STAGE_TRANSFER_BIT; + + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + info.access = VK_ACCESS_HOST_WRITE_BIT + | VK_ACCESS_TRANSFER_READ_BIT; + + if (!(m_desc.Usage & D3DUSAGE_WRITEONLY)) + info.access |= VK_ACCESS_HOST_READ_BIT; + + VkMemoryPropertyFlags memoryFlags = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + return m_parent->GetDXVKDevice()->createBuffer(info, memoryFlags); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_common_buffer.h b/src/d3d9/d3d9_common_buffer.h new file mode 100644 index 000000000..2e343435e --- /dev/null +++ b/src/d3d9/d3d9_common_buffer.h @@ -0,0 +1,202 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" + +#include "d3d9_device_child.h" +#include "d3d9_format.h" + +namespace dxvk { + + /** + * \brief Buffer map mode + */ + enum D3D9_COMMON_BUFFER_MAP_MODE { + D3D9_COMMON_BUFFER_MAP_MODE_BUFFER, + D3D9_COMMON_BUFFER_MAP_MODE_DIRECT + }; + + /** + * \brief Common buffer descriptor + */ + struct D3D9_BUFFER_DESC { + D3DRESOURCETYPE Type; + UINT Size; + DWORD Usage; + D3D9Format Format; + D3DPOOL Pool; + DWORD FVF; + }; + + /** + * \brief The type of buffer you want to use + */ + enum D3D9_COMMON_BUFFER_TYPE { + D3D9_COMMON_BUFFER_TYPE_MAPPING, + D3D9_COMMON_BUFFER_TYPE_STAGING, + D3D9_COMMON_BUFFER_TYPE_REAL + }; + + struct D3D9Range { + D3D9Range() { Clear(); } + + D3D9Range(uint32_t min, uint32_t max) + : min(min), max(max) { } + + bool IsDegenerate() { return min == max; } + + void Conjoin(D3D9Range range) { + if (IsDegenerate()) + *this = range; + else { + min = std::min(range.min, min); + max = std::max(range.max, max); + } + } + + bool Overlaps(D3D9Range range) { + if (IsDegenerate()) + return false; + + return range.max > min && range.min < max; + } + + void Clear() { min = 0; max = 0; } + + uint32_t min = 0; + uint32_t max = 0; + }; + + class D3D9CommonBuffer { + static constexpr VkDeviceSize BufferSliceAlignment = 64; + public: + + D3D9CommonBuffer( + D3D9DeviceEx* pDevice, + const D3D9_BUFFER_DESC* pDesc); + + HRESULT Lock( + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags); + + HRESULT Unlock(); + + void GetDesc( + D3D9_BUFFER_DESC* pDesc); + + D3D9_COMMON_BUFFER_MAP_MODE GetMapMode() const { + return (m_desc.Pool == D3DPOOL_DEFAULT && (m_desc.Usage & (D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY))) + ? D3D9_COMMON_BUFFER_MAP_MODE_DIRECT + : D3D9_COMMON_BUFFER_MAP_MODE_BUFFER; + } + + template + Rc GetBuffer() const { + if constexpr (Type == D3D9_COMMON_BUFFER_TYPE_MAPPING) + return GetMapBuffer(); + else if constexpr (Type == D3D9_COMMON_BUFFER_TYPE_STAGING) + return GetStagingBuffer(); + else //if constexpr (Type == D3D9_COMMON_BUFFER_TYPE_REAL) + return GetRealBuffer(); + } + + template + DxvkBufferSlice GetBufferSlice() const { + return GetBufferSlice(0, m_desc.Size); + } + + template + DxvkBufferSlice GetBufferSlice(VkDeviceSize offset) const { + return GetBufferSlice(offset, m_desc.Size - offset); + } + + template + DxvkBufferSlice GetBufferSlice(VkDeviceSize offset, VkDeviceSize length) const { + return DxvkBufferSlice(GetBuffer(), offset, length); + } + + DxvkBufferSliceHandle AllocMapSlice() { + return GetMapBuffer()->allocSlice(); + } + + DxvkBufferSliceHandle DiscardMapSlice() { + m_sliceHandle = GetMapBuffer()->allocSlice(); + return m_sliceHandle; + } + + DxvkBufferSliceHandle GetMappedSlice() const { + return m_sliceHandle; + } + + DWORD GetMapFlags() const { return m_mapFlags; } + + void SetMapFlags(DWORD Flags) { m_mapFlags = Flags; } + + const D3D9_BUFFER_DESC* Desc() const { + return &m_desc; + } + + static HRESULT ValidateBufferProperties(const D3D9_BUFFER_DESC* pDesc); + + D3D9Range& LockRange() { return m_lockRange; } + D3D9Range& DirtyRange() { return m_dirtyRange; } + + bool GetReadLocked() const { return m_readLocked; } + void SetReadLocked(bool state) { m_readLocked = state; } + + uint32_t IncrementLockCount() { return ++m_lockCount; } + uint32_t DecrementLockCount() { + if (m_lockCount == 0) + return 0; + + return --m_lockCount; + } + + void MarkUploaded() { m_needsUpload = false; } + void MarkNeedsUpload() { m_needsUpload = true; } + bool NeedsUpload() const { return m_needsUpload; } + + bool MarkLocked() { + bool locked = m_readLocked; + m_readLocked = true; + return locked; + } + + private: + + Rc CreateBuffer() const; + Rc CreateStagingBuffer() const; + + Rc GetMapBuffer() const { + return m_stagingBuffer != nullptr ? m_stagingBuffer : m_buffer; + } + + Rc GetStagingBuffer() const { + return m_stagingBuffer; + } + + Rc GetRealBuffer() const { + return m_buffer; + } + + D3D9DeviceEx* m_parent; + const D3D9_BUFFER_DESC m_desc; + DWORD m_mapFlags; + bool m_readLocked = false; + + Rc m_buffer; + Rc m_stagingBuffer; + + DxvkBufferSliceHandle m_sliceHandle; + + D3D9Range m_lockRange; + D3D9Range m_dirtyRange; + + uint32_t m_lockCount = 0; + + bool m_needsUpload = false; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_common_texture.cpp b/src/d3d9/d3d9_common_texture.cpp new file mode 100644 index 000000000..ed498b63a --- /dev/null +++ b/src/d3d9/d3d9_common_texture.cpp @@ -0,0 +1,508 @@ +#include "d3d9_common_texture.h" + +#include "d3d9_util.h" +#include "d3d9_device.h" + +#include + +namespace dxvk { + + D3D9CommonTexture::D3D9CommonTexture( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3DRESOURCETYPE ResourceType, + D3D9_VK_FORMAT_MAPPING Mapping) + : m_device(pDevice), m_desc(*pDesc), m_type(ResourceType), m_mapping(Mapping) { + if (m_desc.Format == D3D9Format::Unknown) + m_desc.Format = (m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) + ? D3D9Format::D32 + : D3D9Format::X8R8G8B8; + + auto pxSize = m_mapping.VideoFormatInfo.MacroPixelSize; + m_adjustedExtent = VkExtent3D{ m_desc.Width / pxSize.width, m_desc.Height / pxSize.height, m_desc.Depth }; + + m_mapMode = DetermineMapMode(); + m_shadow = DetermineShadowState(); + + if (m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) { + try { + m_image = CreatePrimaryImage(ResourceType); + } + catch (const DxvkError& e) { + if (m_desc.Usage & D3DUSAGE_AUTOGENMIPMAP) { + m_desc.Usage &= ~D3DUSAGE_AUTOGENMIPMAP; + m_desc.MipLevels = 1; + m_image = CreatePrimaryImage(ResourceType); + } + else + throw e; + } + + CreateInitialViews(); + + if (!IsManaged()) { + m_size = m_image->memSize(); + if (!m_device->ChangeReportedMemory(-m_size)) + throw DxvkError("D3D9: Reporting out of memory from tracking."); + } + } + + if (m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM) + CreateBuffers(); + } + + + D3D9CommonTexture::~D3D9CommonTexture() { + if (m_size != 0) + m_device->ChangeReportedMemory(m_size); + } + + + VkImageSubresource D3D9CommonTexture::GetSubresourceFromIndex( + VkImageAspectFlags Aspect, + UINT Subresource) const { + VkImageSubresource result; + result.aspectMask = Aspect; + result.mipLevel = Subresource % m_desc.MipLevels; + result.arrayLayer = Subresource / m_desc.MipLevels; + return result; + } + + + HRESULT D3D9CommonTexture::NormalizeTextureProperties( + D3D9DeviceEx* pDevice, + D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING* pMapping) { + auto* options = pDevice->GetOptions(); + + ////////////////////// + // Mapping Validation + + *pMapping = pDevice->LookupFormat(pDesc->Format); + + // Handle DisableA8RT hack for The Sims 2 + if (pDesc->Format == D3D9Format::A8 && + (pDesc->Usage & D3DUSAGE_RENDERTARGET) && + options->disableA8RT) + return D3DERR_INVALIDCALL; + + // If the mapping is invalid then lets return invalid + // Some edge cases: + // NULL format does not map to anything, but should succeed + // SCRATCH textures can still be made if the device does not support + // the format at all. + + if (!pMapping->IsValid() && pDesc->Format != D3D9Format::NULL_FORMAT) { + auto info = pDevice->UnsupportedFormatInfo(pDesc->Format); + + if (pDesc->Pool != D3DPOOL_SCRATCH || info.elementSize == 0) + return D3DERR_INVALIDCALL; + } + + /////////////////// + // Desc Validation + + if (pDesc->Width == 0 || pDesc->Height == 0 || pDesc->Depth == 0) + return D3DERR_INVALIDCALL; + + if (FAILED(DecodeMultiSampleType(pDesc->MultiSample, pDesc->MultisampleQuality, nullptr))) + return D3DERR_INVALIDCALL; + + // Using MANAGED pool with DYNAMIC usage is illegal + if (IsPoolManaged(pDesc->Pool) && (pDesc->Usage & D3DUSAGE_DYNAMIC)) + return D3DERR_INVALIDCALL; + + // D3DUSAGE_WRITEONLY doesn't apply to textures. + if (pDesc->Usage & D3DUSAGE_WRITEONLY) + return D3DERR_INVALIDCALL; + + // RENDERTARGET and DEPTHSTENCIL must be default pool + constexpr DWORD incompatibleUsages = D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL; + if (pDesc->Pool != D3DPOOL_DEFAULT && (pDesc->Usage & incompatibleUsages)) + return D3DERR_INVALIDCALL; + + // Use the maximum possible mip level count if the supplied + // mip level count is either unspecified (0) or invalid + const uint32_t maxMipLevelCount = + (pDesc->MultiSample <= D3DMULTISAMPLE_NONMASKABLE && !(pDesc->Usage & D3DUSAGE_AUTOGENMIPMAP)) + ? util::computeMipLevelCount({ pDesc->Width, pDesc->Height, pDesc->Depth }) + : 1u; + + if (pDesc->MipLevels == 0 || pDesc->MipLevels > maxMipLevelCount) + pDesc->MipLevels = maxMipLevelCount; + + return D3D_OK; + } + + + bool D3D9CommonTexture::CreateBufferSubresource(UINT Subresource) { + if (m_buffers[Subresource] != nullptr) + return false; + + DxvkBufferCreateInfo info; + info.size = GetMipSize(Subresource); + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT; + info.access = VK_ACCESS_TRANSFER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT; + + if (m_mapping.VideoFormatInfo.FormatType != D3D9VideoFormat_None) { + info.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + info.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + + VkMemoryPropertyFlags memType = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM || IsManaged()) + memType |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + m_buffers[Subresource] = m_device->GetDXVKDevice()->createBuffer(info, memType); + m_mappedSlices[Subresource] = m_buffers[Subresource]->getSliceHandle(); + + return true; + } + + + VkDeviceSize D3D9CommonTexture::GetMipSize(UINT Subresource) const { + const UINT MipLevel = Subresource % m_desc.MipLevels; + + const DxvkFormatInfo formatInfo = m_mapping.FormatColor != VK_FORMAT_UNDEFINED + ? *imageFormatInfo(m_mapping.FormatColor) + : m_device->UnsupportedFormatInfo(m_desc.Format); + + const VkExtent3D mipExtent = util::computeMipLevelExtent( + m_adjustedExtent, MipLevel); + + const VkExtent3D blockCount = util::computeBlockCount( + mipExtent, formatInfo.blockSize); + + return formatInfo.elementSize + * blockCount.width + * blockCount.height + * blockCount.depth; + } + + + Rc D3D9CommonTexture::CreatePrimaryImage(D3DRESOURCETYPE ResourceType) const { + DxvkImageCreateInfo imageInfo; + imageInfo.type = GetImageTypeFromResourceType(ResourceType); + imageInfo.format = m_mapping.FormatColor; + imageInfo.flags = 0; + imageInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + imageInfo.extent.width = m_desc.Width; + imageInfo.extent.height = m_desc.Height; + imageInfo.extent.depth = m_desc.Depth; + imageInfo.numLayers = m_desc.ArraySize; + imageInfo.mipLevels = m_desc.MipLevels; + imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT + | VK_IMAGE_USAGE_SAMPLED_BIT; + imageInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT + | m_device->GetEnabledShaderStages(); + imageInfo.access = VK_ACCESS_TRANSFER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_SHADER_READ_BIT; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.layout = VK_IMAGE_LAYOUT_GENERAL; + + if (m_mapping.VideoFormatInfo.FormatType != D3D9VideoFormat_None) { + imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; + imageInfo.stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + } + + DecodeMultiSampleType(m_desc.MultiSample, m_desc.MultisampleQuality, &imageInfo.sampleCount); + + // The image must be marked as mutable if it can be reinterpreted + // by a view with a different format. Depth-stencil formats cannot + // be reinterpreted in Vulkan, so we'll ignore those. + auto formatProperties = imageFormatInfo(m_mapping.FormatColor); + + bool isMutable = m_mapping.FormatSrgb != VK_FORMAT_UNDEFINED; + bool isColorFormat = (formatProperties->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + + if (isMutable && isColorFormat) { + imageInfo.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + imageInfo.viewFormatCount = 2; + imageInfo.viewFormats = m_mapping.Formats; + } + + if (m_desc.Usage & D3DUSAGE_RENDERTARGET || m_desc.Usage & D3DUSAGE_AUTOGENMIPMAP) { + imageInfo.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + imageInfo.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + imageInfo.access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + if (m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) { + imageInfo.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + imageInfo.stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + imageInfo.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + if (ResourceType == D3DRTYPE_CUBETEXTURE) + imageInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + + // Some image formats (i.e. the R32G32B32 ones) are + // only supported with linear tiling on most GPUs + if (!CheckImageSupport(&imageInfo, VK_IMAGE_TILING_OPTIMAL)) + imageInfo.tiling = VK_IMAGE_TILING_LINEAR; + + // We must keep LINEAR images in GENERAL layout, but we + // can choose a better layout for the image based on how + // it is going to be used by the game. + if (imageInfo.tiling == VK_IMAGE_TILING_OPTIMAL) + imageInfo.layout = OptimizeLayout(imageInfo.usage); + + // For some formats, we need to enable render target + // capabilities if available, but these should + // in no way affect the default image layout + imageInfo.usage |= EnableMetaCopyUsage(imageInfo.format, imageInfo.tiling); + + // Check if we can actually create the image + if (!CheckImageSupport(&imageInfo, imageInfo.tiling)) { + throw DxvkError(str::format( + "D3D9: Cannot create texture:", + "\n Type: ", std::hex, ResourceType, + "\n Format: ", m_desc.Format, + "\n Extent: ", m_desc.Width, + "x", m_desc.Height, + "x", m_desc.Depth, + "\n Samples: ", m_desc.MultiSample, + "\n Layers: ", m_desc.ArraySize, + "\n Levels: ", m_desc.MipLevels, + "\n Usage: ", std::hex, m_desc.Usage, + "\n Pool: ", std::hex, m_desc.Pool)); + } + + return m_device->GetDXVKDevice()->createImage(imageInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + + + Rc D3D9CommonTexture::CreateResolveImage() const { + DxvkImageCreateInfo imageInfo = m_image->info(); + imageInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + + return m_device->GetDXVKDevice()->createImage(imageInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + + + void D3D9CommonTexture::RecreateSampledView(UINT Lod) { + // This will be a no-op for SYSTEMMEM types given we + // don't expose the cap to allow texturing with them. + if (unlikely(m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM)) + return; + + const D3D9_VK_FORMAT_MAPPING formatInfo = m_device->LookupFormat(m_desc.Format); + + m_views.Sample = CreateColorViewPair(formatInfo, AllLayers, VK_IMAGE_USAGE_SAMPLED_BIT, Lod); + } + + + BOOL D3D9CommonTexture::DetermineShadowState() const { + static std::array blacklist = { + D3D9Format::INTZ, D3D9Format::DF16, D3D9Format::DF24 + }; + + return IsDepthFormat(m_desc.Format) + && std::find(blacklist.begin(), blacklist.end(), m_desc.Format) == blacklist.end(); + } + + + BOOL D3D9CommonTexture::CheckImageSupport( + const DxvkImageCreateInfo* pImageInfo, + VkImageTiling Tiling) const { + const Rc adapter = m_device->GetDXVKDevice()->adapter(); + + VkImageFormatProperties formatProps = { }; + + VkResult status = adapter->imageFormatProperties( + pImageInfo->format, pImageInfo->type, Tiling, + pImageInfo->usage, pImageInfo->flags, formatProps); + + if (status != VK_SUCCESS) + return FALSE; + + return (pImageInfo->extent.width <= formatProps.maxExtent.width) + && (pImageInfo->extent.height <= formatProps.maxExtent.height) + && (pImageInfo->extent.depth <= formatProps.maxExtent.depth) + && (pImageInfo->numLayers <= formatProps.maxArrayLayers) + && (pImageInfo->mipLevels <= formatProps.maxMipLevels) + && (pImageInfo->sampleCount & formatProps.sampleCounts); + } + + + VkImageUsageFlags D3D9CommonTexture::EnableMetaCopyUsage( + VkFormat Format, + VkImageTiling Tiling) const { + VkFormatFeatureFlags requestedFeatures = 0; + + if (Format == VK_FORMAT_D16_UNORM || Format == VK_FORMAT_D32_SFLOAT) + requestedFeatures |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (Format == VK_FORMAT_R16_UNORM || Format == VK_FORMAT_R32_SFLOAT) + requestedFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + + if (requestedFeatures == 0) + return 0; + + // Enable usage flags for all supported and requested features + VkFormatProperties properties = m_device->GetDXVKDevice()->adapter()->formatProperties(Format); + + requestedFeatures &= Tiling == VK_IMAGE_TILING_OPTIMAL + ? properties.optimalTilingFeatures + : properties.linearTilingFeatures; + + VkImageUsageFlags requestedUsage = 0; + + if (requestedFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) + requestedUsage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (requestedFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) + requestedUsage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return requestedUsage; + } + + + VkImageType D3D9CommonTexture::GetImageTypeFromResourceType(D3DRESOURCETYPE Type) { + switch (Type) { + case D3DRTYPE_TEXTURE: return VK_IMAGE_TYPE_2D; + case D3DRTYPE_VOLUMETEXTURE: return VK_IMAGE_TYPE_3D; + case D3DRTYPE_CUBETEXTURE: return VK_IMAGE_TYPE_2D; + default: throw DxvkError("D3D9CommonTexture: Unhandled resource type"); + } + } + + + VkImageViewType D3D9CommonTexture::GetImageViewTypeFromResourceType( + D3DRESOURCETYPE Dimension, + UINT Layer) { + switch (Dimension) { + case D3DRTYPE_TEXTURE: return VK_IMAGE_VIEW_TYPE_2D; + case D3DRTYPE_VOLUMETEXTURE: return VK_IMAGE_VIEW_TYPE_3D; + case D3DRTYPE_CUBETEXTURE: return Layer == AllLayers + ? VK_IMAGE_VIEW_TYPE_CUBE + : VK_IMAGE_VIEW_TYPE_2D; + default: throw DxvkError("D3D9CommonTexture: Unhandled resource type"); + } + } + + + VkImageLayout D3D9CommonTexture::OptimizeLayout(VkImageUsageFlags Usage) { + const VkImageUsageFlags usageFlags = Usage; + + // Filter out unnecessary flags. Transfer operations + // are handled by the backend in a transparent manner. + Usage &= ~(VK_IMAGE_USAGE_TRANSFER_DST_BIT + | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + + // If the image is used only as an attachment, we never + // have to transform the image back to a different layout + if (Usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + if (Usage == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + Usage &= ~(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + + // If the image is used for reading but not as a storage + // image, we can optimize the image for texture access + if (Usage == VK_IMAGE_USAGE_SAMPLED_BIT) { + return usageFlags & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL + : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + + // Otherwise, we have to stick with the default layout + return VK_IMAGE_LAYOUT_GENERAL; + } + + + Rc D3D9CommonTexture::CreateView( + D3D9_VK_FORMAT_MAPPING FormatInfo, + UINT Layer, + VkImageUsageFlags UsageFlags, + UINT Lod, + BOOL Srgb) { + DxvkImageViewCreateInfo viewInfo; + viewInfo.format = PickSRGB(FormatInfo.FormatColor, FormatInfo.FormatSrgb, Srgb); + viewInfo.aspect = imageFormatInfo(viewInfo.format)->aspectMask; + viewInfo.swizzle = FormatInfo.Swizzle; + viewInfo.usage = UsageFlags; + viewInfo.type = GetImageViewTypeFromResourceType(m_type, Layer); + viewInfo.minLevel = Lod; + viewInfo.numLevels = m_desc.MipLevels - Lod; + viewInfo.minLayer = Layer == AllLayers ? 0 : Layer; + viewInfo.numLayers = Layer == AllLayers ? m_desc.ArraySize : 1; + + // Remove the stencil aspect if we are trying to create a regular image + // view of a depth stencil format + if (UsageFlags != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + viewInfo.aspect &= ~VK_IMAGE_ASPECT_STENCIL_BIT; + + if (UsageFlags == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT || + UsageFlags == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + viewInfo.numLevels = 1; + + // Remove swizzle on depth views. + if (UsageFlags == VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) + viewInfo.swizzle = { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }; + + // Create the underlying image view object + return m_device->GetDXVKDevice()->createImageView(GetImage(), viewInfo); + } + + + D3D9ColorView D3D9CommonTexture::CreateColorViewPair( + D3D9_VK_FORMAT_MAPPING FormatInfo, + UINT Layer, + VkImageUsageFlags UsageFlags, + UINT Lod) { + D3D9ColorView pair; + pair.Color = CreateView(FormatInfo, Layer, UsageFlags, Lod, FALSE); + + if (FormatInfo.FormatSrgb != VK_FORMAT_UNDEFINED) + pair.Srgb = CreateView(FormatInfo, Layer, UsageFlags, Lod, TRUE); + else + pair.Srgb = pair.Color; + + return pair; + } + + + void D3D9CommonTexture::CreateInitialViews() { + const D3D9_VK_FORMAT_MAPPING formatInfo = m_device->LookupFormat(m_desc.Format); + + m_views.Sample = CreateColorViewPair(formatInfo, AllLayers, VK_IMAGE_USAGE_SAMPLED_BIT, 0); + + for (uint32_t i = 0; i < m_desc.ArraySize; i++) { + for (uint32_t j = 0; j < m_desc.MipLevels; j++) + m_views.SubresourceSample[i][j] = CreateColorViewPair(formatInfo, i, VK_IMAGE_USAGE_SAMPLED_BIT, j); + } + + if (m_desc.Usage & D3DUSAGE_RENDERTARGET) { + for (uint32_t i = 0; i < m_desc.ArraySize; i++) { + for (uint32_t j = 0; j < m_desc.MipLevels; j++) + m_views.SubresourceRenderTarget[i][j] = CreateColorViewPair(formatInfo, i, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, j); + } + } + + if (m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) { + for (uint32_t i = 0; i < m_desc.ArraySize; i++) { + for (uint32_t j = 0; j < m_desc.MipLevels; j++) + m_views.SubresourceDepth[i][j] = CreateView(formatInfo, i, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, j, FALSE); + } + } + } + + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_common_texture.h b/src/d3d9/d3d9_common_texture.h new file mode 100644 index 000000000..5f1ed3529 --- /dev/null +++ b/src/d3d9/d3d9_common_texture.h @@ -0,0 +1,431 @@ +#pragma once + +#include "d3d9_format.h" +#include "d3d9_util.h" +#include "d3d9_caps.h" + +#include "../dxvk/dxvk_device.h" + +namespace dxvk { + + class D3D9DeviceEx; + + /** + * \brief Image memory mapping mode + * + * Determines how exactly \c LockBox will + * behave when mapping an image. + */ + enum D3D9_COMMON_TEXTURE_MAP_MODE { + D3D9_COMMON_TEXTURE_MAP_MODE_NONE, ///< No mapping available + D3D9_COMMON_TEXTURE_MAP_MODE_BACKED, ///< Mapped image through buffer + D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM, ///< Only a buffer - no image + }; + + /** + * \brief Common texture description + * + * Contains all members that can be + * defined for 2D, Cube and 3D textures. + */ + struct D3D9_COMMON_TEXTURE_DESC { + UINT Width; + UINT Height; + UINT Depth; + UINT ArraySize; + UINT MipLevels; + DWORD Usage; + D3D9Format Format; + D3DPOOL Pool; + BOOL Discard; + D3DMULTISAMPLE_TYPE MultiSample; + DWORD MultisampleQuality; + }; + + struct D3D9ColorView { + inline Rc Pick(bool Srgb) const { + return Srgb ? this->Srgb : this->Color; + } + + Rc Color; + Rc Srgb; + }; + + struct D3D9ViewSet { + D3D9ColorView Sample; + + std::array< + std::array, 6> SubresourceSample; + std::array< + std::array, 6> SubresourceRenderTarget; + std::array< + std::array, 15>, 6> SubresourceDepth; + + bool Hazardous = false; + + VkImageLayout GetRTLayout() const { + return SubresourceRenderTarget[0][0].Color != nullptr + && SubresourceRenderTarget[0][0].Color->imageInfo().tiling == VK_IMAGE_TILING_OPTIMAL + && !Hazardous + ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_GENERAL; + } + + VkImageLayout GetDepthLayout() const { + return SubresourceDepth[0][0] != nullptr + && SubresourceDepth[0][0]->imageInfo().tiling == VK_IMAGE_TILING_OPTIMAL + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_GENERAL; + } + }; + + template + using D3D9SubresourceArray = std::array; + + class D3D9CommonTexture { + + public: + + D3D9CommonTexture( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3DRESOURCETYPE ResourceType, + D3D9_VK_FORMAT_MAPPING Mapping); + + ~D3D9CommonTexture(); + + /** + * \brief Device + * \returns The parent device + */ + D3D9DeviceEx* Device() const { + return m_device; + } + + /** + * \brief Texture properties + * + * The returned data can be used to fill in + * \c D3D11_TEXTURE2D_DESC and similar structs. + * \returns Pointer to texture description + */ + const D3D9_COMMON_TEXTURE_DESC* Desc() const { + return &m_desc; + } + + /** + * \brief Vulkan Format + * \returns The Vulkan format of the resource + */ + const D3D9_VK_FORMAT_MAPPING GetFormatMapping() const { + return m_mapping; + } + + /** + * \brief Counts number of subresources + * \returns Number of subresources + */ + UINT CountSubresources() const { + return m_desc.ArraySize * m_desc.MipLevels; + } + + /** + * \brief Map mode + * \returns Map mode + */ + D3D9_COMMON_TEXTURE_MAP_MODE GetMapMode() const { + return m_mapMode; + } + + /** + * \brief The DXVK image + * Note, this will be nullptr if the map mode is D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM + * \returns The DXVK image + */ + Rc GetImage() const { + return m_image; + } + + /** + * \brief Get a copy of the main image, but with a single sample + * This function will allocate/reuse an image with the same info + * as the main image + * \returns An image with identical info, but 1 sample + */ + Rc GetResolveImage() { + if (unlikely(m_resolveImage == nullptr)) + m_resolveImage = CreateResolveImage(); + + return m_resolveImage; + } + + Rc GetBuffer(UINT Subresource) { + return m_buffers[Subresource]; + } + + + DxvkBufferSliceHandle GetMappedSlice(UINT Subresource) { + return m_mappedSlices[Subresource]; + } + + + DxvkBufferSliceHandle DiscardMapSlice(UINT Subresource) { + DxvkBufferSliceHandle handle = m_buffers[Subresource]->allocSlice(); + m_mappedSlices[Subresource] = handle; + return handle; + } + + /** + * \brief Computes subresource from the subresource index + * + * Used by some functions that operate on only + * one subresource, such as \c UpdateSurface. + * \param [in] Aspect The image aspect + * \param [in] Subresource Subresource index + * \returns The Vulkan image subresource + */ + VkImageSubresource GetSubresourceFromIndex( + VkImageAspectFlags Aspect, + UINT Subresource) const; + + /** + * \brief Normalizes and validates texture description + * + * Fills in undefined values and validates the texture + * parameters. Any error returned by this method should + * be forwarded to the application. + * \param [in,out] pDesc Texture description + * \returns \c S_OK if the parameters are valid + */ + static HRESULT NormalizeTextureProperties( + D3D9DeviceEx* pDevice, + D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING* pMapping); + + /** + * \brief Lock Flags + * Set the lock flags for a given subresource + */ + void SetLockFlags(UINT Subresource, DWORD Flags) { + m_lockFlags[Subresource] = Flags; + } + + /** + * \brief Lock Flags + * \returns The log flags for a given subresource + */ + DWORD GetLockFlags(UINT Subresource) const { + return m_lockFlags[Subresource]; + } + + /** + * \brief Shadow + * \returns Whether the texture is to be depth compared + */ + bool IsShadow() const { + return m_shadow; + } + + /** + * \brief Subresource + * \returns The subresource idx of a given face and mip level + */ + UINT CalcSubresource(UINT Face, UINT MipLevel) const { + return Face * m_desc.MipLevels + MipLevel; + } + + /** + * \brief Creates buffers + * Creates mapping and staging buffers for all subresources + * allocates new buffers if necessary + */ + void CreateBuffers() { + const uint32_t count = CountSubresources(); + for (uint32_t i = 0; i < count; i++) + CreateBufferSubresource(i); + } + + /** + * \brief Creates a buffer + * Creates mapping and staging buffers for a given subresource + * allocates new buffers if necessary + * \returns Whether an allocation happened + */ + bool CreateBufferSubresource(UINT Subresource); + + /** + * \brief Destroys a buffer + * Destroys mapping and staging buffers for a given subresource + */ + void DestroyBufferSubresource(UINT Subresource) { + m_buffers[Subresource] = nullptr; + SetDirty(Subresource, true); + } + + bool IsDynamic() const { + return m_desc.Usage & D3DUSAGE_DYNAMIC; + } + + /** + * \brief Managed + * \returns Whether a resource is managed (pool) or not + */ + bool IsManaged() const { + return IsPoolManaged(m_desc.Pool); + } + + /** + * \brief Render Target + * \returns Whether a resource is a render target or not + */ + bool IsRenderTarget() const { + return m_desc.Usage & D3DUSAGE_RENDERTARGET; + } + + /** + * \brief Autogen Mipmap + * \returns Whether the texture is to have automatic mip generation + */ + bool IsAutomaticMip() const { + return m_desc.Usage & D3DUSAGE_AUTOGENMIPMAP; + } + + /** + * \brief Autogen Mipmap + * \returns Whether the texture is to have automatic mip generation + */ + const D3D9ViewSet& GetViews() const { + return m_views; + } + + /** + * \brief Recreate main image view + * Recreates the main view of the sampler w/ a specific LOD. + * SetLOD only works on MANAGED textures so this is A-okay. + */ + void RecreateSampledView(UINT Lod); + + /** + * \brief Extent + * \returns The extent of the top-level mip + */ + VkExtent3D GetExtent() const { + return m_adjustedExtent; + } + + /** + * \brief Mip Extent + * \returns The extent of a mip or subresource + */ + VkExtent3D GetExtentMip(UINT Subresource) const { + UINT MipLevel = Subresource % m_desc.MipLevels; + return util::computeMipLevelExtent(GetExtent(), MipLevel); + } + + bool MarkHazardous() { + return std::exchange(m_views.Hazardous, true); + } + + D3DRESOURCETYPE GetType() { + return m_type; + } + + const D3D9_VK_FORMAT_MAPPING& GetMapping() { return m_mapping; } + + bool MarkLocked(UINT Subresource, bool value) { return std::exchange(m_locked[Subresource], value); } + + bool SetDirty(UINT Subresource, bool value) { return std::exchange(m_dirty[Subresource], value); } + void MarkAllDirty() { for (uint32_t i = 0; i < m_dirty.size(); i++) m_dirty[i] = true; } + + private: + + D3D9DeviceEx* m_device; + D3D9_COMMON_TEXTURE_DESC m_desc; + D3DRESOURCETYPE m_type; + D3D9_COMMON_TEXTURE_MAP_MODE m_mapMode; + + Rc m_image; + Rc m_resolveImage; + D3D9SubresourceArray< + Rc> m_buffers; + D3D9SubresourceArray< + DxvkBufferSliceHandle> m_mappedSlices; + D3D9SubresourceArray m_lockFlags; + + D3D9ViewSet m_views; + + D3D9_VK_FORMAT_MAPPING m_mapping; + + VkExtent3D m_adjustedExtent; + + bool m_shadow; //< Depth Compare-ness + + int64_t m_size = 0; + + bool m_systemmemModified = false; + + D3D9SubresourceArray< + bool> m_locked = { }; + + D3D9SubresourceArray< + bool> m_dirty = { }; + + /** + * \brief Mip level + * \returns Size of packed mip level in bytes + */ + VkDeviceSize GetMipSize(UINT Subresource) const; + + Rc CreatePrimaryImage(D3DRESOURCETYPE ResourceType) const; + + Rc CreateResolveImage() const; + + BOOL DetermineShadowState() const; + + BOOL CheckImageSupport( + const DxvkImageCreateInfo* pImageInfo, + VkImageTiling Tiling) const; + + VkImageUsageFlags EnableMetaCopyUsage( + VkFormat Format, + VkImageTiling Tiling) const; + + D3D9_COMMON_TEXTURE_MAP_MODE DetermineMapMode() const { + if (m_desc.Format == D3D9Format::NULL_FORMAT) + return D3D9_COMMON_TEXTURE_MAP_MODE_NONE; + + if (m_desc.Pool == D3DPOOL_SYSTEMMEM || m_desc.Pool == D3DPOOL_SCRATCH) + return D3D9_COMMON_TEXTURE_MAP_MODE_SYSTEMMEM; + + return D3D9_COMMON_TEXTURE_MAP_MODE_BACKED; + } + + static VkImageType GetImageTypeFromResourceType( + D3DRESOURCETYPE Dimension); + + static VkImageViewType GetImageViewTypeFromResourceType( + D3DRESOURCETYPE Dimension, + UINT Layer); + + static VkImageLayout OptimizeLayout( + VkImageUsageFlags Usage); + + static constexpr UINT AllLayers = UINT32_MAX; + + Rc CreateView( + D3D9_VK_FORMAT_MAPPING FormatInfo, + UINT Layer, + VkImageUsageFlags UsageFlags, + UINT Lod, + BOOL Srgb); + + D3D9ColorView CreateColorViewPair( + D3D9_VK_FORMAT_MAPPING FormatInfo, + UINT Layer, + VkImageUsageFlags UsageFlags, + UINT Lod); + + void CreateInitialViews(); + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_constant_layout.h b/src/d3d9/d3d9_constant_layout.h new file mode 100644 index 000000000..ddee0f3b4 --- /dev/null +++ b/src/d3d9/d3d9_constant_layout.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include "d3d9_caps.h" + +namespace dxvk { + + struct D3D9ConstantLayout { + uint32_t floatCount; + uint32_t intCount; + uint32_t boolCount; + uint32_t bitmaskCount; + + uint32_t floatSize() const { return floatCount * 4 * sizeof(float); } + uint32_t intSize() const { return intCount * 4 * sizeof(int); } + uint32_t bitmaskSize() const { return bitmaskCount * 1 * sizeof(uint32_t); } + + uint32_t floatOffset() const { return 0; } + uint32_t intOffset() const { return floatOffset() + floatSize(); } + uint32_t bitmaskOffset() const { return intOffset() + intSize(); } + + uint32_t totalSize() const { return floatSize() + intSize() + bitmaskSize(); } + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_constant_set.h b/src/d3d9/d3d9_constant_set.h new file mode 100644 index 000000000..c65c1825f --- /dev/null +++ b/src/d3d9/d3d9_constant_set.h @@ -0,0 +1,47 @@ +#pragma once + +#include "d3d9_caps.h" + +#include "../dxvk/dxvk_buffer.h" + +#include "../dxso/dxso_isgn.h" + +#include "../util/util_math.h" +#include "../util/util_vector.h" + +#include + +namespace dxvk { + + enum class D3D9ConstantType { + Float, + Int, + Bool + }; + + // We make an assumption later based on the packing of this struct for copying. + struct D3D9ShaderConstantsVSSoftware { + Vector4 fConsts[caps::MaxFloatConstantsSoftware]; + Vector4i iConsts[caps::MaxOtherConstantsSoftware]; + uint32_t bConsts[caps::MaxOtherConstantsSoftware / 32]; + }; + + struct D3D9ShaderConstantsVSHardware { + Vector4 fConsts[caps::MaxFloatConstantsVS]; + Vector4i iConsts[caps::MaxOtherConstants]; + uint32_t bConsts[1]; + }; + + struct D3D9ShaderConstantsPS { + Vector4 fConsts[caps::MaxFloatConstantsPS]; + Vector4i iConsts[caps::MaxOtherConstants]; + uint32_t bConsts[1]; + }; + + struct D3D9ConstantSets { + Rc buffer; + const DxsoShaderMetaInfo* meta = nullptr; + bool dirty = true; + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_cursor.cpp b/src/d3d9/d3d9_cursor.cpp new file mode 100644 index 000000000..ad3903d07 --- /dev/null +++ b/src/d3d9/d3d9_cursor.cpp @@ -0,0 +1,42 @@ +#include "d3d9_cursor.h" + +#include + +namespace dxvk { + + void D3D9Cursor::UpdateCursor(int X, int Y) { + ::SetCursorPos(X, Y); + } + + + BOOL D3D9Cursor::ShowCursor(BOOL bShow) { + ::SetCursor(bShow ? m_hCursor : nullptr); + return std::exchange(m_visible, bShow); + } + + + HRESULT D3D9Cursor::SetHardwareCursor(UINT XHotSpot, UINT YHotSpot, const CursorBitmap& bitmap) { + DWORD mask[32]; + std::memset(mask, ~0, sizeof(mask)); + + ICONINFO info; + info.fIcon = FALSE; + info.xHotspot = XHotSpot; + info.yHotspot = YHotSpot; + info.hbmMask = ::CreateBitmap(HardwareCursorWidth, HardwareCursorHeight, 1, 1, mask); + info.hbmColor = ::CreateBitmap(HardwareCursorWidth, HardwareCursorHeight, 1, 32, &bitmap[0]); + + if (m_hCursor != nullptr) + ::DestroyCursor(m_hCursor); + + m_hCursor = ::CreateIconIndirect(&info); + + ::DeleteObject(info.hbmMask); + ::DeleteObject(info.hbmColor); + + ShowCursor(m_visible); + + return D3D_OK; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_cursor.h b/src/d3d9/d3d9_cursor.h new file mode 100644 index 000000000..32645d26c --- /dev/null +++ b/src/d3d9/d3d9_cursor.h @@ -0,0 +1,33 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + constexpr uint32_t HardwareCursorWidth = 32u; + constexpr uint32_t HardwareCursorHeight = 32u; + constexpr uint32_t HardwareCursorFormatSize = 4u; + constexpr uint32_t HardwareCursorPitch = HardwareCursorWidth * HardwareCursorFormatSize; + + // Format Size of 4 bytes (ARGB) + using CursorBitmap = uint8_t[HardwareCursorHeight * HardwareCursorPitch]; + + class D3D9Cursor { + + public: + + void UpdateCursor(int X, int Y); + + BOOL ShowCursor(BOOL bShow); + + HRESULT SetHardwareCursor(UINT XHotSpot, UINT YHotSpot, const CursorBitmap& bitmap); + + private: + + BOOL m_visible = FALSE; + + HCURSOR m_hCursor = nullptr; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp new file mode 100644 index 000000000..851fde623 --- /dev/null +++ b/src/d3d9/d3d9_device.cpp @@ -0,0 +1,6530 @@ +#include "d3d9_device.h" + +#include "d3d9_interface.h" +#include "d3d9_swapchain.h" +#include "d3d9_caps.h" +#include "d3d9_util.h" +#include "d3d9_texture.h" +#include "d3d9_buffer.h" +#include "d3d9_vertex_declaration.h" +#include "d3d9_shader.h" +#include "d3d9_query.h" +#include "d3d9_stateblock.h" +#include "d3d9_monitor.h" +#include "d3d9_spec_constants.h" +#include "d3d9_names.h" +#include "d3d9_format_helpers.h" + +#include "../dxvk/dxvk_adapter.h" +#include "../dxvk/dxvk_instance.h" + +#include "../util/util_bit.h" +#include "../util/util_math.h" + +#include "d3d9_initializer.h" + +#include +#include +#ifdef MSC_VER +#pragma fenv_access (on) +#endif + +namespace dxvk { + + D3D9DeviceEx::D3D9DeviceEx( + D3D9InterfaceEx* pParent, + D3D9Adapter* pAdapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pDisplayMode, + Rc dxvkDevice) + : m_adapter ( pAdapter ) + , m_dxvkDevice ( dxvkDevice ) + , m_csThread ( dxvkDevice->createContext() ) + , m_csChunk ( AllocCsChunk() ) + , m_parent ( pParent ) + , m_deviceType ( DeviceType ) + , m_window ( hFocusWindow ) + , m_behaviorFlags ( BehaviorFlags ) + , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED ) + , m_shaderModules ( new D3D9ShaderModuleSet ) + , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) + , m_dxsoOptions ( m_dxvkDevice, m_d3d9Options ) + , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? TRUE : FALSE ) { + // If we can SWVP, then we use an extended constant set + // as SWVP has many more slots available than HWVP. + bool canSWVP = CanSWVP(); + DetermineConstantLayouts(canSWVP); + + if (canSWVP) + Logger::info("D3D9DeviceEx: Using extended constant set for software vertex processing."); + + m_initializer = new D3D9Initializer(m_dxvkDevice); + m_converter = new D3D9FormatHelper(m_dxvkDevice); + + EmitCs([ + cDevice = m_dxvkDevice + ] (DxvkContext* ctx) { + ctx->beginRecording(cDevice->createCommandList()); + + DxvkLogicOpState loState; + loState.enableLogicOp = VK_FALSE; + loState.logicOp = VK_LOGIC_OP_CLEAR; + ctx->setLogicOpState(loState); + }); + + CreateConstantBuffers(); + + if (!(BehaviorFlags & D3DCREATE_FPU_PRESERVE)) + SetupFPU(); + + m_availableMemory = DetermineInitialTextureMemory(); + + HRESULT hr = InitialReset(pPresentationParameters, pDisplayMode); + if (FAILED(hr)) + throw DxvkError("D3D9DeviceEx: Initial device reset failed."); + } + + + D3D9DeviceEx::~D3D9DeviceEx() { + Flush(); + SynchronizeCsThread(); + + delete m_initializer; + delete m_converter; + + m_dxvkDevice->waitForIdle(); // Sync Device + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + bool extended = m_parent->IsExtended() + && riid == __uuidof(IDirect3DDevice9Ex); + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DDevice9) + || extended) { + *ppvObject = ref(this); + return S_OK; + } + + // We want to ignore this if the extended device is queried and we weren't made extended. + if (riid == __uuidof(IDirect3DDevice9Ex)) + return E_NOINTERFACE; + + Logger::warn("D3D9DeviceEx::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::TestCooperativeLevel() { + // Equivelant of D3D11/DXGI present tests. We can always present. + return D3D_OK; + } + + + UINT STDMETHODCALLTYPE D3D9DeviceEx::GetAvailableTextureMem() { + // This is not meant to be accurate. + // The values are also wildly incorrect in d3d9... But some games rely + // on this inaccurate value... + + // Clamp to megabyte range, as per spec. + constexpr UINT range = 0xfff00000; + + // Can't have negative memory! + int64_t memory = std::max(m_availableMemory.load(), 0); + + return UINT(memory) & range; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EvictManagedResources() { + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDirect3D(IDirect3D9** ppD3D9) { + if (ppD3D9 == nullptr) + return D3DERR_INVALIDCALL; + + *ppD3D9 = m_parent.ref(); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDeviceCaps(D3DCAPS9* pCaps) { + return m_adapter->GetDeviceCaps(m_deviceType, pCaps); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + return swapchain->GetDisplayMode(pMode); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) { + if (pParameters == nullptr) + return D3DERR_INVALIDCALL; + + pParameters->AdapterOrdinal = m_adapter->GetOrdinal(); + pParameters->BehaviorFlags = m_behaviorFlags; + pParameters->DeviceType = m_deviceType; + pParameters->hFocusWindow = m_window; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCursorProperties( + UINT XHotSpot, + UINT YHotSpot, + IDirect3DSurface9* pCursorBitmap) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pCursorBitmap == nullptr)) + return D3DERR_INVALIDCALL; + + auto* cursorTex = GetCommonTexture(pCursorBitmap); + if (unlikely(cursorTex->Desc()->Format != D3D9Format::A8R8G8B8)) + return D3DERR_INVALIDCALL; + + uint32_t inputWidth = cursorTex->Desc()->Width; + uint32_t inputHeight = cursorTex->Desc()->Height; + + // Always use a hardware cursor when windowed. + bool hwCursor = m_presentParams.Windowed; + + // Always use a hardware cursor w/h <= 32 px + hwCursor |= inputWidth <= HardwareCursorWidth + || inputHeight <= HardwareCursorHeight; + + if (hwCursor) { + D3DLOCKED_BOX lockedBox; + HRESULT hr = LockImage(cursorTex, 0, 0, &lockedBox, nullptr, D3DLOCK_READONLY); + if (FAILED(hr)) + return hr; + + const uint8_t* data = reinterpret_cast(lockedBox.pBits); + + // Windows works with a stride of 128, lets respect that. + // Copy data to the bitmap... + CursorBitmap bitmap = { 0 }; + size_t copyPitch = std::min( + HardwareCursorPitch, + inputWidth * inputHeight * HardwareCursorFormatSize); + + for (uint32_t h = 0; h < HardwareCursorHeight; h++) + std::memcpy(&bitmap[h * HardwareCursorPitch], &data[h * lockedBox.RowPitch], copyPitch); + + UnlockImage(cursorTex, 0, 0); + + // Set this as our cursor. + return m_cursor.SetHardwareCursor(XHotSpot, YHotSpot, bitmap); + } + + // Software Cursor... + Logger::warn("D3D9DeviceEx::SetCursorProperties: Software cursor not implemented."); + return D3D_OK; + } + + + void STDMETHODCALLTYPE D3D9DeviceEx::SetCursorPosition(int X, int Y, DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + // I was not able to find an instance + // where the cursor update was not immediate. + + // Fullscreen + Windowed seem to have the same + // behaviour here. + + // Hence we ignore the flag D3DCURSOR_IMMEDIATE_UPDATE. + + m_cursor.UpdateCursor(X, Y); + } + + + BOOL STDMETHODCALLTYPE D3D9DeviceEx::ShowCursor(BOOL bShow) { + D3D9DeviceLock lock = LockDevice(); + + return m_cursor.ShowCursor(bShow); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChain( + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DSwapChain9** ppSwapChain) { + return CreateAdditionalSwapChainEx(pPresentationParameters, nullptr, ppSwapChain); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(pSwapChain); + + auto* swapchain = GetInternalSwapchain(iSwapChain); + + if (unlikely(swapchain == nullptr || pSwapChain == nullptr)) + return D3DERR_INVALIDCALL; + + *pSwapChain = static_cast(ref(swapchain)); + + return D3D_OK; + } + + + UINT STDMETHODCALLTYPE D3D9DeviceEx::GetNumberOfSwapChains() { + D3D9DeviceLock lock = LockDevice(); + + return UINT(m_swapchains.size()); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Reset(D3DPRESENT_PARAMETERS* pPresentationParameters) { + D3D9DeviceLock lock = LockDevice(); + + HRESULT hr = ResetSwapChain(pPresentationParameters, nullptr); + if (FAILED(hr)) + return hr; + + hr = ResetState(pPresentationParameters); + if (FAILED(hr)) + return hr; + + Flush(); + SynchronizeCsThread(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Present( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion) { + return PresentEx( + pSourceRect, + pDestRect, + hDestWindowOverride, + pDirtyRegion, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetBackBuffer( + UINT iSwapChain, + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppBackBuffer); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + return swapchain->GetBackBuffer(iBackBuffer, Type, ppBackBuffer); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + return swapchain->GetRasterStatus(pRasterStatus); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDialogBoxMode(BOOL bEnableDialogs) { + D3D9DeviceLock lock = LockDevice(); + + HRESULT hr = GetInternalSwapchain(0)->SetDialogBoxMode(bEnableDialogs); + + if (FAILED(hr)) + Logger::warn("D3D9DeviceEx::SetDialogBoxMode: Setting on swapchain failed."); + + return hr; + } + + + void STDMETHODCALLTYPE D3D9DeviceEx::SetGammaRamp( + UINT iSwapChain, + DWORD Flags, + const D3DGAMMARAMP* pRamp) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + swapchain->SetGammaRamp(Flags, pRamp); + } + + + void STDMETHODCALLTYPE D3D9DeviceEx::GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + swapchain->GetGammaRamp(pRamp); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateTexture( + UINT Width, + UINT Height, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DTexture9** ppTexture, + HANDLE* pSharedHandle) { + InitReturnPtr(ppTexture); + + if (unlikely(ppTexture == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = Levels; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_INVALIDCALL; + + try { + const Com texture = new D3D9Texture2D(this, &desc, mapping); + + void* initialData = nullptr; + + if (Pool == D3DPOOL_SYSTEMMEM && Levels == 1 && pSharedHandle != nullptr) + initialData = *(reinterpret_cast(pSharedHandle)); + else // This must be a shared resource. + InitReturnPtr(pSharedHandle); + + m_initializer->InitTexture(texture->GetCommonTexture(), initialData); + *ppTexture = texture.ref(); + + bool mipSuccess = (Usage & D3DUSAGE_AUTOGENMIPMAP) == (texture->GetCommonTexture()->Desc()->Usage & D3DUSAGE_AUTOGENMIPMAP); + return mipSuccess ? D3D_OK : D3DOK_NOAUTOGEN; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVolumeTexture( + UINT Width, + UINT Height, + UINT Depth, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DVolumeTexture9** ppVolumeTexture, + HANDLE* pSharedHandle) { + InitReturnPtr(ppVolumeTexture); + InitReturnPtr(pSharedHandle); + + if (unlikely(ppVolumeTexture == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = Depth; + desc.ArraySize = 1; + desc.MipLevels = Levels; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_INVALIDCALL; + + try { + const Com texture = new D3D9Texture3D(this, &desc, mapping); + m_initializer->InitTexture(texture->GetCommonTexture()); + *ppVolumeTexture = texture.ref(); + + bool mipSuccess = (Usage & D3DUSAGE_AUTOGENMIPMAP) == (texture->GetCommonTexture()->Desc()->Usage & D3DUSAGE_AUTOGENMIPMAP); + return mipSuccess ? D3D_OK : D3DOK_NOAUTOGEN; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateCubeTexture( + UINT EdgeLength, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DCubeTexture9** ppCubeTexture, + HANDLE* pSharedHandle) { + InitReturnPtr(ppCubeTexture); + InitReturnPtr(pSharedHandle); + + if (unlikely(ppCubeTexture == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = EdgeLength; + desc.Height = EdgeLength; + desc.Depth = 1; + desc.ArraySize = 6; // A cube has 6 faces, wowwie! + desc.MipLevels = Levels; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_INVALIDCALL; + + try { + const Com texture = new D3D9TextureCube(this, &desc, mapping); + m_initializer->InitTexture(texture->GetCommonTexture()); + *ppCubeTexture = texture.ref(); + + bool mipSuccess = (Usage & D3DUSAGE_AUTOGENMIPMAP) == (texture->GetCommonTexture()->Desc()->Usage & D3DUSAGE_AUTOGENMIPMAP); + return mipSuccess ? D3D_OK : D3DOK_NOAUTOGEN; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexBuffer( + UINT Length, + DWORD Usage, + DWORD FVF, + D3DPOOL Pool, + IDirect3DVertexBuffer9** ppVertexBuffer, + HANDLE* pSharedHandle) { + InitReturnPtr(ppVertexBuffer); + + if (unlikely(ppVertexBuffer == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_BUFFER_DESC desc; + desc.Format = D3D9Format::VERTEXDATA; + desc.FVF = FVF; + desc.Pool = Pool; + desc.Size = Length; + desc.Type = D3DRTYPE_VERTEXBUFFER; + desc.Usage = Usage; + + if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc))) + return D3DERR_INVALIDCALL; + + try { + const Com buffer = new D3D9VertexBuffer(this, &desc); + m_initializer->InitBuffer(buffer->GetCommonBuffer()); + *ppVertexBuffer = buffer.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateIndexBuffer( + UINT Length, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DIndexBuffer9** ppIndexBuffer, + HANDLE* pSharedHandle) { + InitReturnPtr(ppIndexBuffer); + + if (unlikely(ppIndexBuffer == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_BUFFER_DESC desc; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Size = Length; + desc.Type = D3DRTYPE_INDEXBUFFER; + desc.Usage = Usage; + + if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc))) + return D3DERR_INVALIDCALL; + + try { + const Com buffer = new D3D9IndexBuffer(this, &desc); + m_initializer->InitBuffer(buffer->GetCommonBuffer()); + *ppIndexBuffer = buffer.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTarget( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle) { + return CreateRenderTargetEx( + Width, + Height, + Format, + MultiSample, + MultisampleQuality, + Lockable, + ppSurface, + pSharedHandle, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle) { + return CreateDepthStencilSurfaceEx( + Width, + Height, + Format, + MultiSample, + MultisampleQuality, + Discard, + ppSurface, + pSharedHandle, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateSurface( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestinationSurface, + const POINT* pDestPoint) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* src = static_cast(pSourceSurface); + D3D9Surface* dst = static_cast(pDestinationSurface); + + if (unlikely(src == nullptr || dst == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture(); + D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); + + if (unlikely(srcTextureInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) + return D3DERR_INVALIDCALL; + + if (unlikely(srcTextureInfo->Desc()->Format != dstTextureInfo->Desc()->Format)) + return D3DERR_INVALIDCALL; + + const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTextureInfo->GetFormatMapping().FormatColor); + + VkOffset3D srcBlockOffset = { 0u, 0u, 0u }; + VkOffset3D dstOffset = { 0u, 0u, 0u }; + + VkExtent3D copyExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); + + if (pSourceRect != nullptr) { + srcBlockOffset = { pSourceRect->left / int32_t(formatInfo->blockSize.width), + pSourceRect->top / int32_t(formatInfo->blockSize.height), + 0u }; + + copyExtent = { alignDown(uint32_t(pSourceRect->right - pSourceRect->left), formatInfo->blockSize.width), + alignDown(uint32_t(pSourceRect->bottom - pSourceRect->top), formatInfo->blockSize.height), + 1u }; + } + + if (pDestPoint != nullptr) { + dstOffset = { alignDown(pDestPoint->x, formatInfo->blockSize.width), + alignDown(pDestPoint->y, formatInfo->blockSize.height), + 0u }; + } + + const auto dstSubresource = vk::makeSubresourceLayers( + dstTextureInfo->GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, dst->GetSubresource())); + + Rc srcBuffer = srcTextureInfo->GetBuffer(src->GetSubresource()); + Rc dstImage = dstTextureInfo->GetImage(); + + VkExtent3D levelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); + VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize); + + VkDeviceSize srcByteOffset = srcBlockOffset.y * formatInfo->elementSize * blockCount.width + + srcBlockOffset.x * formatInfo->elementSize; + + VkExtent2D fullSrcExtent = VkExtent2D{ blockCount.width * formatInfo->blockSize.width, + blockCount.height * formatInfo->blockSize.height }; + + EmitCs([ + cDstImage = std::move(dstImage), + cSrcBuffer = std::move(srcBuffer), + cDstLayers = dstSubresource, + cDstOffset = dstOffset, + cSrcOffset = srcByteOffset, + cCopyExtent = copyExtent, + cSrcExtent = fullSrcExtent + ] (DxvkContext* ctx) { + ctx->copyBufferToImage( + cDstImage, cDstLayers, cDstOffset, cCopyExtent, + cSrcBuffer, cSrcOffset, + cSrcExtent); + }); + + dstTextureInfo->SetDirty(dst->GetSubresource(), true); + + if (dstTextureInfo->IsAutomaticMip()) + GenerateMips(dstTextureInfo); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateTexture( + IDirect3DBaseTexture9* pSourceTexture, + IDirect3DBaseTexture9* pDestinationTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (!pDestinationTexture || !pSourceTexture) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* dstTexInfo = GetCommonTexture(pDestinationTexture); + D3D9CommonTexture* srcTexInfo = GetCommonTexture(pSourceTexture); + + if (unlikely(srcTexInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTexInfo->Desc()->Pool != D3DPOOL_DEFAULT)) + return D3DERR_INVALIDCALL; + + const Rc dstImage = dstTexInfo->GetImage(); + + uint32_t mipLevels = std::min(srcTexInfo->Desc()->MipLevels, dstTexInfo->Desc()->MipLevels); + uint32_t arraySlices = std::min(srcTexInfo->Desc()->ArraySize, dstTexInfo->Desc()->ArraySize); + for (uint32_t a = 0; a < arraySlices; a++) { + for (uint32_t m = 0; m < mipLevels; m++) { + Rc srcBuffer = srcTexInfo->GetBuffer(srcTexInfo->CalcSubresource(a, m)); + + VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, m, a, 1 }; + + VkExtent3D extent = dstImage->mipLevelExtent(m); + + EmitCs([ + cDstImage = dstImage, + cSrcBuffer = srcBuffer, + cDstLayers = dstLayers, + cExtent = extent + ] (DxvkContext* ctx) { + ctx->copyBufferToImage( + cDstImage, cDstLayers, + VkOffset3D{ 0, 0, 0 }, cExtent, + cSrcBuffer, 0, { 0u, 0u }); + }); + } + } + + dstTexInfo->MarkAllDirty(); + + pDestinationTexture->GenerateMipSubLevels(); + + FlushImplicit(false); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTargetData( + IDirect3DSurface9* pRenderTarget, + IDirect3DSurface9* pDestSurface) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* src = static_cast(pRenderTarget); + D3D9Surface* dst = static_cast(pDestSurface); + + if (unlikely(src == nullptr || dst == nullptr)) + return D3DERR_INVALIDCALL; + + if (pRenderTarget == pDestSurface) + return D3D_OK; + + D3D9CommonTexture* dstTexInfo = GetCommonTexture(dst); + D3D9CommonTexture* srcTexInfo = GetCommonTexture(src); + + if (srcTexInfo->Desc()->Format != dstTexInfo->Desc()->Format) + return D3DERR_INVALIDCALL; + + if (dstTexInfo->Desc()->Pool == D3DPOOL_DEFAULT) + return this->StretchRect(pRenderTarget, nullptr, pDestSurface, nullptr, D3DTEXF_NONE); + + Rc dstBuffer = dstTexInfo->GetBuffer(dst->GetSubresource()); + + Rc srcImage = srcTexInfo->GetImage(); + const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); + + const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource()); + VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkExtent3D srcExtent = srcTexInfo->GetExtentMip(src->GetMipLevel()); + + EmitCs([ + cBuffer = dstBuffer, + cImage = srcImage, + cSubresources = srcSubresourceLayers, + cLevelExtent = srcExtent + ] (DxvkContext* ctx) { + ctx->copyImageToBuffer( + cBuffer, 0, VkExtent2D { 0u, 0u }, + cImage, cSubresources, VkOffset3D { 0, 0, 0 }, + cLevelExtent); + }); + + // We need to force a wait here + // as some applications depend on + // DO_NOT_WAIT not applying after + // this has happened. + // (this is a blocking call) + WaitForResource(dstBuffer, 0); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + return swapchain->GetFrontBufferData(pDestSurface); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::StretchRect( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestSurface, + const RECT* pDestRect, + D3DTEXTUREFILTERTYPE Filter) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* dst = static_cast(pDestSurface); + D3D9Surface* src = static_cast(pSourceSurface); + + if (unlikely(src == nullptr || dst == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(src == dst)) + return D3DERR_INVALIDCALL; + + bool fastPath = true; + + D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); + D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture(); + + Rc dstImage = dstTextureInfo->GetImage(); + Rc srcImage = srcTextureInfo->GetImage(); + + const DxvkFormatInfo* dstFormatInfo = imageFormatInfo(dstImage->info().format); + const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); + + const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex(dstFormatInfo->aspectMask, dst->GetSubresource()); + const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource()); + + VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel); + VkExtent3D dstExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel); + + D3D9Format srcFormat = srcTextureInfo->Desc()->Format; + D3D9Format dstFormat = dstTextureInfo->Desc()->Format; + + // We may only fast path copy non identicals one way! + // We don't know what garbage could be in the X8 data. + bool similar = (srcFormat == dstFormat) + || (srcFormat == D3D9Format::A8B8G8R8 && dstFormat == D3D9Format::X8B8G8R8) + || (srcFormat == D3D9Format::A8R8G8B8 && dstFormat == D3D9Format::X8R8G8B8) + || (srcFormat == D3D9Format::A1R5G5B5 && dstFormat == D3D9Format::X1R5G5B5) + || (srcFormat == D3D9Format::A4R4G4B4 && dstFormat == D3D9Format::X4R4G4B4); + + // Copies are only supported on similar formats. + fastPath &= similar; + + // Copies are only supported if the sample count matches, + // otherwise we need to resolve. + bool needsResolve = srcImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT; + bool fbBlit = dstImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT; + fastPath &= !fbBlit; + + // Copies would only work if we are block aligned. + if (pSourceRect != nullptr) { + fastPath &= (pSourceRect->left % srcFormatInfo->blockSize.width == 0); + fastPath &= (pSourceRect->right % srcFormatInfo->blockSize.width == 0); + fastPath &= (pSourceRect->top % srcFormatInfo->blockSize.height == 0); + fastPath &= (pSourceRect->bottom % srcFormatInfo->blockSize.height == 0); + } + + if (pDestRect != nullptr) { + fastPath &= (pDestRect->left % dstFormatInfo->blockSize.width == 0); + fastPath &= (pDestRect->top % dstFormatInfo->blockSize.height == 0); + } + + VkImageSubresourceLayers dstSubresourceLayers = { + dstSubresource.aspectMask, + dstSubresource.mipLevel, + dstSubresource.arrayLayer, 1 }; + + VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkImageBlit blitInfo; + blitInfo.dstSubresource = dstSubresourceLayers; + blitInfo.srcSubresource = srcSubresourceLayers; + + blitInfo.dstOffsets[0] = pDestRect != nullptr + ? VkOffset3D{ int32_t(pDestRect->left), int32_t(pDestRect->top), 0 } + : VkOffset3D{ 0, 0, 0 }; + + blitInfo.dstOffsets[1] = pDestRect != nullptr + ? VkOffset3D{ int32_t(pDestRect->right), int32_t(pDestRect->bottom), 1 } + : VkOffset3D{ int32_t(dstExtent.width), int32_t(dstExtent.height), 1 }; + + blitInfo.srcOffsets[0] = pSourceRect != nullptr + ? VkOffset3D{ int32_t(pSourceRect->left), int32_t(pSourceRect->top), 0 } + : VkOffset3D{ 0, 0, 0 }; + + blitInfo.srcOffsets[1] = pSourceRect != nullptr + ? VkOffset3D{ int32_t(pSourceRect->right), int32_t(pSourceRect->bottom), 1 } + : VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 }; + + VkExtent3D srcCopyExtent = + { uint32_t(blitInfo.srcOffsets[1].x - blitInfo.srcOffsets[0].x), + uint32_t(blitInfo.srcOffsets[1].y - blitInfo.srcOffsets[0].y), + uint32_t(blitInfo.srcOffsets[1].z - blitInfo.srcOffsets[0].z) }; + + VkExtent3D dstCopyExtent = + { uint32_t(blitInfo.dstOffsets[1].x - blitInfo.dstOffsets[0].x), + uint32_t(blitInfo.dstOffsets[1].y - blitInfo.dstOffsets[0].y), + uint32_t(blitInfo.dstOffsets[1].z - blitInfo.dstOffsets[0].z) }; + + // Copies would only work if the extents match. (ie. no stretching) + bool stretch = srcCopyExtent != dstCopyExtent; + fastPath &= !stretch; + + if (fastPath) { + if (needsResolve) { + VkImageResolve region; + region.srcSubresource = blitInfo.srcSubresource; + region.srcOffset = blitInfo.srcOffsets[0]; + region.dstSubresource = blitInfo.dstSubresource; + region.dstOffset = blitInfo.dstOffsets[0]; + region.extent = srcCopyExtent; + + EmitCs([ + cDstImage = dstImage, + cSrcImage = srcImage, + cRegion = region + ] (DxvkContext* ctx) { + ctx->resolveImage( + cDstImage, cSrcImage, cRegion, + VK_FORMAT_UNDEFINED); + }); + } else { + EmitCs([ + cDstImage = dstImage, + cSrcImage = srcImage, + cDstLayers = blitInfo.dstSubresource, + cSrcLayers = blitInfo.srcSubresource, + cDstOffset = blitInfo.dstOffsets[0], + cSrcOffset = blitInfo.srcOffsets[0], + cExtent = srcCopyExtent + ] (DxvkContext* ctx) { + ctx->copyImage( + cDstImage, cDstLayers, cDstOffset, + cSrcImage, cSrcLayers, cSrcOffset, + cExtent); + }); + } + } + else { + if (needsResolve) { + auto resolveSrc = srcTextureInfo->GetResolveImage(); + + VkImageResolve region; + region.srcSubresource = blitInfo.srcSubresource; + region.srcOffset = blitInfo.srcOffsets[0]; + region.dstSubresource = blitInfo.srcSubresource; + region.dstOffset = blitInfo.srcOffsets[0]; + region.extent = srcCopyExtent; + + EmitCs([ + cDstImage = resolveSrc, + cSrcImage = srcImage, + cRegion = region + ] (DxvkContext* ctx) { + ctx->resolveImage( + cDstImage, cSrcImage, cRegion, + VK_FORMAT_UNDEFINED); + }); + + srcImage = resolveSrc; + } + + EmitCs([ + cDstImage = dstImage, + cDstMap = dstTextureInfo->GetMapping().Swizzle, + cSrcImage = srcImage, + cSrcMap = srcTextureInfo->GetMapping().Swizzle, + cBlitInfo = blitInfo, + cFilter = stretch ? DecodeFilter(Filter) : VK_FILTER_NEAREST + ] (DxvkContext* ctx) { + ctx->blitImage( + cDstImage, + cDstMap, + cSrcImage, + cSrcMap, + cBlitInfo, + cFilter); + }); + } + + dstTextureInfo->SetDirty(dst->GetSubresource(), true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ColorFill( + IDirect3DSurface9* pSurface, + const RECT* pRect, + D3DCOLOR Color) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* dst = static_cast(pSurface); + + if (unlikely(dst == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); + + VkExtent3D mipExtent = dstTextureInfo->GetExtentMip(dst->GetSubresource()); + + VkOffset3D offset = VkOffset3D{ 0u, 0u, 0u }; + VkExtent3D extent = mipExtent; + + bool isFullExtent = true; + if (pRect != nullptr) { + ConvertRect(*pRect, offset, extent); + + isFullExtent = offset == VkOffset3D{ 0u, 0u, 0u } + && extent == mipExtent; + } + + Rc imageView = dst->GetImageView(false); + Rc renderTargetView = dst->GetRenderTargetView(false); + + VkClearValue clearValue; + DecodeD3DCOLOR(Color, clearValue.color.float32); + + // Fast path for games that may use this as an + // alternative to Clear on render targets. + if (isFullExtent && renderTargetView != nullptr) { + EmitCs([ + cImageView = renderTargetView, + cClearValue = clearValue + ] (DxvkContext* ctx) { + ctx->clearRenderTarget( + cImageView, + VK_IMAGE_ASPECT_COLOR_BIT, + cClearValue); + }); + } else { + EmitCs([ + cImageView = imageView, + cOffset = offset, + cExtent = extent, + cClearValue = clearValue + ] (DxvkContext* ctx) { + ctx->clearImageView( + cImageView, + cOffset, cExtent, + VK_IMAGE_ASPECT_COLOR_BIT, + cClearValue); + }); + } + + dstTextureInfo->SetDirty(dst->GetSubresource(), true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle) { + return CreateOffscreenPlainSurfaceEx( + Width, Height, + Format, Pool, + ppSurface, pSharedHandle, + 0); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9* pRenderTarget) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(RenderTargetIndex >= caps::MaxSimultaneousRenderTargets + || (pRenderTarget == nullptr && RenderTargetIndex == 0))) + return D3DERR_INVALIDCALL; + + D3D9Surface* rt = static_cast(pRenderTarget); + + if (unlikely(rt && !(rt->GetCommonTexture()->Desc()->Usage & D3DUSAGE_RENDERTARGET))) + return D3DERR_INVALIDCALL; + + if (m_state.renderTargets[RenderTargetIndex] == rt) + return D3D_OK; + + // Do a strong flush if the first render target is changed. + FlushImplicit(RenderTargetIndex == 0 ? TRUE : FALSE); + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + + m_state.renderTargets[RenderTargetIndex] = rt; + + UpdateActiveRTs(RenderTargetIndex); + + uint32_t originalAlphaSwizzleRTs = m_alphaSwizzleRTs; + + m_alphaSwizzleRTs &= ~(1 << RenderTargetIndex); + + if (rt != nullptr && rt->GetCommonTexture()->GetMapping().Swizzle.a == VK_COMPONENT_SWIZZLE_ONE) + m_alphaSwizzleRTs |= 1 << RenderTargetIndex; + + if (originalAlphaSwizzleRTs != m_alphaSwizzleRTs) + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + + if (RenderTargetIndex == 0) { + const auto* desc = m_state.renderTargets[0]->GetCommonTexture()->Desc(); + + bool validSampleMask = desc->MultiSample > D3DMULTISAMPLE_NONMASKABLE; + + if (validSampleMask != m_flags.test(D3D9DeviceFlag::ValidSampleMask)) { + m_flags.clr(D3D9DeviceFlag::ValidSampleMask); + if (validSampleMask) + m_flags.set(D3D9DeviceFlag::ValidSampleMask); + + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + } + + D3DVIEWPORT9 viewport; + viewport.X = 0; + viewport.Y = 0; + viewport.Width = desc->Width; + viewport.Height = desc->Height; + viewport.MinZ = 0.0f; + viewport.MaxZ = 1.0f; + m_state.viewport = viewport; + + RECT scissorRect; + scissorRect.left = 0; + scissorRect.top = 0; + scissorRect.right = desc->Width; + scissorRect.bottom = desc->Height; + m_state.scissorRect = scissorRect; + + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + m_flags.set(D3D9DeviceFlag::DirtyFFViewport); + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9** ppRenderTarget) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppRenderTarget); + + if (unlikely(ppRenderTarget == nullptr || RenderTargetIndex > caps::MaxSimultaneousRenderTargets)) + return D3DERR_INVALIDCALL; + + if (m_state.renderTargets[RenderTargetIndex] == nullptr) + return D3DERR_NOTFOUND; + + *ppRenderTarget = m_state.renderTargets[RenderTargetIndex].ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil) { + D3D9DeviceLock lock = LockDevice(); + + D3D9Surface* ds = static_cast(pNewZStencil); + + if (unlikely(ds && !(ds->GetCommonTexture()->Desc()->Usage & D3DUSAGE_DEPTHSTENCIL))) + return D3DERR_INVALIDCALL; + + if (m_state.depthStencil == ds) + return D3D_OK; + + FlushImplicit(FALSE); + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + + m_state.depthStencil = ds; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppZStencilSurface); + + if (unlikely(ppZStencilSurface == nullptr)) + return D3DERR_INVALIDCALL; + + if (m_state.depthStencil == nullptr) + return D3DERR_NOTFOUND; + + *ppZStencilSurface = m_state.depthStencil.ref(); + + return D3D_OK; + } + + // The Begin/EndScene functions actually do nothing. + // Some games don't even call them. + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginScene() { + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndScene() { + FlushImplicit(true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Clear( + DWORD Count, + const D3DRECT* pRects, + DWORD Flags, + D3DCOLOR Color, + float Z, + DWORD Stencil) { + D3D9DeviceLock lock = LockDevice(); + + const auto& vp = m_state.viewport; + const auto& sc = m_state.scissorRect; + + bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE]; + bool scissor = m_state.renderStates[D3DRS_SCISSORTESTENABLE]; + + VkOffset3D offset = { int32_t(vp.X), int32_t(vp.Y), 0 }; + VkExtent3D extent = { vp.Width, vp.Height, 1u }; + + if (scissor) { + offset.x = std::max (offset.x, sc.left); + offset.y = std::max (offset.y, sc.top); + + extent.width = std::min(extent.width, sc.right - offset.x); + extent.height = std::min(extent.height, sc.bottom - offset.y); + } + + // This becomes pretty unreadable in one singular if statement... + if (Count) { + // If pRects is null, or our first rect encompasses the viewport: + if (!pRects) + Count = 0; + else if (pRects[0].x1 <= offset.x && pRects[0].y1 <= offset.y + && pRects[0].x2 >= offset.x + int32_t(extent.width) && pRects[0].y2 >= offset.y + int32_t(extent.height)) + Count = 0; + } + + // Here, Count of 0 will denote whether or not to care about user rects. + + auto* rt0Desc = m_state.renderTargets[0]->GetCommonTexture()->Desc(); + + VkClearValue clearValueDepth; + clearValueDepth.depthStencil.depth = Z; + clearValueDepth.depthStencil.stencil = Stencil; + + VkClearValue clearValueColor; + DecodeD3DCOLOR(Color, clearValueColor.color.float32); + + auto dsv = m_state.depthStencil != nullptr ? m_state.depthStencil->GetDepthStencilView() : nullptr; + VkImageAspectFlags depthAspectMask = 0; + if (dsv != nullptr) { + if (Flags & D3DCLEAR_ZBUFFER) + depthAspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + + if (Flags & D3DCLEAR_STENCIL) + depthAspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + + depthAspectMask &= imageFormatInfo(dsv->info().format)->aspectMask; + } + + auto ClearImageView = [this]( + bool fullClear, + VkOffset3D offset, + VkExtent3D extent, + Rc imageView, + VkImageAspectFlags aspectMask, + VkClearValue clearValue) { + if (fullClear) { + EmitCs([ + cClearValue = clearValue, + cAspectMask = aspectMask, + cImageView = imageView + ] (DxvkContext* ctx) { + ctx->clearRenderTarget( + cImageView, + cAspectMask, + cClearValue); + }); + } + else { + EmitCs([ + cClearValue = clearValue, + cAspectMask = aspectMask, + cImageView = imageView, + cOffset = offset, + cExtent = extent + ] (DxvkContext* ctx) { + ctx->clearImageView( + cImageView, + cOffset, cExtent, + cAspectMask, + cClearValue); + }); + } + }; + + auto ClearViewRect = [&]( + bool fullClear, + VkOffset3D offset, + VkExtent3D extent) { + // Clear depth if we need to. + if (depthAspectMask != 0) + ClearImageView(fullClear, offset, extent, dsv, depthAspectMask, clearValueDepth); + + // Clear render targets if we need to. + if (Flags & D3DCLEAR_TARGET) { + for (auto rt : m_state.renderTargets) { + auto rtv = rt != nullptr ? rt->GetRenderTargetView(srgb) : nullptr; + + if (unlikely(rtv != nullptr)) + ClearImageView(fullClear, offset, extent, rtv, VK_IMAGE_ASPECT_COLOR_BIT, clearValueColor); + } + } + }; + + // A Hat in Time and other UE3 games only gets partial clears here + // because of an oversized rt height due to their weird alignment... + // This works around that. + uint32_t alignment = m_d3d9Options.lenientClear ? 8 : 1; + + bool extentMatches = align(extent.width, alignment) == align(rt0Desc->Width, alignment) + && align(extent.height, alignment) == align(rt0Desc->Height, alignment); + + bool rtSizeMatchesClearSize = offset.x == 0 && offset.y == 0 && extentMatches; + + if (likely(!Count && rtSizeMatchesClearSize)) { + // Fast path w/ ClearRenderTarget for when + // our viewport and stencils match the RT size + ClearViewRect(true, offset, extent); + } + else if (!Count) { + // Clear our viewport & scissor minified region in this rendertarget. + ClearViewRect(false, offset, extent); + } + else { + // Clear the application provided rects. + for (uint32_t i = 0; i < Count; i++) { + VkOffset3D rectOffset = { + std::max(pRects[i].x1, offset.x), + std::max(pRects[i].y1, offset.y), + 0 + }; + + VkExtent3D rectExtent = { + std::min(pRects[i].x2, offset.x + extent.width) - rectOffset.x, + std::min(pRects[i].y2, offset.y + extent.height) - rectOffset.y, + 1u + }; + + ClearViewRect(false, rectOffset, rectExtent); + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix) { + return SetStateTransform(GetTransformIndex(State), pMatrix); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMatrix == nullptr)) + return D3DERR_INVALIDCALL; + + *pMatrix = bit::cast(m_state.transforms[GetTransformIndex(State)]); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->MultiplyStateTransform(TransformState, pMatrix); + + uint32_t idx = GetTransformIndex(TransformState); + + m_state.transforms[idx] = ConvertMatrix(pMatrix) * m_state.transforms[idx]; + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetViewport(const D3DVIEWPORT9* pViewport) { + D3D9DeviceLock lock = LockDevice(); + + D3DVIEWPORT9 viewport; + if (pViewport == nullptr) { + auto rtv = m_state.renderTargets[0]->GetRenderTargetView(false); + + viewport.X = 0; + viewport.Y = 0; + viewport.Width = rtv->image()->info().extent.width; + viewport.Height = rtv->image()->info().extent.height; + viewport.MinZ = 0.0f; + viewport.MaxZ = 1.0f; + } + else + viewport = *pViewport; + + if (unlikely(ShouldRecord())) + return m_recorder->SetViewport(&viewport); + + m_state.viewport = viewport; + + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + m_flags.set(D3D9DeviceFlag::DirtyFFViewport); + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetViewport(D3DVIEWPORT9* pViewport) { + D3D9DeviceLock lock = LockDevice(); + + if (pViewport == nullptr) + return D3DERR_INVALIDCALL; + + *pViewport = m_state.viewport; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaterial(const D3DMATERIAL9* pMaterial) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMaterial == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetMaterial(pMaterial); + + m_state.material = *pMaterial; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaterial(D3DMATERIAL9* pMaterial) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMaterial == nullptr)) + return D3DERR_INVALIDCALL; + + *pMaterial = m_state.material; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetLight(DWORD Index, const D3DLIGHT9* pLight) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pLight == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) { + Logger::warn("D3D9DeviceEx::SetLight: State block not implemented."); + return D3D_OK; + } + + if (Index >= m_state.lights.size()) + m_state.lights.resize(Index + 1); + + m_state.lights[Index] = *pLight; + + if (m_state.IsLightEnabled(Index)) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLight(DWORD Index, D3DLIGHT9* pLight) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pLight == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index])) + return D3DERR_INVALIDCALL; + + *pLight = m_state.lights[Index].value(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::LightEnable(DWORD Index, BOOL Enable) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Index >= m_state.lights.size())) + m_state.lights.resize(Index + 1); + + if (unlikely(!m_state.lights[Index])) + m_state.lights[Index] = DefaultLight; + + if (m_state.IsLightEnabled(Index) == !!Enable) + return D3D_OK; + + uint32_t searchIndex = UINT32_MAX; + uint32_t setIndex = Index; + + if (!Enable) + std::swap(searchIndex, setIndex); + + for (auto& idx : m_state.enabledLightIndices) { + if (idx == searchIndex) { + idx = setIndex; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLightEnable(DWORD Index, BOOL* pEnable) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pEnable == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index])) + return D3DERR_INVALIDCALL; + + *pEnable = m_state.IsLightEnabled(Index) ? 128 : 0; // Weird quirk but OK. + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipPlane(DWORD Index, const float* pPlane) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetClipPlane(Index, pPlane); + + bool dirty = false; + + for (uint32_t i = 0; i < 4; i++) { + dirty |= m_state.clipPlanes[Index].coeff[i] != pPlane[i]; + m_state.clipPlanes[Index].coeff[i] = pPlane[i]; + } + + bool enabled = m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1u << Index); + dirty &= enabled; + + if (dirty) + m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipPlane(DWORD Index, float* pPlane) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) + return D3DERR_INVALIDCALL; + + for (uint32_t i = 0; i < 4; i++) + pPlane[i] = m_state.clipPlanes[Index].coeff[i]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + + // D3D9 only allows reading for values 0 and 7-255 so we don't need to do anything but return OK + if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) { + return D3D_OK; + } + + if (unlikely(ShouldRecord())) + return m_recorder->SetRenderState(State, Value); + + auto& states = m_state.renderStates; + + bool changed = states[State] != Value; + + if (likely(changed)) { + const bool oldATOC = IsAlphaToCoverageEnabled(); + const bool oldNVDB = states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB); + const bool oldAlphaTest = IsAlphaTestEnabled(); + + // AMD's driver hack for ATOC and RESZ + if (unlikely(State == D3DRS_POINTSIZE)) { + // ATOC + constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::A2M1); + constexpr uint32_t AlphaToCoverageDisable = uint32_t(D3D9Format::A2M0); + + if (Value == AlphaToCoverageEnable + || Value == AlphaToCoverageDisable) { + m_amdATOC = Value == AlphaToCoverageEnable; + + bool newATOC = IsAlphaToCoverageEnabled(); + bool newAlphaTest = IsAlphaTestEnabled(); + + if (oldATOC != newATOC) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + + if (oldAlphaTest != newAlphaTest) + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + + return D3D_OK; + } + + // RESZ + constexpr uint32_t RESZ = 0x7fa05000; + if (Value == RESZ) { + ResolveZ(); + return D3D_OK; + } + } + + // NV's driver hack for ATOC. + if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) { + constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::ATOC); + constexpr uint32_t AlphaToCoverageDisable = 0; + + if (Value == AlphaToCoverageEnable + || Value == AlphaToCoverageDisable) { + m_nvATOC = Value == AlphaToCoverageEnable; + + bool newATOC = IsAlphaToCoverageEnabled(); + bool newAlphaTest = IsAlphaToCoverageEnabled(); + + if (oldATOC != newATOC) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + + if (oldAlphaTest != newAlphaTest) + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + + return D3D_OK; + } + + if (Value == uint32_t(D3D9Format::COPM)) { + // UE3 calls this MinimalNVIDIADriverShaderOptimization + Logger::info("D3D9DeviceEx::SetRenderState: MinimalNVIDIADriverShaderOptimization is unsupported"); + return D3D_OK; + } + } + + states[State] = Value; + + switch (State) { + case D3DRS_SEPARATEALPHABLENDENABLE: + case D3DRS_ALPHABLENDENABLE: + case D3DRS_BLENDOP: + case D3DRS_BLENDOPALPHA: + case D3DRS_DESTBLEND: + case D3DRS_DESTBLENDALPHA: + case D3DRS_SRCBLEND: + case D3DRS_SRCBLENDALPHA: + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + + case D3DRS_COLORWRITEENABLE: + UpdateActiveRTs(0); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + case D3DRS_COLORWRITEENABLE1: + UpdateActiveRTs(1); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + case D3DRS_COLORWRITEENABLE2: + UpdateActiveRTs(2); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + case D3DRS_COLORWRITEENABLE3: + UpdateActiveRTs(3); + m_flags.set(D3D9DeviceFlag::DirtyBlendState); + break; + + case D3DRS_ALPHATESTENABLE: { + bool newATOC = IsAlphaToCoverageEnabled(); + bool newAlphaTest = IsAlphaTestEnabled(); + + if (oldATOC != newATOC) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + + if (oldAlphaTest != newAlphaTest) + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + + break; + } + + case D3DRS_ALPHAFUNC: + m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); + break; + + case D3DRS_BLENDFACTOR: + BindBlendFactor(); + break; + + case D3DRS_MULTISAMPLEMASK: + if (m_flags.test(D3D9DeviceFlag::ValidSampleMask)) + m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); + break; + + case D3DRS_ZENABLE: + case D3DRS_ZFUNC: + case D3DRS_TWOSIDEDSTENCILMODE: + case D3DRS_ZWRITEENABLE: + case D3DRS_STENCILENABLE: + case D3DRS_STENCILFAIL: + case D3DRS_STENCILZFAIL: + case D3DRS_STENCILPASS: + case D3DRS_STENCILFUNC: + case D3DRS_CCW_STENCILFAIL: + case D3DRS_CCW_STENCILZFAIL: + case D3DRS_CCW_STENCILPASS: + case D3DRS_CCW_STENCILFUNC: + case D3DRS_STENCILMASK: + case D3DRS_STENCILWRITEMASK: + m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState); + break; + + case D3DRS_STENCILREF: + BindDepthStencilRefrence(); + break; + + case D3DRS_SCISSORTESTENABLE: + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + break; + + case D3DRS_SRGBWRITEENABLE: + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + break; + + case D3DRS_DEPTHBIAS: + case D3DRS_SLOPESCALEDEPTHBIAS: + case D3DRS_CULLMODE: + case D3DRS_FILLMODE: + m_flags.set(D3D9DeviceFlag::DirtyRasterizerState); + break; + + case D3DRS_CLIPPLANEENABLE: + m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + break; + + case D3DRS_ALPHAREF: + UpdatePushConstant(); + break; + + case D3DRS_TEXTUREFACTOR: + m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); + break; + + case D3DRS_DIFFUSEMATERIALSOURCE: + case D3DRS_AMBIENTMATERIALSOURCE: + case D3DRS_SPECULARMATERIALSOURCE: + case D3DRS_EMISSIVEMATERIALSOURCE: + case D3DRS_COLORVERTEX: + case D3DRS_LIGHTING: + case D3DRS_NORMALIZENORMALS: + case D3DRS_LOCALVIEWER: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_AMBIENT: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + break; + + case D3DRS_SPECULARENABLE: + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + break; + + case D3DRS_FOGENABLE: + case D3DRS_FOGVERTEXMODE: + case D3DRS_FOGTABLEMODE: + m_flags.set(D3D9DeviceFlag::DirtyFogState); + break; + + case D3DRS_RANGEFOGENABLE: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_FOGCOLOR: + m_flags.set(D3D9DeviceFlag::DirtyFogColor); + break; + + case D3DRS_FOGSTART: + m_flags.set(D3D9DeviceFlag::DirtyFogScale); + break; + + case D3DRS_FOGEND: + m_flags.set(D3D9DeviceFlag::DirtyFogScale); + m_flags.set(D3D9DeviceFlag::DirtyFogEnd); + break; + + case D3DRS_FOGDENSITY: + m_flags.set(D3D9DeviceFlag::DirtyFogDensity); + break; + + case D3DRS_POINTSIZE: + UpdatePushConstant(); + break; + + case D3DRS_POINTSIZE_MIN: + UpdatePushConstant(); + break; + + case D3DRS_POINTSIZE_MAX: + UpdatePushConstant(); + break; + + case D3DRS_POINTSCALE_A: + case D3DRS_POINTSCALE_B: + case D3DRS_POINTSCALE_C: + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + break; + + case D3DRS_POINTSCALEENABLE: + case D3DRS_POINTSPRITEENABLE: + // Nothing to do here! + // This is handled in UpdatePointMode. + break; + + case D3DRS_SHADEMODE: + if (m_state.pixelShader != nullptr) { + BindShader( + GetCommonShader(m_state.pixelShader), + GetPixelShaderPermutation()); + } + + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + break; + + case D3DRS_TWEENFACTOR: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + break; + + case D3DRS_VERTEXBLEND: + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_INDEXEDVERTEXBLENDENABLE: + if (CanSWVP() && Value) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + break; + + case D3DRS_ADAPTIVETESS_X: + case D3DRS_ADAPTIVETESS_Z: + case D3DRS_ADAPTIVETESS_W: + if (states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB) || oldNVDB) { + m_flags.set(D3D9DeviceFlag::DirtyDepthBounds); + break; + } + + default: + static bool s_errorShown[256]; + + if (!std::exchange(s_errorShown[State], true)) + Logger::warn(str::format("D3D9DeviceEx::SetRenderState: Unhandled render state ", State)); + break; + } + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pValue == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) { + return D3DERR_INVALIDCALL; + } + + if (State < D3DRS_ZENABLE || State > D3DRS_BLENDOPALPHA) + *pValue = 0; + else + *pValue = m_state.renderStates[State]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateStateBlock( + D3DSTATEBLOCKTYPE Type, + IDirect3DStateBlock9** ppSB) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppSB); + + if (unlikely(ppSB == nullptr)) + return D3DERR_INVALIDCALL; + + try { + const Com sb = new D3D9StateBlock(this, ConvertStateBlockType(Type)); + *ppSB = sb.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginStateBlock() { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(m_recorder != nullptr)) + return D3DERR_INVALIDCALL; + + m_recorder = new D3D9StateBlock(this, D3D9StateBlockType::None); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndStateBlock(IDirect3DStateBlock9** ppSB) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppSB); + + if (unlikely(ppSB == nullptr || m_recorder == nullptr)) + return D3DERR_INVALIDCALL; + + *ppSB = m_recorder.ref(); + m_recorder = nullptr; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipStatus(const D3DCLIPSTATUS9* pClipStatus) { + Logger::warn("D3D9DeviceEx::SetClipStatus: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipStatus(D3DCLIPSTATUS9* pClipStatus) { + Logger::warn("D3D9DeviceEx::GetClipStatus: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (ppTexture == nullptr) + return D3DERR_INVALIDCALL; + + *ppTexture = nullptr; + + if (unlikely(InvalidSampler(Stage))) + return D3D_OK; + + DWORD stateSampler = RemapSamplerState(Stage); + + *ppTexture = ref(m_state.textures[stateSampler]); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(InvalidSampler(Stage))) + return D3D_OK; + + DWORD stateSampler = RemapSamplerState(Stage); + + return SetStateTexture(stateSampler, pTexture); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD* pValue) { + if (unlikely(pValue == nullptr)) + return D3DERR_INVALIDCALL; + + *pValue = 0; + + if (unlikely(Stage >= caps::TextureStageCount)) + return D3DERR_INVALIDCALL; + + if (unlikely(Type >= TextureStageStateCount)) + return D3DERR_INVALIDCALL; + + *pValue = m_state.textureStages[Stage][Type]; + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(Stage >= caps::TextureStageCount)) + return D3DERR_INVALIDCALL; + + if (unlikely(Type >= TextureStageStateCount)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetTextureStageState(Stage, Type, Value); + + if (likely(m_state.textureStages[Stage][Type] != Value)) { + if (Type == D3DTSS_TEXTURETRANSFORMFLAGS) { + m_projectionBitfield &= ~(1 << Stage); + if (Value & D3DTTFF_PROJECTED) + m_projectionBitfield |= 1 << Stage; + } + + if ((Type >= D3DTSS_BUMPENVMAT00 && Type <= D3DTSS_BUMPENVMAT11) + || (Type == D3DTSS_BUMPENVLSCALE || Type == D3DTSS_BUMPENVLOFFSET)) + m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData); + else if (Type == D3DTSS_TEXTURETRANSFORMFLAGS) { + // This state affects both! + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + } + else if (Type != D3DTSS_TEXCOORDINDEX) + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + else + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + m_state.textureStages[Stage][Type] = Value; + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD* pValue) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pValue == nullptr)) + return D3DERR_INVALIDCALL; + + *pValue = 0; + + if (unlikely(InvalidSampler(Sampler))) + return D3D_OK; + + Sampler = RemapSamplerState(Sampler); + + *pValue = m_state.samplerStates[Sampler][Type]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + if (unlikely(InvalidSampler(Sampler))) + return D3D_OK; + + uint32_t stateSampler = RemapSamplerState(Sampler); + + return SetStateSamplerState(stateSampler, Type, Value); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ValidateDevice(DWORD* pNumPasses) { + if (pNumPasses != nullptr) + *pNumPasses = 1; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries) { + // This succeeds even though we don't advertise support. + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries) { + // Don't advertise support for this... + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCurrentTexturePalette(UINT PaletteNumber) { + // This succeeds even though we don't advertise support. + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCurrentTexturePalette(UINT *PaletteNumber) { + // Don't advertise support for this... + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetScissorRect(const RECT* pRect) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pRect == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetScissorRect(pRect); + + m_state.scissorRect = *pRect; + + m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetScissorRect(RECT* pRect) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pRect == nullptr)) + return D3DERR_INVALIDCALL; + + *pRect = m_state.scissorRect; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSoftwareVertexProcessing(BOOL bSoftware) { + auto lock = LockDevice(); + + if (bSoftware && !CanSWVP()) + return D3DERR_INVALIDCALL; + + m_isSWVP = bSoftware; + + return D3D_OK; + } + + + BOOL STDMETHODCALLTYPE D3D9DeviceEx::GetSoftwareVertexProcessing() { + auto lock = LockDevice(); + + return m_isSWVP; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetNPatchMode(float nSegments) { + return D3D_OK; + } + + + float STDMETHODCALLTYPE D3D9DeviceEx::GetNPatchMode() { + return 0.0f; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + UINT StartVertex, + UINT PrimitiveCount) { + D3D9DeviceLock lock = LockDevice(); + + PrepareDraw(PrimitiveType); + + EmitCs([this, + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cStartVertex = StartVertex, + cInstanceCount = GetInstanceCount() + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->draw( + drawInfo.vertexCount, drawInfo.instanceCount, + cStartVertex, 0); + }); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + INT BaseVertexIndex, + UINT MinVertexIndex, + UINT NumVertices, + UINT StartIndex, + UINT PrimitiveCount) { + D3D9DeviceLock lock = LockDevice(); + + PrepareDraw(PrimitiveType); + + EmitCs([this, + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cStartIndex = StartIndex, + cBaseVertexIndex = BaseVertexIndex, + cInstanceCount = GetInstanceCount() + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->drawIndexed( + drawInfo.vertexCount, drawInfo.instanceCount, + cStartIndex, + cBaseVertexIndex, 0); + }); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride) { + D3D9DeviceLock lock = LockDevice(); + + PrepareDraw(PrimitiveType, true); + + auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0); + + const uint32_t upSize = drawInfo.vertexCount * VertexStreamZeroStride; + + auto upSlice = AllocUpBuffer(upSize); + std::memcpy(upSlice.mapPtr, pVertexStreamZeroData, upSize); + + EmitCs([this, + cBufferSlice = std::move(upSlice.slice), + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cInstanceCount = GetInstanceCount(), + cStride = VertexStreamZeroStride + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->bindVertexBuffer(0, cBufferSlice, cStride); + ctx->draw( + drawInfo.vertexCount, drawInfo.instanceCount, + 0, 0); + }); + + m_flags.set(D3D9DeviceFlag::UpDirtiedVertices); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT MinVertexIndex, + UINT NumVertices, + UINT PrimitiveCount, + const void* pIndexData, + D3DFORMAT IndexDataFormat, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride) { + D3D9DeviceLock lock = LockDevice(); + + PrepareDraw(PrimitiveType, true); + + auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0); + + const uint32_t vertexSize = (MinVertexIndex + NumVertices) * VertexStreamZeroStride; + + const uint32_t indexSize = IndexDataFormat == D3DFMT_INDEX16 ? 2 : 4; + const uint32_t indicesSize = drawInfo.vertexCount * indexSize; + + const uint32_t upSize = vertexSize + indicesSize; + + auto upSlice = AllocUpBuffer(upSize); + uint8_t* data = reinterpret_cast(upSlice.mapPtr); + + std::memcpy(data, pVertexStreamZeroData, vertexSize); + std::memcpy(data + vertexSize, pIndexData, indicesSize); + + EmitCs([this, + cVertexSize = vertexSize, + cBufferSlice = std::move(upSlice.slice), + cPrimType = PrimitiveType, + cPrimCount = PrimitiveCount, + cStride = VertexStreamZeroStride, + cInstanceCount = GetInstanceCount(), + cIndexType = DecodeIndexType( + static_cast(IndexDataFormat)) + ](DxvkContext* ctx) { + auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); + + ApplyPrimitiveType(ctx, cPrimType); + + ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride); + ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType); + ctx->drawIndexed( + drawInfo.vertexCount, drawInfo.instanceCount, + 0, + 0, 0); + }); + + m_flags.set(D3D9DeviceFlag::UpDirtiedVertices); + m_flags.set(D3D9DeviceFlag::UpDirtiedIndices); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ProcessVertices( + UINT SrcStartIndex, + UINT DestIndex, + UINT VertexCount, + IDirect3DVertexBuffer9* pDestBuffer, + IDirect3DVertexDeclaration9* pVertexDecl, + DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pDestBuffer == nullptr || pVertexDecl == nullptr)) + return D3DERR_INVALIDCALL; + + if (!SupportsSWVP()) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::err("D3D9DeviceEx::ProcessVertices: SWVP emu unsupported (vertexPipelineStoresAndAtomics)"); + + return D3D_OK; + } + + D3D9CommonBuffer* dst = static_cast(pDestBuffer)->GetCommonBuffer(); + D3D9VertexDecl* decl = static_cast (pVertexDecl); + + PrepareDraw(D3DPT_FORCE_DWORD, false); + + if (decl == nullptr) { + DWORD FVF = dst->Desc()->FVF; + + auto iter = m_fvfTable.find(FVF); + + if (iter == m_fvfTable.end()) { + decl = new D3D9VertexDecl(this, FVF); + m_fvfTable.insert(std::make_pair(FVF, decl)); + } + else + decl = iter->second.ptr(); + } + + uint32_t offset = DestIndex * decl->GetSize(); + + auto slice = dst->GetBufferSlice(); + slice = slice.subSlice(offset, slice.length() - offset); + + EmitCs([this, + cDecl = ref(decl), + cVertexCount = VertexCount, + cStartIndex = SrcStartIndex, + cInstanceCount = GetInstanceCount(), + cBufferSlice = slice, + cIndexed = m_state.indices != nullptr + ](DxvkContext* ctx) { + Rc shader = m_swvpEmulator.GetShaderModule(this, cDecl); + + auto drawInfo = GenerateDrawInfo(D3DPT_POINTLIST, cVertexCount, cInstanceCount); + + if (drawInfo.instanceCount != 1) { + drawInfo.instanceCount = 1; + + Logger::warn("D3D9DeviceEx::ProcessVertices: instancing unsupported"); + } + + ApplyPrimitiveType(ctx, D3DPT_POINTLIST); + + ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, shader); + ctx->bindResourceBuffer(getSWVPBufferSlot(), cBufferSlice); + ctx->draw( + drawInfo.vertexCount, drawInfo.instanceCount, + cStartIndex, 0); + ctx->bindResourceBuffer(getSWVPBufferSlot(), DxvkBufferSlice()); + ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, nullptr); + }); + + if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) { + uint32_t copySize = VertexCount * decl->GetSize(); + + EmitCs([ + cSrcBuffer = dst->GetBuffer(), + cDstBuffer = dst->GetBuffer(), + cOffset = offset, + cCopySize = copySize + ](DxvkContext* ctx) { + ctx->copyBuffer(cDstBuffer, cOffset, cSrcBuffer, cOffset, cCopySize); + }); + } + + dst->SetReadLocked(true); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexDeclaration( + const D3DVERTEXELEMENT9* pVertexElements, + IDirect3DVertexDeclaration9** ppDecl) { + InitReturnPtr(ppDecl); + + if (unlikely(ppDecl == nullptr || pVertexElements == nullptr)) + return D3DERR_INVALIDCALL; + + const D3DVERTEXELEMENT9* counter = pVertexElements; + while (counter->Stream != 0xFF) + counter++; + + const uint32_t declCount = uint32_t(counter - pVertexElements); + + try { + const Com decl = new D3D9VertexDecl(this, pVertexElements, declCount); + *ppDecl = decl.ref(); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl) { + D3D9DeviceLock lock = LockDevice(); + + D3D9VertexDecl* decl = static_cast(pDecl); + + if (unlikely(ShouldRecord())) + return m_recorder->SetVertexDeclaration(decl); + + if (decl == m_state.vertexDecl.ptr()) + return D3D_OK; + + bool dirtyFFShader = decl == nullptr || m_state.vertexDecl == nullptr; + if (!dirtyFFShader) + dirtyFFShader |= decl->TestFlag(D3D9VertexDeclFlag::HasPositionT) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) + || decl->TestFlag(D3D9VertexDeclFlag::HasColor0) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) + || decl->TestFlag(D3D9VertexDeclFlag::HasColor1) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) + || decl->GetTexcoordMask() != m_state.vertexDecl->GetTexcoordMask(); + + if (dirtyFFShader) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + m_state.vertexDecl = decl; + + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppDecl); + + if (ppDecl == nullptr) + return D3D_OK; + + if (m_state.vertexDecl == nullptr) + return D3D_OK; + + *ppDecl = m_state.vertexDecl.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetFVF(DWORD FVF) { + D3D9DeviceLock lock = LockDevice(); + + if (FVF == 0) + return D3D_OK; + + D3D9VertexDecl* decl = nullptr; + + auto iter = m_fvfTable.find(FVF); + + if (iter == m_fvfTable.end()) { + decl = new D3D9VertexDecl(this, FVF); + m_fvfTable.insert(std::make_pair(FVF, decl)); + } + else + decl = iter->second.ptr(); + + return this->SetVertexDeclaration(decl); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFVF(DWORD* pFVF) { + D3D9DeviceLock lock = LockDevice(); + + if (pFVF == nullptr) + return D3DERR_INVALIDCALL; + + *pFVF = m_state.vertexDecl != nullptr + ? m_state.vertexDecl->GetFVF() + : 0; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexShader( + const DWORD* pFunction, + IDirect3DVertexShader9** ppShader) { + // CreateVertexShader does not init the + // return ptr unlike CreatePixelShader + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + DxsoModuleInfo moduleInfo; + moduleInfo.options = m_dxsoOptions; + + D3D9CommonShader module; + + if (FAILED(this->CreateShaderModule(&module, + VK_SHADER_STAGE_VERTEX_BIT, + pFunction, + &moduleInfo))) + return D3DERR_INVALIDCALL; + + *ppShader = ref(new D3D9VertexShader(this, module)); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShader(IDirect3DVertexShader9* pShader) { + D3D9DeviceLock lock = LockDevice(); + + D3D9VertexShader* shader = static_cast(pShader); + + if (unlikely(ShouldRecord())) + return m_recorder->SetVertexShader(shader); + + if (shader == m_state.vertexShader.ptr()) + return D3D_OK; + + auto* oldShader = GetCommonShader(m_state.vertexShader); + auto* newShader = GetCommonShader(shader); + + bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; + bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; + + m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader; + m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? &newShader->GetMeta() : nullptr; + + if (newShader && oldShader) { + m_consts[DxsoProgramTypes::VertexShader].dirty + |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF + || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI + || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; + } + + m_state.vertexShader = shader; + + if (shader != nullptr) { + m_flags.clr(D3D9DeviceFlag::DirtyProgVertexShader); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + BindShader( + GetCommonShader(shader), + GetVertexShaderPermutation()); + } + + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShader(IDirect3DVertexShader9** ppShader) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppShader); + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + *ppShader = m_state.vertexShader.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9* pStreamData, + UINT OffsetInBytes, + UINT Stride) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + D3D9VertexBuffer* buffer = static_cast(pStreamData); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStreamSource( + StreamNumber, + buffer, + OffsetInBytes, + Stride); + + auto& vbo = m_state.vertexBuffers[StreamNumber]; + bool needsUpdate = vbo.vertexBuffer != buffer; + + if (needsUpdate) + vbo.vertexBuffer = buffer; + + needsUpdate |= vbo.offset != OffsetInBytes + || vbo.stride != Stride; + + vbo.offset = OffsetInBytes; + vbo.stride = Stride; + + if (needsUpdate) + BindVertexBuffer(StreamNumber, buffer, OffsetInBytes, Stride); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9** ppStreamData, + UINT* pOffsetInBytes, + UINT* pStride) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppStreamData); + + if (unlikely(pOffsetInBytes != nullptr)) + *pOffsetInBytes = 0; + + if (unlikely(pStride != nullptr)) + *pStride = 0; + + if (unlikely(ppStreamData == nullptr || pOffsetInBytes == nullptr || pStride == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + const auto& vbo = m_state.vertexBuffers[StreamNumber]; + + *ppStreamData = vbo.vertexBuffer.ref(); + *pOffsetInBytes = vbo.offset; + *pStride = vbo.stride; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + const bool indexed = Setting & D3DSTREAMSOURCE_INDEXEDDATA; + const bool instanced = Setting & D3DSTREAMSOURCE_INSTANCEDATA; + + if (unlikely(StreamNumber == 0 && instanced)) + return D3DERR_INVALIDCALL; + + if (unlikely(instanced && indexed)) + return D3DERR_INVALIDCALL; + + if (unlikely(Setting == 0)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetStreamSourceFreq(StreamNumber, Setting); + + if (m_state.streamFreq[StreamNumber] == Setting) + return D3D_OK; + + m_state.streamFreq[StreamNumber] = Setting; + + if (instanced) + m_instancedData |= 1u << StreamNumber; + else + m_instancedData &= ~(1u << StreamNumber); + + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(StreamNumber >= caps::MaxStreams)) + return D3DERR_INVALIDCALL; + + if (unlikely(pSetting == nullptr)) + return D3DERR_INVALIDCALL; + + *pSetting = m_state.streamFreq[StreamNumber]; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetIndices(IDirect3DIndexBuffer9* pIndexData) { + D3D9DeviceLock lock = LockDevice(); + + D3D9IndexBuffer* buffer = static_cast(pIndexData); + + if (unlikely(ShouldRecord())) + return m_recorder->SetIndices(buffer); + + if (buffer == m_state.indices.ptr()) + return D3D_OK; + + m_state.indices = buffer; + + BindIndices(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetIndices(IDirect3DIndexBuffer9** ppIndexData) { + D3D9DeviceLock lock = LockDevice(); + InitReturnPtr(ppIndexData); + + if (unlikely(ppIndexData == nullptr)) + return D3DERR_INVALIDCALL; + + *ppIndexData = m_state.indices.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreatePixelShader( + const DWORD* pFunction, + IDirect3DPixelShader9** ppShader) { + InitReturnPtr(ppShader); + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + DxsoModuleInfo moduleInfo; + moduleInfo.options = m_dxsoOptions; + + D3D9CommonShader module; + + if (FAILED(this->CreateShaderModule(&module, + VK_SHADER_STAGE_FRAGMENT_BIT, + pFunction, + &moduleInfo))) + return D3DERR_INVALIDCALL; + + *ppShader = ref(new D3D9PixelShader(this, module)); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShader(IDirect3DPixelShader9* pShader) { + D3D9DeviceLock lock = LockDevice(); + + D3D9PixelShader* shader = static_cast(pShader); + + if (unlikely(ShouldRecord())) + return m_recorder->SetPixelShader(shader); + + if (shader == m_state.pixelShader.ptr()) + return D3D_OK; + + auto* oldShader = GetCommonShader(m_state.pixelShader); + auto* newShader = GetCommonShader(shader); + + bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; + bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; + + m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader; + m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? &newShader->GetMeta() : nullptr; + + if (newShader && oldShader) { + m_consts[DxsoProgramTypes::PixelShader].dirty + |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF + || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI + || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; + } + + m_state.pixelShader = shader; + + if (shader != nullptr) { + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + + BindShader( + GetCommonShader(shader), + GetPixelShaderPermutation()); + } + + UpdateActiveHazards(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShader(IDirect3DPixelShader9** ppShader) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppShader); + + if (unlikely(ppShader == nullptr)) + return D3DERR_INVALIDCALL; + + *ppShader = m_state.pixelShader.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants < + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount) { + D3D9DeviceLock lock = LockDevice(); + + return GetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawRectPatch( + UINT Handle, + const float* pNumSegs, + const D3DRECTPATCH_INFO* pRectPatchInfo) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9DeviceEx::DrawRectPatch: Stub"); + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawTriPatch( + UINT Handle, + const float* pNumSegs, + const D3DTRIPATCH_INFO* pTriPatchInfo) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9DeviceEx::DrawTriPatch: Stub"); + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DeletePatch(UINT Handle) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::warn("D3D9DeviceEx::DeletePatch: Stub"); + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery) { + InitReturnPtr(ppQuery); + + HRESULT hr = D3D9Query::QuerySupported(Type); + + if (ppQuery == nullptr || hr != D3D_OK) + return hr; + + try { + *ppQuery = ref(new D3D9Query(this, Type)); + return D3D_OK; + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + // Ex Methods + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetConvolutionMonoKernel( + UINT width, + UINT height, + float* rows, + float* columns) { + // We don't advertise support for this. + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ComposeRects( + IDirect3DSurface9* pSrc, + IDirect3DSurface9* pDst, + IDirect3DVertexBuffer9* pSrcRectDescs, + UINT NumRects, + IDirect3DVertexBuffer9* pDstRectDescs, + D3DCOMPOSERECTSOP Operation, + int Xoffset, + int Yoffset) { + Logger::warn("D3D9DeviceEx::ComposeRects: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetGPUThreadPriority(INT* pPriority) { + Logger::warn("D3D9DeviceEx::GetGPUThreadPriority: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetGPUThreadPriority(INT Priority) { + Logger::warn("D3D9DeviceEx::SetGPUThreadPriority: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::WaitForVBlank(UINT iSwapChain) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + return swapchain->WaitForVBlank(); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources) { + Logger::warn("D3D9DeviceEx::CheckResourceResidency: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaximumFrameLatency(UINT MaxLatency) { + D3D9DeviceLock lock = LockDevice(); + + if (MaxLatency == 0) + MaxLatency = DefaultFrameLatency; + + if (MaxLatency > MaxFrameLatency) + MaxLatency = MaxFrameLatency; + + m_frameLatency = MaxLatency; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaximumFrameLatency(UINT* pMaxLatency) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pMaxLatency == nullptr)) + return D3DERR_INVALIDCALL; + + *pMaxLatency = m_frameLatency; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckDeviceState(HWND hDestinationWindow) { + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::PresentEx( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags) { + D3D9DeviceLock lock = LockDevice(); + + return GetInternalSwapchain(0)->Present( + pSourceRect, + pDestRect, + hDestWindowOverride, + pDirtyRegion, + dwFlags); + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTargetEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage) { + InitReturnPtr(ppSurface); + InitReturnPtr(pSharedHandle); + + if (unlikely(ppSurface == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = Usage | D3DUSAGE_RENDERTARGET; + desc.Format = EnumerateFormat(Format); + desc.Pool = D3DPOOL_DEFAULT; + desc.Discard = FALSE; + desc.MultiSample = MultiSample; + desc.MultisampleQuality = MultisampleQuality; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_INVALIDCALL; + + try { + const Com surface = new D3D9Surface(this, &desc, mapping); + m_initializer->InitTexture(surface->GetCommonTexture()); + *ppSurface = surface.ref(); + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage) { + InitReturnPtr(ppSurface); + InitReturnPtr(pSharedHandle); + + if (unlikely(ppSurface == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = Usage; + desc.Format = EnumerateFormat(Format); + desc.Pool = Pool; + desc.Discard = FALSE; + desc.MultiSample = D3DMULTISAMPLE_NONE; + desc.MultisampleQuality = 0; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_INVALIDCALL; + + try { + const Com surface = new D3D9Surface(this, &desc, mapping); + m_initializer->InitTexture(surface->GetCommonTexture()); + *ppSurface = surface.ref(); + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage) { + InitReturnPtr(ppSurface); + InitReturnPtr(pSharedHandle); + + if (unlikely(ppSurface == nullptr)) + return D3DERR_INVALIDCALL; + + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = Width; + desc.Height = Height; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = Usage | D3DUSAGE_DEPTHSTENCIL; + desc.Format = EnumerateFormat(Format); + desc.Pool = D3DPOOL_DEFAULT; + desc.Discard = Discard; + desc.MultiSample = MultiSample; + desc.MultisampleQuality = MultisampleQuality; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_INVALIDCALL; + + try { + const Com surface = new D3D9Surface(this, &desc, mapping); + m_initializer->InitTexture(surface->GetCommonTexture()); + *ppSurface = surface.ref(); + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_OUTOFVIDEOMEMORY; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ResetEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + D3D9DeviceLock lock = LockDevice(); + + HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); + if (FAILED(hr)) + return hr; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayModeEx( + UINT iSwapChain, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation) { + D3D9DeviceLock lock = LockDevice(); + + if (auto* swapchain = GetInternalSwapchain(iSwapChain)) + return swapchain->GetDisplayModeEx(pMode, pRotation); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChainEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DSwapChain9** ppSwapChain) { + D3D9DeviceLock lock = LockDevice(); + + InitReturnPtr(ppSwapChain); + + if (ppSwapChain == nullptr || pPresentationParameters == nullptr) + return D3DERR_INVALIDCALL; + + for (uint32_t i = 0; i < m_swapchains.size(); i++) + GetInternalSwapchain(i)->Invalidate(pPresentationParameters->hDeviceWindow); + + try { + auto* swapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode); + *ppSwapChain = ref(swapchain); + + m_swapchains.push_back(swapchain); + swapchain->AddRefPrivate(); + } + catch (const DxvkError & e) { + Logger::err(e.message()); + return D3DERR_NOTAVAILABLE; + } + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateSamplerState(StateSampler, Type, Value); + + auto& state = m_state.samplerStates; + + bool changed = state[StateSampler][Type] != Value; + + if (likely(changed)) { + state[StateSampler][Type] = Value; + + if (Type == D3DSAMP_ADDRESSU + || Type == D3DSAMP_ADDRESSV + || Type == D3DSAMP_ADDRESSW + || Type == D3DSAMP_MAGFILTER + || Type == D3DSAMP_MINFILTER + || Type == D3DSAMP_MIPFILTER + || Type == D3DSAMP_MAXANISOTROPY + || Type == D3DSAMP_MIPMAPLODBIAS + || Type == D3DSAMP_MAXMIPLEVEL + || Type == D3DSAMP_BORDERCOLOR) + m_dirtySamplerStates |= 1u << StateSampler; + else if (Type == D3DSAMP_SRGBTEXTURE) + BindTexture(StateSampler); + } + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateTexture(StateSampler, pTexture); + + if (m_state.textures[StateSampler] == pTexture) + return D3D_OK; + + // We need to check our ops and disable respective stages. + // Given we have transition from a null resource to + // a valid resource or vice versa. + if (pTexture == nullptr || m_state.textures[StateSampler] == nullptr) + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + + TextureChangePrivate(m_state.textures[StateSampler], pTexture); + + BindTexture(StateSampler); + + // We only care about PS samplers + if (likely(StateSampler <= caps::MaxSamplers)) + UpdateActiveRTTextures(StateSampler); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ShouldRecord())) + return m_recorder->SetStateTransform(idx, pMatrix); + + m_state.transforms[idx] = ConvertMatrix(pMatrix); + + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD)) + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + return D3D_OK; + } + + + bool D3D9DeviceEx::IsExtended() { + return m_parent->IsExtended(); + } + + + bool D3D9DeviceEx::SupportsSWVP() { + return m_dxvkDevice->features().core.features.vertexPipelineStoresAndAtomics; + } + + + HWND D3D9DeviceEx::GetWindow() { + return m_window; + } + + + DxvkDeviceFeatures D3D9DeviceEx::GetDeviceFeatures(const Rc& adapter) { + DxvkDeviceFeatures supported = adapter->features(); + DxvkDeviceFeatures enabled = {}; + + // Geometry shaders are used for some meta ops + enabled.core.features.geometryShader = VK_TRUE; + enabled.core.features.robustBufferAccess = VK_TRUE; + + enabled.extMemoryPriority.memoryPriority = supported.extMemoryPriority.memoryPriority; + + enabled.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation = supported.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation; + + enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor; + enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor; + + // ProcessVertices + enabled.core.features.vertexPipelineStoresAndAtomics = supported.core.features.vertexPipelineStoresAndAtomics; + + // DXVK Meta + enabled.core.features.shaderStorageImageWriteWithoutFormat = VK_TRUE; + enabled.core.features.shaderStorageImageExtendedFormats = VK_TRUE; + + enabled.core.features.imageCubeArray = VK_TRUE; + + // SM1 level hardware + enabled.core.features.depthClamp = VK_TRUE; + enabled.core.features.depthBiasClamp = VK_TRUE; + enabled.core.features.fillModeNonSolid = VK_TRUE; + enabled.core.features.pipelineStatisticsQuery = supported.core.features.pipelineStatisticsQuery; + enabled.core.features.sampleRateShading = VK_TRUE; + enabled.core.features.samplerAnisotropy = VK_TRUE; + enabled.core.features.shaderClipDistance = VK_TRUE; + enabled.core.features.shaderCullDistance = VK_TRUE; + + // Ensure we support real BC formats and unofficial vendor ones. + enabled.core.features.textureCompressionBC = VK_TRUE; + + enabled.extDepthClipEnable.depthClipEnable = supported.extDepthClipEnable.depthClipEnable; + enabled.extHostQueryReset.hostQueryReset = supported.extHostQueryReset.hostQueryReset; + + // SM2 level hardware + enabled.core.features.occlusionQueryPrecise = VK_TRUE; + + // SM3 level hardware + enabled.core.features.multiViewport = VK_TRUE; + enabled.core.features.independentBlend = VK_TRUE; + + // D3D10 level hardware supports this in D3D9 native. + enabled.core.features.fullDrawIndexUint32 = VK_TRUE; + + return enabled; + } + + + void D3D9DeviceEx::DetermineConstantLayouts(bool canSWVP) { + m_vsLayout.floatCount = canSWVP ? uint32_t(m_d3d9Options.swvpFloatCount) : caps::MaxFloatConstantsVS; + m_vsLayout.intCount = canSWVP ? uint32_t(m_d3d9Options.swvpIntCount) : caps::MaxOtherConstants; + m_vsLayout.boolCount = canSWVP ? uint32_t(m_d3d9Options.swvpBoolCount) : caps::MaxOtherConstants; + m_vsLayout.bitmaskCount = align(m_vsLayout.boolCount, 32) / 32; + + m_psLayout.floatCount = caps::MaxFloatConstantsPS; + m_psLayout.intCount = caps::MaxOtherConstants; + m_psLayout.boolCount = caps::MaxOtherConstants; + m_psLayout.bitmaskCount = align(m_psLayout.boolCount, 32) / 32; + } + + + D3D9UPBufferSlice D3D9DeviceEx::AllocUpBuffer(VkDeviceSize size) { + constexpr VkDeviceSize DefaultSize = 1 << 20; + + constexpr VkMemoryPropertyFlags memoryFlags + = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (size <= DefaultSize) { + if (unlikely(!m_upBuffer.slice.defined())) { + DxvkBufferCreateInfo info; + info.size = DefaultSize; + info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT + | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT; + info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + + m_upBuffer.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags)); + m_upBuffer.mapPtr = m_upBuffer.slice.mapPtr(0); + } else if (unlikely(m_upBuffer.slice.length() < size)) { + auto physSlice = m_upBuffer.slice.buffer()->allocSlice(); + + m_upBuffer.slice = DxvkBufferSlice(m_upBuffer.slice.buffer()); + m_upBuffer.mapPtr = physSlice.mapPtr; + + EmitCs([ + cBuffer = m_upBuffer.slice.buffer(), + cSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + } + + D3D9UPBufferSlice result; + result.slice = m_upBuffer.slice.subSlice(0, size); + result.mapPtr = reinterpret_cast(m_upBuffer.mapPtr) + m_upBuffer.slice.offset(); + + VkDeviceSize adjust = align(size, CACHE_LINE_SIZE); + m_upBuffer.slice = m_upBuffer.slice.subSlice(adjust, m_upBuffer.slice.length() - adjust); + return result; + } else { + // Create a temporary buffer for very large allocations + DxvkBufferCreateInfo info; + info.size = size; + info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT + | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_INDEX_READ_BIT; + info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + + D3D9UPBufferSlice result; + result.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags)); + result.mapPtr = result.slice.mapPtr(0); + return result; + } + } + + + D3D9SwapChainEx* D3D9DeviceEx::GetInternalSwapchain(UINT index) { + if (unlikely(index >= m_swapchains.size())) + return nullptr; + + return m_swapchains[index].ptr(); + } + + + bool D3D9DeviceEx::ShouldRecord() { + return m_recorder != nullptr && !m_recorder->IsApplying(); + } + + + D3D9_VK_FORMAT_MAPPING D3D9DeviceEx::LookupFormat( + D3D9Format Format) const { + return m_adapter->GetFormatMapping(Format); + } + + DxvkFormatInfo D3D9DeviceEx::UnsupportedFormatInfo( + D3D9Format Format) const { + return m_adapter->GetUnsupportedFormatInfo(Format); + } + + bool D3D9DeviceEx::WaitForResource( + const Rc& Resource, + DWORD MapFlags) { + // Wait for the any pending D3D9 command to be executed + // on the CS thread so that we can determine whether the + // resource is currently in use or not. + + // Determine access type to wait for based on map mode + DxvkAccess access = (MapFlags & D3DLOCK_READONLY) + ? DxvkAccess::Write + : DxvkAccess::Read; + + if (!Resource->isInUse(access)) + SynchronizeCsThread(); + + if (Resource->isInUse(access)) { + if (MapFlags & D3DLOCK_DONOTWAIT) { + // We don't have to wait, but misbehaving games may + // still try to spin on `Map` until the resource is + // idle, so we should flush pending commands + FlushImplicit(FALSE); + return false; + } + else { + // Make sure pending commands using the resource get + // executed on the the GPU if we have to wait for it + Flush(); + SynchronizeCsThread(); + + while (Resource->isInUse(access)) + dxvk::this_thread::yield(); + } + } + + return true; + } + + + uint32_t D3D9DeviceEx::CalcImageLockOffset( + uint32_t SlicePitch, + uint32_t RowPitch, + const DxvkFormatInfo* FormatInfo, + const D3DBOX* pBox) { + if (pBox == nullptr) + return 0; + + std::array offsets = { pBox->Front, pBox->Top, pBox->Left }; + + uint32_t elementSize = 1; + + if (FormatInfo != nullptr) { + elementSize = FormatInfo->elementSize; + + offsets[0] = offsets[0] / FormatInfo->blockSize.depth; + offsets[1] = offsets[1] / FormatInfo->blockSize.height; + offsets[2] = offsets[2] / FormatInfo->blockSize.width; + } + + return offsets[0] * SlicePitch + + offsets[1] * RowPitch + + offsets[2] * elementSize; + } + + + HRESULT D3D9DeviceEx::LockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT MipLevel, + D3DLOCKED_BOX* pLockedBox, + const D3DBOX* pBox, + DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + UINT Subresource = pResource->CalcSubresource(Face, MipLevel); + + // Don't allow multiple lockings. + if (unlikely(pResource->MarkLocked(Subresource, true))) + return D3DERR_INVALIDCALL; + + if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_READONLY)) == (D3DLOCK_DISCARD | D3DLOCK_READONLY))) + return D3DERR_INVALIDCALL; + + if (unlikely(!m_d3d9Options.allowLockFlagReadonly)) + Flags &= ~D3DLOCK_READONLY; + + if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))) + Flags &= ~D3DLOCK_DISCARD; + + auto& desc = *(pResource->Desc()); + + bool alloced = pResource->CreateBufferSubresource(Subresource); + + const Rc mappedBuffer = pResource->GetBuffer(Subresource); + + auto formatInfo = imageFormatInfo(pResource->GetFormatMapping().FormatColor); + auto subresource = pResource->GetSubresourceFromIndex( + formatInfo->aspectMask, Subresource); + + VkExtent3D levelExtent = pResource->GetExtentMip(MipLevel); + VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize); + + const bool systemmem = desc.Pool == D3DPOOL_SYSTEMMEM; + const bool managed = IsPoolManaged(desc.Pool); + const bool scratch = desc.Pool == D3DPOOL_SCRATCH; + + bool fullResource = pBox == nullptr; + if (unlikely(!fullResource)) { + VkOffset3D lockOffset; + VkExtent3D lockExtent; + + ConvertBox(*pBox, lockOffset, lockExtent); + + fullResource = lockOffset == VkOffset3D{ 0, 0, 0 } + && lockExtent.width >= levelExtent.width + && lockExtent.height >= levelExtent.height + && lockExtent.depth >= levelExtent.depth; + } + + // If we are not locking the entire image + // a partial discard is meant to occur. + // We can't really implement that, so just ignore discard + // if we are not locking the full resource + + // DISCARD is also ignored for MANAGED and SYSTEMEM. + // DISCARD is not ignored for non-DYNAMIC unlike what the docs say. + + if (!fullResource || desc.Pool != D3DPOOL_DEFAULT) + Flags &= ~D3DLOCK_DISCARD; + + if (desc.Usage & D3DUSAGE_WRITEONLY) + Flags &= ~D3DLOCK_READONLY; + + pResource->SetLockFlags(Subresource, Flags); + + DxvkBufferSliceHandle physSlice; + + if (Flags & D3DLOCK_DISCARD) { + // We do not have to preserve the contents of the + // buffer if the entire image gets discarded. + physSlice = pResource->DiscardMapSlice(Subresource); + + EmitCs([ + cImageBuffer = mappedBuffer, + cBufferSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cImageBuffer, cBufferSlice); + }); + } + else if (managed || scratch || systemmem) { + // Managed and scratch resources + // are meant to be able to provide readback without waiting. + // We always keep a copy of them in system memory for this reason. + // No need to wait as its not in use. + physSlice = pResource->GetMappedSlice(Subresource); + + // We do not need to wait for the resource in the event the + // calling app promises not to overwrite data that is in use + // or is reading. Remember! This will only trigger for MANAGED resources + // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting. + const bool readOnly = Flags & D3DLOCK_READONLY; + const bool skipWait = (readOnly && managed) || scratch || (readOnly && systemmem); + + if (alloced) + std::memset(physSlice.mapPtr, 0, physSlice.length); + else if (!skipWait) { + if (!WaitForResource(mappedBuffer, Flags)) + return D3DERR_WASSTILLDRAWING; + } + } + else { + bool renderable = desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL | D3DUSAGE_AUTOGENMIPMAP); + + // If we are dirty, then we need to copy -> buffer + // We are also always dirty if we are a render target, + // a depth stencil, or auto generate mipmaps. + bool dirty = pResource->SetDirty(Subresource, false) || renderable; + + if (unlikely(dirty)) { + Rc resourceImage = pResource->GetImage(); + + Rc mappedImage = resourceImage->info().sampleCount != 1 + ? pResource->GetResolveImage() + : std::move(resourceImage); + + // When using any map mode which requires the image contents + // to be preserved, and if the GPU has write access to the + // image, copy the current image contents into the buffer. + auto subresourceLayers = vk::makeSubresourceLayers(subresource); + + // We need to resolve this, some games + // lock MSAA render targets even though + // that's entirely illegal and they explicitly + // tell us that they do NOT want to lock them... + if (resourceImage != nullptr) { + EmitCs([ + cMainImage = resourceImage, + cResolveImage = mappedImage, + cSubresource = subresourceLayers + ] (DxvkContext* ctx) { + VkImageResolve region; + region.srcSubresource = cSubresource; + region.srcOffset = VkOffset3D { 0, 0, 0 }; + region.dstSubresource = cSubresource; + region.dstOffset = VkOffset3D { 0, 0, 0 }; + region.extent = cMainImage->mipLevelExtent(cSubresource.mipLevel); + + if (cSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + ctx->resolveImage( + cResolveImage, cMainImage, region, + cMainImage->info().format); + } + else { + ctx->resolveDepthStencilImage( + cResolveImage, cMainImage, region, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR); + } + }); + } + + VkFormat packedFormat = GetPackedDepthStencilFormat(desc.Format); + + EmitCs([ + cImageBuffer = mappedBuffer, + cImage = std::move(mappedImage), + cSubresources = subresourceLayers, + cLevelExtent = levelExtent, + cPackedFormat = packedFormat + ] (DxvkContext* ctx) { + if (cSubresources.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + ctx->copyImageToBuffer( + cImageBuffer, 0, VkExtent2D { 0u, 0u }, + cImage, cSubresources, VkOffset3D { 0, 0, 0 }, + cLevelExtent); + } else { + ctx->copyDepthStencilImageToPackedBuffer( + cImageBuffer, 0, cImage, cSubresources, + VkOffset2D { 0, 0 }, + VkExtent2D { cLevelExtent.width, cLevelExtent.height }, + cPackedFormat); + } + }); + } + + physSlice = mappedBuffer->getSliceHandle(); + + // If we are a new alloc, and we weren't dirty + // that means that we are a newly initialized + // texture, and hence can just memset -> 0 and + // avoid a wait here. + if (alloced && !dirty) + std::memset(physSlice.mapPtr, 0, physSlice.length); + else { + if (!WaitForResource(mappedBuffer, Flags)) + return D3DERR_WASSTILLDRAWING; + } + } + + const bool atiHack = desc.Format == D3D9Format::ATI1 || desc.Format == D3D9Format::ATI2; + // Set up map pointer. + if (atiHack) { + // We need to lie here. The game is expected to use this info and do a workaround. + // It's stupid. I know. + pLockedBox->RowPitch = std::max(desc.Width >> MipLevel, 1u); + pLockedBox->SlicePitch = pLockedBox->RowPitch * std::max(desc.Height >> MipLevel, 1u); + } + else { + // Data is tightly packed within the mapped buffer. + pLockedBox->RowPitch = formatInfo->elementSize * blockCount.width; + pLockedBox->SlicePitch = formatInfo->elementSize * blockCount.width * blockCount.height; + } + + const uint32_t offset = CalcImageLockOffset( + pLockedBox->SlicePitch, + pLockedBox->RowPitch, + (!atiHack) ? formatInfo : nullptr, + pBox); + + uint8_t* data = reinterpret_cast(physSlice.mapPtr); + data += offset; + pLockedBox->pBits = data; + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::UnlockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT MipLevel) { + D3D9DeviceLock lock = LockDevice(); + + UINT Subresource = pResource->CalcSubresource(Face, MipLevel); + + // We weren't locked anyway! + if (unlikely(!pResource->MarkLocked(Subresource, false))) + return D3DERR_INVALIDCALL; + + // Do we have a pending copy? + if (!(pResource->GetLockFlags(Subresource) & D3DLOCK_READONLY)) { + // Only flush buffer -> image if we actually have an image + if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) + this->FlushImage(pResource, Subresource); + } + + if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED + && (!pResource->IsDynamic()) + && (!pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock)) + pResource->DestroyBufferSubresource(Subresource); + + if (pResource->IsAutomaticMip()) + GenerateMips(pResource); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::FlushImage( + D3D9CommonTexture* pResource, + UINT Subresource) { + const Rc image = pResource->GetImage(); + + // Now that data has been written into the buffer, + // we need to copy its contents into the image + const Rc copyBuffer = pResource->GetBuffer(Subresource); + + auto formatInfo = imageFormatInfo(image->info().format); + auto subresource = pResource->GetSubresourceFromIndex( + formatInfo->aspectMask, Subresource); + + VkExtent3D levelExtent = image + ->mipLevelExtent(subresource.mipLevel); + + VkImageSubresourceLayers subresourceLayers = { + subresource.aspectMask, + subresource.mipLevel, + subresource.arrayLayer, 1 }; + + auto videoFormat = pResource->GetFormatMapping().VideoFormatInfo; + + if (likely(videoFormat.FormatType == D3D9VideoFormat_None)) { + EmitCs([ + cSrcBuffer = copyBuffer, + cDstImage = image, + cDstLayers = subresourceLayers, + cDstLevelExtent = levelExtent + ] (DxvkContext* ctx) { + ctx->copyBufferToImage(cDstImage, cDstLayers, + VkOffset3D{ 0, 0, 0 }, cDstLevelExtent, + cSrcBuffer, 0, { 0u, 0u }); + }); + } + else { + m_converter->ConvertVideoFormat( + videoFormat, + image, subresourceLayers, + copyBuffer); + } + + return D3D_OK; + } + + + void D3D9DeviceEx::GenerateMips( + D3D9CommonTexture* pResource) { + EmitCs([ + cImageView = pResource->GetViews().Sample.Color + ] (DxvkContext* ctx) { + ctx->generateMipmaps(cImageView); + }); + } + + + HRESULT D3D9DeviceEx::LockBuffer( + D3D9CommonBuffer* pResource, + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(ppbData == nullptr)) + return D3DERR_INVALIDCALL; + + if (!m_d3d9Options.allowLockFlagReadonly) + Flags &= ~D3DLOCK_READONLY; + + auto& desc = *pResource->Desc(); + + // Ignore DISCARD if NOOVERWRITE is set + if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))) + Flags &= ~D3DLOCK_DISCARD; + + // Ignore DISCARD and NOOVERWRITE if the buffer is not DEFAULT pool (tests + Halo 2) + // The docs say DISCARD and NOOVERWRITE are ignored if the buffer is not DYNAMIC + // but tests say otherwise! + if (desc.Pool != D3DPOOL_DEFAULT) + Flags &= ~(D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE); + + // Ignore READONLY if we are a WRITEONLY resource. + if (desc.Usage & D3DUSAGE_WRITEONLY) + Flags &= ~D3DLOCK_READONLY; + + // Ignore DONOTWAIT if we are DYNAMIC + // Yes... D3D9 is a good API. + if (desc.Usage & D3DUSAGE_DYNAMIC) + Flags &= ~D3DLOCK_DONOTWAIT; + + // We only bounds check for MANAGED. + // (TODO: Apparently this is meant to happen for DYNAMIC too but I am not sure + // how that works given it is meant to be a DIRECT access..?) + + // D3D9 does not do region tracking for READONLY locks + // But lets also account for whether we get readback from ProcessVertices + const bool quickRead = ((Flags & D3DLOCK_READONLY) && !pResource->GetReadLocked()); + const bool boundsCheck = IsPoolManaged(desc.Pool) && !quickRead; + + if (boundsCheck) { + // We can only respect this for these cases -- otherwise R/W OOB still get copied on native + // and some stupid games depend on that. + const bool respectUserBounds = !(Flags & D3DLOCK_DISCARD) && + SizeToLock != 0; + + // If we don't respect the bounds, encompass it all in our tests/checks + // These values may be out of range and don't get clamped. + uint32_t offset = respectUserBounds ? OffsetToLock : 0; + uint32_t size = respectUserBounds ? SizeToLock : desc.Size; + + pResource->LockRange().Conjoin(D3D9Range(offset, offset + size)); + } + + Rc mappingBuffer = pResource->GetBuffer(); + + DxvkBufferSliceHandle physSlice; + + if (Flags & D3DLOCK_DISCARD) { + // Allocate a new backing slice for the buffer and set + // it as the 'new' mapped slice. This assumes that the + // only way to invalidate a buffer is by mapping it. + physSlice = pResource->DiscardMapSlice(); + + EmitCs([ + cBuffer = std::move(mappingBuffer), + cBufferSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cBufferSlice); + }); + } + else { + // NOOVERWRITE promises that they will not write in a currently used area. + // Therefore we can skip waiting for these two cases. + // We can also skip waiting if there is not dirty range overlap, if we are one of those resources. + + // If we are respecting the bounds ie. (MANAGED) we can test overlap + // of our bounds, otherwise we just ignore this and go for it all the time. + const bool skipWait = (Flags & D3DLOCK_NOOVERWRITE) || + quickRead || + (boundsCheck && !pResource->DirtyRange().Overlaps(pResource->LockRange())); + + if (!skipWait) { + if (!(Flags & D3DLOCK_DONOTWAIT)) { + pResource->SetReadLocked(false); + pResource->DirtyRange().Clear(); + } + + if (!WaitForResource(mappingBuffer, Flags)) + return D3DERR_WASSTILLDRAWING; + } + + // Use map pointer from previous map operation. This + // way we don't have to synchronize with the CS thread + // if the map mode is D3DLOCK_NOOVERWRITE. + physSlice = pResource->GetMappedSlice(); + } + + uint8_t* data = reinterpret_cast(physSlice.mapPtr); + // The offset/size is not clamped to or affected by the desc size. + data += OffsetToLock; + + *ppbData = reinterpret_cast(data); + + DWORD oldFlags = pResource->GetMapFlags(); + + // We need to remove the READONLY flags from the map flags + // if there was ever a non-readonly upload. + if (!(Flags & D3DLOCK_READONLY)) { + oldFlags &= ~D3DLOCK_READONLY; + + if (pResource->Desc()->Pool != D3DPOOL_DEFAULT) + pResource->MarkNeedsUpload(); + } + + pResource->SetMapFlags(Flags | oldFlags); + pResource->IncrementLockCount(); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::FlushBuffer( + D3D9CommonBuffer* pResource) { + auto dstBuffer = pResource->GetBufferSlice(); + auto srcBuffer = pResource->GetBufferSlice(); + + EmitCs([ + cDstSlice = dstBuffer, + cSrcSlice = srcBuffer + ] (DxvkContext* ctx) { + ctx->copyBuffer( + cDstSlice.buffer(), + cDstSlice.offset(), + cSrcSlice.buffer(), + cSrcSlice.offset(), + cSrcSlice.length()); + }); + + pResource->DirtyRange().Conjoin(pResource->LockRange()); + pResource->LockRange().Clear(); + pResource->MarkUploaded(); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::UnlockBuffer( + D3D9CommonBuffer* pResource) { + D3D9DeviceLock lock = LockDevice(); + + if (pResource->DecrementLockCount() != 0) + return D3D_OK; + + if (pResource->GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + return D3D_OK; + + if (pResource->GetMapFlags() & D3DLOCK_READONLY) + return D3D_OK; + + pResource->SetMapFlags(0); + + if (pResource->Desc()->Pool != D3DPOOL_DEFAULT) + return D3D_OK; + + FlushImplicit(FALSE); + + FlushBuffer(pResource); + + return D3D_OK; + } + + + void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { + m_csThread.dispatchChunk(std::move(chunk)); + m_csIsBusy = true; + } + + + void D3D9DeviceEx::FlushImplicit(BOOL StrongHint) { + // Flush only if the GPU is about to go idle, in + // order to keep the number of submissions low. + uint32_t pending = m_dxvkDevice->pendingSubmissions(); + + if (StrongHint || pending <= MaxPendingSubmits) { + auto now = dxvk::high_resolution_clock::now(); + + uint32_t delay = MinFlushIntervalUs + + IncFlushIntervalUs * pending; + + // Prevent flushing too often in short intervals. + if (now - m_lastFlush >= std::chrono::microseconds(delay)) + Flush(); + } + } + + + void D3D9DeviceEx::SynchronizeCsThread() { + D3D9DeviceLock lock = LockDevice(); + + // Dispatch current chunk so that all commands + // recorded prior to this function will be run + FlushCsChunk(); + + if (m_csThread.isBusy()) + m_csThread.synchronize(); + } + + + void D3D9DeviceEx::SetupFPU() { + // Should match d3d9 float behaviour. + +#if defined(_MSC_VER) + // For MSVC we can use these cross arch and platform funcs to set the FPU. + // This will work on any platform, x86, x64, ARM, etc. + + // Clear exceptions. + _clearfp(); + + // Disable exceptions + _controlfp(_MCW_EM, _MCW_EM); + +#ifndef _WIN64 + // Use 24 bit precision + _controlfp(_PC_24, _MCW_PC); +#endif + + // Round to nearest + _controlfp(_RC_NEAR, _MCW_RC); +#elif (defined(__GNUC__) || defined(__MINGW32__)) && (defined(__i386__) || defined(__x86_64__) || defined(__ia64)) + // For GCC/MinGW we can use inline asm to set it. + // This only works for x86 and x64 processors however. + + uint16_t control; + + // Get current control word. + __asm__ __volatile__("fnstcw %0" : "=m" (*&control)); + + // Clear existing settings. + control &= 0xF0C0; + + // Disable exceptions + // Use 24 bit precision + // Round to nearest + control |= 0x003F; + + // Set new control word. + __asm__ __volatile__("fldcw %0" : : "m" (*&control)); +#else + Logger::warn("D3D9DeviceEx::SetupFPU: not supported on this arch."); +#endif + } + + + int64_t D3D9DeviceEx::DetermineInitialTextureMemory() { + auto memoryProp = m_adapter->GetDXVKAdapter()->memoryProperties(); + + VkDeviceSize availableTextureMemory = 0; + + for (uint32_t i = 0; i < memoryProp.memoryHeapCount; i++) { + VkMemoryHeap& heap = memoryProp.memoryHeaps[i]; + + if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + availableTextureMemory += memoryProp.memoryHeaps[i].size; + } + + constexpr VkDeviceSize Megabytes = 1024 * 1024; + + // The value returned is a 32-bit value, so we need to clamp it. + VkDeviceSize maxMemory = (VkDeviceSize(m_d3d9Options.maxAvailableMemory) * Megabytes) - 1; + availableTextureMemory = std::min(availableTextureMemory, maxMemory); + + return int64_t(availableTextureMemory); + } + + + void D3D9DeviceEx::CreateConstantBuffers() { + DxvkBufferCreateInfo info; + info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + info.access = VK_ACCESS_UNIFORM_READ_BIT; + + VkMemoryPropertyFlags memoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + info.size = m_vsLayout.totalSize(); + m_consts[DxsoProgramTypes::VertexShader].buffer = m_dxvkDevice->createBuffer(info, memoryFlags); + + info.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + info.size = m_psLayout.totalSize(); + m_consts[DxsoProgramTypes::PixelShader].buffer = m_dxvkDevice->createBuffer(info, memoryFlags); + + info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + info.size = caps::MaxClipPlanes * sizeof(D3D9ClipPlane); + m_vsClipPlanes = m_dxvkDevice->createBuffer(info, memoryFlags); + + info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + info.size = sizeof(D3D9FixedFunctionVS); + m_vsFixedFunction = m_dxvkDevice->createBuffer(info, memoryFlags); + + info.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + info.size = sizeof(D3D9FixedFunctionPS); + m_psFixedFunction = m_dxvkDevice->createBuffer(info, memoryFlags); + + info.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + info.size = sizeof(D3D9SharedPS); + m_psShared = m_dxvkDevice->createBuffer(info, memoryFlags); + + info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.access = VK_ACCESS_SHADER_READ_BIT; + info.size = CanSWVP() ? sizeof(D3D9FixedFunctionVertexBlendDataSW) : sizeof(D3D9FixedFunctionVertexBlendDataHW); + m_vsVertexBlend = m_dxvkDevice->createBuffer(info, memoryFlags); + + auto BindConstantBuffer = [this]( + DxsoProgramType shaderStage, + Rc buffer, + DxsoConstantBuffers cbuffer) { + const uint32_t slotId = computeResourceSlotId( + shaderStage, DxsoBindingType::ConstantBuffer, + cbuffer); + + EmitCs([ + cSlotId = slotId, + cBuffer = buffer + ] (DxvkContext* ctx) { + ctx->bindResourceBuffer(cSlotId, + DxvkBufferSlice(cBuffer, 0, cBuffer->info().size)); + }); + }; + + BindConstantBuffer(DxsoProgramTypes::VertexShader, m_consts[DxsoProgramTypes::VertexShader].buffer, DxsoConstantBuffers::VSConstantBuffer); + BindConstantBuffer(DxsoProgramTypes::VertexShader, m_vsClipPlanes, DxsoConstantBuffers::VSClipPlanes); + BindConstantBuffer(DxsoProgramTypes::VertexShader, m_vsFixedFunction, DxsoConstantBuffers::VSFixedFunction); + BindConstantBuffer(DxsoProgramTypes::VertexShader, m_vsVertexBlend, DxsoConstantBuffers::VSVertexBlendData); + + BindConstantBuffer(DxsoProgramTypes::PixelShader, m_consts[DxsoProgramTypes::PixelShader].buffer, DxsoConstantBuffers::PSConstantBuffer); + BindConstantBuffer(DxsoProgramTypes::PixelShader, m_psFixedFunction, DxsoConstantBuffers::PSFixedFunction); + BindConstantBuffer(DxsoProgramTypes::PixelShader, m_psShared, DxsoConstantBuffers::PSShared); + + m_flags.set( + D3D9DeviceFlag::DirtyClipPlanes); + } + + + template + inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) { + const D3D9ConstantSets& constSet = m_consts[ShaderStage]; + + auto* dst = reinterpret_cast(pData); + + if (constSet.meta->maxConstIndexF) + std::memcpy(dst->fConsts, Src.fConsts, constSet.meta->maxConstIndexF * sizeof(Vector4)); + if (constSet.meta->maxConstIndexI) + std::memcpy(dst->iConsts, Src.iConsts, constSet.meta->maxConstIndexI * sizeof(Vector4i)); + if (constSet.meta->maxConstIndexB) + dst->bConsts[0] = Src.bConsts[0]; + } + + + template + inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader]; + + auto dst = reinterpret_cast(pData); + + if (constSet.meta->maxConstIndexF) + std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta->maxConstIndexF * sizeof(Vector4)); + if (constSet.meta->maxConstIndexI) + std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta->maxConstIndexI * sizeof(Vector4i)); + if (constSet.meta->maxConstIndexB) + std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize()); + } + + + template + inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + D3D9ConstantSets& constSet = m_consts[ShaderStage]; + + if (!constSet.dirty) + return; + + constSet.dirty = false; + + DxvkBufferSliceHandle slice = constSet.buffer->allocSlice(); + + EmitCs([ + cBuffer = constSet.buffer, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + if constexpr (ShaderStage == DxsoProgramType::PixelShader) + UploadHardwareConstantSet(slice.mapPtr, Src, Shader); + else if (likely(!CanSWVP())) + UploadHardwareConstantSet(slice.mapPtr, Src, Shader); + else + UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader); + + if (constSet.meta->needsConstantCopies) { + Vector4* data = reinterpret_cast(slice.mapPtr); + + auto& shaderConsts = GetCommonShader(Shader)->GetConstants(); + + for (const auto& constant : shaderConsts) + data[constant.uboIdx] = *reinterpret_cast(constant.float32); + } + } + + + template + void D3D9DeviceEx::UploadConstants() { + if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) + return UploadConstantSet(m_state.vsConsts, m_vsLayout, m_state.vertexShader); + else + return UploadConstantSet (m_state.psConsts, m_psLayout, m_state.pixelShader); + } + + + void D3D9DeviceEx::UpdateClipPlanes() { + m_flags.clr(D3D9DeviceFlag::DirtyClipPlanes); + + auto slice = m_vsClipPlanes->allocSlice(); + auto dst = reinterpret_cast(slice.mapPtr); + + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { + dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) + ? m_state.clipPlanes[i] + : D3D9ClipPlane(); + } + + EmitCs([ + cBuffer = m_vsClipPlanes, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + } + + + template + void D3D9DeviceEx::UpdatePushConstant(const void* pData) { + struct ConstantData { uint8_t Data[Length]; }; + + auto* constData = reinterpret_cast(pData); + + EmitCs([ + cData = *constData + ](DxvkContext* ctx) { + ctx->pushConstants(Offset, Length, &cData); + }); + } + + + template + void D3D9DeviceEx::UpdatePushConstant() { + auto& rs = m_state.renderStates; + + if constexpr (Item == D3D9RenderStateItem::AlphaRef) { + float alpha = float(rs[D3DRS_ALPHAREF]) / 255.0f; + UpdatePushConstant(&alpha); + } + else if constexpr (Item == D3D9RenderStateItem::FogColor) { + Vector4 color; + DecodeD3DCOLOR(D3DCOLOR(rs[D3DRS_FOGCOLOR]), color.data); + UpdatePushConstant(&color); + } + else if constexpr (Item == D3D9RenderStateItem::FogDensity) { + float density = bit::cast(rs[D3DRS_FOGDENSITY]); + UpdatePushConstant(&density); + } + else if constexpr (Item == D3D9RenderStateItem::FogEnd) { + float end = bit::cast(rs[D3DRS_FOGEND]); + UpdatePushConstant(&end); + } + else if constexpr (Item == D3D9RenderStateItem::FogScale) { + float end = bit::cast(rs[D3DRS_FOGEND]); + float start = bit::cast(rs[D3DRS_FOGSTART]); + + float scale = 1.0f / (end - start); + if (!std::isfinite(scale)) + scale = 0.0f; + + UpdatePushConstant(&scale); + } + else if constexpr (Item == D3D9RenderStateItem::PointSize) { + UpdatePushConstant(&rs[D3DRS_POINTSIZE]); + } + else if constexpr (Item == D3D9RenderStateItem::PointSizeMin) { + UpdatePushConstant(&rs[D3DRS_POINTSIZE_MIN]); + } + else if constexpr (Item == D3D9RenderStateItem::PointSizeMax) { + UpdatePushConstant(&rs[D3DRS_POINTSIZE_MAX]); + } + else if constexpr (Item == D3D9RenderStateItem::PointScaleA) { + float scale = bit::cast(rs[D3DRS_POINTSCALE_A]); + scale /= float(m_state.viewport.Height * m_state.viewport.Height); + + UpdatePushConstant(&scale); + } + else if constexpr (Item == D3D9RenderStateItem::PointScaleB) { + float scale = bit::cast(rs[D3DRS_POINTSCALE_B]); + scale /= float(m_state.viewport.Height * m_state.viewport.Height); + + UpdatePushConstant(&scale); + } + else if constexpr (Item == D3D9RenderStateItem::PointScaleC) { + float scale = bit::cast(rs[D3DRS_POINTSCALE_C]); + scale /= float(m_state.viewport.Height * m_state.viewport.Height); + + UpdatePushConstant(&scale); + } + else + Logger::warn("D3D9: Invalid push constant set to update."); + } + + + + void D3D9DeviceEx::Flush() { + D3D9DeviceLock lock = LockDevice(); + + m_initializer->Flush(); + + if (m_csIsBusy || !m_csChunk->empty()) { + // Add commands to flush the threaded + // context, then flush the command list + EmitCs([](DxvkContext* ctx) { + ctx->flushCommandList(); + }); + + FlushCsChunk(); + + // Reset flush timer used for implicit flushes + m_lastFlush = dxvk::high_resolution_clock::now(); + m_csIsBusy = false; + } + } + + + inline D3D9ShaderMasks D3D9DeviceEx::GetShaderMasks() { + const auto* shader = GetCommonShader(m_state.pixelShader); + + if (likely(shader != nullptr)) + return shader->GetShaderMask(); + + // TODO: What fixed function textures are in use? + // Currently we are making all 8 of them as in use here. + + // The RT output is always 0 for fixed function. + return D3D9ShaderMasks{ 0b1111111, 0b1 }; + } + + + inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) { + const uint32_t bit = 1 << index; + + m_activeRTs &= ~bit; + + if (m_state.renderTargets[index] != nullptr && + m_state.renderTargets[index]->GetBaseTexture() != nullptr && + m_state.renderStates[ColorWriteIndex(index)]) + m_activeRTs |= bit; + + UpdateActiveHazards(); + } + + + inline void D3D9DeviceEx::UpdateActiveRTTextures(uint32_t index) { + const uint32_t bit = 1 << index; + + m_activeRTTextures &= ~bit; + + auto tex = GetCommonTexture(m_state.textures[index]); + if (tex != nullptr && tex->IsRenderTarget()) + m_activeRTTextures |= bit; + + UpdateActiveHazards(); + } + + + inline void D3D9DeviceEx::UpdateActiveHazards() { + auto masks = GetShaderMasks(); + masks.rtMask &= m_activeRTs; + masks.samplerMask &= m_activeRTTextures; + + m_activeHazards = 0; + for (uint32_t rt = masks.rtMask; rt; rt &= rt - 1) { + for (uint32_t sampler = masks.samplerMask; sampler; sampler &= sampler - 1) { + IDirect3DBaseTexture9* rtBase = m_state.renderTargets[bit::tzcnt(rt)]->GetBaseTexture(); + IDirect3DBaseTexture9* texBase = m_state.textures[bit::tzcnt(sampler)]; + + if (likely(rtBase != texBase)) + continue; + + m_activeHazards |= 1 << bit::tzcnt(rt); + } + } + } + + + void D3D9DeviceEx::MarkRenderHazards() { + for (uint32_t rt = m_activeHazards; rt; rt &= rt - 1) { + // Guaranteed to not be nullptr... + auto tex = m_state.renderTargets[bit::tzcnt(rt)]->GetCommonTexture(); + if (unlikely(!tex->MarkHazardous())) { + TransitionImage(tex, VK_IMAGE_LAYOUT_GENERAL); + m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + } + } + } + + + template + void D3D9DeviceEx::UpdatePointMode() { + if constexpr (!Points) { + m_lastPointMode = 0; + + EmitCs([](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, 0); + }); + } + else { + auto& rs = m_state.renderStates; + + const bool scale = rs[D3DRS_POINTSCALEENABLE] && !UseProgrammableVS(); + const bool sprite = rs[D3DRS_POINTSPRITEENABLE]; + + const uint32_t scaleBit = scale ? 1u : 0u; + const uint32_t spriteBit = sprite ? 2u : 0u; + + uint32_t mode = scaleBit | spriteBit; + + if (rs[D3DRS_POINTSCALEENABLE] && m_flags.test(D3D9DeviceFlag::DirtyPointScale)) { + m_flags.clr(D3D9DeviceFlag::DirtyPointScale); + + UpdatePushConstant(); + UpdatePushConstant(); + UpdatePushConstant(); + } + + if (unlikely(mode != m_lastPointMode)) { + EmitCs([cMode = mode] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, cMode); + }); + + m_lastPointMode = mode; + } + } + } + + + void D3D9DeviceEx::UpdateFog() { + auto& rs = m_state.renderStates; + + bool fogEnabled = rs[D3DRS_FOGENABLE]; + + bool pixelFog = rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE && fogEnabled; + bool vertexFog = rs[D3DRS_FOGVERTEXMODE] != D3DFOG_NONE && fogEnabled && !pixelFog; + + auto UpdateFogConstants = [&](D3DFOGMODE FogMode) { + if (m_flags.test(D3D9DeviceFlag::DirtyFogColor)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogColor); + UpdatePushConstant(); + } + + if (FogMode == D3DFOG_LINEAR) { + if (m_flags.test(D3D9DeviceFlag::DirtyFogScale)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogScale); + UpdatePushConstant(); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyFogEnd)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogEnd); + UpdatePushConstant(); + } + } + else if (FogMode == D3DFOG_EXP || FogMode == D3DFOG_EXP2) { + if (m_flags.test(D3D9DeviceFlag::DirtyFogDensity)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogDensity); + UpdatePushConstant(); + } + } + }; + + if (vertexFog) { + D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGVERTEXMODE]); + + UpdateFogConstants(mode); + + if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogState); + + EmitCs([cMode = mode] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, cMode); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE); + }); + } + } + else if (pixelFog) { + D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGTABLEMODE]); + + UpdateFogConstants(mode); + + if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogState); + + EmitCs([cMode = mode] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, cMode); + }); + } + } + else { + if (fogEnabled) + UpdateFogConstants(D3DFOG_NONE); + + if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { + m_flags.clr(D3D9DeviceFlag::DirtyFogState); + + EmitCs([cEnabled = fogEnabled] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, cEnabled); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE); + }); + } + } + } + + + void D3D9DeviceEx::BindFramebuffer() { + m_flags.clr(D3D9DeviceFlag::DirtyFramebuffer); + + DxvkRenderTargets attachments; + + bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE]; + + // D3D9 doesn't have the concept of a framebuffer object, + // so we'll just create a new one every time the render + // target bindings are updated. Set up the attachments. + VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; + + for (UINT i = 0; i < m_state.renderTargets.size(); i++) { + if (m_state.renderTargets[i] != nullptr && !m_state.renderTargets[i]->IsNull()) { + const DxvkImageCreateInfo& rtImageInfo = m_state.renderTargets[i]->GetCommonTexture()->GetImage()->info(); + + if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)) + sampleCount = rtImageInfo.sampleCount; + else if (unlikely(sampleCount != rtImageInfo.sampleCount)) + continue; + + attachments.color[i] = { + m_state.renderTargets[i]->GetRenderTargetView(srgb), + m_state.renderTargets[i]->GetRenderTargetLayout() }; + } + } + + if (m_state.depthStencil != nullptr) { + const DxvkImageCreateInfo& dsImageInfo = m_state.depthStencil->GetCommonTexture()->GetImage()->info(); + + if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM || sampleCount == dsImageInfo.sampleCount)) { + attachments.depth = { + m_state.depthStencil->GetDepthStencilView(), + m_state.depthStencil->GetDepthLayout() }; + } + } + + // Create and bind the framebuffer object to the context + EmitCs([ + cAttachments = std::move(attachments) + ] (DxvkContext* ctx) { + ctx->bindRenderTargets(cAttachments); + }); + } + + + void D3D9DeviceEx::BindViewportAndScissor() { + m_flags.clr(D3D9DeviceFlag::DirtyViewportScissor); + + VkViewport viewport; + VkRect2D scissor; + + // D3D9's coordinate system has its origin in the bottom left, + // but the viewport coordinates are aligned to the top-left + // corner so we can get away with flipping the viewport. + const D3DVIEWPORT9& vp = m_state.viewport; + + // Correctness Factor for 1/2 texel offset + float cf = 0.5f; + + // HACK: UE3 bug re. tonemapper + shadow sampling being red:- + // We need to bias this, except when it's + // NOT powers of two in order to make + // imprecision biased towards infinity. + if ((vp.Width & (vp.Width - 1)) == 0 + && (vp.Height & (vp.Height - 1)) == 0) + cf -= 1.0f / 128.0f; + + viewport = VkViewport{ + float(vp.X) + cf, float(vp.Height + vp.Y) + cf, + float(vp.Width), -float(vp.Height), + vp.MinZ, vp.MaxZ, + }; + + // Scissor rectangles. Vulkan does not provide an easy way + // to disable the scissor test, so we'll have to set scissor + // rects that are at least as large as the framebuffer. + bool enableScissorTest = m_state.renderStates[D3DRS_SCISSORTESTENABLE]; + + if (enableScissorTest) { + RECT sr = m_state.scissorRect; + + VkOffset2D srPosA; + srPosA.x = std::max(0, sr.left); + srPosA.y = std::max(0, sr.top); + + VkOffset2D srPosB; + srPosB.x = std::max(srPosA.x, sr.right); + srPosB.y = std::max(srPosA.y, sr.bottom); + + VkExtent2D srSize; + srSize.width = uint32_t(srPosB.x - srPosA.x); + srSize.height = uint32_t(srPosB.y - srPosA.y); + + scissor = VkRect2D{ srPosA, srSize }; + } + else { + scissor = VkRect2D{ + VkOffset2D { int32_t(vp.X), int32_t(vp.Y) }, + VkExtent2D { vp.Width, vp.Height }}; + } + + EmitCs([ + cViewport = viewport, + cScissor = scissor + ] (DxvkContext* ctx) { + ctx->setViewports( + 1, + &cViewport, + &cScissor); + }); + } + + + void D3D9DeviceEx::BindMultiSampleState() { + m_flags.clr(D3D9DeviceFlag::DirtyMultiSampleState); + + DxvkMultisampleState msState; + msState.sampleMask = m_flags.test(D3D9DeviceFlag::ValidSampleMask) + ? m_state.renderStates[D3DRS_MULTISAMPLEMASK] + : 0xffffffff; + msState.enableAlphaToCoverage = IsAlphaToCoverageEnabled(); + + EmitCs([ + cState = msState + ] (DxvkContext* ctx) { + ctx->setMultisampleState(cState); + }); + } + + + void D3D9DeviceEx::BindBlendState() { + m_flags.clr(D3D9DeviceFlag::DirtyBlendState); + + auto& state = m_state.renderStates; + + bool separateAlpha = state[D3DRS_SEPARATEALPHABLENDENABLE]; + + DxvkBlendMode mode; + mode.enableBlending = state[D3DRS_ALPHABLENDENABLE] != FALSE; + + D3D9BlendState color, alpha; + + color.Src = D3DBLEND(state[D3DRS_SRCBLEND]); + color.Dst = D3DBLEND(state[D3DRS_DESTBLEND]); + color.Op = D3DBLENDOP(state[D3DRS_BLENDOP]); + FixupBlendState(color); + + if (separateAlpha) { + alpha.Src = D3DBLEND(state[D3DRS_SRCBLENDALPHA]); + alpha.Dst = D3DBLEND(state[D3DRS_DESTBLENDALPHA]); + alpha.Op = D3DBLENDOP(state[D3DRS_BLENDOPALPHA]); + FixupBlendState(alpha); + } + else + alpha = color; + + mode.colorSrcFactor = DecodeBlendFactor(color.Src, false); + mode.colorDstFactor = DecodeBlendFactor(color.Dst, false); + mode.colorBlendOp = DecodeBlendOp (color.Op); + + mode.alphaSrcFactor = DecodeBlendFactor(alpha.Src, true); + mode.alphaDstFactor = DecodeBlendFactor(alpha.Dst, true); + mode.alphaBlendOp = DecodeBlendOp (alpha.Op); + + mode.writeMask = state[ColorWriteIndex(0)]; + + std::array extraWriteMasks; + for (uint32_t i = 0; i < 3; i++) + extraWriteMasks[i] = state[ColorWriteIndex(i + 1)]; + + EmitCs([ + cMode = mode, + cWriteMasks = extraWriteMasks, + cAlphaMasks = m_alphaSwizzleRTs + ](DxvkContext* ctx) { + for (uint32_t i = 0; i < 4; i++) { + DxvkBlendMode mode = cMode; + if (i != 0) + mode.writeMask = cWriteMasks[i - 1]; + + const bool alphaSwizzle = cAlphaMasks & (1 << i); + + auto NormalizeFactor = [alphaSwizzle](VkBlendFactor Factor) { + if (alphaSwizzle) { + if (Factor == VK_BLEND_FACTOR_DST_ALPHA) + return VK_BLEND_FACTOR_ONE; + else if (Factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA) + return VK_BLEND_FACTOR_ZERO; + } + + return Factor; + }; + + mode.colorSrcFactor = NormalizeFactor(mode.colorSrcFactor); + mode.colorDstFactor = NormalizeFactor(mode.colorDstFactor); + mode.alphaSrcFactor = NormalizeFactor(mode.alphaSrcFactor); + mode.alphaDstFactor = NormalizeFactor(mode.alphaDstFactor); + + ctx->setBlendMode(i, mode); + } + }); + } + + + void D3D9DeviceEx::BindBlendFactor() { + DxvkBlendConstants blendConstants; + DecodeD3DCOLOR( + D3DCOLOR(m_state.renderStates[D3DRS_BLENDFACTOR]), + reinterpret_cast(&blendConstants)); + + EmitCs([ + cBlendConstants = blendConstants + ](DxvkContext* ctx) { + ctx->setBlendConstants(cBlendConstants); + }); + } + + + void D3D9DeviceEx::BindDepthStencilState() { + m_flags.clr(D3D9DeviceFlag::DirtyDepthStencilState); + + auto& rs = m_state.renderStates; + + bool stencil = rs[D3DRS_STENCILENABLE]; + bool twoSidedStencil = stencil && rs[D3DRS_TWOSIDEDSTENCILMODE]; + + DxvkDepthStencilState state; + state.enableDepthTest = rs[D3DRS_ZENABLE] != FALSE; + state.enableDepthWrite = rs[D3DRS_ZWRITEENABLE] != FALSE; + state.enableStencilTest = stencil; + state.depthCompareOp = DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ZFUNC])); + + if (stencil) { + state.stencilOpFront.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILFAIL])); + state.stencilOpFront.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILPASS])); + state.stencilOpFront.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILZFAIL])); + state.stencilOpFront.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_STENCILFUNC])); + state.stencilOpFront.compareMask = uint32_t(rs[D3DRS_STENCILMASK]); + state.stencilOpFront.writeMask = uint32_t(rs[D3DRS_STENCILWRITEMASK]); + state.stencilOpFront.reference = 0; + } + else + state.stencilOpFront = VkStencilOpState(); + + if (twoSidedStencil) { + state.stencilOpBack.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILFAIL])); + state.stencilOpBack.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILPASS])); + state.stencilOpBack.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILZFAIL])); + state.stencilOpBack.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_CCW_STENCILFUNC])); + state.stencilOpBack.compareMask = state.stencilOpFront.compareMask; + state.stencilOpBack.writeMask = state.stencilOpFront.writeMask; + state.stencilOpBack.reference = 0; + } + else + state.stencilOpBack = state.stencilOpFront; + + EmitCs([ + cState = state + ](DxvkContext* ctx) { + ctx->setDepthStencilState(cState); + }); + } + + + void D3D9DeviceEx::BindRasterizerState() { + m_flags.clr(D3D9DeviceFlag::DirtyRasterizerState); + + // TODO: Can we get a specific non-magic number in Vulkan for this based on device/adapter? + constexpr float DepthBiasFactor = float(1 << 23); + + auto& rs = m_state.renderStates; + + float depthBias = bit::cast(rs[D3DRS_DEPTHBIAS]) * DepthBiasFactor; + float slopeScaledDepthBias = bit::cast(rs[D3DRS_SLOPESCALEDEPTHBIAS]); + + DxvkRasterizerState state; + state.cullMode = DecodeCullMode(D3DCULL(rs[D3DRS_CULLMODE])); + state.depthBiasEnable = depthBias != 0.0f || slopeScaledDepthBias != 0.0f; + state.depthClipEnable = true; + state.frontFace = VK_FRONT_FACE_CLOCKWISE; + state.polygonMode = DecodeFillMode(D3DFILLMODE(rs[D3DRS_FILLMODE])); + state.sampleCount = 0; + + DxvkDepthBias biases; + biases.depthBiasConstant = depthBias; + biases.depthBiasSlope = slopeScaledDepthBias; + biases.depthBiasClamp = 0.0f; + + EmitCs([ + cState = state, + cBiases = biases + ](DxvkContext* ctx) { + ctx->setRasterizerState(cState); + ctx->setDepthBias(cBiases); + }); + } + + + void D3D9DeviceEx::BindAlphaTestState() { + m_flags.clr(D3D9DeviceFlag::DirtyAlphaTestState); + + auto& rs = m_state.renderStates; + + VkCompareOp alphaOp = IsAlphaTestEnabled() + ? DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ALPHAFUNC])) + : VK_COMPARE_OP_ALWAYS; + + EmitCs([cAlphaOp = alphaOp] (DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaTestEnable, cAlphaOp != VK_COMPARE_OP_ALWAYS); + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaCompareOp, cAlphaOp); + }); + } + + + void D3D9DeviceEx::BindDepthStencilRefrence() { + auto& rs = m_state.renderStates; + + uint32_t ref = uint32_t(rs[D3DRS_STENCILREF]); + + EmitCs([cRef = ref] (DxvkContext* ctx) { + ctx->setStencilReference(cRef); + }); + } + + + void D3D9DeviceEx::BindSampler(DWORD Sampler) { + auto& state = m_state.samplerStates[Sampler]; + + D3D9SamplerKey key; + key.AddressU = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSU]); + key.AddressV = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSV]); + key.AddressW = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSW]); + key.MagFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MAGFILTER]); + key.MinFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MINFILTER]); + key.MipFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MIPFILTER]); + key.MaxAnisotropy = state[D3DSAMP_MAXANISOTROPY]; + key.MipmapLodBias = bit::cast(state[D3DSAMP_MIPMAPLODBIAS]); + key.MaxMipLevel = state[D3DSAMP_MAXMIPLEVEL]; + DecodeD3DCOLOR(D3DCOLOR(state[D3DSAMP_BORDERCOLOR]), key.BorderColor); + + if (m_d3d9Options.samplerAnisotropy != -1) { + if (key.MagFilter == D3DTEXF_LINEAR) + key.MagFilter = D3DTEXF_ANISOTROPIC; + + if (key.MinFilter == D3DTEXF_LINEAR) + key.MinFilter = D3DTEXF_ANISOTROPIC; + + key.MaxAnisotropy = m_d3d9Options.samplerAnisotropy; + } + + NormalizeSamplerKey(key); + + auto samplerInfo = RemapStateSamplerShader(Sampler); + + const uint32_t colorSlot = computeResourceSlotId( + samplerInfo.first, DxsoBindingType::ColorImage, + samplerInfo.second); + + const uint32_t depthSlot = computeResourceSlotId( + samplerInfo.first, DxsoBindingType::DepthImage, + samplerInfo.second); + + EmitCs([this, + cColorSlot = colorSlot, + cDepthSlot = depthSlot, + cKey = key + ] (DxvkContext* ctx) { + auto pair = m_samplers.find(cKey); + if (pair != m_samplers.end()) { + ctx->bindResourceSampler(cColorSlot, pair->second.color); + ctx->bindResourceSampler(cDepthSlot, pair->second.depth); + return; + } + + auto mipFilter = DecodeMipFilter(cKey.MipFilter); + + DxvkSamplerCreateInfo colorInfo; + colorInfo.addressModeU = DecodeAddressMode(cKey.AddressU); + colorInfo.addressModeV = DecodeAddressMode(cKey.AddressV); + colorInfo.addressModeW = DecodeAddressMode(cKey.AddressW); + colorInfo.compareToDepth = VK_FALSE; + colorInfo.compareOp = VK_COMPARE_OP_NEVER; + colorInfo.magFilter = DecodeFilter(cKey.MagFilter); + colorInfo.minFilter = DecodeFilter(cKey.MinFilter); + colorInfo.mipmapMode = mipFilter.MipFilter; + colorInfo.maxAnisotropy = float(cKey.MaxAnisotropy); + colorInfo.useAnisotropy = IsAnisotropic(cKey.MinFilter) + || IsAnisotropic(cKey.MagFilter); + colorInfo.mipmapLodBias = cKey.MipmapLodBias; + colorInfo.mipmapLodMin = mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0; + colorInfo.mipmapLodMax = mipFilter.MipsEnabled ? FLT_MAX : 0; + colorInfo.usePixelCoord = VK_FALSE; + for (uint32_t i = 0; i < 4; i++) + colorInfo.borderColor.float32[i] = cKey.BorderColor[i]; + + // HACK: Let's get OPAQUE_WHITE border color over + // TRANSPARENT_BLACK if the border RGB is white. + if (colorInfo.borderColor.float32[0] == 1.0f + && colorInfo.borderColor.float32[1] == 1.0f + && colorInfo.borderColor.float32[2] == 1.0f) { + // Then set the alpha to 1. + colorInfo.borderColor.float32[3] = 1.0f; + } + + DxvkSamplerCreateInfo depthInfo = colorInfo; + depthInfo.compareToDepth = VK_TRUE; + depthInfo.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + depthInfo.magFilter = VK_FILTER_LINEAR; + depthInfo.minFilter = VK_FILTER_LINEAR; + + try { + D3D9SamplerPair pair; + + pair.color = m_dxvkDevice->createSampler(colorInfo); + pair.depth = m_dxvkDevice->createSampler(depthInfo); + + m_samplerCount++; + + m_samplers.insert(std::make_pair(cKey, pair)); + ctx->bindResourceSampler(cColorSlot, pair.color); + ctx->bindResourceSampler(cDepthSlot, pair.depth); + } + catch (const DxvkError& e) { + Logger::err(e.message()); + } + }); + } + + + void D3D9DeviceEx::BindTexture(DWORD StateSampler) { + auto shaderSampler = RemapStateSamplerShader(StateSampler); + + uint32_t colorSlot = computeResourceSlotId(shaderSampler.first, + DxsoBindingType::ColorImage, uint32_t(shaderSampler.second)); + + uint32_t depthSlot = computeResourceSlotId(shaderSampler.first, + DxsoBindingType::DepthImage, uint32_t(shaderSampler.second)); + + const bool srgb = + m_state.samplerStates[StateSampler][D3DSAMP_SRGBTEXTURE]; + + D3D9CommonTexture* commonTex = + GetCommonTexture(m_state.textures[StateSampler]); + + // For all our pixel shader textures + if (likely(StateSampler < 16)) { + const uint32_t offset = StateSampler * 2; + const uint32_t textureType = commonTex != nullptr + ? uint32_t(commonTex->GetType() - D3DRTYPE_TEXTURE) + : 0; + const uint32_t textureBitMask = 0b11u << offset; + const uint32_t textureBits = textureType << offset; + + m_samplerTypeBitfield &= ~textureBitMask; + m_samplerTypeBitfield |= textureBits; + } + + if (commonTex == nullptr) { + EmitCs([ + cColorSlot = colorSlot, + cDepthSlot = depthSlot + ](DxvkContext* ctx) { + ctx->bindResourceView(cColorSlot, nullptr, nullptr); + ctx->bindResourceView(cDepthSlot, nullptr, nullptr); + }); + return; + } + + EmitCs([ + cColorSlot = colorSlot, + cDepthSlot = depthSlot, + cDepth = commonTex->IsShadow(), + cImageView = commonTex->GetViews().Sample.Pick(srgb) + ](DxvkContext* ctx) { + ctx->bindResourceView(cColorSlot, !cDepth ? cImageView : nullptr, nullptr); + ctx->bindResourceView(cDepthSlot, cDepth ? cImageView : nullptr, nullptr); + }); + } + + + void D3D9DeviceEx::UndirtySamplers() { + for (uint32_t dirty = m_dirtySamplerStates; dirty; dirty &= dirty - 1) + BindSampler(bit::tzcnt(dirty)); + + m_dirtySamplerStates = 0; + } + + + void D3D9DeviceEx::MarkSamplersDirty() { + m_dirtySamplerStates = 0x001fffff; // 21 bits. + } + + + D3D9DrawInfo D3D9DeviceEx::GenerateDrawInfo( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + UINT InstanceCount) { + D3D9DrawInfo drawInfo; + drawInfo.vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); + drawInfo.instanceCount = m_iaState.streamsInstanced & m_iaState.streamsUsed + ? InstanceCount + : 1u; + return drawInfo; + } + + + uint32_t D3D9DeviceEx::GetInstanceCount() const { + return std::max(m_state.streamFreq[0] & 0x7FFFFFu, 1u); + } + + + void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool up) { + if (unlikely(m_activeHazards != 0)) { + EmitCs([](DxvkContext* ctx) { + ctx->emitRenderTargetReadbackBarrier(); + }); + + if (m_d3d9Options.generalHazards) + MarkRenderHazards(); + } + + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + if (vbo != nullptr && vbo->NeedsUpload()) + FlushBuffer(vbo); + } + + auto* ibo = GetCommonBuffer(m_state.indices); + if (ibo != nullptr && ibo->NeedsUpload()) + FlushBuffer(ibo); + + UpdateFog(); + + if (m_flags.test(D3D9DeviceFlag::DirtyFramebuffer)) + BindFramebuffer(); + + if (m_flags.test(D3D9DeviceFlag::DirtyViewportScissor)) + BindViewportAndScissor(); + + if (m_dirtySamplerStates) + UndirtySamplers(); + + if (m_flags.test(D3D9DeviceFlag::DirtyBlendState)) + BindBlendState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyDepthStencilState)) + BindDepthStencilState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyRasterizerState)) + BindRasterizerState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyMultiSampleState)) + BindMultiSampleState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyAlphaTestState)) + BindAlphaTestState(); + + if (m_flags.test(D3D9DeviceFlag::DirtyClipPlanes)) + UpdateClipPlanes(); + + if (PrimitiveType == D3DPT_POINTLIST) + UpdatePointMode(); + else if (m_lastPointMode != 0) + UpdatePointMode(); + + if (!up && m_flags.test(D3D9DeviceFlag::UpDirtiedVertices)) { + m_flags.clr(D3D9DeviceFlag::UpDirtiedVertices); + if (m_state.vertexBuffers[0].vertexBuffer != nullptr) + BindVertexBuffer(0, + m_state.vertexBuffers[0].vertexBuffer.ptr(), + m_state.vertexBuffers[0].offset, + m_state.vertexBuffers[0].stride); + } + + if (!up && m_flags.test(D3D9DeviceFlag::UpDirtiedIndices)) { + m_flags.clr(D3D9DeviceFlag::UpDirtiedIndices); + BindIndices(); + } + + if (likely(UseProgrammableVS())) { + if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) { + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + BindShader( + GetCommonShader(m_state.vertexShader), + GetVertexShaderPermutation()); + } + UploadConstants(); + } + else + UpdateFixedFunctionVS(); + + if (m_flags.test(D3D9DeviceFlag::DirtyInputLayout)) + BindInputLayout(); + + auto UpdateSamplerTypes = [&](uint32_t types, uint32_t projections) { + if (m_lastSamplerTypeBitfield != types) + UpdateSamplerSpecConsant(types); + + if (m_lastProjectionBitfield != projections) + UpdateProjectionSpecConstant(projections); + }; + + if (likely(UseProgrammablePS())) { + UploadConstants(); + + if (GetCommonShader(m_state.pixelShader)->GetInfo().majorVersion() >= 2) + UpdateSamplerTypes(0u, 0u); + else + UpdateSamplerTypes(m_samplerTypeBitfield, m_projectionBitfield); // For implicit samplers... + } + else { + UpdateSamplerTypes(0u, 0u); + + UpdateFixedFunctionPS(); + } + + if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData)) { + m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData); + + DxvkBufferSliceHandle slice = m_psShared->allocSlice(); + + EmitCs([ + cBuffer = m_psShared, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + D3D9SharedPS* data = reinterpret_cast(slice.mapPtr); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + DecodeD3DCOLOR(D3DCOLOR(m_state.textureStages[i][D3DTSS_CONSTANT]), data->Stages[i].Constant); + + // Flip major-ness so we can get away with a nice easy + // dot in the shader without complex access + data->Stages[i].BumpEnvMat[0][0] = bit::cast(m_state.textureStages[i][D3DTSS_BUMPENVMAT00]); + data->Stages[i].BumpEnvMat[1][0] = bit::cast(m_state.textureStages[i][D3DTSS_BUMPENVMAT01]); + data->Stages[i].BumpEnvMat[0][1] = bit::cast(m_state.textureStages[i][D3DTSS_BUMPENVMAT10]); + data->Stages[i].BumpEnvMat[1][1] = bit::cast(m_state.textureStages[i][D3DTSS_BUMPENVMAT11]); + + data->Stages[i].BumpEnvLScale = bit::cast(m_state.textureStages[i][D3DTSS_BUMPENVLSCALE]); + data->Stages[i].BumpEnvLOffset = bit::cast(m_state.textureStages[i][D3DTSS_BUMPENVLOFFSET]); + } + } + + if (m_flags.test(D3D9DeviceFlag::DirtyDepthBounds)) { + m_flags.clr(D3D9DeviceFlag::DirtyDepthBounds); + + DxvkDepthBounds db; + db.enableDepthBounds = (m_state.renderStates[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB)); + db.minDepthBounds = bit::cast(m_state.renderStates[D3DRS_ADAPTIVETESS_Z]); + db.maxDepthBounds = bit::cast(m_state.renderStates[D3DRS_ADAPTIVETESS_W]); + + EmitCs([ + cDepthBounds = db + ] (DxvkContext* ctx) { + ctx->setDepthBounds(cDepthBounds); + }); + } + } + + + template + void D3D9DeviceEx::BindShader( + const D3D9CommonShader* pShaderModule, + D3D9ShaderPermutation Permutation) { + EmitCs([ + cShader = pShaderModule->GetShader(Permutation) + ] (DxvkContext* ctx) { + ctx->bindShader(GetShaderStage(ShaderStage), cShader); + }); + } + + + void D3D9DeviceEx::BindInputLayout() { + m_flags.clr(D3D9DeviceFlag::DirtyInputLayout); + + if (m_state.vertexDecl == nullptr) { + EmitCs([&cIaState = m_iaState] (DxvkContext* ctx) { + cIaState.streamsUsed = 0; + ctx->setInputLayout(0, nullptr, 0, nullptr); + }); + } + else { + std::array streamFreq; + + for (uint32_t i = 0; i < caps::MaxStreams; i++) + streamFreq[i] = m_state.streamFreq[i]; + + Com vertexDecl = m_state.vertexDecl; + Com vertexShader; + + if (UseProgrammableVS()) + vertexShader = m_state.vertexShader; + + EmitCs([ + &cIaState = m_iaState, + cVertexDecl = std::move(vertexDecl), + cVertexShader = std::move(vertexShader), + cStreamsInstanced = m_instancedData, + cStreamFreq = streamFreq + ] (DxvkContext* ctx) { + cIaState.streamsInstanced = cStreamsInstanced; + cIaState.streamsUsed = 0; + + const auto& elements = cVertexDecl->GetElements(); + + std::array attrList; + std::array bindList; + + uint32_t attrMask = 0; + uint32_t bindMask = 0; + + const auto& isgn = cVertexShader != nullptr + ? GetCommonShader(cVertexShader)->GetIsgn() + : GetFixedFunctionIsgn(); + + for (uint32_t i = 0; i < isgn.elemCount; i++) { + const auto& decl = isgn.elems[i]; + + DxvkVertexAttribute attrib; + attrib.location = i; + attrib.binding = NullStreamIdx; + attrib.format = VK_FORMAT_R32G32B32A32_SFLOAT; + attrib.offset = 0; + + for (const auto& element : elements) { + DxsoSemantic elementSemantic = { static_cast(element.Usage), element.UsageIndex }; + if (elementSemantic.usage == DxsoUsage::PositionT) + elementSemantic.usage = DxsoUsage::Position; + + if (elementSemantic == decl.semantic) { + attrib.binding = uint32_t(element.Stream); + attrib.format = DecodeDecltype(D3DDECLTYPE(element.Type)); + attrib.offset = element.Offset; + + cIaState.streamsUsed |= 1u << attrib.binding; + break; + } + } + + attrList[i] = attrib; + + DxvkVertexBinding binding; + binding.binding = attrib.binding; + + uint32_t instanceData = cStreamFreq[binding.binding % caps::MaxStreams]; + if (instanceData & D3DSTREAMSOURCE_INSTANCEDATA) { + binding.fetchRate = instanceData & 0x7FFFFF; // Remove instance packed-in flags in the data. + binding.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE; + } + else { + binding.fetchRate = 0; + binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + } + + // Check if the binding was already defined. + bool bindingDefined = false; + + for (uint32_t j = 0; j < i; j++) { + uint32_t bindingId = attrList.at(j).binding; + + if (binding.binding == bindingId) { + bindingDefined = true; + } + } + + if (!bindingDefined) + bindList.at(binding.binding) = binding; + + attrMask |= 1u << i; + bindMask |= 1u << binding.binding; + } + + // Compact the attribute and binding lists to filter + // out attributes and bindings not used by the shader + uint32_t attrCount = CompactSparseList(attrList.data(), attrMask); + uint32_t bindCount = CompactSparseList(bindList.data(), bindMask); + + ctx->setInputLayout( + attrCount, attrList.data(), + bindCount, bindList.data()); + }); + } + } + + + void D3D9DeviceEx::BindVertexBuffer( + UINT Slot, + D3D9VertexBuffer* pBuffer, + UINT Offset, + UINT Stride) { + EmitCs([ + cSlotId = Slot, + cBufferSlice = pBuffer != nullptr ? + pBuffer->GetCommonBuffer()->GetBufferSlice(Offset) + : DxvkBufferSlice(), + cStride = pBuffer != nullptr ? Stride : 0 + ] (DxvkContext* ctx) { + ctx->bindVertexBuffer(cSlotId, cBufferSlice, cStride); + }); + } + + void D3D9DeviceEx::BindIndices() { + D3D9CommonBuffer* buffer = GetCommonBuffer(m_state.indices); + + D3D9Format format = buffer != nullptr + ? buffer->Desc()->Format + : D3D9Format::INDEX32; + + const VkIndexType indexType = DecodeIndexType(format); + + EmitCs([ + cBufferSlice = buffer != nullptr ? buffer->GetBufferSlice() : DxvkBufferSlice(), + cIndexType = indexType + ](DxvkContext* ctx) { + ctx->bindIndexBuffer(cBufferSlice, cIndexType); + }); + } + + + void D3D9DeviceEx::Begin(D3D9Query* pQuery) { + D3D9DeviceLock lock = LockDevice(); + + EmitCs([cQuery = Com(pQuery)](DxvkContext* ctx) { + cQuery->Begin(ctx); + }); + } + + + void D3D9DeviceEx::End(D3D9Query* pQuery) { + D3D9DeviceLock lock = LockDevice(); + + if (unlikely(pQuery->IsEvent())) { + pQuery->NotifyEnd(); + pQuery->IsStalling() + ? Flush() + : FlushImplicit(TRUE); + } + + EmitCs([cQuery = Com(pQuery)](DxvkContext* ctx) { + cQuery->End(ctx); + }); + } + + + void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.vsConsts.bConsts[idx] &= ~mask; + m_state.vsConsts.bConsts[idx] |= bits & mask; + + m_consts[DxsoProgramTypes::VertexShader].dirty = true; + } + + + void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.psConsts.bConsts[idx] &= ~mask; + m_state.psConsts.bConsts[idx] |= bits & mask; + + m_consts[DxsoProgramTypes::PixelShader].dirty = true; + } + + + HRESULT D3D9DeviceEx::CreateShaderModule( + D3D9CommonShader* pShaderModule, + VkShaderStageFlagBits ShaderStage, + const DWORD* pShaderBytecode, + const DxsoModuleInfo* pModuleInfo) { + try { + *pShaderModule = m_shaderModules->GetShaderModule(this, + ShaderStage, pModuleInfo, pShaderBytecode); + + return D3D_OK; + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_INVALIDCALL; + } + } + + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT D3D9DeviceEx::SetShaderConstants( + UINT StartRegister, + const T* pConstantData, + UINT Count) { + const uint32_t regCountHardware = DetermineHardwareRegCount(); + constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount(); + + if (unlikely(StartRegister + Count > regCountSoftware)) + return D3DERR_INVALIDCALL; + + Count = UINT( + std::max( + std::clamp(Count + StartRegister, 0, regCountHardware) - INT(StartRegister), + 0)); + + if (unlikely(Count == 0)) + return D3D_OK; + + if (unlikely(pConstantData == nullptr)) + return D3DERR_INVALIDCALL; + + if (unlikely(ShouldRecord())) + return m_recorder->SetShaderConstants( + StartRegister, + pConstantData, + Count); + + auto DetermineMaxCount = [&](const auto& shader) { + if (unlikely(shader == nullptr)) + return 0u; + + const auto& meta = GetCommonShader(shader)->GetMeta(); + + if constexpr (ConstantType == D3D9ConstantType::Float) + return meta.maxConstIndexF; + else if constexpr (ConstantType == D3D9ConstantType::Int) + return meta.maxConstIndexI; + else + return meta.maxConstIndexB; + }; + + uint32_t maxCount = ProgramType == DxsoProgramTypes::VertexShader + ? DetermineMaxCount(m_state.vertexShader) + : DetermineMaxCount(m_state.pixelShader); + + m_consts[ProgramType].dirty |= StartRegister < maxCount; + + UpdateStateConstants( + &m_state, + StartRegister, + pConstantData, + Count, + m_d3d9Options.d3d9FloatEmulation); + + return D3D_OK; + } + + + void D3D9DeviceEx::UpdateFixedFunctionVS() { + // Shader... + bool hasPositionT = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) : false; + bool hasBlendWeight = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendWeight) : false; + bool hasBlendIndices = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendIndices) : false; + + bool indexedVertexBlend = hasBlendIndices && m_state.renderStates[D3DRS_INDEXEDVERTEXBLENDENABLE]; + + D3D9FF_VertexBlendMode vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; + + if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE && !hasPositionT) { + vertexBlendMode = m_state.renderStates[D3DRS_VERTEXBLEND] == D3DVBF_TWEENING + ? D3D9FF_VertexBlendMode_Tween + : D3D9FF_VertexBlendMode_Normal; + + if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) { + if (!hasBlendWeight) + vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; + } + else if (!indexedVertexBlend) + vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; + } + + if (unlikely(hasPositionT && m_state.vertexShader != nullptr && !m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) { + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + m_flags.set(D3D9DeviceFlag::DirtyProgVertexShader); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexShader)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFVertexShader); + + D3D9FFShaderKeyVS key; + key.Data.Contents.HasPositionT = hasPositionT; + key.Data.Contents.HasColor0 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) : false; + key.Data.Contents.HasColor1 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) : false; + key.Data.Contents.HasPointSize = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPointSize) : false; + key.Data.Contents.HasFog = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasFog) : false; + + bool lighting = m_state.renderStates[D3DRS_LIGHTING] != 0 && !key.Data.Contents.HasPositionT; + bool colorVertex = m_state.renderStates[D3DRS_COLORVERTEX] != 0; + uint32_t mask = (lighting && colorVertex) + ? (key.Data.Contents.HasColor0 ? D3DMCS_COLOR1 : D3DMCS_MATERIAL) + | (key.Data.Contents.HasColor1 ? D3DMCS_COLOR2 : D3DMCS_MATERIAL) + : 0; + + key.Data.Contents.UseLighting = lighting; + key.Data.Contents.NormalizeNormals = m_state.renderStates[D3DRS_NORMALIZENORMALS]; + key.Data.Contents.LocalViewer = m_state.renderStates[D3DRS_LOCALVIEWER] && lighting; + + key.Data.Contents.RangeFog = m_state.renderStates[D3DRS_RANGEFOGENABLE]; + + key.Data.Contents.DiffuseSource = m_state.renderStates[D3DRS_DIFFUSEMATERIALSOURCE] & mask; + key.Data.Contents.AmbientSource = m_state.renderStates[D3DRS_AMBIENTMATERIALSOURCE] & mask; + key.Data.Contents.SpecularSource = m_state.renderStates[D3DRS_SPECULARMATERIALSOURCE] & mask; + key.Data.Contents.EmissiveSource = m_state.renderStates[D3DRS_EMISSIVEMATERIALSOURCE] & mask; + + uint32_t lightCount = 0; + + if (key.Data.Contents.UseLighting) { + for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { + if (m_state.enabledLightIndices[i] != UINT32_MAX) + lightCount++; + } + } + + key.Data.Contents.LightCount = lightCount; + + for (uint32_t i = 0; i < caps::MaxTextureBlendStages; i++) { + uint32_t transformFlags = m_state.textureStages[i][D3DTSS_TEXTURETRANSFORMFLAGS] & ~(D3DTTFF_PROJECTED); + uint32_t index = m_state.textureStages[i][D3DTSS_TEXCOORDINDEX]; + uint32_t indexFlags = (index & TCIMask) >> TCIOffset; + + transformFlags &= 0b111; + index &= 0b111; + + key.Data.Contents.TransformFlags |= transformFlags << (i * 3); + key.Data.Contents.TexcoordFlags |= indexFlags << (i * 3); + key.Data.Contents.TexcoordIndices |= index << (i * 3); + } + + key.Data.Contents.TexcoordDeclMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetTexcoordMask() : 0; + + key.Data.Contents.VertexBlendMode = uint32_t(vertexBlendMode); + + if (vertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + key.Data.Contents.VertexBlendIndexed = indexedVertexBlend; + key.Data.Contents.VertexBlendCount = m_state.renderStates[D3DRS_VERTEXBLEND] & 0xff; + } + + EmitCs([ + this, + cKey = key, + &cShaders = m_ffModules + ](DxvkContext* ctx) { + auto shader = cShaders.GetShaderModule(this, cKey); + ctx->bindShader(VK_SHADER_STAGE_VERTEX_BIT, shader.GetShader()); + }); + } + + if (hasPositionT && (m_flags.test(D3D9DeviceFlag::DirtyFFViewport) || m_ffZTest != IsZTestEnabled())) { + m_flags.clr(D3D9DeviceFlag::DirtyFFViewport); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + const auto& vp = m_state.viewport; + // For us to account for the Vulkan viewport rules + // when translating Window Coords -> Real Coords: + // We need to negate the inverse extent we multiply by, + // this follows through to the offset when that gets + // timesed by it. + // The 1.0f additional offset however does not, + // so we account for that there manually. + + m_ffZTest = IsZTestEnabled(); + + float zMin = m_ffZTest ? vp.MinZ : 0.0f; + float zMax = m_ffZTest ? vp.MaxZ : 0.0f; + float zExtent = zMax - zMin; + zExtent = zExtent != 0.0f + ? 1.0f / zExtent + : 0.0f; + + m_viewportInfo.inverseExtent = Vector4( + 2.0f / float(vp.Width), + -2.0f / float(vp.Height), + zExtent, + 1.0f); + + m_viewportInfo.inverseOffset = Vector4( + -float(vp.X), -float(vp.Y), + -zMin, 0.0f); + + m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset * m_viewportInfo.inverseExtent; + + m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset + Vector4(-1.0f, 1.0f, 0.0f, 0.0f); + } + + // Constants... + if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexData)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFVertexData); + + DxvkBufferSliceHandle slice = m_vsFixedFunction->allocSlice(); + + EmitCs([ + cBuffer = m_vsFixedFunction, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + auto WorldView = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLD)]; + auto NormalMatrix = inverse(WorldView); + + D3D9FixedFunctionVS* data = reinterpret_cast(slice.mapPtr); + data->WorldView = WorldView; + data->NormalMatrix = NormalMatrix; + data->Projection = m_state.transforms[GetTransformIndex(D3DTS_PROJECTION)]; + + for (uint32_t i = 0; i < data->TexcoordMatrices.size(); i++) + data->TexcoordMatrices[i] = m_state.transforms[GetTransformIndex(D3DTS_TEXTURE0) + i]; + + data->ViewportInfo = m_viewportInfo; + + DecodeD3DCOLOR(m_state.renderStates[D3DRS_AMBIENT], data->GlobalAmbient.data); + + uint32_t lightIdx = 0; + for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { + auto idx = m_state.enabledLightIndices[i]; + if (idx == UINT32_MAX) + continue; + + data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]); + } + + data->Material = m_state.material; + data->TweenFactor = bit::cast(m_state.renderStates[D3DRS_TWEENFACTOR]); + } + + if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexBlend) && vertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + m_flags.clr(D3D9DeviceFlag::DirtyFFVertexBlend); + + DxvkBufferSliceHandle slice = m_vsVertexBlend->allocSlice(); + + EmitCs([ + cBuffer = m_vsVertexBlend, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + auto UploadVertexBlendData = [&](auto data) { + for (uint32_t i = 0; i < countof(data->WorldView); i++) + data->WorldView[i] = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLDMATRIX(i))]; + }; + + (m_isSWVP && indexedVertexBlend) + ? UploadVertexBlendData(reinterpret_cast(slice.mapPtr)) + : UploadVertexBlendData(reinterpret_cast(slice.mapPtr)); + } + } + + + void D3D9DeviceEx::UpdateFixedFunctionPS() { + // Shader... + if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelShader)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFPixelShader); + + // Used args for a given operation. + auto ArgsMask = [](DWORD Op) { + switch (Op) { + case D3DTOP_DISABLE: + return 0b0u; // No Args + case D3DTOP_SELECTARG1: + case D3DTOP_PREMODULATE: + return 0b10u; // Arg 1 + case D3DTOP_SELECTARG2: + return 0b100u; // Arg 2 + case D3DTOP_MULTIPLYADD: + case D3DTOP_LERP: + return 0b111u; // Arg 0, 1, 2 + default: + return 0b110u; // Arg 1, 2 + } + }; + + D3D9FFShaderKeyFS key; + + uint32_t idx; + for (idx = 0; idx < caps::TextureStageCount; idx++) { + auto& stage = key.Stages[idx].Contents; + auto& data = m_state.textureStages[idx]; + + // Subsequent stages do not occur if this is true. + if (data[D3DTSS_COLOROP] == D3DTOP_DISABLE) + break; + + // If the stage is invalid (ie. no texture bound), + // this and all subsequent stages get disabled. + if (m_state.textures[idx] == nullptr) { + if (((data[D3DTSS_COLORARG0] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[D3DTSS_COLOROP]) & (1 << 0u))) + || ((data[D3DTSS_COLORARG1] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[D3DTSS_COLOROP]) & (1 << 1u))) + || ((data[D3DTSS_COLORARG2] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[D3DTSS_COLOROP]) & (1 << 2u)))) + break; + } + + stage.ColorOp = data[D3DTSS_COLOROP]; + stage.AlphaOp = data[D3DTSS_ALPHAOP]; + + stage.ColorArg0 = data[D3DTSS_COLORARG0]; + stage.ColorArg1 = data[D3DTSS_COLORARG1]; + stage.ColorArg2 = data[D3DTSS_COLORARG2]; + + stage.AlphaArg0 = data[D3DTSS_ALPHAARG0]; + stage.AlphaArg1 = data[D3DTSS_ALPHAARG1]; + stage.AlphaArg2 = data[D3DTSS_ALPHAARG2]; + + const uint32_t samplerOffset = idx * 2; + stage.Type = (m_samplerTypeBitfield >> samplerOffset) & 0xffu; + stage.ResultIsTemp = data[D3DTSS_RESULTARG] == D3DTA_TEMP; + + uint32_t ttff = data[D3DTSS_TEXTURETRANSFORMFLAGS]; + uint32_t count = ttff & ~D3DTTFF_PROJECTED; + + stage.Projected = (ttff & D3DTTFF_PROJECTED) ? 1 : 0; + stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count : 0; + } + + auto& stage0 = key.Stages[0].Contents; + + if (stage0.ResultIsTemp && + stage0.ColorOp != D3DTOP_DISABLE && + stage0.AlphaOp == D3DTOP_DISABLE) { + stage0.AlphaOp = D3DTOP_SELECTARG1; + stage0.AlphaArg1 = D3DTA_DIFFUSE; + } + + stage0.GlobalSpecularEnable = m_state.renderStates[D3DRS_SPECULARENABLE]; + stage0.GlobalFlatShade = m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT; + + // The last stage *always* writes to current. + if (idx >= 1) + key.Stages[idx - 1].Contents.ResultIsTemp = false; + + EmitCs([ + this, + cKey = key, + &cShaders = m_ffModules + ](DxvkContext* ctx) { + auto shader = cShaders.GetShaderModule(this, cKey); + ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, shader.GetShader()); + }); + } + + // Constants + + if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelData)) { + m_flags.clr(D3D9DeviceFlag::DirtyFFPixelData); + + DxvkBufferSliceHandle slice = m_psFixedFunction->allocSlice(); + + EmitCs([ + cBuffer = m_psFixedFunction, + cSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cSlice); + }); + + auto& rs = m_state.renderStates; + + D3D9FixedFunctionPS* data = reinterpret_cast(slice.mapPtr); + DecodeD3DCOLOR((D3DCOLOR)rs[D3DRS_TEXTUREFACTOR], data->textureFactor.data); + } + } + + + bool D3D9DeviceEx::UseProgrammableVS() { + return m_state.vertexShader != nullptr + && m_state.vertexDecl != nullptr + && !m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT); + } + + + bool D3D9DeviceEx::UseProgrammablePS() { + return m_state.pixelShader != nullptr; + } + + + void D3D9DeviceEx::UpdateSamplerSpecConsant(uint32_t value) { + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerType, cBitfield); + }); + + m_lastSamplerTypeBitfield = value; + } + + + void D3D9DeviceEx::UpdateProjectionSpecConstant(uint32_t value) { + EmitCs([cBitfield = value](DxvkContext* ctx) { + ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::ProjectionType, cBitfield); + }); + + m_lastProjectionBitfield = value; + } + + + void D3D9DeviceEx::ApplyPrimitiveType( + DxvkContext* pContext, + D3DPRIMITIVETYPE PrimType) { + if (m_iaState.primitiveType != PrimType) { + m_iaState.primitiveType = PrimType; + + auto iaState = DecodeInputAssemblyState(PrimType); + pContext->setInputAssemblyState(iaState); + } + } + + + void D3D9DeviceEx::ResolveZ() { + D3D9Surface* src = m_state.depthStencil.ptr(); + IDirect3DBaseTexture9* dst = m_state.textures[0]; + + if (unlikely(!src || !dst)) + return; + + D3D9CommonTexture* srcTextureInfo = GetCommonTexture(src); + D3D9CommonTexture* dstTextureInfo = GetCommonTexture(dst); + + const D3D9_COMMON_TEXTURE_DESC* srcDesc = srcTextureInfo->Desc(); + const D3D9_COMMON_TEXTURE_DESC* dstDesc = dstTextureInfo->Desc(); + + VkSampleCountFlagBits dstSampleCount; + DecodeMultiSampleType(dstDesc->MultiSample, dstDesc->MultisampleQuality, &dstSampleCount); + + if (unlikely(dstSampleCount != VK_SAMPLE_COUNT_1_BIT)) { + Logger::warn("D3D9DeviceEx::ResolveZ: dstSampleCount != 1. Discarding."); + return; + } + + const D3D9_VK_FORMAT_MAPPING srcFormatInfo = LookupFormat(srcDesc->Format); + const D3D9_VK_FORMAT_MAPPING dstFormatInfo = LookupFormat(dstDesc->Format); + + auto srcVulkanFormatInfo = imageFormatInfo(srcFormatInfo.FormatColor); + auto dstVulkanFormatInfo = imageFormatInfo(dstFormatInfo.FormatColor); + + const VkImageSubresource dstSubresource = + dstTextureInfo->GetSubresourceFromIndex( + dstVulkanFormatInfo->aspectMask, 0); + + const VkImageSubresource srcSubresource = + srcTextureInfo->GetSubresourceFromIndex( + srcVulkanFormatInfo->aspectMask, src->GetSubresource()); + + const VkImageSubresourceLayers dstSubresourceLayers = { + dstSubresource.aspectMask, + dstSubresource.mipLevel, + dstSubresource.arrayLayer, 1 }; + + const VkImageSubresourceLayers srcSubresourceLayers = { + srcSubresource.aspectMask, + srcSubresource.mipLevel, + srcSubresource.arrayLayer, 1 }; + + VkSampleCountFlagBits srcSampleCount; + DecodeMultiSampleType(srcDesc->MultiSample, srcDesc->MultisampleQuality, &srcSampleCount); + + if (srcSampleCount == VK_SAMPLE_COUNT_1_BIT) { + EmitCs([ + cDstImage = dstTextureInfo->GetImage(), + cSrcImage = srcTextureInfo->GetImage(), + cDstLayers = dstSubresourceLayers, + cSrcLayers = srcSubresourceLayers + ] (DxvkContext* ctx) { + ctx->copyImage( + cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 }, + cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 }, + cDstImage->mipLevelExtent(cDstLayers.mipLevel)); + }); + } else { + EmitCs([ + cDstImage = dstTextureInfo->GetImage(), + cSrcImage = srcTextureInfo->GetImage(), + cDstSubres = dstSubresourceLayers, + cSrcSubres = srcSubresourceLayers + ] (DxvkContext* ctx) { + // We should resolve using the first sample according to + // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Advanced-DX9-Capabilities-for-ATI-Radeon-Cards_v2.pdf + // "The resolve operation copies the depth value from the *first sample only* into the resolved depth stencil texture." + constexpr auto resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR; + + VkImageResolve region; + region.srcSubresource = cSrcSubres; + region.srcOffset = VkOffset3D { 0, 0, 0 }; + region.dstSubresource = cDstSubres; + region.dstOffset = VkOffset3D { 0, 0, 0 }; + region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel); + + ctx->resolveDepthStencilImage(cDstImage, cSrcImage, region, resolveMode, resolveMode); + }); + } + + dstTextureInfo->MarkAllDirty(); + } + + + void D3D9DeviceEx::TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout) { + EmitCs([ + cImage = pResource->GetImage(), + cNewLayout = NewLayout + ] (DxvkContext* ctx) { + ctx->changeImageLayout( + cImage, cNewLayout); + }); + } + + + void D3D9DeviceEx::TransformImage( + D3D9CommonTexture* pResource, + const VkImageSubresourceRange* pSubresources, + VkImageLayout OldLayout, + VkImageLayout NewLayout) { + EmitCs([ + cImage = pResource->GetImage(), + cSubresources = *pSubresources, + cOldLayout = OldLayout, + cNewLayout = NewLayout + ] (DxvkContext* ctx) { + ctx->transformImage( + cImage, cSubresources, + cOldLayout, cNewLayout); + }); + } + + + HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) { + if (!pPresentationParameters->EnableAutoDepthStencil) + SetDepthStencilSurface(nullptr); + + for (uint32_t i = 1; i < caps::MaxSimultaneousRenderTargets; i++) + SetRenderTarget(0, nullptr); + + auto& rs = m_state.renderStates; + + rs[D3DRS_SEPARATEALPHABLENDENABLE] = FALSE; + rs[D3DRS_ALPHABLENDENABLE] = FALSE; + rs[D3DRS_BLENDOP] = D3DBLENDOP_ADD; + rs[D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD; + rs[D3DRS_DESTBLEND] = D3DBLEND_ZERO; + rs[D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO; + rs[D3DRS_COLORWRITEENABLE] = 0x0000000f; + rs[D3DRS_COLORWRITEENABLE1] = 0x0000000f; + rs[D3DRS_COLORWRITEENABLE2] = 0x0000000f; + rs[D3DRS_COLORWRITEENABLE3] = 0x0000000f; + rs[D3DRS_SRCBLEND] = D3DBLEND_ONE; + rs[D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE; + BindBlendState(); + + rs[D3DRS_BLENDFACTOR] = 0xffffffff; + BindBlendFactor(); + + rs[D3DRS_ZENABLE] = pPresentationParameters->EnableAutoDepthStencil + ? D3DZB_TRUE + : D3DZB_FALSE; + rs[D3DRS_ZFUNC] = D3DCMP_LESSEQUAL; + rs[D3DRS_TWOSIDEDSTENCILMODE] = FALSE; + rs[D3DRS_ZWRITEENABLE] = TRUE; + rs[D3DRS_STENCILENABLE] = FALSE; + rs[D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP; + rs[D3DRS_STENCILFUNC] = D3DCMP_ALWAYS; + rs[D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP; + rs[D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP; + rs[D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS; + rs[D3DRS_STENCILMASK] = 0xFFFFFFFF; + rs[D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF; + BindDepthStencilState(); + + rs[D3DRS_STENCILREF] = 0; + BindDepthStencilRefrence(); + + rs[D3DRS_FILLMODE] = D3DFILL_SOLID; + rs[D3DRS_CULLMODE] = D3DCULL_CCW; + rs[D3DRS_DEPTHBIAS] = bit::cast(0.0f); + rs[D3DRS_SLOPESCALEDEPTHBIAS] = bit::cast(0.0f); + BindRasterizerState(); + + rs[D3DRS_SCISSORTESTENABLE] = FALSE; + + rs[D3DRS_ALPHATESTENABLE] = FALSE; + rs[D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS; + BindAlphaTestState(); + rs[D3DRS_ALPHAREF] = 0; + UpdatePushConstant(); + + rs[D3DRS_MULTISAMPLEMASK] = 0xffffffff; + BindMultiSampleState(); + + rs[D3DRS_TEXTUREFACTOR] = 0xffffffff; + m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); + + rs[D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1; + rs[D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2; + rs[D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL; + rs[D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL; + rs[D3DRS_LIGHTING] = TRUE; + rs[D3DRS_COLORVERTEX] = TRUE; + rs[D3DRS_LOCALVIEWER] = TRUE; + rs[D3DRS_RANGEFOGENABLE] = FALSE; + rs[D3DRS_NORMALIZENORMALS] = FALSE; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); + + // PS + rs[D3DRS_SPECULARENABLE] = FALSE; + + rs[D3DRS_AMBIENT] = 0; + m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); + + rs[D3DRS_FOGENABLE] = FALSE; + rs[D3DRS_FOGCOLOR] = 0; + rs[D3DRS_FOGTABLEMODE] = D3DFOG_NONE; + rs[D3DRS_FOGSTART] = bit::cast(0.0f); + rs[D3DRS_FOGEND] = bit::cast(1.0f); + rs[D3DRS_FOGDENSITY] = bit::cast(1.0f); + rs[D3DRS_FOGVERTEXMODE] = D3DFOG_NONE; + m_flags.set(D3D9DeviceFlag::DirtyFogColor); + m_flags.set(D3D9DeviceFlag::DirtyFogDensity); + m_flags.set(D3D9DeviceFlag::DirtyFogEnd); + m_flags.set(D3D9DeviceFlag::DirtyFogScale); + m_flags.set(D3D9DeviceFlag::DirtyFogState); + + rs[D3DRS_CLIPPLANEENABLE] = 0; + m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); + + rs[D3DRS_POINTSPRITEENABLE] = FALSE; + rs[D3DRS_POINTSCALEENABLE] = FALSE; + rs[D3DRS_POINTSCALE_A] = bit::cast(1.0f); + rs[D3DRS_POINTSCALE_B] = bit::cast(0.0f); + rs[D3DRS_POINTSCALE_C] = bit::cast(0.0f); + rs[D3DRS_POINTSIZE] = bit::cast(1.0f); + rs[D3DRS_POINTSIZE_MIN] = bit::cast(1.0f); + rs[D3DRS_POINTSIZE_MAX] = bit::cast(64.0f); + UpdatePushConstant(); + UpdatePushConstant(); + UpdatePushConstant(); + m_flags.set(D3D9DeviceFlag::DirtyPointScale); + UpdatePointMode(); + + rs[D3DRS_SRGBWRITEENABLE] = 0; + + rs[D3DRS_SHADEMODE] = D3DSHADE_GOURAUD; + + rs[D3DRS_VERTEXBLEND] = D3DVBF_DISABLE; + rs[D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE; + rs[D3DRS_TWEENFACTOR] = bit::cast(0.0f); + m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); + + // Render States not implemented beyond this point. + rs[D3DRS_LASTPIXEL] = TRUE; + rs[D3DRS_DITHERENABLE] = FALSE; + rs[D3DRS_WRAP0] = 0; + rs[D3DRS_WRAP1] = 0; + rs[D3DRS_WRAP2] = 0; + rs[D3DRS_WRAP3] = 0; + rs[D3DRS_WRAP4] = 0; + rs[D3DRS_WRAP5] = 0; + rs[D3DRS_WRAP6] = 0; + rs[D3DRS_WRAP7] = 0; + rs[D3DRS_CLIPPING] = TRUE; + rs[D3DRS_MULTISAMPLEANTIALIAS] = TRUE; + rs[D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE; + rs[D3DRS_DEBUGMONITORTOKEN] = D3DDMT_ENABLE; + rs[D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC; + rs[D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR; + rs[D3DRS_ANTIALIASEDLINEENABLE] = FALSE; + rs[D3DRS_MINTESSELLATIONLEVEL] = bit::cast(1.0f); + rs[D3DRS_MAXTESSELLATIONLEVEL] = bit::cast(1.0f); + rs[D3DRS_ADAPTIVETESS_X] = bit::cast(0.0f); + rs[D3DRS_ADAPTIVETESS_Y] = bit::cast(0.0f); + rs[D3DRS_ADAPTIVETESS_Z] = bit::cast(1.0f); + rs[D3DRS_ADAPTIVETESS_W] = bit::cast(0.0f); + rs[D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE; + rs[D3DRS_WRAP8] = 0; + rs[D3DRS_WRAP9] = 0; + rs[D3DRS_WRAP10] = 0; + rs[D3DRS_WRAP11] = 0; + rs[D3DRS_WRAP12] = 0; + rs[D3DRS_WRAP13] = 0; + rs[D3DRS_WRAP14] = 0; + rs[D3DRS_WRAP15] = 0; + // End Unimplemented Render States + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + auto& stage = m_state.textureStages[i]; + + stage[D3DTSS_COLOROP] = i == 0 ? D3DTOP_MODULATE : D3DTOP_DISABLE; + stage[D3DTSS_COLORARG1] = D3DTA_TEXTURE; + stage[D3DTSS_COLORARG2] = D3DTA_CURRENT; + stage[D3DTSS_ALPHAOP] = i == 0 ? D3DTOP_SELECTARG1 : D3DTOP_DISABLE; + stage[D3DTSS_ALPHAARG1] = D3DTA_TEXTURE; + stage[D3DTSS_ALPHAARG2] = D3DTA_CURRENT; + stage[D3DTSS_BUMPENVMAT00] = bit::cast(0.0f); + stage[D3DTSS_BUMPENVMAT01] = bit::cast(0.0f); + stage[D3DTSS_BUMPENVMAT10] = bit::cast(0.0f); + stage[D3DTSS_BUMPENVMAT11] = bit::cast(0.0f); + stage[D3DTSS_TEXCOORDINDEX] = i; + stage[D3DTSS_BUMPENVLSCALE] = bit::cast(0.0f); + stage[D3DTSS_BUMPENVLOFFSET] = bit::cast(0.0f); + stage[D3DTSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE; + stage[D3DTSS_COLORARG0] = D3DTA_CURRENT; + stage[D3DTSS_ALPHAARG0] = D3DTA_CURRENT; + stage[D3DTSS_RESULTARG] = D3DTA_CURRENT; + stage[D3DTSS_CONSTANT] = 0x00000000; + } + m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData); + m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); + + for (uint32_t i = 0; i < caps::MaxStreams; i++) + m_state.streamFreq[i] = 1; + + for (uint32_t i = 0; i < m_state.textures.size(); i++) { + TextureChangePrivate(m_state.textures[i], nullptr); + + DWORD sampler = i; + auto samplerInfo = RemapStateSamplerShader(sampler); + uint32_t slot = computeResourceSlotId(samplerInfo.first, DxsoBindingType::ColorImage, uint32_t(samplerInfo.second)); + + EmitCs([ + cSlot = slot + ](DxvkContext* ctx) { + ctx->bindResourceView(cSlot, nullptr, nullptr); + }); + } + + auto& ss = m_state.samplerStates; + for (uint32_t i = 0; i < ss.size(); i++) { + auto& state = ss[i]; + state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP; + state[D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP; + state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP; + state[D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP; + state[D3DSAMP_BORDERCOLOR] = 0x00000000; + state[D3DSAMP_MAGFILTER] = D3DTEXF_POINT; + state[D3DSAMP_MINFILTER] = D3DTEXF_POINT; + state[D3DSAMP_MIPFILTER] = D3DTEXF_NONE; + state[D3DSAMP_MIPMAPLODBIAS] = bit::cast(0.0f); + state[D3DSAMP_MAXMIPLEVEL] = 0; + state[D3DSAMP_MAXANISOTROPY] = 1; + state[D3DSAMP_SRGBTEXTURE] = 0; + state[D3DSAMP_ELEMENTINDEX] = 0; + state[D3DSAMP_DMAPOFFSET] = 0; + + BindSampler(i); + } + + m_dirtySamplerStates = 0; + + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { + float plane[4] = { 0, 0, 0, 0 }; + SetClipPlane(i, plane); + } + + // We should do this... + m_flags.set(D3D9DeviceFlag::DirtyInputLayout); + + UpdateSamplerSpecConsant(0u); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + D3D9Format backBufferFmt = EnumerateFormat(pPresentationParameters->BackBufferFormat); + + Logger::info(str::format( + "D3D9DeviceEx::ResetSwapChain:\n", + " Requested Presentation Parameters\n", + " - Width: ", pPresentationParameters->BackBufferWidth, "\n", + " - Height: ", pPresentationParameters->BackBufferHeight, "\n", + " - Format: ", backBufferFmt, "\n" + " - Auto Depth Stencil: ", pPresentationParameters->EnableAutoDepthStencil ? "true" : "false", "\n", + " ^ Format: ", EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat), "\n", + " - Windowed: ", pPresentationParameters->Windowed ? "true" : "false", "\n")); + + if (backBufferFmt != D3D9Format::Unknown) { + if (!IsSupportedBackBufferFormat( + backBufferFmt, + pPresentationParameters->Windowed)) { + Logger::err("D3D9DeviceEx::ResetSwapChain: Unsupported backbuffer format."); + return D3DERR_NOTAVAILABLE; + } + } + + if (pPresentationParameters->EnableAutoDepthStencil) { + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = pPresentationParameters->BackBufferWidth; + desc.Height = pPresentationParameters->BackBufferHeight; + desc.Depth = 1; + desc.ArraySize = 1; + desc.MipLevels = 1; + desc.Usage = D3DUSAGE_DEPTHSTENCIL; + desc.Format = EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat); + desc.Pool = D3DPOOL_DEFAULT; + desc.Discard = FALSE; + desc.MultiSample = pPresentationParameters->MultiSampleType; + desc.MultisampleQuality = pPresentationParameters->MultiSampleQuality; + + D3D9_VK_FORMAT_MAPPING mapping; + if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc, &mapping))) + return D3DERR_NOTAVAILABLE; + + m_autoDepthStencil = new D3D9Surface(this, &desc, mapping); + m_initializer->InitTexture(m_autoDepthStencil->GetCommonTexture()); + SetDepthStencilSurface(m_autoDepthStencil.ptr()); + } + + if (auto* implicitSwapchain = GetInternalSwapchain(0)) + implicitSwapchain->Reset(pPresentationParameters, pFullscreenDisplayMode); + else + m_swapchains.emplace_back(new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode)); + + SetRenderTarget(0, GetInternalSwapchain(0)->GetBackBuffer(0)); + + // Force this if we end up binding the same RT to make scissor change go into effect. + BindViewportAndScissor(); + + return D3D_OK; + } + + + HRESULT D3D9DeviceEx::InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); + if (FAILED(hr)) + return hr; + + hr = ResetState(pPresentationParameters); + if (FAILED(hr)) + return hr; + + Flush(); + SynchronizeCsThread(); + + return D3D_OK; + } + + +} diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h new file mode 100644 index 000000000..d3bdec4c1 --- /dev/null +++ b/src/d3d9/d3d9_device.h @@ -0,0 +1,1154 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" +#include "../dxvk/dxvk_cs.h" + +#include "d3d9_include.h" +#include "d3d9_cursor.h" +#include "d3d9_format.h" +#include "d3d9_multithread.h" +#include "d3d9_adapter.h" +#include "d3d9_constant_set.h" + +#include "d3d9_state.h" + +#include "d3d9_options.h" + +#include "../dxso/dxso_module.h" +#include "../dxso/dxso_util.h" +#include "../dxso/dxso_options.h" +#include "../dxso/dxso_modinfo.h" + +#include "d3d9_sampler.h" +#include "d3d9_fixed_function.h" +#include "d3d9_swvp_emu.h" + +#include "d3d9_shader_permutations.h" + +#include +#include +#include + +namespace dxvk { + + class D3D9InterfaceEx; + class D3D9SwapChainEx; + class D3D9CommonTexture; + class D3D9CommonBuffer; + class D3D9CommonShader; + class D3D9ShaderModuleSet; + class D3D9Initializer; + class D3D9Query; + class D3D9StateBlock; + class D3D9FormatHelper; + + enum class D3D9DeviceFlag : uint32_t { + DirtyFramebuffer, + DirtyClipPlanes, + DirtyDepthStencilState, + DirtyBlendState, + DirtyRasterizerState, + DirtyAlphaTestState, + DirtyInputLayout, + DirtyViewportScissor, + DirtyMultiSampleState, + + DirtyFogState, + DirtyFogColor, + DirtyFogDensity, + DirtyFogScale, + DirtyFogEnd, + + DirtyFFVertexData, + DirtyFFVertexBlend, + DirtyFFVertexShader, + DirtyFFPixelShader, + DirtyFFViewport, + DirtyFFPixelData, + DirtyProgVertexShader, + DirtySharedPixelShaderData, + UpDirtiedVertices, + UpDirtiedIndices, + ValidSampleMask, + DirtyDepthBounds, + DirtyPointScale, + }; + + using D3D9DeviceFlags = Flags; + + struct D3D9DrawInfo { + uint32_t vertexCount; + uint32_t instanceCount; + }; + + struct D3D9SamplerPair { + Rc color; + Rc depth; + }; + + struct D3D9UPBufferSlice { + DxvkBufferSlice slice = {}; + void* mapPtr = nullptr; + }; + + class D3D9DeviceEx final : public ComObjectClamp { + constexpr static uint32_t DefaultFrameLatency = 3; + constexpr static uint32_t MaxFrameLatency = 20; + + constexpr static uint32_t MinFlushIntervalUs = 750; + constexpr static uint32_t IncFlushIntervalUs = 250; + constexpr static uint32_t MaxPendingSubmits = 6; + + constexpr static uint32_t NullStreamIdx = caps::MaxStreams; + + friend class D3D9SwapChainEx; + public: + + D3D9DeviceEx( + D3D9InterfaceEx* pParent, + D3D9Adapter* pAdapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pDisplayMode, + Rc dxvkDevice); + + ~D3D9DeviceEx(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE TestCooperativeLevel(); + + UINT STDMETHODCALLTYPE GetAvailableTextureMem(); + + HRESULT STDMETHODCALLTYPE EvictManagedResources(); + + HRESULT STDMETHODCALLTYPE GetDirect3D(IDirect3D9** ppD3D9); + + HRESULT STDMETHODCALLTYPE GetDeviceCaps(D3DCAPS9* pCaps); + + HRESULT STDMETHODCALLTYPE GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode); + + HRESULT STDMETHODCALLTYPE GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters); + + HRESULT STDMETHODCALLTYPE SetCursorProperties( + UINT XHotSpot, + UINT YHotSpot, + IDirect3DSurface9* pCursorBitmap); + + void STDMETHODCALLTYPE SetCursorPosition(int X, int Y, DWORD Flags); + + BOOL STDMETHODCALLTYPE ShowCursor(BOOL bShow); + + HRESULT STDMETHODCALLTYPE CreateAdditionalSwapChain( + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DSwapChain9** ppSwapChain); + + HRESULT STDMETHODCALLTYPE GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain); + + UINT STDMETHODCALLTYPE GetNumberOfSwapChains(); + + HRESULT STDMETHODCALLTYPE Reset(D3DPRESENT_PARAMETERS* pPresentationParameters); + + HRESULT STDMETHODCALLTYPE Present( + const RECT* pSourceRect, + const RECT* pDestRect, HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion); + + HRESULT STDMETHODCALLTYPE GetBackBuffer( + UINT iSwapChain, + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer); + + HRESULT STDMETHODCALLTYPE GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus); + + HRESULT STDMETHODCALLTYPE SetDialogBoxMode(BOOL bEnableDialogs); + + void STDMETHODCALLTYPE SetGammaRamp( + UINT iSwapChain, + DWORD Flags, + const D3DGAMMARAMP* pRamp); + + void STDMETHODCALLTYPE GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp); + + HRESULT STDMETHODCALLTYPE CreateTexture( + UINT Width, + UINT Height, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DTexture9** ppTexture, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateVolumeTexture( + UINT Width, + UINT Height, + UINT Depth, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DVolumeTexture9** ppVolumeTexture, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateCubeTexture( + UINT EdgeLength, + UINT Levels, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DCubeTexture9** ppCubeTexture, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateVertexBuffer( + UINT Length, + DWORD Usage, + DWORD FVF, + D3DPOOL Pool, + IDirect3DVertexBuffer9** ppVertexBuffer, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateIndexBuffer( + UINT Length, + DWORD Usage, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DIndexBuffer9** ppIndexBuffer, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateRenderTarget( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE CreateDepthStencilSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE UpdateSurface( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestinationSurface, + const POINT* pDestPoint); + + HRESULT STDMETHODCALLTYPE UpdateTexture( + IDirect3DBaseTexture9* pSourceTexture, + IDirect3DBaseTexture9* pDestinationTexture); + + HRESULT STDMETHODCALLTYPE GetRenderTargetData( + IDirect3DSurface9* pRenderTarget, + IDirect3DSurface9* pDestSurface); + + HRESULT STDMETHODCALLTYPE GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface); + + HRESULT STDMETHODCALLTYPE StretchRect( + IDirect3DSurface9* pSourceSurface, + const RECT* pSourceRect, + IDirect3DSurface9* pDestSurface, + const RECT* pDestRect, + D3DTEXTUREFILTERTYPE Filter); + + HRESULT STDMETHODCALLTYPE ColorFill( + IDirect3DSurface9* pSurface, + const RECT* pRect, + D3DCOLOR Color); + + HRESULT STDMETHODCALLTYPE CreateOffscreenPlainSurface( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle); + + HRESULT STDMETHODCALLTYPE SetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9* pRenderTarget); + + HRESULT STDMETHODCALLTYPE GetRenderTarget( + DWORD RenderTargetIndex, + IDirect3DSurface9** ppRenderTarget); + + HRESULT STDMETHODCALLTYPE SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil); + + HRESULT STDMETHODCALLTYPE GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface); + + HRESULT STDMETHODCALLTYPE BeginScene(); + + HRESULT STDMETHODCALLTYPE EndScene(); + + HRESULT STDMETHODCALLTYPE Clear( + DWORD Count, + const D3DRECT* pRects, + DWORD Flags, + D3DCOLOR Color, + float Z, + DWORD Stencil); + + HRESULT STDMETHODCALLTYPE SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix); + + HRESULT STDMETHODCALLTYPE GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix); + + HRESULT STDMETHODCALLTYPE MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix); + + HRESULT STDMETHODCALLTYPE SetViewport(const D3DVIEWPORT9* pViewport); + + HRESULT STDMETHODCALLTYPE GetViewport(D3DVIEWPORT9* pViewport); + + HRESULT STDMETHODCALLTYPE SetMaterial(const D3DMATERIAL9* pMaterial); + + HRESULT STDMETHODCALLTYPE GetMaterial(D3DMATERIAL9* pMaterial); + + HRESULT STDMETHODCALLTYPE SetLight(DWORD Index, const D3DLIGHT9* pLight); + + HRESULT STDMETHODCALLTYPE GetLight(DWORD Index, D3DLIGHT9* pLight); + + HRESULT STDMETHODCALLTYPE LightEnable(DWORD Index, BOOL Enable); + + HRESULT STDMETHODCALLTYPE GetLightEnable(DWORD Index, BOOL* pEnable); + + HRESULT STDMETHODCALLTYPE SetClipPlane(DWORD Index, const float* pPlane); + + HRESULT STDMETHODCALLTYPE GetClipPlane(DWORD Index, float* pPlane); + + HRESULT STDMETHODCALLTYPE SetRenderState(D3DRENDERSTATETYPE State, DWORD Value); + + HRESULT STDMETHODCALLTYPE GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue); + + HRESULT STDMETHODCALLTYPE CreateStateBlock( + D3DSTATEBLOCKTYPE Type, + IDirect3DStateBlock9** ppSB); + + HRESULT STDMETHODCALLTYPE BeginStateBlock(); + + HRESULT STDMETHODCALLTYPE EndStateBlock(IDirect3DStateBlock9** ppSB); + + HRESULT STDMETHODCALLTYPE SetClipStatus(const D3DCLIPSTATUS9* pClipStatus); + + HRESULT STDMETHODCALLTYPE GetClipStatus(D3DCLIPSTATUS9* pClipStatus); + + HRESULT STDMETHODCALLTYPE GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture); + + HRESULT STDMETHODCALLTYPE SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture); + + HRESULT STDMETHODCALLTYPE GetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD* pValue); + + HRESULT STDMETHODCALLTYPE SetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD Value); + + HRESULT STDMETHODCALLTYPE GetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD* pValue); + + HRESULT STDMETHODCALLTYPE SetSamplerState( + DWORD Sampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value); + + HRESULT STDMETHODCALLTYPE ValidateDevice(DWORD* pNumPasses); + + HRESULT STDMETHODCALLTYPE SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries); + + HRESULT STDMETHODCALLTYPE GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries); + + HRESULT STDMETHODCALLTYPE SetCurrentTexturePalette(UINT PaletteNumber); + + HRESULT STDMETHODCALLTYPE GetCurrentTexturePalette(UINT *PaletteNumber); + + HRESULT STDMETHODCALLTYPE SetScissorRect(const RECT* pRect); + + HRESULT STDMETHODCALLTYPE GetScissorRect(RECT* pRect); + + HRESULT STDMETHODCALLTYPE SetSoftwareVertexProcessing(BOOL bSoftware); + + BOOL STDMETHODCALLTYPE GetSoftwareVertexProcessing(); + + HRESULT STDMETHODCALLTYPE SetNPatchMode(float nSegments); + + float STDMETHODCALLTYPE GetNPatchMode(); + + HRESULT STDMETHODCALLTYPE DrawPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + UINT StartVertex, + UINT PrimitiveCount); + + HRESULT STDMETHODCALLTYPE DrawIndexedPrimitive( + D3DPRIMITIVETYPE PrimitiveType, + INT BaseVertexIndex, + UINT MinVertexIndex, + UINT NumVertices, + UINT StartIndex, + UINT PrimitiveCount); + + HRESULT STDMETHODCALLTYPE DrawPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride); + + HRESULT STDMETHODCALLTYPE DrawIndexedPrimitiveUP( + D3DPRIMITIVETYPE PrimitiveType, + UINT MinVertexIndex, + UINT NumVertices, + UINT PrimitiveCount, + const void* pIndexData, + D3DFORMAT IndexDataFormat, + const void* pVertexStreamZeroData, + UINT VertexStreamZeroStride); + + HRESULT STDMETHODCALLTYPE ProcessVertices( + UINT SrcStartIndex, + UINT DestIndex, + UINT VertexCount, + IDirect3DVertexBuffer9* pDestBuffer, + IDirect3DVertexDeclaration9* pVertexDecl, + DWORD Flags); + + HRESULT STDMETHODCALLTYPE CreateVertexDeclaration( + const D3DVERTEXELEMENT9* pVertexElements, + IDirect3DVertexDeclaration9** ppDecl); + + HRESULT STDMETHODCALLTYPE SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl); + + HRESULT STDMETHODCALLTYPE GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl); + + HRESULT STDMETHODCALLTYPE SetFVF(DWORD FVF); + + HRESULT STDMETHODCALLTYPE GetFVF(DWORD* pFVF); + + HRESULT STDMETHODCALLTYPE CreateVertexShader( + const DWORD* pFunction, + IDirect3DVertexShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetVertexShader(IDirect3DVertexShader9* pShader); + + HRESULT STDMETHODCALLTYPE GetVertexShader(IDirect3DVertexShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE GetVertexShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE GetVertexShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE GetVertexShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE SetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9* pStreamData, + UINT OffsetInBytes, + UINT Stride); + + HRESULT STDMETHODCALLTYPE GetStreamSource( + UINT StreamNumber, + IDirect3DVertexBuffer9** ppStreamData, + UINT* pOffsetInBytes, + UINT* pStride); + + HRESULT STDMETHODCALLTYPE SetStreamSourceFreq(UINT StreamNumber, UINT Setting); + + HRESULT STDMETHODCALLTYPE GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting); + + HRESULT STDMETHODCALLTYPE SetIndices(IDirect3DIndexBuffer9* pIndexData); + + HRESULT STDMETHODCALLTYPE GetIndices(IDirect3DIndexBuffer9** ppIndexData); + + HRESULT STDMETHODCALLTYPE CreatePixelShader( + const DWORD* pFunction, + IDirect3DPixelShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetPixelShader(IDirect3DPixelShader9* pShader); + + HRESULT STDMETHODCALLTYPE GetPixelShader(IDirect3DPixelShader9** ppShader); + + HRESULT STDMETHODCALLTYPE SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE GetPixelShaderConstantF( + UINT StartRegister, + float* pConstantData, + UINT Vector4fCount); + + HRESULT STDMETHODCALLTYPE SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE GetPixelShaderConstantI( + UINT StartRegister, + int* pConstantData, + UINT Vector4iCount); + + HRESULT STDMETHODCALLTYPE SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE GetPixelShaderConstantB( + UINT StartRegister, + BOOL* pConstantData, + UINT BoolCount); + + HRESULT STDMETHODCALLTYPE DrawRectPatch( + UINT Handle, + const float* pNumSegs, + const D3DRECTPATCH_INFO* pRectPatchInfo); + + HRESULT STDMETHODCALLTYPE DrawTriPatch( + UINT Handle, + const float* pNumSegs, + const D3DTRIPATCH_INFO* pTriPatchInfo); + + HRESULT STDMETHODCALLTYPE DeletePatch(UINT Handle); + + HRESULT STDMETHODCALLTYPE CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery); + + // Ex Methods + + HRESULT STDMETHODCALLTYPE SetConvolutionMonoKernel( + UINT width, + UINT height, + float* rows, + float* columns); + + HRESULT STDMETHODCALLTYPE ComposeRects( + IDirect3DSurface9* pSrc, + IDirect3DSurface9* pDst, + IDirect3DVertexBuffer9* pSrcRectDescs, + UINT NumRects, + IDirect3DVertexBuffer9* pDstRectDescs, + D3DCOMPOSERECTSOP Operation, + int Xoffset, + int Yoffset); + + HRESULT STDMETHODCALLTYPE GetGPUThreadPriority(INT* pPriority); + + HRESULT STDMETHODCALLTYPE SetGPUThreadPriority(INT Priority); + + HRESULT STDMETHODCALLTYPE WaitForVBlank(UINT iSwapChain); + + HRESULT STDMETHODCALLTYPE CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources); + + HRESULT STDMETHODCALLTYPE SetMaximumFrameLatency(UINT MaxLatency); + + HRESULT STDMETHODCALLTYPE GetMaximumFrameLatency(UINT* pMaxLatency); + + HRESULT STDMETHODCALLTYPE CheckDeviceState(HWND hDestinationWindow); + + HRESULT STDMETHODCALLTYPE PresentEx( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags); + + HRESULT STDMETHODCALLTYPE CreateRenderTargetEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Lockable, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage); + + HRESULT STDMETHODCALLTYPE CreateOffscreenPlainSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DPOOL Pool, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage); + + HRESULT STDMETHODCALLTYPE CreateDepthStencilSurfaceEx( + UINT Width, + UINT Height, + D3DFORMAT Format, + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + BOOL Discard, + IDirect3DSurface9** ppSurface, + HANDLE* pSharedHandle, + DWORD Usage); + + HRESULT STDMETHODCALLTYPE ResetEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT STDMETHODCALLTYPE GetDisplayModeEx( + UINT iSwapChain, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation); + + HRESULT STDMETHODCALLTYPE CreateAdditionalSwapChainEx( + D3DPRESENT_PARAMETERS* pPresentationParameters, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DSwapChain9** ppSwapChain); + + HRESULT SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value); + + HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); + + HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + + VkPipelineStageFlags GetEnabledShaderStages() const { + return m_dxvkDevice->getShaderPipelineStages(); + } + + static DxvkDeviceFeatures GetDeviceFeatures(const Rc& adapter); + + bool SupportsSWVP(); + + bool IsExtended(); + + HWND GetWindow(); + + Rc GetDXVKDevice() { + return m_dxvkDevice; + } + + D3D9_VK_FORMAT_MAPPING LookupFormat( + D3D9Format Format) const; + + DxvkFormatInfo UnsupportedFormatInfo( + D3D9Format Format) const; + + bool WaitForResource( + const Rc& Resource, + DWORD MapFlags); + + /** + * \brief Locks a subresource of an image + * + * \param [in] Subresource The subresource of the image to lock + * \param [out] pLockedBox The returned locked box of the image, containing data ptr and strides + * \param [in] pBox The region of the subresource to lock. This offsets the returned data ptr + * \param [in] Flags The D3DLOCK_* flags to lock the image with + * \returns \c D3D_OK if the parameters are valid or D3DERR_INVALIDCALL if it fails. + */ + HRESULT LockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT Mip, + D3DLOCKED_BOX* pLockedBox, + const D3DBOX* pBox, + DWORD Flags); + + uint32_t CalcImageLockOffset( + uint32_t SlicePitch, + uint32_t RowPitch, + const DxvkFormatInfo* FormatInfo, + const D3DBOX* pBox); + + /** + * \brief Unlocks a subresource of an image + * + * Passthrough to device unlock. + * \param [in] Subresource The subresource of the image to unlock + * \returns \c D3D_OK if the parameters are valid or D3DERR_INVALIDCALL if it fails. + */ + HRESULT UnlockImage( + D3D9CommonTexture* pResource, + UINT Face, + UINT MipLevel); + + HRESULT FlushImage( + D3D9CommonTexture* pResource, + UINT Subresource); + + void GenerateMips( + D3D9CommonTexture* pResource); + + HRESULT LockBuffer( + D3D9CommonBuffer* pResource, + UINT OffsetToLock, + UINT SizeToLock, + void** ppbData, + DWORD Flags); + + HRESULT FlushBuffer( + D3D9CommonBuffer* pResource); + + HRESULT UnlockBuffer( + D3D9CommonBuffer* pResource); + + void SetupFPU(); + + int64_t DetermineInitialTextureMemory(); + + void CreateConstantBuffers(); + + void SynchronizeCsThread(); + + void Flush(); + + D3D9ShaderMasks GetShaderMasks(); + + void UpdateActiveRTs(uint32_t index); + + void UpdateActiveRTTextures(uint32_t index); + + void UpdateActiveHazards(); + + void MarkRenderHazards(); + + template + void UpdatePointMode(); + + void UpdateFog(); + + void BindFramebuffer(); + + void BindViewportAndScissor(); + + inline bool IsAlphaToCoverageEnabled() { + const bool alphaTest = m_state.renderStates[D3DRS_ALPHATESTENABLE] != 0; + + return m_amdATOC || (m_nvATOC && alphaTest); + } + + inline bool IsAlphaTestEnabled() { + return m_state.renderStates[D3DRS_ALPHATESTENABLE] && !IsAlphaToCoverageEnabled(); + } + + inline bool IsZTestEnabled() { + return m_state.renderStates[D3DRS_ZENABLE] && m_state.depthStencil != nullptr; + } + + void BindMultiSampleState(); + + void BindBlendState(); + + void BindBlendFactor(); + + void BindDepthStencilState(); + + void BindDepthStencilRefrence(); + + void BindRasterizerState(); + + void BindAlphaTestState(); + + template + inline void UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader); + + template + inline void UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); + + template + inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); + + template + void UploadConstants(); + + void UpdateClipPlanes(); + + template + void UpdatePushConstant(const void* pData); + + template + void UpdatePushConstant(); + + void BindSampler(DWORD Sampler); + + void BindTexture(DWORD SamplerSampler); + + void UndirtySamplers(); + + void MarkSamplersDirty(); + + D3D9DrawInfo GenerateDrawInfo( + D3DPRIMITIVETYPE PrimitiveType, + UINT PrimitiveCount, + UINT InstanceCount); + + uint32_t GetInstanceCount() const; + + void PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool up = false); + + template + void BindShader( + const D3D9CommonShader* pShaderModule, + D3D9ShaderPermutation Permutation); + + void BindInputLayout(); + + void BindVertexBuffer( + UINT Slot, + D3D9VertexBuffer* pBuffer, + UINT Offset, + UINT Stride); + + void BindIndices(); + + D3D9DeviceLock LockDevice() { + return m_multithread.AcquireLock(); + } + + const D3D9Options* GetOptions() const { + return &m_d3d9Options; + } + + Direct3DState9* GetRawState() { + return &m_state; + } + + void Begin(D3D9Query* pQuery); + void End(D3D9Query* pQuery); + + void SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits); + void SetPixelBoolBitfield (uint32_t idx, uint32_t mask, uint32_t bits); + + void FlushImplicit(BOOL StrongHint); + + bool ChangeReportedMemory(int64_t delta) { + if (IsExtended()) + return true; + + m_availableMemory += delta; + + return !m_d3d9Options.memoryTrackTest || m_availableMemory > 0; + } + + void ResolveZ(); + + void TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout); + + void TransformImage( + D3D9CommonTexture* pResource, + const VkImageSubresourceRange* pSubresources, + VkImageLayout OldLayout, + VkImageLayout NewLayout); + + const D3D9ConstantLayout& GetVertexConstantLayout() { return m_vsLayout; } + const D3D9ConstantLayout& GetPixelConstantLayout() { return m_psLayout; } + + HRESULT ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters); + HRESULT ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + UINT GetSamplerCount() const { + return m_samplerCount.load(); + } + + private: + + D3D9DeviceFlags m_flags; + uint32_t m_dirtySamplerStates = 0; + + D3D9Adapter* m_adapter; + Rc m_dxvkDevice; + + Rc m_updateBuffer; + DxvkCsChunkPool m_csChunkPool; + dxvk::high_resolution_clock::time_point m_lastFlush + = dxvk::high_resolution_clock::now(); + DxvkCsThread m_csThread; + bool m_csIsBusy = false; + + uint32_t m_frameLatency = DefaultFrameLatency; + + D3D9Initializer* m_initializer = nullptr; + D3D9FormatHelper* m_converter = nullptr; + + DxvkCsChunkRef m_csChunk; + + D3D9FFShaderModuleSet m_ffModules; + D3D9SWVPEmulator m_swvpEmulator; + + DxvkCsChunkRef AllocCsChunk() { + DxvkCsChunk* chunk = m_csChunkPool.allocChunk(DxvkCsChunkFlag::SingleUse); + return DxvkCsChunkRef(chunk, &m_csChunkPool); + } + + template + void EmitCs(Cmd&& command) { + if (unlikely(!m_csChunk->push(command))) { + EmitCsChunk(std::move(m_csChunk)); + + m_csChunk = AllocCsChunk(); + m_csChunk->push(command); + } + } + + void EmitCsChunk(DxvkCsChunkRef&& chunk); + + void FlushCsChunk() { + if (likely(!m_csChunk->empty())) { + EmitCsChunk(std::move(m_csChunk)); + m_csChunk = AllocCsChunk(); + } + } + + bool CanSWVP() { + return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING); + } + + inline constexpr D3D9ShaderPermutation GetVertexShaderPermutation() { + return D3D9ShaderPermutations::None; + } + + inline D3D9ShaderPermutation GetPixelShaderPermutation() { + if (unlikely(m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT)) + return D3D9ShaderPermutations::FlatShade; + + return D3D9ShaderPermutations::None; + } + + Com m_parent; + D3DDEVTYPE m_deviceType; + HWND m_window; + + DWORD m_behaviorFlags; + Direct3DState9 m_state; + Com m_recorder; + D3D9Multithread m_multithread; + + Rc m_shaderModules; + + D3D9ConstantSets m_consts[DxsoProgramTypes::Count]; + + Rc m_vsClipPlanes; + + Rc m_vsFixedFunction; + Rc m_vsVertexBlend; + Rc m_psFixedFunction; + Rc m_psShared; + + D3D9UPBufferSlice m_upBuffer; + + const D3D9Options m_d3d9Options; + const DxsoOptions m_dxsoOptions; + + BOOL m_isSWVP; + + D3DPRESENT_PARAMETERS m_presentParams; + + D3D9Cursor m_cursor; + + Com m_autoDepthStencil; + + std::vector< + Com> m_swapchains; + + std::unordered_map< + D3D9SamplerKey, + D3D9SamplerPair, + D3D9SamplerKeyHash, + D3D9SamplerKeyEq> m_samplers; + + std::unordered_map< + DWORD, + Com> m_fvfTable; + + D3D9InputAssemblyState m_iaState; + + uint32_t m_instancedData = 0; + uint32_t m_lastSamplerTypeBitfield = 0; + uint32_t m_samplerTypeBitfield = 0; + uint32_t m_lastProjectionBitfield = 0; + uint32_t m_projectionBitfield = 0; + + uint32_t m_lastPointMode = 0; + + uint32_t m_activeRTs = 0; + uint32_t m_activeRTTextures = 0; + uint32_t m_activeHazards = 0; + uint32_t m_alphaSwizzleRTs = 0; + + D3D9ViewportInfo m_viewportInfo; + + std::atomic m_availableMemory = 0; + std::atomic m_samplerCount = 0; + + bool m_amdATOC = false; + bool m_nvATOC = false; + bool m_ffZTest = false; + + D3D9ConstantLayout m_vsLayout; + D3D9ConstantLayout m_psLayout; + + void DetermineConstantLayouts(bool canSWVP); + + D3D9UPBufferSlice AllocUpBuffer(VkDeviceSize size); + + D3D9SwapChainEx* GetInternalSwapchain(UINT index); + + bool ShouldRecord(); + + HRESULT CreateShaderModule( + D3D9CommonShader* pShaderModule, + VkShaderStageFlagBits ShaderStage, + const DWORD* pShaderBytecode, + const DxsoModuleInfo* pModuleInfo); + + // So we don't do OOB. + template + inline static constexpr uint32_t DetermineSoftwareRegCount() { + constexpr bool isVS = ProgramType == DxsoProgramType::VertexShader; + + switch (ConstantType) { + default: + case D3D9ConstantType::Float: return isVS ? caps::MaxFloatConstantsSoftware : caps::MaxFloatConstantsPS; + case D3D9ConstantType::Int: return isVS ? caps::MaxOtherConstantsSoftware : caps::MaxOtherConstants; + case D3D9ConstantType::Bool: return isVS ? caps::MaxOtherConstantsSoftware : caps::MaxOtherConstants; + } + } + + // So we don't copy more than we need. + template + inline uint32_t DetermineHardwareRegCount() const { + const auto& layout = ProgramType == DxsoProgramType::VertexShader + ? m_vsLayout : m_psLayout; + + switch (ConstantType) { + default: + case D3D9ConstantType::Float: return layout.floatCount; + case D3D9ConstantType::Int: return layout.intCount; + case D3D9ConstantType::Bool: return layout.boolCount; + } + } + + inline uint32_t GetFrameLatency() { + return m_frameLatency; + } + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT SetShaderConstants( + UINT StartRegister, + const T* pConstantData, + UINT Count); + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT GetShaderConstants( + UINT StartRegister, + T* pConstantData, + UINT Count) { + auto GetHelper = [&] (const auto& set) { + const uint32_t regCountHardware = DetermineHardwareRegCount(); + constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount(); + + if (StartRegister + Count > regCountSoftware) + return D3DERR_INVALIDCALL; + + Count = UINT( + std::max( + std::clamp(Count + StartRegister, 0, regCountHardware) - INT(StartRegister), + 0)); + + if (Count == 0) + return D3D_OK; + + if (pConstantData == nullptr) + return D3DERR_INVALIDCALL; + + if constexpr (ConstantType == D3D9ConstantType::Float) { + auto begin = &set.fConsts[StartRegister]; + auto end = &begin[Count]; + + std::copy(begin, end, reinterpret_cast(pConstantData)); + } + else if constexpr (ConstantType == D3D9ConstantType::Int) { + auto begin = &set.iConsts[StartRegister]; + auto end = &begin[Count]; + + std::copy(begin, end, reinterpret_cast(pConstantData)); + } + else { + for (uint32_t i = 0; i < Count; i++) { + const uint32_t constantIdx = StartRegister + i; + const uint32_t arrayIdx = constantIdx / 32; + const uint32_t bitIdx = constantIdx % 32; + + const uint32_t bit = (1u << bitIdx); + + bool constValue = set.bConsts[arrayIdx] & bit; + pConstantData[i] = constValue ? TRUE : FALSE; + } + } + + return D3D_OK; + }; + + return ProgramType == DxsoProgramTypes::VertexShader + ? GetHelper(m_state.vsConsts) + : GetHelper(m_state.psConsts); + } + + void UpdateFixedFunctionVS(); + + void UpdateFixedFunctionPS(); + + void ApplyPrimitiveType( + DxvkContext* pContext, + D3DPRIMITIVETYPE PrimType); + + bool UseProgrammableVS(); + + bool UseProgrammablePS(); + + void UpdateSamplerSpecConsant(uint32_t value); + + void UpdateProjectionSpecConstant(uint32_t value); + + }; + +} diff --git a/src/d3d9/d3d9_device_child.h b/src/d3d9/d3d9_device_child.h new file mode 100644 index 000000000..433a269ac --- /dev/null +++ b/src/d3d9/d3d9_device_child.h @@ -0,0 +1,61 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + class D3D9DeviceEx; + + template + class D3D9DeviceChild : public ComObjectClamp { + + public: + + D3D9DeviceChild(D3D9DeviceEx* pDevice) + : m_parent( pDevice ) { } + + ULONG STDMETHODCALLTYPE AddRef() { + uint32_t refCount = this->m_refCount++; + if (unlikely(!refCount)) { + this->AddRefPrivate(); + GetDevice()->AddRef(); + } + + return refCount + 1; + } + + ULONG STDMETHODCALLTYPE Release() { + uint32_t refCount = --this->m_refCount; + if (unlikely(!refCount)) { + auto* pDevice = GetDevice(); + this->ReleasePrivate(); + pDevice->Release(); + } + return refCount; + } + + HRESULT STDMETHODCALLTYPE GetDevice(IDirect3DDevice9** ppDevice) { + InitReturnPtr(ppDevice); + + if (ppDevice == nullptr) + return D3DERR_INVALIDCALL; + + *ppDevice = ref(GetDevice()); + return D3D_OK; + } + + IDirect3DDevice9Ex* GetDevice() { + return reinterpret_cast(m_parent); + } + + D3D9DeviceEx* GetParent() { + return m_parent; + } + + protected: + + D3D9DeviceEx* m_parent; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_fixed_function.cpp b/src/d3d9/d3d9_fixed_function.cpp new file mode 100644 index 000000000..e0679c99c --- /dev/null +++ b/src/d3d9/d3d9_fixed_function.cpp @@ -0,0 +1,2311 @@ +#include "d3d9_fixed_function.h" + +#include "d3d9_device.h" +#include "d3d9_spec_constants.h" + +#include "../dxvk/dxvk_hash.h" +#include "../dxvk/dxvk_spec_const.h" + +#include "../spirv/spirv_module.h" + +#include + +namespace dxvk { + + D3D9FixedFunctionOptions::D3D9FixedFunctionOptions(const D3D9Options* options) { + invariantPosition = options->invariantPosition; + } + + uint32_t DoFixedFunctionFog(SpirvModule& spvModule, const D3D9FogContext& fogCtx) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t uint32Type = spvModule.defIntType(32, 0); + uint32_t vec3Type = spvModule.defVectorType(floatType, 3); + uint32_t vec4Type = spvModule.defVectorType(floatType, 4); + uint32_t floatPtr = spvModule.defPointerType(floatType, spv::StorageClassPushConstant); + uint32_t vec3Ptr = spvModule.defPointerType(vec3Type, spv::StorageClassPushConstant); + + uint32_t fogColorMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogColor)); + uint32_t fogColor = spvModule.opLoad(vec3Type, + spvModule.opAccessChain(vec3Ptr, fogCtx.RenderState, 1, &fogColorMember)); + + uint32_t fogScaleMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogScale)); + uint32_t fogScale = spvModule.opLoad(floatType, + spvModule.opAccessChain(floatPtr, fogCtx.RenderState, 1, &fogScaleMember)); + + uint32_t fogEndMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogEnd)); + uint32_t fogEnd = spvModule.opLoad(floatType, + spvModule.opAccessChain(floatPtr, fogCtx.RenderState, 1, &fogEndMember)); + + uint32_t fogDensityMember = spvModule.constu32(uint32_t(D3D9RenderStateItem::FogDensity)); + uint32_t fogDensity = spvModule.opLoad(floatType, + spvModule.opAccessChain(floatPtr, fogCtx.RenderState, 1, &fogDensityMember)); + + uint32_t fogMode = spvModule.specConst32(uint32Type, 0); + + if (!fogCtx.IsPixel) { + spvModule.setDebugName(fogMode, "vertex_fog_mode"); + spvModule.decorateSpecId(fogMode, getSpecId(D3D9SpecConstantId::VertexFogMode)); + } + else { + spvModule.setDebugName(fogMode, "pixel_fog_mode"); + spvModule.decorateSpecId(fogMode, getSpecId(D3D9SpecConstantId::PixelFogMode)); + } + + uint32_t fogEnabled = spvModule.specConstBool(false); + spvModule.setDebugName(fogEnabled, "fog_enabled"); + spvModule.decorateSpecId(fogEnabled, getSpecId(D3D9SpecConstantId::FogEnabled)); + + uint32_t doFog = spvModule.allocateId(); + uint32_t skipFog = spvModule.allocateId(); + + uint32_t returnType = fogCtx.IsPixel ? vec4Type : floatType; + uint32_t returnTypePtr = spvModule.defPointerType(returnType, spv::StorageClassPrivate); + uint32_t returnValuePtr = spvModule.newVar(returnTypePtr, spv::StorageClassPrivate); + spvModule.opStore(returnValuePtr, fogCtx.IsPixel ? fogCtx.oColor : spvModule.constf32(0.0f)); + + // Actually do the fog now we have all the vars in-place. + + spvModule.opSelectionMerge(skipFog, spv::SelectionControlMaskNone); + spvModule.opBranchConditional(fogEnabled, doFog, skipFog); + + spvModule.opLabel(doFog); + + uint32_t wIndex = 3; + uint32_t zIndex = 2; + + uint32_t w = spvModule.opCompositeExtract(floatType, fogCtx.vPos, 1, &wIndex); + uint32_t z = spvModule.opCompositeExtract(floatType, fogCtx.vPos, 1, &zIndex); + + uint32_t depth = 0; + if (fogCtx.IsPixel) + depth = spvModule.opFMul(floatType, z, spvModule.opFDiv(floatType, spvModule.constf32(1.0f), w)); + else { + if (fogCtx.RangeFog) { + std::array indices = { 0, 1, 2 }; + uint32_t pos3 = spvModule.opVectorShuffle(vec3Type, fogCtx.vPos, fogCtx.vPos, indices.size(), indices.data()); + depth = spvModule.opLength(floatType, pos3); + } + else + depth = fogCtx.HasFogInput + ? fogCtx.vFog + : spvModule.opFAbs(floatType, z); + } + + uint32_t applyFogFactor = spvModule.allocateId(); + + std::array fogVariables; + + std::array fogCaseLabels = { { + { uint32_t(D3DFOG_NONE), spvModule.allocateId() }, + { uint32_t(D3DFOG_EXP), spvModule.allocateId() }, + { uint32_t(D3DFOG_EXP2), spvModule.allocateId() }, + { uint32_t(D3DFOG_LINEAR), spvModule.allocateId() }, + } }; + + + spvModule.opSelectionMerge(applyFogFactor, spv::SelectionControlMaskNone); + spvModule.opSwitch(fogMode, + fogCaseLabels[D3DFOG_NONE].labelId, + fogCaseLabels.size(), + fogCaseLabels.data()); + + for (uint32_t i = 0; i < fogCaseLabels.size(); i++) { + spvModule.opLabel(fogCaseLabels[i].labelId); + + fogVariables[i].labelId = fogCaseLabels[i].labelId; + fogVariables[i].varId = [&] { + auto mode = D3DFOGMODE(fogCaseLabels[i].literal); + switch (mode) { + default: + // vFog + case D3DFOG_NONE: { + return fogCtx.IsPixel + ? fogCtx.vFog + : spvModule.constf32(1.0f); + } + + // (end - d) / (end - start) + case D3DFOG_LINEAR: { + uint32_t fogFactor = spvModule.opFSub(floatType, fogEnd, depth); + fogFactor = spvModule.opFMul(floatType, fogFactor, fogScale); + fogFactor = spvModule.opFClamp(floatType, fogFactor, spvModule.constf32(0.0f), spvModule.constf32(1.0f)); + return fogFactor; + } + + // 1 / (e^[d * density])^2 + case D3DFOG_EXP2: + // 1 / (e^[d * density]) + case D3DFOG_EXP: { + uint32_t fogFactor = spvModule.opFMul(floatType, depth, fogDensity); + + if (mode == D3DFOG_EXP2) + fogFactor = spvModule.opFMul(floatType, fogFactor, fogFactor); + + // Provides the rcp. + fogFactor = spvModule.opFNegate(floatType, fogFactor); + fogFactor = spvModule.opExp(floatType, fogFactor); + return fogFactor; + } + } + }(); + + spvModule.opBranch(applyFogFactor); + } + + spvModule.opLabel(applyFogFactor); + + uint32_t fogFactor = spvModule.opPhi(floatType, + fogVariables.size(), + fogVariables.data()); + + uint32_t fogRetValue = 0; + + // Return the new color if we are doing this in PS + // or just the fog factor for oFog in VS + if (fogCtx.IsPixel) { + std::array indices = { 0, 1, 2, 6 }; + + uint32_t color = fogCtx.oColor; + + uint32_t color3 = spvModule.opVectorShuffle(vec3Type, color, color, 3, indices.data()); + + std::array fogFacIndices = { fogFactor, fogFactor, fogFactor }; + uint32_t fogFact3 = spvModule.opCompositeConstruct(vec3Type, fogFacIndices.size(), fogFacIndices.data()); + + uint32_t lerpedFrog = spvModule.opFMix(vec3Type, fogColor, color3, fogFact3); + + fogRetValue = spvModule.opVectorShuffle(vec4Type, lerpedFrog, color, indices.size(), indices.data()); + } + else + fogRetValue = fogFactor; + + spvModule.opStore(returnValuePtr, fogRetValue); + + spvModule.opBranch(skipFog); + + spvModule.opLabel(skipFog); + + return spvModule.opLoad(returnType, returnValuePtr); + } + + + uint32_t SetupRenderStateBlock(SpirvModule& spvModule) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t vec3Type = spvModule.defVectorType(floatType, 3); + + std::array rsMembers = {{ + vec3Type, + floatType, + floatType, + floatType, + floatType, + + floatType, + floatType, + floatType, + floatType, + floatType, + floatType, + }}; + + uint32_t rsStruct = spvModule.defStructTypeUnique(rsMembers.size(), rsMembers.data()); + uint32_t rsBlock = spvModule.newVar( + spvModule.defPointerType(rsStruct, spv::StorageClassPushConstant), + spv::StorageClassPushConstant); + + spvModule.setDebugName (rsStruct, "render_state_t"); + spvModule.decorate (rsStruct, spv::DecorationBlock); + spvModule.setDebugMemberName (rsStruct, 0, "fog_color"); + spvModule.memberDecorateOffset (rsStruct, 0, offsetof(D3D9RenderStateInfo, fogColor)); + spvModule.setDebugMemberName (rsStruct, 1, "fog_scale"); + spvModule.memberDecorateOffset (rsStruct, 1, offsetof(D3D9RenderStateInfo, fogScale)); + spvModule.setDebugMemberName (rsStruct, 2, "fog_end"); + spvModule.memberDecorateOffset (rsStruct, 2, offsetof(D3D9RenderStateInfo, fogEnd)); + spvModule.setDebugMemberName (rsStruct, 3, "fog_density"); + spvModule.memberDecorateOffset (rsStruct, 3, offsetof(D3D9RenderStateInfo, fogDensity)); + spvModule.setDebugMemberName (rsStruct, 4, "alpha_ref"); + spvModule.memberDecorateOffset (rsStruct, 4, offsetof(D3D9RenderStateInfo, alphaRef)); + spvModule.setDebugMemberName (rsStruct, 5, "point_size"); + spvModule.memberDecorateOffset (rsStruct, 5, offsetof(D3D9RenderStateInfo, pointSize)); + spvModule.setDebugMemberName (rsStruct, 6, "point_size_min"); + spvModule.memberDecorateOffset (rsStruct, 6, offsetof(D3D9RenderStateInfo, pointSizeMin)); + spvModule.setDebugMemberName (rsStruct, 7, "point_size_max"); + spvModule.memberDecorateOffset (rsStruct, 7, offsetof(D3D9RenderStateInfo, pointSizeMax)); + spvModule.setDebugMemberName (rsStruct, 8, "point_scale_a"); + spvModule.memberDecorateOffset (rsStruct, 8, offsetof(D3D9RenderStateInfo, pointScaleA)); + spvModule.setDebugMemberName (rsStruct, 9, "point_scale_b"); + spvModule.memberDecorateOffset (rsStruct, 9, offsetof(D3D9RenderStateInfo, pointScaleB)); + spvModule.setDebugMemberName (rsStruct, 10, "point_scale_c"); + spvModule.memberDecorateOffset (rsStruct, 10, offsetof(D3D9RenderStateInfo, pointScaleC)); + + spvModule.setDebugName (rsBlock, "render_state"); + + return rsBlock; + } + + + D3D9PointSizeInfoVS GetPointSizeInfoVS(SpirvModule& spvModule, uint32_t vPos, uint32_t vtx, uint32_t perVertPointSize, uint32_t rsBlock) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t floatPtr = spvModule.defPointerType(floatType, spv::StorageClassPushConstant); + uint32_t vec3Type = spvModule.defVectorType(floatType, 3); + uint32_t vec4Type = spvModule.defVectorType(floatType, 4); + uint32_t uint32Type = spvModule.defIntType(32, 0); + uint32_t boolType = spvModule.defBoolType(); + + auto LoadFloat = [&](D3D9RenderStateItem item) { + uint32_t index = spvModule.constu32(uint32_t(item)); + return spvModule.opLoad(floatType, spvModule.opAccessChain(floatPtr, rsBlock, 1, &index)); + }; + + uint32_t pointMode = spvModule.specConst32(uint32Type, 0); + spvModule.setDebugName(pointMode, "point_mode"); + spvModule.decorateSpecId(pointMode, getSpecId(D3D9SpecConstantId::PointMode)); + + uint32_t scaleBit = spvModule.opBitFieldUExtract(uint32Type, pointMode, spvModule.consti32(0), spvModule.consti32(1)); + uint32_t isScale = spvModule.opIEqual(boolType, scaleBit, spvModule.constu32(1)); + + uint32_t regularValue = perVertPointSize != 0 ? perVertPointSize : LoadFloat(D3D9RenderStateItem::PointSize); + + uint32_t scaleC = LoadFloat(D3D9RenderStateItem::PointScaleC); + uint32_t scaleB = LoadFloat(D3D9RenderStateItem::PointScaleB); + uint32_t scaleA = LoadFloat(D3D9RenderStateItem::PointScaleA); + + std::array indices = { 0, 1, 2, 3 }; + + uint32_t vtx3; + if (vPos != 0) { + vPos = spvModule.opLoad(vec4Type, vPos); + + uint32_t rhw = spvModule.opCompositeExtract(floatType, vPos, 1, &indices[3]); + rhw = spvModule.opFDiv(floatType, spvModule.constf32(1.0f), rhw); + uint32_t pos3 = spvModule.opVectorShuffle(vec3Type, vPos, vPos, 3, indices.data()); + vtx3 = spvModule.opVectorTimesScalar(vec3Type, pos3, rhw); + } + else { + vtx3 = spvModule.opVectorShuffle(vec3Type, vtx, vtx, 3, indices.data()); + } + + uint32_t DeSqr = spvModule.opDot (floatType, vtx3, vtx3); + uint32_t De = spvModule.opSqrt(floatType, DeSqr); + uint32_t scaleValue = spvModule.opFMul(floatType, scaleC, DeSqr); + scaleValue = spvModule.opFFma(floatType, scaleB, De, scaleValue); + scaleValue = spvModule.opFAdd(floatType, scaleA, scaleValue); + scaleValue = spvModule.opSqrt(floatType, scaleValue); + scaleValue = spvModule.opFDiv(floatType, regularValue, scaleValue); + + uint32_t value = spvModule.opSelect(floatType, isScale, scaleValue, regularValue); + + uint32_t min = LoadFloat(D3D9RenderStateItem::PointSizeMin); + uint32_t max = LoadFloat(D3D9RenderStateItem::PointSizeMax); + + D3D9PointSizeInfoVS info; + info.defaultValue = value; + info.min = min; + info.max = max; + + return info; + } + + + D3D9PointSizeInfoPS GetPointSizeInfoPS(SpirvModule& spvModule, uint32_t rsBlock) { + uint32_t uint32Type = spvModule.defIntType(32, 0); + uint32_t boolType = spvModule.defBoolType(); + uint32_t boolVec4 = spvModule.defVectorType(boolType, 4); + + uint32_t pointMode = spvModule.specConst32(uint32Type, 0); + spvModule.setDebugName(pointMode, "point_mode"); + spvModule.decorateSpecId(pointMode, getSpecId(D3D9SpecConstantId::PointMode)); + + uint32_t spriteBit = spvModule.opBitFieldUExtract(uint32Type, pointMode, spvModule.consti32(1), spvModule.consti32(1)); + uint32_t isSprite = spvModule.opIEqual(boolType, spriteBit, spvModule.constu32(1)); + + std::array isSpriteIndices; + for (uint32_t i = 0; i < isSpriteIndices.size(); i++) + isSpriteIndices[i] = isSprite; + + isSprite = spvModule.opCompositeConstruct(boolVec4, isSpriteIndices.size(), isSpriteIndices.data()); + + D3D9PointSizeInfoPS info; + info.isSprite = isSprite; + + return info; + } + + + uint32_t GetPointCoord(SpirvModule& spvModule, std::vector& entryPointInterfaces) { + uint32_t floatType = spvModule.defFloatType(32); + uint32_t vec2Type = spvModule.defVectorType(floatType, 2); + uint32_t vec4Type = spvModule.defVectorType(floatType, 4); + uint32_t vec2Ptr = spvModule.defPointerType(vec2Type, spv::StorageClassInput); + + uint32_t pointCoordPtr = spvModule.newVar(vec2Ptr, spv::StorageClassInput); + + spvModule.decorateBuiltIn(pointCoordPtr, spv::BuiltInPointCoord); + entryPointInterfaces.push_back(pointCoordPtr); + + uint32_t pointCoord = spvModule.opLoad(vec2Type, pointCoordPtr); + + std::array indices = { 0, 1, 2, 3 }; + + std::array pointCoordIndices = { + spvModule.opCompositeExtract(floatType, pointCoord, 1, &indices[0]), + spvModule.opCompositeExtract(floatType, pointCoord, 1, &indices[1]), + spvModule.constf32(0.0f), + spvModule.constf32(0.0f) + }; + + return spvModule.opCompositeConstruct(vec4Type, pointCoordIndices.size(), pointCoordIndices.data()); + } + + + uint32_t GetSharedConstants(SpirvModule& spvModule) { + uint32_t float_t = spvModule.defFloatType(32); + uint32_t vec2_t = spvModule.defVectorType(float_t, 2); + uint32_t vec4_t = spvModule.defVectorType(float_t, 4); + + std::array stageMembers = { + vec4_t, + + vec2_t, + vec2_t, + + float_t, + float_t, + }; + + std::array members; + + for (auto& member : members) + member = stageMembers; + + const uint32_t structType = + spvModule.defStructType(members.size() * stageMembers.size(), members[0].data()); + + spvModule.decorateBlock(structType); + + uint32_t offset = 0; + for (uint32_t stage = 0; stage < caps::TextureStageCount; stage++) { + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_Constant, offset); + offset += sizeof(float) * 4; + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvMat0, offset); + offset += sizeof(float) * 2; + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvMat1, offset); + offset += sizeof(float) * 2; + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvLScale, offset); + offset += sizeof(float); + + spvModule.memberDecorateOffset(structType, stage * D3D9SharedPSStages_Count + D3D9SharedPSStages_BumpEnvLOffset, offset); + offset += sizeof(float); + + // Padding... + offset += sizeof(float) * 2; + } + + uint32_t sharedState = spvModule.newVar( + spvModule.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + spvModule.setDebugName(sharedState, "D3D9SharedPS"); + + return sharedState; + } + + + enum class D3D9FFVSMembers { + WorldViewMatrix, + NormalMatrix, + ProjMatrix, + + Texcoord0, + Texcoord1, + Texcoord2, + Texcoord3, + Texcoord4, + Texcoord5, + Texcoord6, + Texcoord7, + + InverseOffset, + InverseExtent, + + GlobalAmbient, + + Light0, + Light1, + Light2, + Light3, + Light4, + Light5, + Light6, + Light7, + + MaterialDiffuse, + MaterialAmbient, + MaterialSpecular, + MaterialEmissive, + MaterialPower, + + TweenFactor, + + MemberCount + }; + + struct D3D9FFVertexData { + uint32_t constantBuffer; + uint32_t vertexBlendData; + uint32_t lightType; + + struct { + uint32_t worldview; + uint32_t normal; + uint32_t proj; + + uint32_t texcoord[8]; + + uint32_t invOffset; + uint32_t invExtent; + + uint32_t globalAmbient; + + uint32_t materialDiffuse; + uint32_t materialSpecular; + uint32_t materialAmbient; + uint32_t materialEmissive; + uint32_t materialPower; + uint32_t tweenFactor; + } constants; + + struct { + uint32_t POSITION; + uint32_t POSITION1; + uint32_t POINTSIZE; + uint32_t NORMAL; + uint32_t NORMAL1; + uint32_t TEXCOORD[8]; + uint32_t COLOR[2]; + uint32_t FOG; + + uint32_t BLENDWEIGHT; + uint32_t BLENDINDICES; + } in; + + struct { + uint32_t POSITION; + uint32_t POINTSIZE; + uint32_t NORMAL; + uint32_t TEXCOORD[8]; + uint32_t COLOR[2]; + uint32_t FOG; + } out; + }; + + enum D3D9FFPSMembers { + TextureFactor = 0, + + MemberCount + }; + + struct D3D9FFPixelData { + uint32_t constantBuffer; + uint32_t sharedState; + + struct { + uint32_t textureFactor; + } constants; + + struct { + uint32_t TEXCOORD[8]; + uint32_t COLOR[2]; + uint32_t FOG; + uint32_t POS; + } in; + + struct { + uint32_t texcoordCnt; + uint32_t typeId; + uint32_t varId; + uint32_t bound; + } samplers[8]; + + struct { + uint32_t COLOR; + } out; + }; + + class D3D9FFShaderCompiler { + + public: + + D3D9FFShaderCompiler( + Rc Device, + const D3D9FFShaderKeyVS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options); + + D3D9FFShaderCompiler( + Rc Device, + const D3D9FFShaderKeyFS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options); + + Rc compile(); + + DxsoIsgn isgn() { return m_isgn; } + + private: + + // Returns value for inputs + // Returns ptr for outputs + uint32_t declareIO(bool input, DxsoSemantic semantic, spv::BuiltIn builtin = spv::BuiltInMax); + + void compileVS(); + + void setupRenderStateInfo(); + + void emitLightTypeDecl(); + + void emitBaseBufferDecl(); + + void emitVertexBlendDecl(); + + void setupVS(); + + void compilePS(); + + void setupPS(); + + void emitPsSharedConstants(); + + void alphaTestPS(); + + bool isVS() { return m_programType == DxsoProgramType::VertexShader; } + bool isPS() { return !isVS(); } + + std::string m_filename; + + SpirvModule m_module; + std::vector + m_resourceSlots; + DxvkInterfaceSlots m_interfaceSlots; + std::vector m_entryPointInterfaces; + + DxsoProgramType m_programType; + D3D9FFShaderKeyVS m_vsKey; + D3D9FFShaderKeyFS m_fsKey; + + D3D9FFVertexData m_vs = { }; + D3D9FFPixelData m_ps = { }; + + DxsoIsgn m_isgn; + DxsoIsgn m_osgn; + + uint32_t m_floatType; + uint32_t m_uint32Type; + uint32_t m_vec4Type; + uint32_t m_vec3Type; + uint32_t m_mat3Type; + uint32_t m_mat4Type; + + uint32_t m_entryPointId; + + uint32_t m_rsBlock; + uint32_t m_mainFuncLabel; + + D3D9FixedFunctionOptions m_options; + }; + + D3D9FFShaderCompiler::D3D9FFShaderCompiler( + Rc Device, + const D3D9FFShaderKeyVS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options) + : m_options (Options) { + m_programType = DxsoProgramTypes::VertexShader; + m_vsKey = Key; + m_filename = Name; + } + + + D3D9FFShaderCompiler::D3D9FFShaderCompiler( + Rc Device, + const D3D9FFShaderKeyFS& Key, + const std::string& Name, + D3D9FixedFunctionOptions Options) + : m_options(Options) { + m_programType = DxsoProgramTypes::PixelShader; + m_fsKey = Key; + m_filename = Name; + } + + + Rc D3D9FFShaderCompiler::compile() { + m_floatType = m_module.defFloatType(32); + m_uint32Type = m_module.defIntType(32, 0); + m_vec4Type = m_module.defVectorType(m_floatType, 4); + m_vec3Type = m_module.defVectorType(m_floatType, 3); + m_mat3Type = m_module.defMatrixType(m_vec3Type, 3); + m_mat4Type = m_module.defMatrixType(m_vec4Type, 4); + + m_entryPointId = m_module.allocateId(); + + // Set the shader name so that we recognize it in renderdoc + m_module.setDebugSource( + spv::SourceLanguageUnknown, 0, + m_module.addDebugString(m_filename.c_str()), + nullptr); + + // Set the memory model. This is the same for all shaders. + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + + m_module.enableCapability(spv::CapabilityShader); + m_module.enableCapability(spv::CapabilityImageQuery); + + m_module.functionBegin( + m_module.defVoidType(), m_entryPointId, m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), + spv::FunctionControlMaskNone); + m_module.setDebugName(m_entryPointId, "main"); + + m_mainFuncLabel = m_module.allocateId(); + m_module.opLabel(m_mainFuncLabel); + + if (isVS()) + compileVS(); + else + compilePS(); + + m_module.opReturn(); + m_module.functionEnd(); + + // Declare the entry point, we now have all the + // information we need, including the interfaces + m_module.addEntryPoint(m_entryPointId, + isVS() ? spv::ExecutionModelVertex : spv::ExecutionModelFragment, "main", + m_entryPointInterfaces.size(), + m_entryPointInterfaces.data()); + + DxvkShaderOptions shaderOptions = { }; + + DxvkShaderConstData constData = { }; + + // Create the shader module object + return new DxvkShader( + isVS() ? VK_SHADER_STAGE_VERTEX_BIT : VK_SHADER_STAGE_FRAGMENT_BIT, + m_resourceSlots.size(), + m_resourceSlots.data(), + m_interfaceSlots, + m_module.compile(), + shaderOptions, + std::move(constData)); + } + + + uint32_t D3D9FFShaderCompiler::declareIO(bool input, DxsoSemantic semantic, spv::BuiltIn builtin) { + // Declare in ISGN and do linkage + auto& sgn = input + ? m_isgn : m_osgn; + + uint32_t& slots = input + ? m_interfaceSlots.inputSlots + : m_interfaceSlots.outputSlots; + + uint32_t i = sgn.elemCount++; + + uint32_t slot = i; + + if (builtin == spv::BuiltInMax) { + if (input != isVS()) { + slot = RegisterLinkerSlot(semantic); // Requires linkage... + } + + slots |= 1u << slot; + } + + auto& elem = sgn.elems[i]; + elem.slot = slot; + elem.semantic = semantic; + + // Declare variable + spv::StorageClass storageClass = input ? + spv::StorageClassInput : spv::StorageClassOutput; + + const bool scalar = semantic.usage == DxsoUsage::Fog || semantic.usage == DxsoUsage::PointSize; + uint32_t type = scalar ? m_floatType : m_vec4Type; + + uint32_t ptrType = m_module.defPointerType(type, storageClass); + + uint32_t ptr = m_module.newVar(ptrType, storageClass); + + if (builtin == spv::BuiltInMax) + m_module.decorateLocation(ptr, slot); + else + m_module.decorateBuiltIn(ptr, builtin); + + bool diffuseOrSpec = semantic == DxsoSemantic{ DxsoUsage::Color, 0 } + || semantic == DxsoSemantic{ DxsoUsage::Color, 1 }; + + if (diffuseOrSpec && m_fsKey.Stages[0].Contents.GlobalFlatShade) + m_module.decorate(ptr, spv::DecorationFlat); + + std::string name = str::format(input ? "in_" : "out_", semantic.usage, semantic.usageIndex); + m_module.setDebugName(ptr, name.c_str()); + + m_entryPointInterfaces.push_back(ptr); + + if (input) + return m_module.opLoad(type, ptr); + + return ptr; + } + + + void D3D9FFShaderCompiler::compileVS() { + setupVS(); + + std::array indices = { 0, 1, 2, 3 }; + + uint32_t gl_Position = m_vs.in.POSITION; + uint32_t vtx = m_vs.in.POSITION; + uint32_t normal = m_module.opVectorShuffle(m_vec3Type, m_vs.in.NORMAL, m_vs.in.NORMAL, 3, indices.data()); + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Tween) { + uint32_t vtx1 = m_vs.in.POSITION1; + uint32_t normal1 = m_module.opVectorShuffle(m_vec3Type, m_vs.in.NORMAL1, m_vs.in.NORMAL1, 3, indices.data()); + + vtx = m_module.opFMix(m_vec3Type, vtx, vtx1, m_vs.constants.tweenFactor); + normal = m_module.opFMix(m_vec3Type, normal, normal1, m_vs.constants.tweenFactor); + } + + const uint32_t wIndex = 3; + + if (!m_vsKey.Data.Contents.HasPositionT) { + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + uint32_t blendWeightRemaining = m_module.constf32(1); + uint32_t vtxSum = m_module.constvec4f32(0, 0, 0, 0); + uint32_t nrmSum = m_module.constvec3f32(0, 0, 0); + + for (uint32_t i = 0; i <= m_vsKey.Data.Contents.VertexBlendCount; i++) { + std::array arrayIndices; + + if (m_vsKey.Data.Contents.VertexBlendIndexed) { + uint32_t index = m_module.opCompositeExtract(m_floatType, m_vs.in.BLENDINDICES, 1, &i); + index = m_module.opConvertFtoU(m_uint32Type, m_module.opRound(m_floatType, index)); + + arrayIndices = { m_module.constu32(0), index }; + } + else + arrayIndices = { m_module.constu32(0), m_module.constu32(i) }; + + uint32_t worldview = m_module.opLoad(m_mat4Type, + m_module.opAccessChain( + m_module.defPointerType(m_mat4Type, spv::StorageClassUniform), m_vs.vertexBlendData, arrayIndices.size(), arrayIndices.data())); + + uint32_t nrmMtx = worldview; + + std::array mtxIndices; + for (uint32_t i = 0; i < 3; i++) { + mtxIndices[i] = m_module.opCompositeExtract(m_vec4Type, nrmMtx, 1, &i); + mtxIndices[i] = m_module.opVectorShuffle(m_vec3Type, mtxIndices[i], mtxIndices[i], 3, indices.data()); + } + nrmMtx = m_module.opCompositeConstruct(m_mat3Type, mtxIndices.size(), mtxIndices.data()); + + uint32_t vtxResult = m_module.opVectorTimesMatrix(m_vec4Type, vtx, worldview); + uint32_t nrmResult = m_module.opVectorTimesMatrix(m_vec3Type, normal, nrmMtx); + + uint32_t weight; + if (i != m_vsKey.Data.Contents.VertexBlendCount) { + weight = m_module.opCompositeExtract(m_floatType, m_vs.in.BLENDWEIGHT, 1, &i); + blendWeightRemaining = m_module.opFSub(m_floatType, blendWeightRemaining, weight); + } + else + weight = blendWeightRemaining; + + vtxResult = m_module.opVectorTimesScalar(m_vec4Type, vtxResult, weight); + nrmResult = m_module.opVectorTimesScalar(m_vec3Type, nrmResult, weight); + + vtxSum = m_module.opFAdd(m_vec4Type, vtxSum, vtxResult); + nrmSum = m_module.opFAdd(m_vec3Type, nrmSum, nrmResult); + } + + vtx = vtxSum; + normal = nrmSum; + } + else { + vtx = m_module.opVectorTimesMatrix(m_vec4Type, vtx, m_vs.constants.worldview); + + uint32_t nrmMtx = m_vs.constants.normal; + + std::array mtxIndices; + for (uint32_t i = 0; i < 3; i++) { + mtxIndices[i] = m_module.opCompositeExtract(m_vec4Type, nrmMtx, 1, &i); + mtxIndices[i] = m_module.opVectorShuffle(m_vec3Type, mtxIndices[i], mtxIndices[i], 3, indices.data()); + } + nrmMtx = m_module.opCompositeConstruct(m_mat3Type, mtxIndices.size(), mtxIndices.data()); + + normal = m_module.opMatrixTimesVector(m_vec3Type, nrmMtx, normal); + } + + // Some games rely no normals not being normal. + if (m_vsKey.Data.Contents.NormalizeNormals) { + uint32_t bool_t = m_module.defBoolType(); + uint32_t bool3_t = m_module.defVectorType(bool_t, 3); + + uint32_t isZeroNormal = m_module.opAll(bool_t, m_module.opFOrdEqual(bool3_t, normal, m_module.constvec3f32(0.0f, 0.0f, 0.0f))); + + std::array members = { isZeroNormal, isZeroNormal, isZeroNormal }; + uint32_t isZeroNormal3 = m_module.opCompositeConstruct(bool3_t, members.size(), members.data()); + + normal = m_module.opNormalize(m_vec3Type, normal); + normal = m_module.opSelect(m_vec3Type, isZeroNormal3, m_module.constvec3f32(0.0f, 0.0f, 0.0f), normal); + } + + gl_Position = m_module.opVectorTimesMatrix(m_vec4Type, vtx, m_vs.constants.proj); + } else { + gl_Position = m_module.opFMul(m_vec4Type, gl_Position, m_vs.constants.invExtent); + gl_Position = m_module.opFAdd(m_vec4Type, gl_Position, m_vs.constants.invOffset); + + // We still need to account for perspective correction here... + + // gl_Position.w = 1.0f / gl_Position.w + // gl_Position.xyz *= gl_Position.w; + + uint32_t bool_t = m_module.defBoolType(); + + uint32_t w = m_module.opCompositeExtract (m_floatType, gl_Position, 1, &wIndex); // w = gl_Position.w + uint32_t is0 = m_module.opFOrdEqual (bool_t, w, m_module.constf32(0)); // is0 = w == 0 + uint32_t rhw = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), w); // rhw = 1.0f / w + rhw = m_module.opSelect (m_floatType, is0, m_module.constf32(1.0), rhw); // rhw = w == 0 ? 1.0 : rhw + gl_Position = m_module.opVectorTimesScalar(m_vec4Type, gl_Position, rhw); // gl_Position.xyz *= rhw + gl_Position = m_module.opCompositeInsert (m_vec4Type, rhw, gl_Position, 1, &wIndex); // gl_Position.w = rhw + } + + m_module.opStore(m_vs.out.POSITION, gl_Position); + + std::array outNrmIndices; + for (uint32_t i = 0; i < 3; i++) + outNrmIndices[i] = m_module.opCompositeExtract(m_floatType, normal, 1, &i); + outNrmIndices[3] = m_module.constf32(1.0f); + + uint32_t outNrm = m_module.opCompositeConstruct(m_vec4Type, outNrmIndices.size(), outNrmIndices.data()); + + m_module.opStore(m_vs.out.NORMAL, outNrm); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + uint32_t inputIndex = (m_vsKey.Data.Contents.TexcoordIndices >> (i * 3)) & 0b111; + uint32_t inputFlags = (m_vsKey.Data.Contents.TexcoordFlags >> (i * 3)) & 0b111; + + uint32_t transformed; + + const uint32_t wIndex = 3; + + uint32_t flags = (m_vsKey.Data.Contents.TransformFlags >> (i * 3)) & 0b111; + uint32_t count = flags; + switch (inputFlags) { + default: + case (D3DTSS_TCI_PASSTHRU >> TCIOffset): + transformed = m_vs.in.TEXCOORD[inputIndex & 0xFF]; + break; + + case (D3DTSS_TCI_CAMERASPACENORMAL >> TCIOffset): + transformed = outNrm; + count = 4; + break; + + case (D3DTSS_TCI_CAMERASPACEPOSITION >> TCIOffset): + transformed = m_module.opCompositeInsert(m_vec4Type, m_module.constf32(1.0f), vtx, 1, &wIndex); + count = 4; + break; + + case (D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR >> TCIOffset): { + uint32_t vtx3 = m_module.opVectorShuffle(m_vec3Type, vtx, vtx, 3, indices.data()); + vtx3 = m_module.opNormalize(m_vec3Type, vtx3); + + uint32_t reflection = m_module.opReflect(m_vec3Type, vtx3, normal); + + std::array transformIndices; + for (uint32_t i = 0; i < 3; i++) + transformIndices[i] = m_module.opCompositeExtract(m_floatType, reflection, 1, &i); + transformIndices[3] = m_module.constf32(1.0f); + + transformed = m_module.opCompositeConstruct(m_vec4Type, transformIndices.size(), transformIndices.data()); + count = 4; + break; + } + + case (D3DTSS_TCI_SPHEREMAP >> TCIOffset): { + uint32_t vtx3 = m_module.opVectorShuffle(m_vec3Type, vtx, vtx, 3, indices.data()); + vtx3 = m_module.opNormalize(m_vec3Type, vtx3); + + uint32_t reflection = m_module.opReflect(m_vec3Type, vtx3, normal); + uint32_t m = m_module.opFAdd(m_vec3Type, reflection, m_module.constvec3f32(0, 0, 1)); + m = m_module.opLength(m_floatType, m); + m = m_module.opFMul(m_floatType, m, m_module.constf32(2.0f)); + + std::array transformIndices; + for (uint32_t i = 0; i < 2; i++) { + transformIndices[i] = m_module.opCompositeExtract(m_floatType, reflection, 1, &i); + transformIndices[i] = m_module.opFDiv(m_floatType, transformIndices[i], m); + transformIndices[i] = m_module.opFAdd(m_floatType, transformIndices[i], m_module.constf32(0.5f)); + } + + transformIndices[2] = m_module.constf32(0.0f); + transformIndices[3] = m_module.constf32(1.0f); + + transformed = m_module.opCompositeConstruct(m_vec4Type, transformIndices.size(), transformIndices.data()); + count = 4; + break; + } + } + + uint32_t type = flags; + if (type != D3DTTFF_DISABLE) { + if (!m_vsKey.Data.Contents.HasPositionT) { + for (uint32_t j = count; j < 4; j++) { + // If we're outside the component count of the vertex decl for this texcoord then we pad with zeroes. + // Otherwise, pad with ones. + + // Very weird quirk in order to get texcoord transforms to work like they do in native. + // In future, maybe we could sort this out properly by chopping matrices of different sizes, but thats + // a project for another day. + uint32_t texcoordCount = (m_vsKey.Data.Contents.TexcoordDeclMask >> (3 * inputIndex)) & 0x7; + uint32_t value = j > texcoordCount ? m_module.constf32(0) : m_module.constf32(1); + transformed = m_module.opCompositeInsert(m_vec4Type, value, transformed, 1, &j); + } + + transformed = m_module.opVectorTimesMatrix(m_vec4Type, transformed, m_vs.constants.texcoord[i]); + } + + // Pad the unused section of it with the value for projection. + uint32_t lastIdx = count - 1; + uint32_t projValue = m_module.opCompositeExtract(m_floatType, transformed, 1, &lastIdx); + + for (uint32_t j = count; j < 4; j++) + transformed = m_module.opCompositeInsert(m_vec4Type, projValue, transformed, 1, &j); + } + + m_module.opStore(m_vs.out.TEXCOORD[i], transformed); + } + + if (m_vsKey.Data.Contents.UseLighting) { + auto PickSource = [&](uint32_t Source, uint32_t Material) { + if (Source == D3DMCS_MATERIAL) + return Material; + else if (Source == D3DMCS_COLOR1) + return m_vs.in.COLOR[0]; + else + return m_vs.in.COLOR[1]; + }; + + uint32_t diffuseValue = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + uint32_t specularValue = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + uint32_t ambientValue = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + for (uint32_t i = 0; i < m_vsKey.Data.Contents.LightCount; i++) { + uint32_t light_ptr_t = m_module.defPointerType(m_vs.lightType, spv::StorageClassUniform); + + uint32_t indexVal = m_module.constu32(uint32_t(D3D9FFVSMembers::Light0) + i); + uint32_t lightPtr = m_module.opAccessChain(light_ptr_t, m_vs.constantBuffer, 1, &indexVal); + + auto LoadLightItem = [&](uint32_t type, uint32_t idx) { + uint32_t typePtr = m_module.defPointerType(type, spv::StorageClassUniform); + + idx = m_module.constu32(idx); + + return m_module.opLoad(type, + m_module.opAccessChain(typePtr, lightPtr, 1, &idx)); + }; + + uint32_t diffuse = LoadLightItem(m_vec4Type, 0); + uint32_t specular = LoadLightItem(m_vec4Type, 1); + uint32_t ambient = LoadLightItem(m_vec4Type, 2); + uint32_t position = LoadLightItem(m_vec4Type, 3); + uint32_t direction = LoadLightItem(m_vec4Type, 4); + uint32_t type = LoadLightItem(m_uint32Type, 5); + uint32_t range = LoadLightItem(m_floatType, 6); + uint32_t falloff = LoadLightItem(m_floatType, 7); + uint32_t atten0 = LoadLightItem(m_floatType, 8); + uint32_t atten1 = LoadLightItem(m_floatType, 9); + uint32_t atten2 = LoadLightItem(m_floatType, 10); + uint32_t theta = LoadLightItem(m_floatType, 11); + uint32_t phi = LoadLightItem(m_floatType, 12); + + uint32_t bool_t = m_module.defBoolType(); + uint32_t bool3_t = m_module.defVectorType(bool_t, 3); + + uint32_t isSpot = m_module.opIEqual(bool_t, type, m_module.constu32(D3DLIGHT_SPOT)); + uint32_t isDirectional = m_module.opIEqual(bool_t, type, m_module.constu32(D3DLIGHT_DIRECTIONAL)); + + std::array members = { isDirectional, isDirectional, isDirectional }; + + uint32_t isDirectional3 = m_module.opCompositeConstruct(bool3_t, members.size(), members.data()); + + uint32_t vtx3 = m_module.opVectorShuffle(m_vec3Type, vtx, vtx, 3, indices.data()); + position = m_module.opVectorShuffle(m_vec3Type, position, position, 3, indices.data()); + direction = m_module.opVectorShuffle(m_vec3Type, direction, direction, 3, indices.data()); + + uint32_t delta = m_module.opFSub(m_vec3Type, position, vtx3); + uint32_t d = m_module.opLength(m_floatType, delta); + uint32_t hitDir = m_module.opFNegate(m_vec3Type, direction); + hitDir = m_module.opSelect(m_vec3Type, isDirectional3, hitDir, delta); + hitDir = m_module.opNormalize(m_vec3Type, hitDir); + + uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1); + atten = m_module.opFFma (m_floatType, d, atten, atten0); + atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten); + atten = m_module.opNMin (m_floatType, atten, m_module.constf32(FLT_MAX)); + + atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten); + atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten); + + // Spot Lighting + { + uint32_t rho = m_module.opDot (m_floatType, m_module.opFNegate(m_vec3Type, hitDir), direction); + uint32_t spotAtten = m_module.opFSub(m_floatType, rho, phi); + spotAtten = m_module.opFDiv(m_floatType, spotAtten, m_module.opFSub(m_floatType, theta, phi)); + spotAtten = m_module.opPow (m_floatType, spotAtten, falloff); + + uint32_t insideThetaAndPhi = m_module.opFOrdLessThanEqual(bool_t, rho, theta); + uint32_t insidePhi = m_module.opFOrdGreaterThan(bool_t, rho, phi); + spotAtten = m_module.opSelect(m_floatType, insidePhi, spotAtten, m_module.constf32(0.0f)); + spotAtten = m_module.opSelect(m_floatType, insideThetaAndPhi, spotAtten, m_module.constf32(1.0f)); + spotAtten = m_module.opFClamp(m_floatType, spotAtten, m_module.constf32(0.0f), m_module.constf32(1.0f)); + + spotAtten = m_module.opFMul(m_floatType, atten, spotAtten); + atten = m_module.opSelect(m_floatType, isSpot, spotAtten, atten); + } + + + uint32_t hitDot = m_module.opDot(m_floatType, normal, hitDir); + hitDot = m_module.opFClamp(m_floatType, hitDot, m_module.constf32(0.0f), m_module.constf32(1.0f)); + + uint32_t diffuseness = m_module.opFMul(m_floatType, hitDot, atten); + + uint32_t mid; + if (m_vsKey.Data.Contents.LocalViewer) { + mid = m_module.opNormalize(m_vec3Type, vtx3); + mid = m_module.opFSub(m_vec3Type, hitDir, mid); + } + else + mid = m_module.opFSub(m_vec3Type, hitDir, m_module.constvec3f32(0.0f, 0.0f, 1.0f)); + + mid = m_module.opNormalize(m_vec3Type, mid); + + uint32_t midDot = m_module.opDot(m_floatType, normal, mid); + midDot = m_module.opFClamp(m_floatType, midDot, m_module.constf32(0.0f), m_module.constf32(1.0f)); + uint32_t doSpec = m_module.opFOrdGreaterThan(bool_t, midDot, m_module.constf32(0.0f)); + uint32_t specularness = m_module.opPow(m_floatType, midDot, m_vs.constants.materialPower); + specularness = m_module.opFMul(m_floatType, specularness, atten); + specularness = m_module.opSelect(m_floatType, doSpec, specularness, m_module.constf32(0.0f)); + + uint32_t lightAmbient = m_module.opVectorTimesScalar(m_vec4Type, ambient, atten); + uint32_t lightDiffuse = m_module.opVectorTimesScalar(m_vec4Type, diffuse, diffuseness); + uint32_t lightSpecular = m_module.opVectorTimesScalar(m_vec4Type, specular, specularness); + + ambientValue = m_module.opFAdd(m_vec4Type, ambientValue, lightAmbient); + diffuseValue = m_module.opFAdd(m_vec4Type, diffuseValue, lightDiffuse); + specularValue = m_module.opFAdd(m_vec4Type, specularValue, lightSpecular); + } + + uint32_t mat_diffuse = PickSource(m_vsKey.Data.Contents.DiffuseSource, m_vs.constants.materialDiffuse); + uint32_t mat_ambient = PickSource(m_vsKey.Data.Contents.AmbientSource, m_vs.constants.materialAmbient); + uint32_t mat_emissive = PickSource(m_vsKey.Data.Contents.EmissiveSource, m_vs.constants.materialEmissive); + uint32_t mat_specular = PickSource(m_vsKey.Data.Contents.SpecularSource, m_vs.constants.materialSpecular); + + std::array alphaSwizzle = {0, 1, 2, 7}; + uint32_t finalColor0 = m_module.opFFma(m_vec4Type, mat_ambient, m_vs.constants.globalAmbient, mat_emissive); + finalColor0 = m_module.opFFma(m_vec4Type, mat_ambient, ambientValue, finalColor0); + finalColor0 = m_module.opFFma(m_vec4Type, mat_diffuse, diffuseValue, finalColor0); + finalColor0 = m_module.opVectorShuffle(m_vec4Type, finalColor0, mat_diffuse, alphaSwizzle.size(), alphaSwizzle.data()); + + uint32_t finalColor1 = m_module.opFMul(m_vec4Type, mat_specular, specularValue); + + // Saturate + finalColor0 = m_module.opFClamp(m_vec4Type, finalColor0, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)); + + finalColor1 = m_module.opFClamp(m_vec4Type, finalColor1, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)); + + m_module.opStore(m_vs.out.COLOR[0], finalColor0); + m_module.opStore(m_vs.out.COLOR[1], finalColor1); + } + else { + m_module.opStore(m_vs.out.COLOR[0], m_vs.in.COLOR[0]); + m_module.opStore(m_vs.out.COLOR[1], m_vs.in.COLOR[1]); + } + + D3D9FogContext fogCtx; + fogCtx.IsPixel = false; + fogCtx.RangeFog = m_vsKey.Data.Contents.RangeFog; + fogCtx.RenderState = m_rsBlock; + fogCtx.vPos = vtx; + fogCtx.HasFogInput = m_vsKey.Data.Contents.HasFog; + fogCtx.vFog = m_vs.in.FOG; + fogCtx.oColor = 0; + m_module.opStore(m_vs.out.FOG, DoFixedFunctionFog(m_module, fogCtx)); + + auto pointInfo = GetPointSizeInfoVS(m_module, 0, vtx, m_vs.in.POINTSIZE, m_rsBlock); + + uint32_t pointSize = m_module.opFClamp(m_floatType, pointInfo.defaultValue, pointInfo.min, pointInfo.max); + m_module.opStore(m_vs.out.POINTSIZE, pointSize); + } + + + void D3D9FFShaderCompiler::setupRenderStateInfo() { + m_rsBlock = SetupRenderStateBlock(m_module); + + if (m_programType == DxsoProgramType::PixelShader) { + m_interfaceSlots.pushConstOffset = 0; + m_interfaceSlots.pushConstSize = offsetof(D3D9RenderStateInfo, pointSize); + } + else { + m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize); + m_interfaceSlots.pushConstSize = sizeof(float) * 6; + } + } + + + void D3D9FFShaderCompiler::emitLightTypeDecl() { + std::array light_members = { + m_vec4Type, // Diffuse + m_vec4Type, // Specular + m_vec4Type, // Ambient + m_vec4Type, // Position + m_vec4Type, // Direction + m_uint32Type, // Type + m_floatType, // Range + m_floatType, // Falloff + m_floatType, // Attenuation0 + m_floatType, // Attenuation1 + m_floatType, // Attenuation2 + m_floatType, // Theta + m_floatType, // Phi + }; + + m_vs.lightType = + m_module.defStructType(light_members.size(), light_members.data()); + + m_module.setDebugName(m_vs.lightType, "light_t"); + + uint32_t offset = 0; + + m_module.memberDecorateOffset(m_vs.lightType, 0, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 0, "Diffuse"); + m_module.memberDecorateOffset(m_vs.lightType, 1, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 1, "Specular"); + m_module.memberDecorateOffset(m_vs.lightType, 2, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 2, "Ambient"); + + m_module.memberDecorateOffset(m_vs.lightType, 3, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 3, "Position"); + m_module.memberDecorateOffset(m_vs.lightType, 4, offset); offset += 4 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 4, "Direction"); + + m_module.memberDecorateOffset(m_vs.lightType, 5, offset); offset += 1 * sizeof(uint32_t); + m_module.setDebugMemberName (m_vs.lightType, 5, "Type"); + + m_module.memberDecorateOffset(m_vs.lightType, 6, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 6, "Range"); + m_module.memberDecorateOffset(m_vs.lightType, 7, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 7, "Falloff"); + + m_module.memberDecorateOffset(m_vs.lightType, 8, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 8, "Attenuation0"); + m_module.memberDecorateOffset(m_vs.lightType, 9, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 9, "Attenuation1"); + m_module.memberDecorateOffset(m_vs.lightType, 10, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 10, "Attenuation2"); + + m_module.memberDecorateOffset(m_vs.lightType, 11, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 11, "Theta"); + m_module.memberDecorateOffset(m_vs.lightType, 12, offset); offset += 1 * sizeof(float); + m_module.setDebugMemberName (m_vs.lightType, 12, "Phi"); + } + + + void D3D9FFShaderCompiler::emitBaseBufferDecl() { + // Constant Buffer for VS. + std::array members = { + m_mat4Type, // World + m_mat4Type, // View + m_mat4Type, // Proj + + m_mat4Type, // Texture0 + m_mat4Type, // Texture1 + m_mat4Type, // Texture2 + m_mat4Type, // Texture3 + m_mat4Type, // Texture4 + m_mat4Type, // Texture5 + m_mat4Type, // Texture6 + m_mat4Type, // Texture7 + + m_vec4Type, // Inverse Offset + m_vec4Type, // Inverse Extent + + m_vec4Type, // Global Ambient + + m_vs.lightType, // Light0 + m_vs.lightType, // Light1 + m_vs.lightType, // Light2 + m_vs.lightType, // Light3 + m_vs.lightType, // Light4 + m_vs.lightType, // Light5 + m_vs.lightType, // Light6 + m_vs.lightType, // Light7 + + m_vec4Type, // Material Diffuse + m_vec4Type, // Material Ambient + m_vec4Type, // Material Specular + m_vec4Type, // Material Emissive + m_floatType, // Material Power + + m_floatType, // Tween Factor + }; + + const uint32_t structType = + m_module.defStructType(members.size(), members.data()); + + m_module.decorateBlock(structType); + + uint32_t offset = 0; + + for (uint32_t i = 0; i < uint32_t(D3D9FFVSMembers::InverseOffset); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Matrix4); + m_module.memberDecorateMatrixStride(structType, i, 16); + m_module.memberDecorate(structType, i, spv::DecorationRowMajor); + } + + for (uint32_t i = uint32_t(D3D9FFVSMembers::InverseOffset); i < uint32_t(D3D9FFVSMembers::Light0); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Vector4); + } + + for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { + m_module.memberDecorateOffset(structType, uint32_t(D3D9FFVSMembers::Light0) + i, offset); + offset += sizeof(D3D9Light); + } + + for (uint32_t i = uint32_t(D3D9FFVSMembers::MaterialDiffuse); i < uint32_t(D3D9FFVSMembers::MaterialPower); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Vector4); + } + + m_module.memberDecorateOffset(structType, uint32_t(D3D9FFVSMembers::MaterialPower), offset); + offset += sizeof(float); + + m_module.memberDecorateOffset(structType, uint32_t(D3D9FFVSMembers::TweenFactor), offset); + offset += sizeof(float); + + m_module.setDebugName(structType, "D3D9FixedFunctionVS"); + uint32_t member = 0; + m_module.setDebugMemberName(structType, member++, "WorldView"); + m_module.setDebugMemberName(structType, member++, "Normal"); + m_module.setDebugMemberName(structType, member++, "Projection"); + + m_module.setDebugMemberName(structType, member++, "TexcoordTransform0"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform1"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform2"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform3"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform4"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform5"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform6"); + m_module.setDebugMemberName(structType, member++, "TexcoordTransform7"); + + m_module.setDebugMemberName(structType, member++, "ViewportInfo_InverseOffset"); + m_module.setDebugMemberName(structType, member++, "ViewportInfo_InverseExtent"); + + m_module.setDebugMemberName(structType, member++, "GlobalAmbient"); + + m_module.setDebugMemberName(structType, member++, "Light0"); + m_module.setDebugMemberName(structType, member++, "Light1"); + m_module.setDebugMemberName(structType, member++, "Light2"); + m_module.setDebugMemberName(structType, member++, "Light3"); + m_module.setDebugMemberName(structType, member++, "Light4"); + m_module.setDebugMemberName(structType, member++, "Light5"); + m_module.setDebugMemberName(structType, member++, "Light6"); + m_module.setDebugMemberName(structType, member++, "Light7"); + + m_module.setDebugMemberName(structType, member++, "Material_Diffuse"); + m_module.setDebugMemberName(structType, member++, "Material_Ambient"); + m_module.setDebugMemberName(structType, member++, "Material_Specular"); + m_module.setDebugMemberName(structType, member++, "Material_Emissive"); + m_module.setDebugMemberName(structType, member++, "Material_Power"); + + m_module.setDebugMemberName(structType, member++, "TweenFactor"); + + m_vs.constantBuffer = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_vs.constantBuffer, "consts"); + + const uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::VertexShader, DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::VSFixedFunction); + + m_module.decorateDescriptorSet(m_vs.constantBuffer, 0); + m_module.decorateBinding(m_vs.constantBuffer, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void D3D9FFShaderCompiler::emitVertexBlendDecl() { + const uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(m_mat4Type); + m_module.decorateArrayStride(arrayType, sizeof(Matrix4)); + + const uint32_t structType = m_module.defStructTypeUnique(1, &arrayType); + + m_module.memberDecorateMatrixStride(structType, 0, 16); + m_module.memberDecorate(structType, 0, spv::DecorationRowMajor); + + m_module.decorate(structType, spv::DecorationBufferBlock); + + m_module.memberDecorateOffset(structType, 0, 0); + + m_module.setDebugName(structType, "D3D9FF_VertexBlendData"); + m_module.setDebugMemberName(structType, 0, "WorldViewArray"); + + m_vs.vertexBlendData = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_vs.vertexBlendData, "VertexBlendData"); + + const uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::VertexShader, DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::VSVertexBlendData); + + m_module.decorateDescriptorSet(m_vs.vertexBlendData, 0); + m_module.decorateBinding(m_vs.vertexBlendData, bindingId); + + m_module.decorate(m_vs.vertexBlendData, spv::DecorationNonWritable); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_SHADER_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void D3D9FFShaderCompiler::setupVS() { + setupRenderStateInfo(); + + // VS Caps + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityDrawParameters); + + m_module.enableExtension("SPV_KHR_shader_draw_parameters"); + + emitLightTypeDecl(); + emitBaseBufferDecl(); + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Normal) + emitVertexBlendDecl(); + + // Load constants + auto LoadConstant = [&](uint32_t type, uint32_t idx) { + uint32_t offset = m_module.constu32(idx); + uint32_t typePtr = m_module.defPointerType(type, spv::StorageClassUniform); + + return m_module.opLoad(type, + m_module.opAccessChain(typePtr, m_vs.constantBuffer, 1, &offset)); + }; + + m_vs.constants.worldview = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::WorldViewMatrix)); + m_vs.constants.normal = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::NormalMatrix)); + m_vs.constants.proj = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::ProjMatrix)); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) + m_vs.constants.texcoord[i] = LoadConstant(m_mat4Type, uint32_t(D3D9FFVSMembers::Texcoord0) + i); + + m_vs.constants.invOffset = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::InverseOffset)); + m_vs.constants.invExtent = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::InverseExtent)); + + m_vs.constants.globalAmbient = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::GlobalAmbient)); + + m_vs.constants.materialDiffuse = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialDiffuse)); + m_vs.constants.materialAmbient = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialAmbient)); + m_vs.constants.materialSpecular = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialSpecular)); + m_vs.constants.materialEmissive = LoadConstant(m_vec4Type, uint32_t(D3D9FFVSMembers::MaterialEmissive)); + m_vs.constants.materialPower = LoadConstant(m_floatType, uint32_t(D3D9FFVSMembers::MaterialPower)); + m_vs.constants.tweenFactor = LoadConstant(m_floatType, uint32_t(D3D9FFVSMembers::TweenFactor)); + + // Do IO + m_vs.in.POSITION = declareIO(true, DxsoSemantic{ DxsoUsage::Position, 0 }); + m_vs.in.NORMAL = declareIO(true, DxsoSemantic{ DxsoUsage::Normal, 0 }); + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Tween) { + m_vs.in.POSITION1 = declareIO(true, DxsoSemantic{ DxsoUsage::Position, 1 }); + m_vs.in.NORMAL1 = declareIO(true, DxsoSemantic{ DxsoUsage::Normal, 1 }); + } + else { + m_isgn.elemCount++; + m_isgn.elemCount++; + } + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) + m_vs.in.TEXCOORD[i] = declareIO(true, DxsoSemantic{ DxsoUsage::Texcoord, i }); + + if (m_vsKey.Data.Contents.HasColor0) + m_vs.in.COLOR[0] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 0 }); + else { + m_vs.in.COLOR[0] = m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f); + m_isgn.elemCount++; + } + + if (m_vsKey.Data.Contents.HasColor1) + m_vs.in.COLOR[1] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 1 }); + else { + m_vs.in.COLOR[1] = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + m_isgn.elemCount++; + } + + if (m_vsKey.Data.Contents.HasPointSize) + m_vs.in.FOG = declareIO(true, DxsoSemantic{ DxsoUsage::Fog, 0 }); + else + m_isgn.elemCount++; + + if (m_vsKey.Data.Contents.HasPointSize) + m_vs.in.POINTSIZE = declareIO(true, DxsoSemantic{ DxsoUsage::PointSize, 0 }); + else + m_isgn.elemCount++; + + if (m_vsKey.Data.Contents.VertexBlendMode == D3D9FF_VertexBlendMode_Normal) { + m_vs.in.BLENDWEIGHT = declareIO(true, DxsoSemantic{ DxsoUsage::BlendWeight, 0 }); + m_vs.in.BLENDINDICES = declareIO(true, DxsoSemantic{ DxsoUsage::BlendIndices, 0 }); + } + else { + m_isgn.elemCount++; + m_isgn.elemCount++; + } + + // Declare Outputs + m_vs.out.POSITION = declareIO(false, DxsoSemantic{ DxsoUsage::Position, 0 }, spv::BuiltInPosition); + if (m_options.invariantPosition) + m_module.decorate(m_vs.out.POSITION, spv::DecorationInvariant); + + m_vs.out.POINTSIZE = declareIO(false, DxsoSemantic{ DxsoUsage::PointSize, 0 }, spv::BuiltInPointSize); + + m_vs.out.NORMAL = declareIO(false, DxsoSemantic{ DxsoUsage::Normal, 0 }); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) + m_vs.out.TEXCOORD[i] = declareIO(false, DxsoSemantic{ DxsoUsage::Texcoord, i }); + + m_vs.out.COLOR[0] = declareIO(false, DxsoSemantic{ DxsoUsage::Color, 0 }); + m_vs.out.COLOR[1] = declareIO(false, DxsoSemantic{ DxsoUsage::Color, 1 }); + + m_vs.out.FOG = declareIO(false, DxsoSemantic{ DxsoUsage::Fog, 0 }); + } + + + void D3D9FFShaderCompiler::compilePS() { + setupPS(); + + uint32_t diffuse = m_ps.in.COLOR[0]; + uint32_t specular = m_ps.in.COLOR[1]; + + // Current starts of as equal to diffuse. + uint32_t current = diffuse; + // Temp starts off as equal to vec4(0) + uint32_t temp = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + uint32_t texture = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 1.0f); + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + const auto& stage = m_fsKey.Stages[i].Contents; + + bool processedTexture = false; + + auto GetTexture = [&]() { + if (!processedTexture) { + SpirvImageOperands imageOperands; + uint32_t imageVarId = m_module.opLoad(m_ps.samplers[i].typeId, m_ps.samplers[i].varId); + + uint32_t texcoordCnt = m_ps.samplers[i].texcoordCnt; + + // Add one for the texcoord count + // if we need to include the divider + if (m_fsKey.Stages[i].Contents.Projected) + texcoordCnt++; + + std::array indices = { 0, 1, 2, 3 }; + + uint32_t texcoord = m_ps.in.TEXCOORD[i]; + uint32_t texcoord_t = m_module.defVectorType(m_floatType, texcoordCnt); + texcoord = m_module.opVectorShuffle(texcoord_t, + texcoord, texcoord, texcoordCnt, indices.data()); + + uint32_t projIdx = m_fsKey.Stages[i].Contents.ProjectedCount; + if (projIdx == 0) + projIdx = texcoordCnt; + else + projIdx--; + + if (m_fsKey.Stages[i].Contents.Projected) { + uint32_t projValue = m_module.opCompositeExtract(m_floatType, m_ps.in.TEXCOORD[i], 1, &projIdx); + uint32_t insertIdx = texcoordCnt - 1; + texcoord = m_module.opCompositeInsert(texcoord_t, projValue, texcoord, 1, &insertIdx); + } + + if (m_fsKey.Stages[i].Contents.Projected) + texture = m_module.opImageSampleProjImplicitLod(m_vec4Type, imageVarId, texcoord, imageOperands); + else + texture = m_module.opImageSampleImplicitLod(m_vec4Type, imageVarId, texcoord, imageOperands); + } + + processedTexture = true; + + return texture; + }; + + auto ScalarReplicate = [&](uint32_t reg) { + std::array replicant = { reg, reg, reg, reg }; + return m_module.opCompositeConstruct(m_vec4Type, replicant.size(), replicant.data()); + }; + + auto AlphaReplicate = [&](uint32_t reg) { + uint32_t alphaComponentId = 3; + uint32_t alpha = m_module.opCompositeExtract(m_floatType, reg, 1, &alphaComponentId); + + return ScalarReplicate(alpha); + }; + + auto Complement = [&](uint32_t reg) { + return m_module.opFSub(m_vec4Type, + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f), + reg); + }; + + auto Saturate = [&](uint32_t reg) { + return m_module.opFClamp(m_vec4Type, reg, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)); + }; + + auto GetArg = [&] (uint32_t arg) { + uint32_t reg = m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f); + + switch (arg & D3DTA_SELECTMASK) { + case D3DTA_CONSTANT: { + uint32_t offset = m_module.constu32(D3D9SharedPSStages_Count * i + D3D9SharedPSStages_Constant); + uint32_t ptr = m_module.opAccessChain(m_module.defPointerType(m_vec4Type, spv::StorageClassUniform), + m_ps.sharedState, 1, &offset); + + reg = m_module.opLoad(m_vec4Type, ptr); + break; + } + case D3DTA_CURRENT: + reg = current; + break; + case D3DTA_DIFFUSE: + reg = diffuse; + break; + case D3DTA_SPECULAR: + reg = specular; + break; + case D3DTA_TEMP: + reg = temp; + break; + case D3DTA_TEXTURE: + reg = GetTexture(); + break; + case D3DTA_TFACTOR: + reg = m_ps.constants.textureFactor; + break; + default: + break; + } + + // reg = 1 - reg + if (arg & D3DTA_COMPLEMENT) + reg = Complement(reg); + + // reg = reg.wwww + if (arg & D3DTA_ALPHAREPLICATE) + reg = AlphaReplicate(reg); + + return reg; + }; + + auto DoOp = [&](D3DTEXTUREOP op, uint32_t dst, std::array arg) { + switch (op) { + case D3DTOP_SELECTARG1: + dst = arg[1]; + break; + + case D3DTOP_SELECTARG2: + dst = arg[2]; + break; + + case D3DTOP_MODULATE4X: + dst = m_module.opFMul(m_vec4Type, arg[1], arg[2]); + dst = m_module.opVectorTimesScalar(m_vec4Type, dst, m_module.constf32(4.0f)); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATE2X: + dst = m_module.opFMul(m_vec4Type, arg[1], arg[2]); + dst = m_module.opVectorTimesScalar(m_vec4Type, dst, m_module.constf32(2.0f)); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATE: + dst = m_module.opFMul(m_vec4Type, arg[1], arg[2]); + break; + + case D3DTOP_ADDSIGNED2X: + arg[2] = m_module.opFSub(m_vec4Type, arg[2], + m_module.constvec4f32(0.5f, 0.5f, 0.5f, 0.5f)); + + dst = m_module.opFAdd(m_vec4Type, arg[1], arg[2]); + dst = m_module.opVectorTimesScalar(m_vec4Type, dst, m_module.constf32(2.0f)); + dst = Saturate(dst); + break; + + case D3DTOP_ADDSIGNED: + arg[2] = m_module.opFSub(m_vec4Type, arg[2], + m_module.constvec4f32(0.5f, 0.5f, 0.5f, 0.5f)); + + dst = m_module.opFAdd(m_vec4Type, arg[1], arg[2]); + dst = Saturate(dst); + break; + + case D3DTOP_ADD: + dst = m_module.opFAdd(m_vec4Type, arg[1], arg[2]); + dst = Saturate(dst); + break; + + case D3DTOP_SUBTRACT: + dst = m_module.opFSub(m_vec4Type, arg[1], arg[2]); + dst = Saturate(dst); + break; + + case D3DTOP_ADDSMOOTH: + dst = m_module.opFFma(m_vec4Type, Complement(arg[1]), arg[2], arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_BLENDDIFFUSEALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(diffuse)); + break; + + case D3DTOP_BLENDTEXTUREALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(GetTexture())); + break; + + case D3DTOP_BLENDFACTORALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(m_ps.constants.textureFactor)); + break; + + case D3DTOP_BLENDTEXTUREALPHAPM: + Logger::warn("D3DTOP_BLENDTEXTUREALPHAPM: not implemented"); + break; + + case D3DTOP_BLENDCURRENTALPHA: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], AlphaReplicate(current)); + break; + + case D3DTOP_PREMODULATE: + Logger::warn("D3DTOP_PREMODULATE: not implemented"); + break; + + case D3DTOP_MODULATEALPHA_ADDCOLOR: + dst = m_module.opFFma(m_vec4Type, AlphaReplicate(arg[1]), arg[2], arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATECOLOR_ADDALPHA: + dst = m_module.opFFma(m_vec4Type, arg[1], arg[2], AlphaReplicate(arg[1])); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATEINVALPHA_ADDCOLOR: + dst = m_module.opFFma(m_vec4Type, Complement(AlphaReplicate(arg[1])), arg[2], arg[1]); + dst = Saturate(dst); + break; + + case D3DTOP_MODULATEINVCOLOR_ADDALPHA: + dst = m_module.opFFma(m_vec4Type, Complement(arg[1]), arg[2], AlphaReplicate(arg[1])); + dst = Saturate(dst); + break; + + case D3DTOP_BUMPENVMAP: + Logger::warn("D3DTOP_BUMPENVMAP: not implemented"); + break; + + case D3DTOP_BUMPENVMAPLUMINANCE: + Logger::warn("D3DTOP_BUMPENVMAPLUMINANCE: not implemented"); + break; + + case D3DTOP_DOTPRODUCT3: { + // Get vec3 of arg1 & 2 + uint32_t vec3Type = m_module.defVectorType(m_floatType, 3); + std::array indices = { 0, 1, 2 }; + arg[1] = m_module.opVectorShuffle(vec3Type, arg[1], arg[1], indices.size(), indices.data()); + arg[2] = m_module.opVectorShuffle(vec3Type, arg[2], arg[2], indices.size(), indices.data()); + + // Bias according to spec. + arg[1] = m_module.opFSub(vec3Type, arg[1], m_module.constvec3f32(0.5f, 0.5f, 0.5f)); + arg[2] = m_module.opFSub(vec3Type, arg[2], m_module.constvec3f32(0.5f, 0.5f, 0.5f)); + + // Do the dotting! + dst = m_module.opDot(m_floatType, arg[1], arg[2]); + + // Multiply by 4 and replicate -> vec4 + dst = m_module.opFMul(m_floatType, dst, m_module.constf32(4.0f)); + dst = ScalarReplicate(dst); + + // Saturate + dst = Saturate(dst); + + break; + } + + case D3DTOP_MULTIPLYADD: + dst = m_module.opFFma(m_vec4Type, arg[1], arg[2], arg[0]); + dst = Saturate(dst); + break; + + case D3DTOP_LERP: + dst = m_module.opFMix(m_vec4Type, arg[2], arg[1], arg[0]); + break; + + default: + Logger::warn("Unhandled texture op!"); + break; + } + + return dst; + }; + + uint32_t& dst = stage.ResultIsTemp ? temp : current; + + D3DTEXTUREOP colorOp = (D3DTEXTUREOP)stage.ColorOp; + + // This cancels all subsequent stages. + if (colorOp == D3DTOP_DISABLE) + break; + + std::array colorArgs = { + stage.ColorArg0, + stage.ColorArg1, + stage.ColorArg2}; + + D3DTEXTUREOP alphaOp = (D3DTEXTUREOP)stage.AlphaOp; + std::array alphaArgs = { + stage.AlphaArg0, + stage.AlphaArg1, + stage.AlphaArg2}; + + auto ProcessArgs = [&](auto op, auto& args) { + for (uint32_t& arg : args) + arg = GetArg(arg); + }; + + // Fast path if alpha/color path is identical. + // D3DTOP_DOTPRODUCT3 also has special quirky behaviour here. + const bool fastPath = colorOp == alphaOp && colorArgs == alphaArgs; + if (fastPath || colorOp == D3DTOP_DOTPRODUCT3) { + if (colorOp != D3DTOP_DISABLE) { + ProcessArgs(colorOp, colorArgs); + dst = DoOp(colorOp, dst, colorArgs); + } + } + else { + std::array indices = { 0, 1, 2, 4 + 3 }; + + uint32_t colorResult = dst; + uint32_t alphaResult = dst; + if (colorOp != D3DTOP_DISABLE) { + ProcessArgs(colorOp, colorArgs); + colorResult = DoOp(colorOp, dst, colorArgs); + } + + if (alphaOp != D3DTOP_DISABLE) { + ProcessArgs(alphaOp, alphaArgs); + alphaResult = DoOp(alphaOp, dst, alphaArgs); + } + + // src0.x, src0.y, src0.z src1.w + if (colorResult != dst) + dst = m_module.opVectorShuffle(m_vec4Type, colorResult, dst, indices.size(), indices.data()); + + // src0.x, src0.y, src0.z src1.w + // But we flip src0, src1 to be inverse of color. + if (alphaResult != dst) + dst = m_module.opVectorShuffle(m_vec4Type, dst, alphaResult, indices.size(), indices.data()); + } + } + + if (m_fsKey.Stages[0].Contents.GlobalSpecularEnable) { + uint32_t specular = m_module.opFMul(m_vec4Type, m_ps.in.COLOR[1], m_module.constvec4f32(1.0f, 1.0f, 1.0f, 0.0f)); + + current = m_module.opFAdd(m_vec4Type, current, specular); + } + + D3D9FogContext fogCtx; + fogCtx.IsPixel = true; + fogCtx.RangeFog = false; + fogCtx.RenderState = m_rsBlock; + fogCtx.vPos = m_ps.in.POS; + fogCtx.vFog = m_ps.in.FOG; + fogCtx.oColor = current; + current = DoFixedFunctionFog(m_module, fogCtx); + + m_module.opStore(m_ps.out.COLOR, current); + + alphaTestPS(); + } + + void D3D9FFShaderCompiler::setupPS() { + setupRenderStateInfo(); + + // PS Caps + m_module.enableCapability(spv::CapabilityDerivativeControl); + + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeOriginUpperLeft); + + uint32_t pointCoord = GetPointCoord(m_module, m_entryPointInterfaces); + auto pointInfo = GetPointSizeInfoPS(m_module, m_rsBlock); + + // We need to replace TEXCOORD inputs with gl_PointCoord + // if D3DRS_POINTSPRITEENABLE is set. + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + m_ps.in.TEXCOORD[i] = declareIO(true, DxsoSemantic{ DxsoUsage::Texcoord, i }); + m_ps.in.TEXCOORD[i] = m_module.opSelect(m_vec4Type, pointInfo.isSprite, pointCoord, m_ps.in.TEXCOORD[i]); + } + + m_ps.in.COLOR[0] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 0 }); + m_ps.in.COLOR[1] = declareIO(true, DxsoSemantic{ DxsoUsage::Color, 1 }); + + m_ps.in.FOG = declareIO(true, DxsoSemantic{ DxsoUsage::Fog, 0 }); + m_ps.in.POS = declareIO(true, DxsoSemantic{ DxsoUsage::Position, 0 }, spv::BuiltInFragCoord); + + m_ps.out.COLOR = declareIO(false, DxsoSemantic{ DxsoUsage::Color, 0 }); + + // Constant Buffer for PS. + std::array members = { + m_vec4Type // Texture Factor + }; + + const uint32_t structType = + m_module.defStructType(members.size(), members.data()); + + m_module.decorateBlock(structType); + uint32_t offset = 0; + + for (uint32_t i = 0; i < uint32_t(D3D9FFPSMembers::MemberCount); i++) { + m_module.memberDecorateOffset(structType, i, offset); + offset += sizeof(Vector4); + } + + m_module.setDebugName(structType, "D3D9FixedFunctionPS"); + m_module.setDebugMemberName(structType, 0, "textureFactor"); + + m_ps.constantBuffer = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_ps.constantBuffer, "consts"); + + const uint32_t bindingId = computeResourceSlotId( + DxsoProgramType::PixelShader, DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::PSFixedFunction); + + m_module.decorateDescriptorSet(m_ps.constantBuffer, 0); + m_module.decorateBinding(m_ps.constantBuffer, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + + // Load constants + auto LoadConstant = [&](uint32_t type, uint32_t idx) { + uint32_t offset = m_module.constu32(idx); + uint32_t typePtr = m_module.defPointerType(type, spv::StorageClassUniform); + + return m_module.opLoad(type, + m_module.opAccessChain(typePtr, m_ps.constantBuffer, 1, &offset)); + }; + + m_ps.constants.textureFactor = LoadConstant(m_vec4Type, uint32_t(D3D9FFPSMembers::TextureFactor)); + + // Samplers + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + auto& sampler = m_ps.samplers[i]; + D3DRESOURCETYPE type = D3DRESOURCETYPE(m_fsKey.Stages[i].Contents.Type + D3DRTYPE_TEXTURE); + + spv::Dim dimensionality; + VkImageViewType viewType; + + switch (type) { + default: + case D3DRTYPE_TEXTURE: + dimensionality = spv::Dim2D; + sampler.texcoordCnt = 2; + viewType = VK_IMAGE_VIEW_TYPE_2D; + break; + case D3DRTYPE_CUBETEXTURE: + dimensionality = spv::DimCube; + sampler.texcoordCnt = 3; + viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + case D3DRTYPE_VOLUMETEXTURE: + dimensionality = spv::Dim3D; + sampler.texcoordCnt = 3; + viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + } + + sampler.typeId = m_module.defImageType( + m_module.defFloatType(32), + dimensionality, 0, 0, 0, 1, + spv::ImageFormatUnknown); + + sampler.typeId = m_module.defSampledImageType(sampler.typeId); + + sampler.varId = m_module.newVar( + m_module.defPointerType( + sampler.typeId, spv::StorageClassUniformConstant), + spv::StorageClassUniformConstant); + + std::string name = str::format("s", i); + m_module.setDebugName(sampler.varId, name.c_str()); + + const uint32_t bindingId = computeResourceSlotId(DxsoProgramType::PixelShader, + DxsoBindingType::ColorImage, i); + + m_module.decorateDescriptorSet(sampler.varId, 0); + m_module.decorateBinding(sampler.varId, bindingId); + + // Store descriptor info for the shader interface + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + resource.view = viewType; + resource.access = VK_ACCESS_SHADER_READ_BIT; + m_resourceSlots.push_back(resource); + } + + emitPsSharedConstants(); + } + + + void D3D9FFShaderCompiler::emitPsSharedConstants() { + m_ps.sharedState = GetSharedConstants(m_module); + + const uint32_t bindingId = computeResourceSlotId( + m_programType, DxsoBindingType::ConstantBuffer, + PSShared); + + m_module.decorateDescriptorSet(m_ps.sharedState, 0); + m_module.decorateBinding(m_ps.sharedState, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void D3D9FFShaderCompiler::alphaTestPS() { + // Alpha testing + uint32_t boolType = m_module.defBoolType(); + uint32_t floatPtr = m_module.defPointerType(m_floatType, spv::StorageClassPushConstant); + + // Declare spec constants for render states + uint32_t alphaTestId = m_module.specConstBool(false); + uint32_t alphaFuncId = m_module.specConst32(m_module.defIntType(32, 0), uint32_t(VK_COMPARE_OP_ALWAYS)); + + m_module.setDebugName(alphaTestId, "alpha_test"); + m_module.decorateSpecId(alphaTestId, getSpecId(D3D9SpecConstantId::AlphaTestEnable)); + + m_module.setDebugName(alphaFuncId, "alpha_func"); + m_module.decorateSpecId(alphaFuncId, getSpecId(D3D9SpecConstantId::AlphaCompareOp)); + + // Implement alpha test + auto oC0 = m_ps.out.COLOR; + // Labels for the alpha test + std::array atestCaseLabels = { { + { uint32_t(VK_COMPARE_OP_NEVER), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_LESS), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_LESS_OR_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_GREATER), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_NOT_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_GREATER_OR_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_ALWAYS), m_module.allocateId() }, + } }; + + uint32_t atestBeginLabel = m_module.allocateId(); + uint32_t atestTestLabel = m_module.allocateId(); + uint32_t atestDiscardLabel = m_module.allocateId(); + uint32_t atestKeepLabel = m_module.allocateId(); + uint32_t atestSkipLabel = m_module.allocateId(); + + // if (alpha_test) { ... } + m_module.opSelectionMerge(atestSkipLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(alphaTestId, atestBeginLabel, atestSkipLabel); + m_module.opLabel(atestBeginLabel); + + // Load alpha component + uint32_t alphaComponentId = 3; + uint32_t alphaId = m_module.opCompositeExtract(m_floatType, + m_module.opLoad(m_vec4Type, oC0), + 1, &alphaComponentId); + + // Load alpha reference + uint32_t alphaRefMember = m_module.constu32(uint32_t(D3D9RenderStateItem::AlphaRef)); + uint32_t alphaRefId = m_module.opLoad(m_floatType, + m_module.opAccessChain(floatPtr, m_rsBlock, 1, &alphaRefMember)); + + // switch (alpha_func) { ... } + m_module.opSelectionMerge(atestTestLabel, spv::SelectionControlMaskNone); + m_module.opSwitch(alphaFuncId, + atestCaseLabels[uint32_t(VK_COMPARE_OP_ALWAYS)].labelId, + atestCaseLabels.size(), + atestCaseLabels.data()); + + std::array atestVariables; + + for (uint32_t i = 0; i < atestCaseLabels.size(); i++) { + m_module.opLabel(atestCaseLabels[i].labelId); + + atestVariables[i].labelId = atestCaseLabels[i].labelId; + atestVariables[i].varId = [&] { + switch (VkCompareOp(atestCaseLabels[i].literal)) { + case VK_COMPARE_OP_NEVER: return m_module.constBool(false); + case VK_COMPARE_OP_LESS: return m_module.opFOrdLessThan(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_EQUAL: return m_module.opFOrdEqual(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_LESS_OR_EQUAL: return m_module.opFOrdLessThanEqual(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_GREATER: return m_module.opFOrdGreaterThan(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_NOT_EQUAL: return m_module.opFOrdNotEqual(boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_GREATER_OR_EQUAL: return m_module.opFOrdGreaterThanEqual(boolType, alphaId, alphaRefId); + default: + case VK_COMPARE_OP_ALWAYS: return m_module.constBool(true); + } + }(); + + m_module.opBranch(atestTestLabel); + } + + // end switch + m_module.opLabel(atestTestLabel); + + uint32_t atestResult = m_module.opPhi(boolType, + atestVariables.size(), + atestVariables.data()); + uint32_t atestDiscard = m_module.opLogicalNot(boolType, atestResult); + + atestResult = m_module.opLogicalNot(boolType, atestResult); + + // if (do_discard) { ... } + m_module.opSelectionMerge(atestKeepLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(atestDiscard, atestDiscardLabel, atestKeepLabel); + + m_module.opLabel(atestDiscardLabel); + m_module.opKill(); + + // end if (do_discard) + m_module.opLabel(atestKeepLabel); + m_module.opBranch(atestSkipLabel); + + // end if (alpha_test) + m_module.opLabel(atestSkipLabel); + } + + + D3D9FFShader::D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& Key) { + Sha1Hash hash = Sha1Hash::compute(&Key, sizeof(Key)); + DxvkShaderKey shaderKey = { VK_SHADER_STAGE_VERTEX_BIT, hash }; + + std::string name = str::format("FF_", shaderKey.toString()); + + D3D9FFShaderCompiler compiler( + pDevice->GetDXVKDevice(), + Key, name, + pDevice->GetOptions()); + + m_shader = compiler.compile(); + m_isgn = compiler.isgn(); + + Dump(Key, name); + + m_shader->setShaderKey(shaderKey); + pDevice->GetDXVKDevice()->registerShader(m_shader); + } + + + D3D9FFShader::D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& Key) { + Sha1Hash hash = Sha1Hash::compute(&Key, sizeof(Key)); + DxvkShaderKey shaderKey = { VK_SHADER_STAGE_FRAGMENT_BIT, hash }; + + std::string name = str::format("FF_", shaderKey.toString()); + + D3D9FFShaderCompiler compiler( + pDevice->GetDXVKDevice(), + Key, name, + pDevice->GetOptions()); + + m_shader = compiler.compile(); + m_isgn = compiler.isgn(); + + Dump(Key, name); + + m_shader->setShaderKey(shaderKey); + pDevice->GetDXVKDevice()->registerShader(m_shader); + } + + template + void D3D9FFShader::Dump(const T& Key, const std::string& Name) { + const std::string dumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); + + if (dumpPath.size() != 0) { + std::ofstream dumpStream( + str::format(dumpPath, "/", Name, ".spv"), + std::ios_base::binary | std::ios_base::trunc); + + m_shader->dump(dumpStream); + } + } + + + D3D9FFShader D3D9FFShaderModuleSet::GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& ShaderKey) { + // Use the shader's unique key for the lookup + auto entry = m_vsModules.find(ShaderKey); + if (entry != m_vsModules.end()) + return entry->second; + + D3D9FFShader shader( + pDevice, ShaderKey); + + m_vsModules.insert({ShaderKey, shader}); + + return shader; + } + + + D3D9FFShader D3D9FFShaderModuleSet::GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& ShaderKey) { + // Use the shader's unique key for the lookup + auto entry = m_fsModules.find(ShaderKey); + if (entry != m_fsModules.end()) + return entry->second; + + D3D9FFShader shader( + pDevice, ShaderKey); + + m_fsModules.insert({ShaderKey, shader}); + + return shader; + } + + + size_t D3D9FFShaderKeyHash::operator () (const D3D9FFShaderKeyVS& key) const { + DxvkHashState state; + + std::hash uint32hash; + + for (uint32_t i = 0; i < countof(key.Data.Primitive); i++) + state.add(uint32hash(key.Data.Primitive[i])); + + return state; + } + + + size_t D3D9FFShaderKeyHash::operator () (const D3D9FFShaderKeyFS& key) const { + DxvkHashState state; + + std::hash uint32hash; + + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + for (uint32_t j = 0; j < countof(key.Stages[i].Primitive); j++) + state.add(uint32hash(key.Stages[i].Primitive[j])); + } + + return state; + } + + + bool operator == (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) { + return std::memcmp(&a, &b, sizeof(D3D9FFShaderKeyVS)) == 0; + } + + + bool operator == (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) { + return std::memcmp(&a, &b, sizeof(D3D9FFShaderKeyFS)) == 0; + } + + + bool operator != (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) { + return !(a == b); + } + + + bool operator != (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) { + return !(a == b); + } + + + bool D3D9FFShaderKeyEq::operator () (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) const { + return a == b; + } + + + bool D3D9FFShaderKeyEq::operator () (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) const { + return a == b; + } + + + static inline DxsoIsgn CreateFixedFunctionIsgn() { + DxsoIsgn ffIsgn; + + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Position, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Normal, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Position, 1 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Normal, 1 }; + for (uint32_t i = 0; i < 8; i++) + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Texcoord, i }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Color, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Color, 1 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::Fog, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::PointSize, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::BlendWeight, 0 }; + ffIsgn.elems[ffIsgn.elemCount++].semantic = DxsoSemantic{ DxsoUsage::BlendIndices, 0 }; + + return ffIsgn; + } + + + DxsoIsgn g_ffIsgn = CreateFixedFunctionIsgn(); + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_fixed_function.h b/src/d3d9/d3d9_fixed_function.h new file mode 100644 index 000000000..378685d38 --- /dev/null +++ b/src/d3d9/d3d9_fixed_function.h @@ -0,0 +1,247 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_caps.h" + +#include "../dxvk/dxvk_shader.h" + +#include "../dxso/dxso_isgn.h" + +#include +#include + +namespace dxvk { + + class D3D9DeviceEx; + class SpirvModule; + + struct D3D9Options; + + struct D3D9FogContext { + // General inputs... + bool IsPixel; + bool RangeFog; + uint32_t RenderState; + uint32_t vPos; + uint32_t vFog; + + uint32_t oColor; + + bool HasFogInput; + }; + + struct D3D9FixedFunctionOptions { + D3D9FixedFunctionOptions(const D3D9Options* options); + + bool invariantPosition; + }; + + // Returns new oFog if VS + // Returns new oColor if PS + uint32_t DoFixedFunctionFog(SpirvModule& spvModule, const D3D9FogContext& fogCtx); + + // Returns a render state block + uint32_t SetupRenderStateBlock(SpirvModule& spvModule); + + struct D3D9PointSizeInfoVS { + uint32_t defaultValue; + uint32_t min; + uint32_t max; + }; + + // Default point size and point scale magic! + D3D9PointSizeInfoVS GetPointSizeInfoVS(SpirvModule& spvModule, uint32_t vPos, uint32_t vtx, uint32_t perVertPointSize, uint32_t rsBlock); + + struct D3D9PointSizeInfoPS { + uint32_t isSprite; + }; + + D3D9PointSizeInfoPS GetPointSizeInfoPS(SpirvModule& spvModule, uint32_t rsBlock); + + uint32_t GetPointCoord(SpirvModule& spvModule, std::vector& entryPointInterfaces); + + uint32_t GetSharedConstants(SpirvModule& spvModule); + + constexpr uint32_t TCIOffset = 16; + constexpr uint32_t TCIMask = 0b111 << TCIOffset; + + enum D3D9FF_VertexBlendMode { + D3D9FF_VertexBlendMode_Disabled, + D3D9FF_VertexBlendMode_Normal, + D3D9FF_VertexBlendMode_Tween, + }; + + struct D3D9FFShaderKeyVSData { + union { + struct { + uint32_t TexcoordIndices : 24; + + uint32_t HasPositionT : 1; + + uint32_t HasColor0 : 1; // Diffuse + uint32_t HasColor1 : 1; // Specular + + uint32_t HasPointSize : 1; + + uint32_t UseLighting : 1; + + uint32_t NormalizeNormals : 1; + uint32_t LocalViewer : 1; + uint32_t RangeFog : 1; + + uint32_t TexcoordFlags : 24; + + uint32_t DiffuseSource : 2; + uint32_t AmbientSource : 2; + uint32_t SpecularSource : 2; + uint32_t EmissiveSource : 2; + + uint32_t TransformFlags : 24; + + uint32_t LightCount : 4; + + uint32_t TexcoordDeclMask : 24; + uint32_t HasFog : 1; + + uint32_t VertexBlendMode : 2; + uint32_t VertexBlendIndexed : 1; + uint32_t VertexBlendCount : 3; + } Contents; + + uint32_t Primitive[4]; + }; + }; + + struct D3D9FFShaderKeyVS { + D3D9FFShaderKeyVS() { + // memcmp safety + std::memset(&Data, 0, sizeof(Data)); + } + + D3D9FFShaderKeyVSData Data; + }; + + constexpr uint32_t TextureArgCount = 3; + + struct D3D9FFShaderStage { + union { + struct { + uint32_t ColorOp : 5; + uint32_t ColorArg0 : 6; + uint32_t ColorArg1 : 6; + uint32_t ColorArg2 : 6; + + uint32_t AlphaOp : 5; + uint32_t AlphaArg0 : 6; + uint32_t AlphaArg1 : 6; + uint32_t AlphaArg2 : 6; + + uint32_t Type : 2; + uint32_t ResultIsTemp : 1; + uint32_t Projected : 1; + + uint32_t ProjectedCount : 3; + + // Included in here, read from Stage 0 for packing reasons + // Affects all stages. + uint32_t GlobalSpecularEnable : 1; + uint32_t GlobalFlatShade : 1; + } Contents; + + uint32_t Primitive[2]; + }; + }; + + struct D3D9FFShaderKeyFS { + D3D9FFShaderKeyFS() { + // memcmp safety + std::memset(Stages, 0, sizeof(Stages)); + + // Normalize this. DISABLE != 0. + for (uint32_t i = 0; i < caps::TextureStageCount; i++) { + Stages[i].Contents.ColorOp = D3DTOP_DISABLE; + Stages[i].Contents.AlphaOp = D3DTOP_DISABLE; + } + } + + D3D9FFShaderStage Stages[caps::TextureStageCount]; + }; + + struct D3D9FFShaderKeyHash { + size_t operator () (const D3D9FFShaderKeyVS& key) const; + size_t operator () (const D3D9FFShaderKeyFS& key) const; + }; + + bool operator == (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b); + bool operator != (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b); + bool operator == (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b); + bool operator != (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b); + + struct D3D9FFShaderKeyEq { + bool operator () (const D3D9FFShaderKeyVS& a, const D3D9FFShaderKeyVS& b) const; + bool operator () (const D3D9FFShaderKeyFS& a, const D3D9FFShaderKeyFS& b) const; + }; + + class D3D9FFShader { + + public: + + D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& Key); + + D3D9FFShader( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& Key); + + template + void Dump(const T& Key, const std::string& Name); + + Rc GetShader() const { + return m_shader; + } + + private: + + Rc m_shader; + + DxsoIsgn m_isgn; + + }; + + + class D3D9FFShaderModuleSet : public RcObject { + + public: + + D3D9FFShader GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyVS& ShaderKey); + + D3D9FFShader GetShaderModule( + D3D9DeviceEx* pDevice, + const D3D9FFShaderKeyFS& ShaderKey); + + private: + + std::unordered_map< + D3D9FFShaderKeyVS, + D3D9FFShader, + D3D9FFShaderKeyHash, D3D9FFShaderKeyEq> m_vsModules; + + std::unordered_map< + D3D9FFShaderKeyFS, + D3D9FFShader, + D3D9FFShaderKeyHash, D3D9FFShaderKeyEq> m_fsModules; + + }; + + + inline const DxsoIsgn& GetFixedFunctionIsgn() { + extern DxsoIsgn g_ffIsgn; + + return g_ffIsgn; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_format.cpp b/src/d3d9/d3d9_format.cpp new file mode 100644 index 000000000..81270bf42 --- /dev/null +++ b/src/d3d9/d3d9_format.cpp @@ -0,0 +1,500 @@ +#include "d3d9_format.h" + +namespace dxvk { + + // It is also worth noting that the msb/lsb-ness is flipped between VK and D3D9. + D3D9_VK_FORMAT_MAPPING ConvertFormatUnfixed(D3D9Format Format) { + switch (Format) { + case D3D9Format::Unknown: return {}; + + case D3D9Format::R8G8B8: return {}; // Unsupported + + case D3D9Format::A8R8G8B8: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::X8R8G8B8: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::R5G6B5: return { + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT}; + + case D3D9Format::X1R5G5B5: return { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A1R5G5B5: return { + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A4R4G4B4: return { + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::R3G3B2: return {}; // Unsupported + + case D3D9Format::A8: return { + VK_FORMAT_R8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::A8R3G3B2: return {}; // Unsupported + + case D3D9Format::X4R4G4B4: return { + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A2B10G10R10: return { + VK_FORMAT_A2B10G10R10_UNORM_PACK32, // The A2 is out of place here. This should be investigated. + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A8B8G8R8: return { + VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::X8B8G8R8: return { + VK_FORMAT_R8G8B8A8_UNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::G16R16: return { + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A2R10G10B10: return { + VK_FORMAT_A2R10G10B10_UNORM_PACK32, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A16B16G16R16: return { + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A8P8: return {}; // Unsupported + + case D3D9Format::P8: return {}; // Unsupported + + case D3D9Format::L8: return { + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A8L8: return { + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }}; + + case D3D9Format::A4L4: return { + VK_FORMAT_R4G4_UNORM_PACK8, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::V8U8: return { + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::L6V5U5: return {}; // Unsupported + + case D3D9Format::X8L8V8U8: return {}; // Unsupported + + case D3D9Format::Q8W8V8U8: return { + VK_FORMAT_R8G8B8A8_SNORM, + VK_FORMAT_R8G8B8A8_SRGB, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::V16U16: return { + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A2W10V10U10: return {}; // Unsupported + + case D3D9Format::UYVY: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, + { D3D9VideoFormat_UYVY, { 2u, 1u } } + }; + + case D3D9Format::R8G8_B8G8: return { + VK_FORMAT_G8B8G8R8_422_UNORM, // This format may have been _SCALED in DX9. + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::YUY2: return { + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }, + { D3D9VideoFormat_YUY2, { 2u, 1u } } + }; + + case D3D9Format::G8R8_G8B8: return { + VK_FORMAT_B8G8R8G8_422_UNORM, // This format may have been _SCALED in DX9. + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT1: return { + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT2: return { + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT3: return { + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT4: return { + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::DXT5: return { + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::D16_LOCKABLE: return { + VK_FORMAT_D16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D32: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D15S1: return {}; // Unsupported (everywhere) + + case D3D9Format::D24S8: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT }; + + case D3D9Format::D24X8: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D24X4S4: return {}; // Unsupported (everywhere) + + case D3D9Format::D16: return { + VK_FORMAT_D16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D32F_LOCKABLE: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::D24FS8: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT }; + + case D3D9Format::D32_LOCKABLE: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT }; + + case D3D9Format::S8_LOCKABLE: return { + VK_FORMAT_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_STENCIL_BIT }; + + case D3D9Format::L16: return { + VK_FORMAT_R16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::VERTEXDATA: return { + VK_FORMAT_R8_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::INDEX16: return { + VK_FORMAT_R16_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::INDEX32: return { + VK_FORMAT_R32_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::Q16W16V16U16: return { + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::MULTI2_ARGB8: return {}; // Unsupported + + case D3D9Format::R16F: return { + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::G16R16F: return { + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A16B16G16R16F: return { + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::R32F: return { + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::G32R32F: return { + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::A32B32G32R32F: return { + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::CxV8U8: return {}; // Unsupported + + case D3D9Format::A1: return {}; // Unsupported + + case D3D9Format::A2B10G10R10_XR_BIAS: return { + VK_FORMAT_A2B10G10R10_SNORM_PACK32, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::BINARYBUFFER: return { + VK_FORMAT_R8_UINT, + VK_FORMAT_UNDEFINED, + 0 }; + + case D3D9Format::ATI1: return { + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::ATI2: return { + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_COLOR_BIT, + { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::INST: return {}; // Driver hack, handled elsewhere + + case D3D9Format::DF24: return { + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::DF16: return { + VK_FORMAT_D16_UNORM, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE }}; + + case D3D9Format::NULL_FORMAT: return {}; // Driver hack, handled elsewhere + + case D3D9Format::GET4: return {}; // Unsupported + + case D3D9Format::GET1: return {}; // Unsupported + + case D3D9Format::NVDB: return {}; // Driver hack, handled elsewhere + + case D3D9Format::A2M1: return {}; // Driver hack, handled elsewhere + + case D3D9Format::A2M0: return {}; // Driver hack, handled elsewhere + + case D3D9Format::ATOC: return {}; // Driver hack, handled elsewhere + + case D3D9Format::INTZ: return { + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_UNDEFINED, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }}; + + case D3D9Format::RAWZ: return {}; // Unsupported + + default: + Logger::warn(str::format("ConvertFormat: Unknown format encountered: ", Format)); + return {}; // Unsupported + } + } + + D3D9VkFormatTable::D3D9VkFormatTable( + const Rc& adapter, + const D3D9Options& options) { + m_dfSupport = options.supportDFFormats; + m_x4r4g4b4Support = options.supportX4R4G4B4; + m_d32supportFinal = options.supportD32; + + // AMD do not support 24-bit depth buffers on Vulkan, + // so we have to fall back to a 32-bit depth format. + m_d24s8Support = CheckImageFormatSupport(adapter, VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); + + // NVIDIA do not support 16-bit depth buffers with stencil on Vulkan, + // so we have to fall back to a 32-bit depth format. + m_d16s8Support = CheckImageFormatSupport(adapter, VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); + + if (!m_d24s8Support) + Logger::warn("D3D9: VK_FORMAT_D24_UNORM_S8_UINT -> VK_FORMAT_D32_SFLOAT_S8_UINT"); + + if (!m_d16s8Support) { + if (m_d24s8Support) + Logger::warn("D3D9: VK_FORMAT_D16_UNORM_S8_UINT -> VK_FORMAT_D24_UNORM_S8_UINT"); + else + Logger::warn("D3D9: VK_FORMAT_D16_UNORM_S8_UINT -> VK_FORMAT_D32_SFLOAT_S8_UINT"); + } + } + + D3D9_VK_FORMAT_MAPPING D3D9VkFormatTable::GetFormatMapping( + D3D9Format Format) const { + D3D9_VK_FORMAT_MAPPING mapping = ConvertFormatUnfixed(Format); + + if (Format == D3D9Format::X4R4G4B4 && !m_x4r4g4b4Support) + return D3D9_VK_FORMAT_MAPPING(); + + if (Format == D3D9Format::DF16 && !m_dfSupport) + return D3D9_VK_FORMAT_MAPPING(); + + if (Format == D3D9Format::DF24 && !m_dfSupport) + return D3D9_VK_FORMAT_MAPPING(); + + if (Format == D3D9Format::D32 && !m_d32supportFinal) + return D3D9_VK_FORMAT_MAPPING(); + + if (!m_d24s8Support && mapping.FormatColor == VK_FORMAT_D24_UNORM_S8_UINT) + mapping.FormatColor = VK_FORMAT_D32_SFLOAT_S8_UINT; + + if (!m_d16s8Support && mapping.FormatColor == VK_FORMAT_D16_UNORM_S8_UINT) + mapping.FormatColor = m_d24s8Support ? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; + + return mapping; + } + + + DxvkFormatInfo D3D9VkFormatTable::GetUnsupportedFormatInfo( + D3D9Format Format) const { + switch (Format) { + case D3D9Format::R8G8B8: + return { 3, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::R3G3B2: + return { 1, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A8R3G3B2: + return { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A8P8: + return { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::P8: + return { 1, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::L6V5U5: + return { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::X8L8V8U8: + return { 4, VK_IMAGE_ASPECT_COLOR_BIT }; + + case D3D9Format::A2W10V10U10: + return { 4, VK_IMAGE_ASPECT_COLOR_BIT }; + + // MULTI2_ARGB8 -> Don't have a clue what this is. + + case D3D9Format::CxV8U8: + return { 2, VK_IMAGE_ASPECT_COLOR_BIT }; + + // A1 -> Doesn't map nicely here cause it's not byte aligned. + // Gonna just pretend that doesn't exist until something + // depends on that. + + default: + return {}; + } + } + + + bool D3D9VkFormatTable::CheckImageFormatSupport( + const Rc& Adapter, + VkFormat Format, + VkFormatFeatureFlags Features) const { + VkFormatProperties supported = Adapter->formatProperties(Format); + + return (supported.linearTilingFeatures & Features) == Features + || (supported.optimalTilingFeatures & Features) == Features; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_format.h b/src/d3d9/d3d9_format.h new file mode 100644 index 000000000..8f63ae6ee --- /dev/null +++ b/src/d3d9/d3d9_format.h @@ -0,0 +1,215 @@ +#pragma once + +#include "d3d9_include.h" +#include "d3d9_options.h" + +#include "../dxvk/dxvk_adapter.h" +#include "../dxvk/dxvk_format.h" + +#include + +namespace dxvk { + + enum class D3D9Format : uint32_t { + Unknown = 0, + + R8G8B8 = 20, + A8R8G8B8 = 21, + X8R8G8B8 = 22, + R5G6B5 = 23, + X1R5G5B5 = 24, + A1R5G5B5 = 25, + A4R4G4B4 = 26, + R3G3B2 = 27, + A8 = 28, + A8R3G3B2 = 29, + X4R4G4B4 = 30, + A2B10G10R10 = 31, + A8B8G8R8 = 32, + X8B8G8R8 = 33, + G16R16 = 34, + A2R10G10B10 = 35, + A16B16G16R16 = 36, + A8P8 = 40, + P8 = 41, + L8 = 50, + A8L8 = 51, + A4L4 = 52, + V8U8 = 60, + L6V5U5 = 61, + X8L8V8U8 = 62, + Q8W8V8U8 = 63, + V16U16 = 64, + A2W10V10U10 = 67, + UYVY = MAKEFOURCC('U', 'Y', 'V', 'Y'), + R8G8_B8G8 = MAKEFOURCC('R', 'G', 'B', 'G'), + YUY2 = MAKEFOURCC('Y', 'U', 'Y', '2'), + G8R8_G8B8 = MAKEFOURCC('G', 'R', 'G', 'B'), + DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), + DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), + DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), + DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), + DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), + D16_LOCKABLE = 70, + D32 = 71, + D15S1 = 73, + D24S8 = 75, + D24X8 = 77, + D24X4S4 = 79, + D16 = 80, + D32F_LOCKABLE = 82, + D24FS8 = 83, + D32_LOCKABLE = 84, + S8_LOCKABLE = 85, + L16 = 81, + VERTEXDATA = 100, + INDEX16 = 101, + INDEX32 = 102, + Q16W16V16U16 = 110, + MULTI2_ARGB8 = MAKEFOURCC('M', 'E', 'T', '1'), + R16F = 111, + G16R16F = 112, + A16B16G16R16F = 113, + R32F = 114, + G32R32F = 115, + A32B32G32R32F = 116, + CxV8U8 = 117, + A1 = 118, + A2B10G10R10_XR_BIAS = 119, + BINARYBUFFER = 199, + + // Driver Hacks / Unofficial Formats + ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), + ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), + INST = MAKEFOURCC('I', 'N', 'S', 'T'), + DF24 = MAKEFOURCC('D', 'F', '2', '4'), + DF16 = MAKEFOURCC('D', 'F', '1', '6'), + NULL_FORMAT = MAKEFOURCC('N', 'U', 'L', 'L'), + GET4 = MAKEFOURCC('G', 'E', 'T', '4'), + GET1 = MAKEFOURCC('G', 'E', 'T', '1'), + NVDB = MAKEFOURCC('N', 'V', 'D', 'B'), + A2M1 = MAKEFOURCC('A', '2', 'M', '1'), + A2M0 = MAKEFOURCC('A', '2', 'M', '0'), + ATOC = MAKEFOURCC('A', 'T', 'O', 'C'), + INTZ = MAKEFOURCC('I', 'N', 'T', 'Z'), + RAWZ = MAKEFOURCC('R', 'A', 'W', 'Z'), + RESZ = MAKEFOURCC('R', 'E', 'S', 'Z'), + + NV11 = MAKEFOURCC('N', 'V', '1', '1'), + NV12 = MAKEFOURCC('N', 'V', '1', '2'), + P010 = MAKEFOURCC('P', '0', '1', '0'), // Same as NV12 but 10 bit + P016 = MAKEFOURCC('P', '0', '1', '6'), // Same as NV12 but 16 bit + Y210 = MAKEFOURCC('Y', '2', '1', '0'), + Y216 = MAKEFOURCC('Y', '2', '1', '6'), + Y410 = MAKEFOURCC('Y', '4', '1', '0'), + AYUV = MAKEFOURCC('A', 'Y', 'U', 'V'), + YV12 = MAKEFOURCC('Y', 'V', '1', '2'), + OPAQUE_420 = MAKEFOURCC('4', '2', '0', 'O'), + + // Not supported but exist + AI44 = MAKEFOURCC('A', 'I', '4', '4'), + IA44 = MAKEFOURCC('I', 'A', '4', '4'), + R2VB = MAKEFOURCC('R', '2', 'V', 'B'), + COPM = MAKEFOURCC('C', 'O', 'P', 'M'), + SSAA = MAKEFOURCC('S', 'S', 'A', 'A'), + AL16 = MAKEFOURCC('A', 'L', '1', '6'), + R16 = MAKEFOURCC(' ', 'R', '1', '6'), + + EXT1 = MAKEFOURCC('E', 'X', 'T', '1'), + FXT1 = MAKEFOURCC('F', 'X', 'T', '1'), + GXT1 = MAKEFOURCC('G', 'X', 'T', '1'), + HXT1 = MAKEFOURCC('H', 'X', 'T', '1'), + }; + + inline D3D9Format EnumerateFormat(D3DFORMAT format) { + return static_cast(format); + } + + std::ostream& operator << (std::ostream& os, D3D9Format format); + + enum D3D9VideoFormat : uint32_t { + D3D9VideoFormat_None = 0, + D3D9VideoFormat_YUY2 = 1, + D3D9VideoFormat_UYVY, + D3D9VideoFormat_Count + }; + + struct D3D9_VIDEO_FORMAT_INFO { + D3D9VideoFormat FormatType = D3D9VideoFormat_None; + VkExtent2D MacroPixelSize = { 1u, 1u }; + }; + + /** + * \brief Format mapping + * + * Maps a D3D9 format to a set of Vulkan formats. + */ + struct D3D9_VK_FORMAT_MAPPING { + union { + struct { + VkFormat FormatColor; ///< Corresponding color format + VkFormat FormatSrgb; ///< Corresponding color format + }; + VkFormat Formats[2]; + }; + VkImageAspectFlags Aspect = 0; ///< Defined aspects for the color format + VkComponentMapping Swizzle = { ///< Color component swizzle + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY }; + D3D9_VIDEO_FORMAT_INFO VideoFormatInfo = { }; + + bool IsValid() { return FormatColor != VK_FORMAT_UNDEFINED; } + }; + + D3D9_VK_FORMAT_MAPPING ConvertFormatUnfixed(D3D9Format Format); + + /** + * \brief Format table + * + * Initializes a format table for a specific + * device and provides methods to look up + * formats. + */ + class D3D9VkFormatTable { + + public: + + D3D9VkFormatTable( + const Rc& adapter, + const D3D9Options& options); + + /** + * \brief Retrieves info for a given D3D9 format + * + * \param [in] Format The D3D9 format to look up + * \param [in] Mode the format lookup mode + * \returns Format info + */ + D3D9_VK_FORMAT_MAPPING GetFormatMapping( + D3D9Format Format) const; + + /** + * \brief Retrieves format info for unsupported + * formats. + * + * \param [in] Format The D3D9 format to look up + */ + DxvkFormatInfo GetUnsupportedFormatInfo( + D3D9Format Format) const; + + private: + + bool CheckImageFormatSupport( + const Rc& Adapter, + VkFormat Format, + VkFormatFeatureFlags Features) const; + + bool m_d24s8Support; + bool m_d16s8Support; + + bool m_dfSupport; + bool m_x4r4g4b4Support; + bool m_d32supportFinal; + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_format_helpers.cpp b/src/d3d9/d3d9_format_helpers.cpp new file mode 100644 index 000000000..67d0dac89 --- /dev/null +++ b/src/d3d9/d3d9_format_helpers.cpp @@ -0,0 +1,82 @@ +#include "d3d9_format_helpers.h" + +#include + +namespace dxvk { + + D3D9FormatHelper::D3D9FormatHelper(const Rc& device) + : m_device(device), m_context(m_device->createContext()) { + m_context->beginRecording( + m_device->createCommandList()); + + InitShaders(); + } + + + void D3D9FormatHelper::ConvertVideoFormat( + D3D9_VIDEO_FORMAT_INFO videoFormat, + const Rc& dstImage, + VkImageSubresourceLayers dstSubresource, + const Rc& srcBuffer) { + DxvkImageViewCreateInfo imageViewInfo; + imageViewInfo.type = VK_IMAGE_VIEW_TYPE_2D; + imageViewInfo.format = dstImage->info().format; + imageViewInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT; + imageViewInfo.aspect = dstSubresource.aspectMask; + imageViewInfo.minLevel = dstSubresource.mipLevel; + imageViewInfo.numLevels = 1; + imageViewInfo.minLayer = dstSubresource.baseArrayLayer; + imageViewInfo.numLayers = dstSubresource.layerCount; + auto tmpImageView = m_device->createImageView(dstImage, imageViewInfo); + + VkExtent3D imageExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel); + imageExtent = VkExtent3D{ imageExtent.width / videoFormat.MacroPixelSize.width, + imageExtent.height / videoFormat.MacroPixelSize.height, + 1 }; + + DxvkBufferViewCreateInfo bufferViewInfo; + bufferViewInfo.format = VK_FORMAT_R32_UINT; + bufferViewInfo.rangeOffset = 0; + bufferViewInfo.rangeLength = srcBuffer->info().size; + auto tmpBufferView = m_device->createBufferView(srcBuffer, bufferViewInfo); + + if (videoFormat.FormatType == D3D9VideoFormat_UYVY + || videoFormat.FormatType == D3D9VideoFormat_YUY2) { + m_context->setSpecConstant(VK_PIPELINE_BIND_POINT_COMPUTE, 0, videoFormat.FormatType == D3D9VideoFormat_UYVY); + } + + m_context->bindResourceView(BindingIds::Image, tmpImageView, nullptr); + m_context->bindResourceView(BindingIds::Buffer, nullptr, tmpBufferView); + m_context->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, m_shaders[videoFormat.FormatType]); + m_context->pushConstants(0, sizeof(VkExtent2D), &imageExtent); + m_context->dispatch( + (imageExtent.width / 8) + (imageExtent.width % 8), + (imageExtent.height / 8) + (imageExtent.height % 8), + 1); + + // Reset the spec constants used... + m_context->setSpecConstant(VK_PIPELINE_BIND_POINT_COMPUTE, 0, 0); + + m_context->flushCommandList(); + } + + + void D3D9FormatHelper::InitShaders() { + m_shaders[D3D9VideoFormat_YUY2] = InitShader(d3d9_convert_yuy2_uyvy); + m_shaders[D3D9VideoFormat_UYVY] = m_shaders[D3D9VideoFormat_YUY2]; + } + + + Rc D3D9FormatHelper::InitShader(SpirvCodeBuffer code) { + const std::array resourceSlots = { { + { BindingIds::Image, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_IMAGE_VIEW_TYPE_2D }, + { BindingIds::Buffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_IMAGE_VIEW_TYPE_1D }, + } }; + + return m_device->createShader( + VK_SHADER_STAGE_COMPUTE_BIT, + resourceSlots.size(), resourceSlots.data(), + { 0u, 0u, 0u, sizeof(VkExtent2D) }, code); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_format_helpers.h b/src/d3d9/d3d9_format_helpers.h new file mode 100644 index 000000000..6a3bfdb95 --- /dev/null +++ b/src/d3d9/d3d9_format_helpers.h @@ -0,0 +1,40 @@ +#pragma once + +#include "d3d9_include.h" +#include "d3d9_format.h" +#include "../dxvk/dxvk_device.h" +#include "../dxvk/dxvk_context.h" + +namespace dxvk { + + class D3D9FormatHelper { + + public: + + D3D9FormatHelper(const Rc& device); + + void ConvertVideoFormat( + D3D9_VIDEO_FORMAT_INFO videoFormat, + const Rc& dstImage, + VkImageSubresourceLayers dstSubresource, + const Rc& srcBuffer); + + private: + + enum BindingIds : uint32_t { + Image = 0, + Buffer = 1, + }; + + void InitShaders(); + + Rc InitShader(SpirvCodeBuffer code); + + Rc m_device; + Rc m_context; + + std::array, D3D9VideoFormat_Count> m_shaders; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_hud.cpp b/src/d3d9/d3d9_hud.cpp new file mode 100644 index 000000000..e37a5a0e3 --- /dev/null +++ b/src/d3d9/d3d9_hud.cpp @@ -0,0 +1,36 @@ +#include "d3d9_hud.h" + +namespace dxvk::hud { + + HudSamplerCount::HudSamplerCount(D3D9DeviceEx* device) + : m_device (device) + , m_samplerCount ("0"){ + + } + + + void HudSamplerCount::update(dxvk::high_resolution_clock::time_point time) { + m_samplerCount = str::format(m_device->GetSamplerCount()); + } + + + HudPos HudSamplerCount::render( + HudRenderer& renderer, + HudPos position) { + position.y += 16.0f; + + renderer.drawText(16.0f, + { position.x, position.y }, + { 0.0f, 1.0f, 0.75f, 1.0f }, + "Samplers:"); + + renderer.drawText(16.0f, + { position.x + 120.0f, position.y }, + { 1.0f, 1.0f, 1.0f, 1.0f }, + m_samplerCount); + + position.y += 8.0f; + return position; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_hud.h b/src/d3d9/d3d9_hud.h new file mode 100644 index 000000000..b500e48b6 --- /dev/null +++ b/src/d3d9/d3d9_hud.h @@ -0,0 +1,31 @@ +#pragma once + +#include "d3d9_device.h" +#include "../dxvk/hud/dxvk_hud_item.h" + +namespace dxvk::hud { + + /** + * \brief HUD item to display DXVK version + */ + class HudSamplerCount : public HudItem { + + public: + + HudSamplerCount(D3D9DeviceEx* device); + + void update(dxvk::high_resolution_clock::time_point time); + + HudPos render( + HudRenderer& renderer, + HudPos position); + + private: + + D3D9DeviceEx* m_device; + + std::string m_samplerCount; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_include.h b/src/d3d9/d3d9_include.h new file mode 100644 index 000000000..ca174da62 --- /dev/null +++ b/src/d3d9/d3d9_include.h @@ -0,0 +1,95 @@ +#pragma once + +#ifndef _MSC_VER +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0A00 +#endif + +#include +#include + +//for some reason we need to specify __declspec(dllexport) for MinGW +#if defined(__WINE__) +#define DLLEXPORT __attribute__((visibility("default"))) +#elif defined(_MSC_VER) +#define DLLEXPORT +#else +#define DLLEXPORT __declspec(dllexport) +#endif + + +#include "../util/com/com_guid.h" +#include "../util/com/com_object.h" +#include "../util/com/com_pointer.h" + +#include "../util/log/log.h" +#include "../util/log/log_debug.h" + +#include "../util/rc/util_rc.h" +#include "../util/rc/util_rc_ptr.h" + +#include "../util/util_env.h" +#include "../util/util_enum.h" +#include "../util/util_error.h" +#include "../util/util_flags.h" +#include "../util/util_likely.h" +#include "../util/util_math.h" +#include "../util/util_string.h" +#include "../util/util_misc.h" + +// Missed definitions in Wine/MinGW. + +#ifndef D3DPRESENT_BACK_BUFFERS_MAX_EX +#define D3DPRESENT_BACK_BUFFERS_MAX_EX 30 +#endif + +#ifndef D3DSI_OPCODE_MASK +#define D3DSI_OPCODE_MASK 0x0000FFFF +#endif + +#ifndef D3DSP_TEXTURETYPE_MASK +#define D3DSP_TEXTURETYPE_MASK 0x78000000 +#endif + +#ifndef D3DUSAGE_AUTOGENMIPMAP +#define D3DUSAGE_AUTOGENMIPMAP 0x00000400L +#endif + +#ifndef D3DSP_DCL_USAGE_MASK +#define D3DSP_DCL_USAGE_MASK 0x0000000f +#endif + +#ifndef D3DSP_OPCODESPECIFICCONTROL_MASK +#define D3DSP_OPCODESPECIFICCONTROL_MASK 0x00ff0000 +#endif + +#ifndef D3DSP_OPCODESPECIFICCONTROL_SHIFT +#define D3DSP_OPCODESPECIFICCONTROL_SHIFT 16 +#endif + +#ifndef D3DCURSOR_IMMEDIATE_UPDATE +#define D3DCURSOR_IMMEDIATE_UPDATE 0x00000001L +#endif + +#ifndef D3DPRESENT_FORCEIMMEDIATE +#define D3DPRESENT_FORCEIMMEDIATE 0x00000100L +#endif + +// MinGW headers are broken. Who'dve guessed? +#ifndef _MSC_VER +typedef struct _D3DDEVINFO_RESOURCEMANAGER +{ + char dummy; +} D3DDEVINFO_RESOURCEMANAGER, * LPD3DDEVINFO_RESOURCEMANAGER; + +#ifndef __WINE__ +extern "C" WINUSERAPI WINBOOL WINAPI SetProcessDPIAware(VOID); +#endif +#endif + +// This is the managed pool on D3D9Ex, it's just hidden! +#define D3DPOOL_MANAGED_EX D3DPOOL(6) + +using D3D9VertexElements = std::vector; diff --git a/src/d3d9/d3d9_initializer.cpp b/src/d3d9/d3d9_initializer.cpp new file mode 100644 index 000000000..19d3e656c --- /dev/null +++ b/src/d3d9/d3d9_initializer.cpp @@ -0,0 +1,165 @@ +#include + +#include "d3d9_initializer.h" + +namespace dxvk { + + D3D9Initializer::D3D9Initializer( + const Rc& Device) + : m_device(Device), m_context(m_device->createContext()) { + m_context->beginRecording( + m_device->createCommandList()); + } + + + D3D9Initializer::~D3D9Initializer() { + + } + + + void D3D9Initializer::Flush() { + std::lock_guard lock(m_mutex); + + if (m_transferCommands != 0) + FlushInternal(); + } + + + void D3D9Initializer::InitBuffer( + D3D9CommonBuffer* pBuffer) { + VkMemoryPropertyFlags memFlags = pBuffer->GetBuffer()->memFlags(); + + (memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? InitHostVisibleBuffer(pBuffer->GetBufferSlice()) + : InitDeviceLocalBuffer(pBuffer->GetBufferSlice()); + + if (pBuffer->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + InitHostVisibleBuffer(pBuffer->GetBufferSlice()); + } + + + void D3D9Initializer::InitTexture( + D3D9CommonTexture* pTexture, + void* pInitialData) { + if (pTexture->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_NONE) + return; + + (pTexture->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) + ? InitDeviceLocalTexture(pTexture) + : InitHostVisibleTexture(pTexture, pInitialData); + } + + + void D3D9Initializer::InitDeviceLocalBuffer( + DxvkBufferSlice Slice) { + std::lock_guard lock(m_mutex); + + m_transferCommands += 1; + + m_context->clearBuffer( + Slice.buffer(), + Slice.offset(), + Slice.length(), + 0u); + + FlushImplicit(); + } + + + void D3D9Initializer::InitHostVisibleBuffer( + DxvkBufferSlice Slice) { + // If the buffer is mapped, we can write data directly + // to the mapped memory region instead of doing it on + // the GPU. Same goes for zero-initialization. + std::memset( + Slice.mapPtr(0), 0, + Slice.length()); + } + + + void D3D9Initializer::InitDeviceLocalTexture( + D3D9CommonTexture* pTexture) { + std::lock_guard lock(m_mutex); + + auto InitImage = [&](Rc image) { + if (image == nullptr) + return; + + auto formatInfo = imageFormatInfo(image->info().format); + + m_transferCommands += 1; + + // While the Microsoft docs state that resource contents are + // undefined if no initial data is provided, some applications + // expect a resource to be pre-cleared. We can only do that + // for non-compressed images, but that should be fine. + VkImageSubresourceRange subresources; + subresources.aspectMask = formatInfo->aspectMask; + subresources.baseMipLevel = 0; + subresources.levelCount = image->info().mipLevels; + subresources.baseArrayLayer = 0; + subresources.layerCount = image->info().numLayers; + + if (formatInfo->flags.test(DxvkFormatFlag::BlockCompressed)) { + m_context->clearCompressedColorImage(image, subresources); + } else { + if (subresources.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { + VkClearColorValue value = { }; + + m_context->clearColorImage( + image, value, subresources); + } else { + VkClearDepthStencilValue value; + value.depth = 0.0f; + value.stencil = 0; + + m_context->clearDepthStencilImage( + image, value, subresources); + } + } + }; + + InitImage(pTexture->GetImage()); + + FlushImplicit(); + } + + + void D3D9Initializer::InitHostVisibleTexture( + D3D9CommonTexture* pTexture, + void* pInitialData) { + // If the buffer is mapped, we can write data directly + // to the mapped memory region instead of doing it on + // the GPU. Same goes for zero-initialization. + for (uint32_t i = 0; i < pTexture->CountSubresources(); i++) { + DxvkBufferSliceHandle mapSlice = pTexture->GetBuffer(i)->getSliceHandle(); + + if (pInitialData != nullptr) { + std::memcpy( + mapSlice.mapPtr, + pInitialData, + mapSlice.length); + } else { + std::memset( + mapSlice.mapPtr, 0, + mapSlice.length); + } + } + } + + + void D3D9Initializer::FlushImplicit() { + if (m_transferCommands > MaxTransferCommands + || m_transferMemory > MaxTransferMemory) + FlushInternal(); + } + + + void D3D9Initializer::FlushInternal() { + m_context->flushCommandList(); + + m_transferCommands = 0; + m_transferMemory = 0; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_initializer.h b/src/d3d9/d3d9_initializer.h new file mode 100644 index 000000000..bf7d6085b --- /dev/null +++ b/src/d3d9/d3d9_initializer.h @@ -0,0 +1,62 @@ +#pragma once + +#include "d3d9_common_buffer.h" +#include "d3d9_common_texture.h" + +namespace dxvk { + + /** + * \brief Resource initialization context + * + * Manages a context which is used for resource + * initialization. This includes + * zero-initialization for buffers and images. + */ + class D3D9Initializer { + constexpr static size_t MaxTransferMemory = 32 * 1024 * 1024; + constexpr static size_t MaxTransferCommands = 512; + public: + + D3D9Initializer( + const Rc& Device); + + ~D3D9Initializer(); + + void Flush(); + + void InitBuffer( + D3D9CommonBuffer* pBuffer); + + void InitTexture( + D3D9CommonTexture* pTexture, + void* pInitialData = nullptr); + + private: + + std::mutex m_mutex; + + Rc m_device; + Rc m_context; + + size_t m_transferCommands = 0; + size_t m_transferMemory = 0; + + void InitDeviceLocalBuffer( + DxvkBufferSlice Slice); + + void InitHostVisibleBuffer( + DxvkBufferSlice Slice); + + void InitDeviceLocalTexture( + D3D9CommonTexture* pTexture); + + void InitHostVisibleTexture( + D3D9CommonTexture* pTexture, + void* pInitialData); + + void FlushImplicit(); + void FlushInternal(); + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_interface.cpp b/src/d3d9/d3d9_interface.cpp new file mode 100644 index 000000000..e13ea04e3 --- /dev/null +++ b/src/d3d9/d3d9_interface.cpp @@ -0,0 +1,316 @@ +#include "d3d9_interface.h" + +#include "d3d9_monitor.h" +#include "d3d9_caps.h" +#include "d3d9_device.h" + +#include + +namespace dxvk { + + D3D9InterfaceEx::D3D9InterfaceEx(bool bExtended) + : m_instance ( new DxvkInstance() ) + , m_extended ( bExtended ) + , m_d3d9Options ( nullptr, m_instance->config() ) { + m_adapters.reserve(m_instance->adapterCount()); + for (uint32_t i = 0; i < m_instance->adapterCount(); i++) + m_adapters.emplace_back(this, m_instance->enumAdapters(i), i); + + if (m_d3d9Options.dpiAware) { + Logger::info("Process set as DPI aware"); + SetProcessDPIAware(); + } + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3D9) + || (m_extended && riid == __uuidof(IDirect3D9Ex))) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9InterfaceEx::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::RegisterSoftwareDevice(void* pInitializeFunction) { + Logger::warn("D3D9InterfaceEx::RegisterSoftwareDevice: Stub"); + return D3D_OK; + } + + + UINT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterCount() { + return UINT(m_adapters.size()); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterIdentifier( + UINT Adapter, + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterIdentifier(Flags, pIdentifier); + + return D3DERR_INVALIDCALL; + } + + + UINT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterModeCount(UINT Adapter, D3DFORMAT Format) { + D3DDISPLAYMODEFILTER filter; + filter.Size = sizeof(D3DDISPLAYMODEFILTER); + filter.Format = Format; + filter.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + return this->GetAdapterModeCountEx(Adapter, &filter); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterDisplayMode(UINT Adapter, D3DDISPLAYMODE* pMode) { + constexpr D3DFORMAT format = D3DFMT_X8R8G8B8; + const UINT mode = GetAdapterModeCount(Adapter, format) - 1; + + return this->EnumAdapterModes(Adapter, format, mode, pMode); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceType( + UINT Adapter, + D3DDEVTYPE DevType, + D3DFORMAT AdapterFormat, + D3DFORMAT BackBufferFormat, + BOOL bWindowed) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceType( + DevType, EnumerateFormat(AdapterFormat), + EnumerateFormat(BackBufferFormat), bWindowed); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceFormat( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3DFORMAT CheckFormat) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceFormat( + DeviceType, EnumerateFormat(AdapterFormat), + Usage, RType, + EnumerateFormat(CheckFormat)); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceMultiSampleType( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceMultiSampleType( + DeviceType, EnumerateFormat(SurfaceFormat), + Windowed, MultiSampleType, + pQualityLevels); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDepthStencilMatch( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + D3DFORMAT RenderTargetFormat, + D3DFORMAT DepthStencilFormat) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDepthStencilMatch( + DeviceType, EnumerateFormat(AdapterFormat), + EnumerateFormat(RenderTargetFormat), + EnumerateFormat(DepthStencilFormat)); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CheckDeviceFormatConversion( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SourceFormat, + D3DFORMAT TargetFormat) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->CheckDeviceFormatConversion( + DeviceType, EnumerateFormat(SourceFormat), + EnumerateFormat(TargetFormat)); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetDeviceCaps( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetDeviceCaps( + DeviceType, pCaps); + + return D3DERR_INVALIDCALL; + } + + + HMONITOR STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterMonitor(UINT Adapter) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetMonitor(); + + return nullptr; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CreateDevice( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DDevice9** ppReturnedDeviceInterface) { + return this->CreateDeviceEx( + Adapter, + DeviceType, + hFocusWindow, + BehaviorFlags, + pPresentationParameters, + nullptr, // <-- pFullscreenDisplayMode + reinterpret_cast(ppReturnedDeviceInterface)); + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::EnumAdapterModes( + UINT Adapter, + D3DFORMAT Format, + UINT Mode, + D3DDISPLAYMODE* pMode) { + if (pMode == nullptr) + return D3DERR_INVALIDCALL; + + D3DDISPLAYMODEFILTER filter; + filter.Format = Format; + filter.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + filter.Size = sizeof(D3DDISPLAYMODEFILTER); + + D3DDISPLAYMODEEX modeEx; + HRESULT hr = this->EnumAdapterModesEx(Adapter, &filter, Mode, &modeEx); + + if (FAILED(hr)) + return hr; + + pMode->Width = modeEx.Width; + pMode->Height = modeEx.Height; + pMode->RefreshRate = modeEx.RefreshRate; + pMode->Format = modeEx.Format; + + return D3D_OK; + } + + + // Ex Methods + + + UINT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterModeCountEx(UINT Adapter, CONST D3DDISPLAYMODEFILTER* pFilter) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterModeCountEx(pFilter); + + return 0; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::EnumAdapterModesEx( + UINT Adapter, + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->EnumAdapterModesEx(pFilter, Mode, pMode); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterDisplayModeEx( + UINT Adapter, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterDisplayModeEx(pMode, pRotation); + + return D3DERR_INVALIDCALL; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::CreateDeviceEx( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DDevice9Ex** ppReturnedDeviceInterface) { + InitReturnPtr(ppReturnedDeviceInterface); + + if (ppReturnedDeviceInterface == nullptr + || pPresentationParameters == nullptr) + return D3DERR_INVALIDCALL; + + auto* adapter = GetAdapter(Adapter); + + if (adapter == nullptr) + return D3DERR_INVALIDCALL; + + auto dxvkAdapter = adapter->GetDXVKAdapter(); + + std::string clientApi = str::format("D3D9", m_extended ? "Ex" : ""); + + try { + auto dxvkDevice = dxvkAdapter->createDevice(m_instance, clientApi, D3D9DeviceEx::GetDeviceFeatures(dxvkAdapter)); + + *ppReturnedDeviceInterface = ref(new D3D9DeviceEx( + this, + adapter, + DeviceType, + hFocusWindow, + BehaviorFlags, + pPresentationParameters, + pFullscreenDisplayMode, + dxvkDevice)); + } + catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_NOTAVAILABLE; + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9InterfaceEx::GetAdapterLUID(UINT Adapter, LUID* pLUID) { + if (auto* adapter = GetAdapter(Adapter)) + return adapter->GetAdapterLUID(pLUID); + + return D3DERR_INVALIDCALL; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_interface.h b/src/d3d9/d3d9_interface.h new file mode 100644 index 000000000..85cc5a036 --- /dev/null +++ b/src/d3d9/d3d9_interface.h @@ -0,0 +1,148 @@ +#pragma once + +#include "d3d9_adapter.h" + +#include "../dxvk/dxvk_instance.h" + +namespace dxvk { + + /** + * \brief D3D9 interface implementation + * + * Implements the IDirect3DDevice9Ex interfaces + * which provides the way to get adapters and create other objects such as \ref IDirect3DDevice9Ex. + * similar to \ref DxgiFactory but for D3D9. + */ + class D3D9InterfaceEx final : public ComObjectClamp { + + public: + + D3D9InterfaceEx(bool bExtended); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE RegisterSoftwareDevice(void* pInitializeFunction); + + UINT STDMETHODCALLTYPE GetAdapterCount(); + + HRESULT STDMETHODCALLTYPE GetAdapterIdentifier( + UINT Adapter, + DWORD Flags, + D3DADAPTER_IDENTIFIER9* pIdentifier); + + UINT STDMETHODCALLTYPE GetAdapterModeCount(UINT Adapter, D3DFORMAT Format); + + HRESULT STDMETHODCALLTYPE GetAdapterDisplayMode(UINT Adapter, D3DDISPLAYMODE* pMode); + + HRESULT STDMETHODCALLTYPE CheckDeviceType( + UINT Adapter, + D3DDEVTYPE DevType, + D3DFORMAT AdapterFormat, + D3DFORMAT BackBufferFormat, + BOOL bWindowed); + + HRESULT STDMETHODCALLTYPE CheckDeviceFormat( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3DFORMAT CheckFormat); + + HRESULT STDMETHODCALLTYPE CheckDeviceMultiSampleType( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD* pQualityLevels); + + HRESULT STDMETHODCALLTYPE CheckDepthStencilMatch( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + D3DFORMAT RenderTargetFormat, + D3DFORMAT DepthStencilFormat); + + HRESULT STDMETHODCALLTYPE CheckDeviceFormatConversion( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SourceFormat, + D3DFORMAT TargetFormat); + + HRESULT STDMETHODCALLTYPE GetDeviceCaps( + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DCAPS9* pCaps); + + HMONITOR STDMETHODCALLTYPE GetAdapterMonitor(UINT Adapter); + + HRESULT STDMETHODCALLTYPE CreateDevice( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + IDirect3DDevice9** ppReturnedDeviceInterface); + + HRESULT STDMETHODCALLTYPE EnumAdapterModes( + UINT Adapter, + D3DFORMAT Format, + UINT Mode, + D3DDISPLAYMODE* pMode); + + // Ex Methods + + UINT STDMETHODCALLTYPE GetAdapterModeCountEx(UINT Adapter, CONST D3DDISPLAYMODEFILTER* pFilter); + + HRESULT STDMETHODCALLTYPE EnumAdapterModesEx( + UINT Adapter, + const D3DDISPLAYMODEFILTER* pFilter, + UINT Mode, + D3DDISPLAYMODEEX* pMode); + + HRESULT STDMETHODCALLTYPE GetAdapterDisplayModeEx( + UINT Adapter, + D3DDISPLAYMODEEX* pMode, + D3DDISPLAYROTATION* pRotation); + + HRESULT STDMETHODCALLTYPE CreateDeviceEx( + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS* pPresentationParameters, + D3DDISPLAYMODEEX* pFullscreenDisplayMode, + IDirect3DDevice9Ex** ppReturnedDeviceInterface); + + HRESULT STDMETHODCALLTYPE GetAdapterLUID(UINT Adapter, LUID* pLUID); + + const D3D9Options& GetOptions() { return m_d3d9Options; } + + D3D9Adapter* GetAdapter(UINT Ordinal) { + return Ordinal < m_adapters.size() + ? &m_adapters[Ordinal] + : nullptr; + } + + bool IsExtended() { return m_extended; } + + Rc GetInstance() { return m_instance; } + + private: + + void CacheModes(D3D9Format Format); + + static const char* GetDriverDllName(DxvkGpuVendor vendor); + + Rc m_instance; + + bool m_extended; + + D3D9Options m_d3d9Options; + + std::vector m_adapters; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_main.cpp b/src/d3d9/d3d9_main.cpp new file mode 100644 index 000000000..dfca9c1d8 --- /dev/null +++ b/src/d3d9/d3d9_main.cpp @@ -0,0 +1,86 @@ +#include "../dxvk/dxvk_instance.h" + +#include "d3d9_interface.h" +#include "d3d9_shader_validator.h" + +class D3DFE_PROCESSVERTICES; +using PSGPERRORID = UINT; + +namespace dxvk { + Logger Logger::s_instance("d3d9.log"); + + HRESULT CreateD3D9( + bool Extended, + IDirect3D9Ex** ppDirect3D9Ex) { + if (!ppDirect3D9Ex) + return D3DERR_INVALIDCALL; + + *ppDirect3D9Ex = ref(new D3D9InterfaceEx( Extended )); + return D3D_OK; + } +} + +extern "C" { + + DLLEXPORT IDirect3D9* __stdcall Direct3DCreate9(UINT nSDKVersion) { + IDirect3D9Ex* pDirect3D = nullptr; + dxvk::CreateD3D9(false, &pDirect3D); + + return pDirect3D; + } + + DLLEXPORT HRESULT __stdcall Direct3DCreate9Ex(UINT nSDKVersion, IDirect3D9Ex** ppDirect3D9Ex) { + return dxvk::CreateD3D9(true, ppDirect3D9Ex); + } + + DLLEXPORT int __stdcall D3DPERF_BeginEvent(D3DCOLOR col, LPCWSTR wszName) { + return 0; + } + + DLLEXPORT int __stdcall D3DPERF_EndEvent(void) { + return 0; + } + + DLLEXPORT void __stdcall D3DPERF_SetMarker(D3DCOLOR col, LPCWSTR wszName) { + } + + DLLEXPORT void __stdcall D3DPERF_SetRegion(D3DCOLOR col, LPCWSTR wszName) { + } + + DLLEXPORT BOOL __stdcall D3DPERF_QueryRepeatFrame(void) { + return FALSE; + } + + DLLEXPORT void __stdcall D3DPERF_SetOptions(DWORD dwOptions) { + } + + DLLEXPORT DWORD __stdcall D3DPERF_GetStatus(void) { + return 0; + } + + + DLLEXPORT void __stdcall DebugSetMute(void) { + } + + DLLEXPORT int __stdcall DebugSetLevel(void) { + return 0; + } + + // Processor Specific Geometry Pipeline + // for P3 SIMD/AMD 3DNow. + + DLLEXPORT void __stdcall PSGPError(D3DFE_PROCESSVERTICES* a, PSGPERRORID b, UINT c) { + } + + DLLEXPORT void __stdcall PSGPSampleTexture(D3DFE_PROCESSVERTICES* a, UINT b, float(*const c)[4], UINT d, float(*const e)[4]) { + } + + DLLEXPORT dxvk::D3D9ShaderValidator* __stdcall Direct3DShaderValidatorCreate9(void) { + return ref(new dxvk::D3D9ShaderValidator()); + } + + DLLEXPORT int __stdcall Direct3D9EnableMaximizedWindowedModeShim(UINT a) { + return 0; + } + +} diff --git a/src/d3d9/d3d9_monitor.cpp b/src/d3d9/d3d9_monitor.cpp new file mode 100644 index 000000000..b6e0d6c5b --- /dev/null +++ b/src/d3d9/d3d9_monitor.cpp @@ -0,0 +1,175 @@ +#include "d3d9_monitor.h" + +#include "d3d9_format.h" + +namespace dxvk { + + uint32_t GetMonitorFormatBpp(D3D9Format Format) { + switch (Format) { + case D3D9Format::A8R8G8B8: + case D3D9Format::X8R8G8B8: // This is still 32 bit even though the alpha is unspecified. + case D3D9Format::A2R10G10B10: + return 32; + + case D3D9Format::A1R5G5B5: + case D3D9Format::X1R5G5B5: + case D3D9Format::R5G6B5: + return 16; + + default: + Logger::warn(str::format( + "GetMonitorFormatBpp: Unknown format: ", + Format)); + return 32; + } + } + + + bool IsSupportedAdapterFormat( + D3D9Format Format) { + return Format == D3D9Format::A2R10G10B10 + || Format == D3D9Format::X8R8G8B8 + || Format == D3D9Format::A8R8G8B8 + || Format == D3D9Format::X1R5G5B5 + || Format == D3D9Format::A1R5G5B5 + || Format == D3D9Format::R5G6B5; + } + + + bool IsSupportedDisplayFormat( + D3D9Format Format, + BOOL Windowed) { + return (Format == D3D9Format::A2R10G10B10 && !Windowed) + || Format == D3D9Format::X8R8G8B8 + || Format == D3D9Format::X1R5G5B5 + || Format == D3D9Format::R5G6B5; + } + + + bool IsSupportedBackBufferFormat( + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL Windowed) { + if (!IsSupportedAdapterFormat(AdapterFormat)) + return false; + + if (AdapterFormat == D3D9Format::A2R10G10B10 && Windowed) + return false; + + return AdapterFormat == BackBufferFormat + || (AdapterFormat == D3D9Format::X8R8G8B8 && BackBufferFormat == D3D9Format::A8R8G8B8) + || (AdapterFormat == D3D9Format::X1R5G5B5 && BackBufferFormat == D3D9Format::A1R5G5B5); + } + + + bool IsSupportedBackBufferFormat( + D3D9Format BackBufferFormat, + BOOL Windowed) { + return (BackBufferFormat == D3D9Format::A2R10G10B10 && !Windowed) + || BackBufferFormat == D3D9Format::A8R8G8B8 + || BackBufferFormat == D3D9Format::X8R8G8B8 + || BackBufferFormat == D3D9Format::A1R5G5B5 + || BackBufferFormat == D3D9Format::X1R5G5B5 + || BackBufferFormat == D3D9Format::R5G6B5; + } + + + HMONITOR GetDefaultMonitor() { + return ::MonitorFromPoint({ 0, 0 }, MONITOR_DEFAULTTOPRIMARY); + } + + + HRESULT SetMonitorDisplayMode( + HMONITOR hMonitor, + const D3DDISPLAYMODEEX* pMode) { + ::MONITORINFOEXW monInfo; + monInfo.cbSize = sizeof(monInfo); + + if (!::GetMonitorInfoW(hMonitor, reinterpret_cast(&monInfo))) { + Logger::err("D3D9: Failed to query monitor info"); + return E_FAIL; + } + + DEVMODEW devMode = { }; + devMode.dmSize = sizeof(devMode); + devMode.dmFields = DM_PELSWIDTH | DM_PELSHEIGHT | DM_BITSPERPEL; + devMode.dmPelsWidth = pMode->Width; + devMode.dmPelsHeight = pMode->Height; + devMode.dmBitsPerPel = GetMonitorFormatBpp(EnumerateFormat(pMode->Format)); + + if (pMode->RefreshRate != 0) { + devMode.dmFields |= DM_DISPLAYFREQUENCY; + devMode.dmDisplayFrequency = pMode->RefreshRate; + } + + Logger::info(str::format("D3D9: Setting display mode: ", + devMode.dmPelsWidth, "x", devMode.dmPelsHeight, "@", + devMode.dmDisplayFrequency)); + + LONG status = ::ChangeDisplaySettingsExW( + monInfo.szDevice, &devMode, nullptr, CDS_FULLSCREEN, nullptr); + + if (status != DISP_CHANGE_SUCCESSFUL) { + // Try again but without setting the frequency. + devMode.dmFields &= ~DM_DISPLAYFREQUENCY; + devMode.dmDisplayFrequency = 0; + status = ::ChangeDisplaySettingsExW( + monInfo.szDevice, &devMode, nullptr, CDS_FULLSCREEN, nullptr); + } + + return status == DISP_CHANGE_SUCCESSFUL ? D3D_OK : D3DERR_NOTAVAILABLE; + } + + + void GetWindowClientSize( + HWND hWnd, + UINT* pWidth, + UINT* pHeight) { + RECT rect = { }; + ::GetClientRect(hWnd, &rect); + + if (pWidth) + *pWidth = rect.right - rect.left; + + if (pHeight) + *pHeight = rect.bottom - rect.top; + } + + + void GetMonitorClientSize( + HMONITOR hMonitor, + UINT* pWidth, + UINT* pHeight) { + ::MONITORINFOEXW monInfo; + monInfo.cbSize = sizeof(monInfo); + + if (!::GetMonitorInfoW(hMonitor, reinterpret_cast(&monInfo))) { + Logger::err("D3D9: Failed to query monitor info"); + return; + } + + auto rect = monInfo.rcMonitor; + + if (pWidth) + *pWidth = rect.right - rect.left; + + if (pHeight) + *pHeight = rect.bottom - rect.top; + } + + + void GetMonitorRect( + HMONITOR hMonitor, + RECT* pRect) { + ::MONITORINFOEXW monInfo; + monInfo.cbSize = sizeof(monInfo); + + if (!::GetMonitorInfoW(hMonitor, reinterpret_cast(&monInfo))) { + Logger::err("D3D9: Failed to query monitor info"); + return; + } + + *pRect = monInfo.rcMonitor; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_monitor.h b/src/d3d9/d3d9_monitor.h new file mode 100644 index 000000000..480f275d2 --- /dev/null +++ b/src/d3d9/d3d9_monitor.h @@ -0,0 +1,88 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_format.h" + +namespace dxvk { + + /** + * \brief Queries bits per pixel for a format + * + * The format must be a valid swap chain format. + * \param [in] Format The D3D9 format to query + * \returns Bits per pixel for this format + */ + uint32_t GetMonitorFormatBpp( + D3D9Format Format); + + /** + * \brief Returns if a format is supported for a backbuffer/swapchain. + * + * \param [in] Format The D3D9 format to query + * \returns If it is supported as a swapchain/backbuffer format. + */ + bool IsSupportedAdapterFormat( + D3D9Format Format); + + bool IsSupportedDisplayFormat( + D3D9Format Format, + BOOL Windowed); + + bool IsSupportedBackBufferFormat( + D3D9Format AdapterFormat, + D3D9Format BackBufferFormat, + BOOL Windowed); + + bool IsSupportedBackBufferFormat( + D3D9Format BackBufferFormat, + BOOL Windowed); + + HMONITOR GetDefaultMonitor(); + + /** + * \brief Sets monitor display mode + * + * \param [in] hMonitor Monitor handle + * \param [in] pMode Display mode properties + * \returns S_OK on success + */ + HRESULT SetMonitorDisplayMode( + HMONITOR hMonitor, + const D3DDISPLAYMODEEX* pMode); + + /** + * \brief Queries window client size + * + * \param [in] hWnd Window to query + * \param [out] pWidth Client width + * \param [out] pHeight Client height + */ + void GetWindowClientSize( + HWND hWnd, + UINT* pWidth, + UINT* pHeight); + + /** + * \brief Queries monitor size + * + * \param [in] hMonitor Monitor to query + * \param [out] pWidth Client width + * \param [out] pHeight Client height + */ + void GetMonitorClientSize( + HMONITOR hMonitor, + UINT* pWidth, + UINT* pHeight); + + /** + * \brief Queries monitor rect + * + * \param [in] hMonitor Monitor to query + * \param [out] pRect The rect to return + */ + void GetMonitorRect( + HMONITOR hMonitor, + RECT* pRect); + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_multithread.cpp b/src/d3d9/d3d9_multithread.cpp new file mode 100644 index 000000000..3089e47f1 --- /dev/null +++ b/src/d3d9/d3d9_multithread.cpp @@ -0,0 +1,41 @@ +#include "d3d9_device.h" + +namespace dxvk { + + void D3D9DeviceMutex::lock() { + while (!try_lock()) + dxvk::this_thread::yield(); + } + + + void D3D9DeviceMutex::unlock() { + if (likely(m_counter == 0)) + m_owner.store(0, std::memory_order_release); + else + m_counter -= 1; + } + + + bool D3D9DeviceMutex::try_lock() { + uint32_t threadId = GetCurrentThreadId(); + uint32_t expected = 0; + + bool status = m_owner.compare_exchange_weak( + expected, threadId, std::memory_order_acquire); + + if (status) + return true; + + if (expected != threadId) + return false; + + m_counter += 1; + return true; + } + + + D3D9Multithread::D3D9Multithread( + BOOL Protected) + : m_protected( Protected ) { } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_multithread.h b/src/d3d9/d3d9_multithread.h new file mode 100644 index 000000000..47f8999f8 --- /dev/null +++ b/src/d3d9/d3d9_multithread.h @@ -0,0 +1,101 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + /** + * \brief Device mutex + * + * Effectively implements a recursive spinlock + * which is used to lock the D3D9 device. + */ + class D3D9DeviceMutex { + + public: + + void lock(); + + void unlock(); + + bool try_lock(); + + private: + + std::atomic m_owner = { 0u }; + uint32_t m_counter = { 0u }; + + }; + + + /** + * \brief Device lock + * + * Lightweight RAII wrapper that implements + * a subset of the functionality provided by + * \c std::unique_lock, with the goal of being + * cheaper to construct and destroy. + */ + class D3D9DeviceLock { + + public: + + D3D9DeviceLock() + : m_mutex(nullptr) { } + + D3D9DeviceLock(D3D9DeviceMutex& mutex) + : m_mutex(&mutex) { + mutex.lock(); + } + + D3D9DeviceLock(D3D9DeviceLock&& other) + : m_mutex(other.m_mutex) { + other.m_mutex = nullptr; + } + + D3D9DeviceLock& operator = (D3D9DeviceLock&& other) { + if (m_mutex) + m_mutex->unlock(); + + m_mutex = other.m_mutex; + other.m_mutex = nullptr; + return *this; + } + + ~D3D9DeviceLock() { + if (m_mutex != nullptr) + m_mutex->unlock(); + } + + private: + + D3D9DeviceMutex* m_mutex; + + }; + + + /** + * \brief D3D9 context lock + */ + class D3D9Multithread { + + public: + + D3D9Multithread( + BOOL Protected); + + D3D9DeviceLock AcquireLock() { + return m_protected + ? D3D9DeviceLock(m_mutex) + : D3D9DeviceLock(); + } + + private: + + BOOL m_protected; + + D3D9DeviceMutex m_mutex; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_names.cpp b/src/d3d9/d3d9_names.cpp new file mode 100644 index 000000000..e7b7b6a66 --- /dev/null +++ b/src/d3d9/d3d9_names.cpp @@ -0,0 +1,230 @@ +#include "d3d9_format.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, D3D9Format e) { + switch (e) { + ENUM_NAME(D3D9Format::Unknown); + + ENUM_NAME(D3D9Format::R8G8B8); + ENUM_NAME(D3D9Format::A8R8G8B8); + ENUM_NAME(D3D9Format::X8R8G8B8); + ENUM_NAME(D3D9Format::R5G6B5); + ENUM_NAME(D3D9Format::X1R5G5B5); + ENUM_NAME(D3D9Format::A1R5G5B5); + ENUM_NAME(D3D9Format::A4R4G4B4); + ENUM_NAME(D3D9Format::R3G3B2); + ENUM_NAME(D3D9Format::A8); + ENUM_NAME(D3D9Format::A8R3G3B2); + ENUM_NAME(D3D9Format::X4R4G4B4); + ENUM_NAME(D3D9Format::A2B10G10R10); + ENUM_NAME(D3D9Format::A8B8G8R8); + ENUM_NAME(D3D9Format::X8B8G8R8); + ENUM_NAME(D3D9Format::G16R16); + ENUM_NAME(D3D9Format::A2R10G10B10); + ENUM_NAME(D3D9Format::A16B16G16R16); + ENUM_NAME(D3D9Format::A8P8); + ENUM_NAME(D3D9Format::P8); + ENUM_NAME(D3D9Format::L8); + ENUM_NAME(D3D9Format::A8L8); + ENUM_NAME(D3D9Format::A4L4); + ENUM_NAME(D3D9Format::V8U8); + ENUM_NAME(D3D9Format::L6V5U5); + ENUM_NAME(D3D9Format::X8L8V8U8); + ENUM_NAME(D3D9Format::Q8W8V8U8); + ENUM_NAME(D3D9Format::V16U16); + ENUM_NAME(D3D9Format::A2W10V10U10); + ENUM_NAME(D3D9Format::UYVY); + ENUM_NAME(D3D9Format::R8G8_B8G8); + ENUM_NAME(D3D9Format::YUY2); + ENUM_NAME(D3D9Format::G8R8_G8B8); + ENUM_NAME(D3D9Format::DXT1); + ENUM_NAME(D3D9Format::DXT2); + ENUM_NAME(D3D9Format::DXT3); + ENUM_NAME(D3D9Format::DXT4); + ENUM_NAME(D3D9Format::DXT5); + ENUM_NAME(D3D9Format::D16_LOCKABLE); + ENUM_NAME(D3D9Format::D32); + ENUM_NAME(D3D9Format::D15S1); + ENUM_NAME(D3D9Format::D24S8); + ENUM_NAME(D3D9Format::D24X8); + ENUM_NAME(D3D9Format::D24X4S4); + ENUM_NAME(D3D9Format::D16); + ENUM_NAME(D3D9Format::D32F_LOCKABLE); + ENUM_NAME(D3D9Format::D24FS8); + ENUM_NAME(D3D9Format::D32_LOCKABLE); + ENUM_NAME(D3D9Format::S8_LOCKABLE); + ENUM_NAME(D3D9Format::L16); + ENUM_NAME(D3D9Format::VERTEXDATA); + ENUM_NAME(D3D9Format::INDEX16); + ENUM_NAME(D3D9Format::INDEX32); + ENUM_NAME(D3D9Format::Q16W16V16U16); + ENUM_NAME(D3D9Format::MULTI2_ARGB8); + ENUM_NAME(D3D9Format::R16F); + ENUM_NAME(D3D9Format::G16R16F); + ENUM_NAME(D3D9Format::A16B16G16R16F); + ENUM_NAME(D3D9Format::R32F); + ENUM_NAME(D3D9Format::G32R32F); + ENUM_NAME(D3D9Format::A32B32G32R32F); + ENUM_NAME(D3D9Format::CxV8U8); + ENUM_NAME(D3D9Format::A1); + ENUM_NAME(D3D9Format::A2B10G10R10_XR_BIAS); + ENUM_NAME(D3D9Format::BINARYBUFFER); + + // Driver Hacks / Unofficial Formats + ENUM_NAME(D3D9Format::ATI1); + ENUM_NAME(D3D9Format::ATI2); + ENUM_NAME(D3D9Format::INST); + ENUM_NAME(D3D9Format::DF24); + ENUM_NAME(D3D9Format::DF16); + ENUM_NAME(D3D9Format::NULL_FORMAT); + ENUM_NAME(D3D9Format::GET4); + ENUM_NAME(D3D9Format::GET1); + ENUM_NAME(D3D9Format::NVDB); + ENUM_NAME(D3D9Format::A2M1); + ENUM_NAME(D3D9Format::A2M0); + ENUM_NAME(D3D9Format::ATOC); + ENUM_NAME(D3D9Format::INTZ); + ENUM_NAME(D3D9Format::RAWZ); + ENUM_NAME(D3D9Format::RESZ); + + ENUM_NAME(D3D9Format::NV11); + ENUM_NAME(D3D9Format::NV12); + ENUM_NAME(D3D9Format::P010); + ENUM_NAME(D3D9Format::P016); + ENUM_NAME(D3D9Format::Y210); + ENUM_NAME(D3D9Format::Y216); + ENUM_NAME(D3D9Format::Y410); + ENUM_NAME(D3D9Format::AYUV); + ENUM_NAME(D3D9Format::YV12); + ENUM_NAME(D3D9Format::OPAQUE_420); + + ENUM_NAME(D3D9Format::AI44); + ENUM_NAME(D3D9Format::IA44); + ENUM_NAME(D3D9Format::R2VB); + ENUM_NAME(D3D9Format::COPM); + ENUM_NAME(D3D9Format::SSAA); + ENUM_NAME(D3D9Format::AL16); + ENUM_NAME(D3D9Format::R16); + + ENUM_NAME(D3D9Format::EXT1); + ENUM_NAME(D3D9Format::FXT1); + ENUM_NAME(D3D9Format::GXT1); + ENUM_NAME(D3D9Format::HXT1); + + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, D3DRENDERSTATETYPE e) { + switch (e) { + ENUM_NAME(D3DRS_ZENABLE); + ENUM_NAME(D3DRS_FILLMODE); + ENUM_NAME(D3DRS_SHADEMODE); + ENUM_NAME(D3DRS_ZWRITEENABLE); + ENUM_NAME(D3DRS_ALPHATESTENABLE); + ENUM_NAME(D3DRS_LASTPIXEL); + ENUM_NAME(D3DRS_SRCBLEND); + ENUM_NAME(D3DRS_DESTBLEND); + ENUM_NAME(D3DRS_CULLMODE); + ENUM_NAME(D3DRS_ZFUNC); + ENUM_NAME(D3DRS_ALPHAREF); + ENUM_NAME(D3DRS_ALPHAFUNC); + ENUM_NAME(D3DRS_DITHERENABLE); + ENUM_NAME(D3DRS_ALPHABLENDENABLE); + ENUM_NAME(D3DRS_FOGENABLE); + ENUM_NAME(D3DRS_SPECULARENABLE); + ENUM_NAME(D3DRS_FOGCOLOR); + ENUM_NAME(D3DRS_FOGTABLEMODE); + ENUM_NAME(D3DRS_FOGSTART); + ENUM_NAME(D3DRS_FOGEND); + ENUM_NAME(D3DRS_FOGDENSITY); + ENUM_NAME(D3DRS_RANGEFOGENABLE); + ENUM_NAME(D3DRS_STENCILENABLE); + ENUM_NAME(D3DRS_STENCILFAIL); + ENUM_NAME(D3DRS_STENCILZFAIL); + ENUM_NAME(D3DRS_STENCILPASS); + ENUM_NAME(D3DRS_STENCILFUNC); + ENUM_NAME(D3DRS_STENCILREF); + ENUM_NAME(D3DRS_STENCILMASK); + ENUM_NAME(D3DRS_STENCILWRITEMASK); + ENUM_NAME(D3DRS_TEXTUREFACTOR); + ENUM_NAME(D3DRS_WRAP0); + ENUM_NAME(D3DRS_WRAP1); + ENUM_NAME(D3DRS_WRAP2); + ENUM_NAME(D3DRS_WRAP3); + ENUM_NAME(D3DRS_WRAP4); + ENUM_NAME(D3DRS_WRAP5); + ENUM_NAME(D3DRS_WRAP6); + ENUM_NAME(D3DRS_WRAP7); + ENUM_NAME(D3DRS_CLIPPING); + ENUM_NAME(D3DRS_LIGHTING); + ENUM_NAME(D3DRS_AMBIENT); + ENUM_NAME(D3DRS_FOGVERTEXMODE); + ENUM_NAME(D3DRS_COLORVERTEX); + ENUM_NAME(D3DRS_LOCALVIEWER); + ENUM_NAME(D3DRS_NORMALIZENORMALS); + ENUM_NAME(D3DRS_DIFFUSEMATERIALSOURCE); + ENUM_NAME(D3DRS_SPECULARMATERIALSOURCE); + ENUM_NAME(D3DRS_AMBIENTMATERIALSOURCE); + ENUM_NAME(D3DRS_EMISSIVEMATERIALSOURCE); + ENUM_NAME(D3DRS_VERTEXBLEND); + ENUM_NAME(D3DRS_CLIPPLANEENABLE); + ENUM_NAME(D3DRS_POINTSIZE); + ENUM_NAME(D3DRS_POINTSIZE_MIN); + ENUM_NAME(D3DRS_POINTSPRITEENABLE); + ENUM_NAME(D3DRS_POINTSCALEENABLE); + ENUM_NAME(D3DRS_POINTSCALE_A); + ENUM_NAME(D3DRS_POINTSCALE_B); + ENUM_NAME(D3DRS_POINTSCALE_C); + ENUM_NAME(D3DRS_MULTISAMPLEANTIALIAS); + ENUM_NAME(D3DRS_MULTISAMPLEMASK); + ENUM_NAME(D3DRS_PATCHEDGESTYLE); + ENUM_NAME(D3DRS_DEBUGMONITORTOKEN); + ENUM_NAME(D3DRS_POINTSIZE_MAX); + ENUM_NAME(D3DRS_INDEXEDVERTEXBLENDENABLE); + ENUM_NAME(D3DRS_COLORWRITEENABLE); + ENUM_NAME(D3DRS_TWEENFACTOR); + ENUM_NAME(D3DRS_BLENDOP); + ENUM_NAME(D3DRS_POSITIONDEGREE); + ENUM_NAME(D3DRS_NORMALDEGREE); + ENUM_NAME(D3DRS_SCISSORTESTENABLE); + ENUM_NAME(D3DRS_SLOPESCALEDEPTHBIAS); + ENUM_NAME(D3DRS_ANTIALIASEDLINEENABLE); + ENUM_NAME(D3DRS_MINTESSELLATIONLEVEL); + ENUM_NAME(D3DRS_MAXTESSELLATIONLEVEL); + ENUM_NAME(D3DRS_ADAPTIVETESS_X); + ENUM_NAME(D3DRS_ADAPTIVETESS_Y); + ENUM_NAME(D3DRS_ADAPTIVETESS_Z); + ENUM_NAME(D3DRS_ADAPTIVETESS_W); + ENUM_NAME(D3DRS_ENABLEADAPTIVETESSELLATION); + ENUM_NAME(D3DRS_TWOSIDEDSTENCILMODE); + ENUM_NAME(D3DRS_CCW_STENCILFAIL); + ENUM_NAME(D3DRS_CCW_STENCILZFAIL); + ENUM_NAME(D3DRS_CCW_STENCILPASS); + ENUM_NAME(D3DRS_CCW_STENCILFUNC); + ENUM_NAME(D3DRS_COLORWRITEENABLE1); + ENUM_NAME(D3DRS_COLORWRITEENABLE2); + ENUM_NAME(D3DRS_COLORWRITEENABLE3); + ENUM_NAME(D3DRS_BLENDFACTOR); + ENUM_NAME(D3DRS_SRGBWRITEENABLE); + ENUM_NAME(D3DRS_DEPTHBIAS); + ENUM_NAME(D3DRS_WRAP8); + ENUM_NAME(D3DRS_WRAP9); + ENUM_NAME(D3DRS_WRAP10); + ENUM_NAME(D3DRS_WRAP11); + ENUM_NAME(D3DRS_WRAP12); + ENUM_NAME(D3DRS_WRAP13); + ENUM_NAME(D3DRS_WRAP14); + ENUM_NAME(D3DRS_WRAP15); + ENUM_NAME(D3DRS_SEPARATEALPHABLENDENABLE); + ENUM_NAME(D3DRS_SRCBLENDALPHA); + ENUM_NAME(D3DRS_DESTBLENDALPHA); + ENUM_NAME(D3DRS_BLENDOPALPHA); + + ENUM_DEFAULT(e); + } + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_names.h b/src/d3d9/d3d9_names.h new file mode 100644 index 000000000..82d9c4790 --- /dev/null +++ b/src/d3d9/d3d9_names.h @@ -0,0 +1,7 @@ +#include "d3d9_include.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, D3DRENDERSTATETYPE e); + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_options.cpp b/src/d3d9/d3d9_options.cpp new file mode 100644 index 000000000..0b7371a12 --- /dev/null +++ b/src/d3d9/d3d9_options.cpp @@ -0,0 +1,76 @@ +#include "d3d9_options.h" + +#include "d3d9_caps.h" + +namespace dxvk { + + static int32_t parsePciId(const std::string& str) { + if (str.size() != 4) + return -1; + + int32_t id = 0; + + for (size_t i = 0; i < str.size(); i++) { + id *= 16; + + if (str[i] >= '0' && str[i] <= '9') + id += str[i] - '0'; + else if (str[i] >= 'A' && str[i] <= 'F') + id += str[i] - 'A' + 10; + else if (str[i] >= 'a' && str[i] <= 'f') + id += str[i] - 'a' + 10; + else + return -1; + } + + return id; + } + + + D3D9Options::D3D9Options(const Rc& device, const Config& config) { + const Rc adapter = device != nullptr ? device->adapter() : nullptr; + + // Fetch these as a string representing a hexadecimal number and parse it. + this->customVendorId = parsePciId(config.getOption("d3d9.customVendorId")); + this->customDeviceId = parsePciId(config.getOption("d3d9.customDeviceId")); + this->customDeviceDesc = config.getOption("d3d9.customDeviceDesc"); + + const int32_t vendorId = this->customDeviceId != -1 ? this->customDeviceId : (adapter != nullptr ? adapter->deviceProperties().vendorID : 0); + + this->maxFrameLatency = config.getOption ("d3d9.maxFrameLatency", 0); + this->presentInterval = config.getOption ("d3d9.presentInterval", -1); + this->shaderModel = config.getOption ("d3d9.shaderModel", 3); + this->evictManagedOnUnlock = config.getOption ("d3d9.evictManagedOnUnlock", false); + this->dpiAware = config.getOption ("d3d9.dpiAware", true); + this->allowLockFlagReadonly = config.getOption ("d3d9.allowLockFlagReadonly", true); + this->strictConstantCopies = config.getOption ("d3d9.strictConstantCopies", false); + this->strictPow = config.getOption ("d3d9.strictPow", true); + this->lenientClear = config.getOption ("d3d9.lenientClear", false); + this->numBackBuffers = config.getOption ("d3d9.numBackBuffers", 0); + this->deferSurfaceCreation = config.getOption ("d3d9.deferSurfaceCreation", false); + this->samplerAnisotropy = config.getOption ("d3d9.samplerAnisotropy", -1); + this->maxAvailableMemory = config.getOption ("d3d9.maxAvailableMemory", 4096); + this->supportDFFormats = config.getOption ("d3d9.supportDFFormats", true); + this->supportX4R4G4B4 = config.getOption ("d3d9.supportX4R4G4B4", true); + this->supportD32 = config.getOption ("d3d9.supportD32", true); + this->swvpFloatCount = config.getOption ("d3d9.swvpFloatCount", caps::MaxFloatConstantsSoftware); + this->swvpIntCount = config.getOption ("d3d9.swvpIntCount", caps::MaxOtherConstantsSoftware); + this->swvpBoolCount = config.getOption ("d3d9.swvpBoolCount", caps::MaxOtherConstantsSoftware); + this->disableA8RT = config.getOption ("d3d9.disableA8RT", false); + this->invariantPosition = config.getOption ("d3d9.invariantPosition", false); + this->memoryTrackTest = config.getOption ("d3d9.memoryTrackTest", false); + this->supportVCache = config.getOption ("d3d9.supportVCache", vendorId == 0x10de); + this->enableDialogMode = config.getOption ("d3d9.enableDialogMode", false); + + this->forceAspectRatio = config.getOption("d3d9.forceAspectRatio", ""); + + // If we are not Nvidia, enable general hazards. + this->generalHazards = adapter == nullptr || !adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0); + applyTristate(this->generalHazards, config.getOption("d3d9.generalHazards", Tristate::Auto)); + + this->d3d9FloatEmulation = true; // <-- Future Extension? + + applyTristate(this->d3d9FloatEmulation, config.getOption("d3d9.floatEmulation", Tristate::Auto)); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_options.h b/src/d3d9/d3d9_options.h new file mode 100644 index 000000000..7e044ddf0 --- /dev/null +++ b/src/d3d9/d3d9_options.h @@ -0,0 +1,117 @@ +#pragma once + +#include "../util/config/config.h" +#include "../dxvk/dxvk_device.h" + +#include "d3d9_include.h" + +namespace dxvk { + + struct D3D9Options { + + D3D9Options(const Rc& device, const Config& config); + + /// Override PCI vendor and device IDs reported to the + /// application. This may make apps think they are running + /// on a different GPU than they do and behave differently. + int32_t customVendorId; + int32_t customDeviceId; + std::string customDeviceDesc; + + /// Present interval. Overrides the value + /// in D3DPRESENT_PARAMS used in swapchain present. + int32_t presentInterval; + + /// Override maximum frame latency if the app specifies + /// a higher value. May help with frame timing issues. + int32_t maxFrameLatency; + + /// Set the max shader model the device can support in the caps. + int32_t shaderModel; + + /// Whether or not managed resources should stay in memory until unlock, or until manually evicted. + bool evictManagedOnUnlock; + + /// Whether or not to set the process as DPI aware in Windows when the API interface is created. + bool dpiAware; + + /// Handle D3DLOCK_READONLY properly. + /// + /// Risen 1 writes to buffers mapped with readonly. + bool allowLockFlagReadonly; + + /// True: Copy our constant set into UBO if we are relative indexing ever. + /// False: Copy our constant set into UBO if we are relative indexing at the start of a defined constant + /// Why?: In theory, FXC should never generate code where this would be an issue. + bool strictConstantCopies; + + /// Whether or not we should care about pow(0, 0) = 1 + bool strictPow; + + /// Whether or not to do a fast path clear if we're close enough to the whole render target. + bool lenientClear; + + /// Back buffer count for the Vulkan swap chain. + /// Overrides buffer count in present parameters. + int32_t numBackBuffers; + + /// Defer surface creation + bool deferSurfaceCreation; + + /// Whether to transition to general + /// for rendering hazards + bool generalHazards; + + /// Anisotropic filter override + /// + /// Enforces anisotropic filtering with the + /// given anisotropy value for all samplers. + int32_t samplerAnisotropy; + + /// Max available memory override + /// + /// Changes the max initial value used in + /// tracking and GetAvailableTextureMem + uint32_t maxAvailableMemory; + + /// D3D9 Floating Point Emulation (anything * 0 = 0) + bool d3d9FloatEmulation; + + /// Support the DF16 & DF24 texture format + bool supportDFFormats; + + /// Support X4R4G4B4 + bool supportX4R4G4B4; + + /// Support D32 + bool supportD32; + + /// SWVP Constant Limits + int32_t swvpFloatCount; + int32_t swvpIntCount; + int32_t swvpBoolCount; + + /// Disable D3DFMT_A8 for render targets. + /// Specifically to work around a game + /// bug in The Sims 2 that happens on native too! + bool disableA8RT; + + /// Work around a NV driver quirk + /// Fixes flickering/z-fighting in some games. + bool invariantPosition; + + /// Whether or not to respect memory tracking for + /// failing resource allocation. + bool memoryTrackTest; + + /// Support VCACHE query + bool supportVCache; + + /// Forced aspect ratio, disable other modes + std::string forceAspectRatio; + + /// Enable dialog mode (ie. no exclusive fullscreen) + bool enableDialogMode; + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_query.cpp b/src/d3d9/d3d9_query.cpp new file mode 100644 index 000000000..301295ecc --- /dev/null +++ b/src/d3d9/d3d9_query.cpp @@ -0,0 +1,312 @@ +#include "d3d9_query.h" + +#include "d3d9_device.h" + +namespace dxvk { + + D3D9Query::D3D9Query( + D3D9DeviceEx* pDevice, + D3DQUERYTYPE QueryType) + : D3D9DeviceChild(pDevice) + , m_queryType (QueryType) + , m_state (D3D9_VK_QUERY_INITIAL) { + Rc dxvkDevice = m_parent->GetDXVKDevice(); + + switch (m_queryType) { + case D3DQUERYTYPE_VCACHE: + if (!pDevice->GetOptions()->supportVCache) + throw DxvkError(str::format("D3D9Query: Unsupported query type ", m_queryType, " (from d3d9.supportVCache)")); + break; + + case D3DQUERYTYPE_EVENT: + m_event[0] = dxvkDevice->createGpuEvent(); + break; + + case D3DQUERYTYPE_OCCLUSION: + m_query[0] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_OCCLUSION, + VK_QUERY_CONTROL_PRECISE_BIT, 0); + break; + + case D3DQUERYTYPE_TIMESTAMP: + m_query[0] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_TIMESTAMP, 0, 0); + break; + + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + for (uint32_t i = 0; i < 2; i++) { + m_query[i] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_TIMESTAMP, 0, 0); + } + break; + + case D3DQUERYTYPE_TIMESTAMPFREQ: + break; + + case D3DQUERYTYPE_VERTEXSTATS: + m_query[0] = dxvkDevice->createGpuQuery( + VK_QUERY_TYPE_PIPELINE_STATISTICS, 0, 0); + break; + + default: + throw DxvkError(str::format("D3D9Query: Unsupported query type ", m_queryType)); + } + } + + + HRESULT STDMETHODCALLTYPE D3D9Query::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DQuery9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Query::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DQUERYTYPE STDMETHODCALLTYPE D3D9Query::GetType() { + return m_queryType; + } + + + DWORD STDMETHODCALLTYPE D3D9Query::GetDataSize() { + switch (m_queryType) { + case D3DQUERYTYPE_VCACHE: return sizeof(D3DDEVINFO_VCACHE); + case D3DQUERYTYPE_RESOURCEMANAGER: return sizeof(D3DDEVINFO_RESOURCEMANAGER); + case D3DQUERYTYPE_VERTEXSTATS: return sizeof(D3DDEVINFO_D3DVERTEXSTATS); + case D3DQUERYTYPE_EVENT: return sizeof(BOOL); + case D3DQUERYTYPE_OCCLUSION: return sizeof(DWORD); + case D3DQUERYTYPE_TIMESTAMP: return sizeof(UINT64); + case D3DQUERYTYPE_TIMESTAMPDISJOINT: return sizeof(BOOL); + case D3DQUERYTYPE_TIMESTAMPFREQ: return sizeof(UINT64); + case D3DQUERYTYPE_PIPELINETIMINGS: return sizeof(D3DDEVINFO_D3D9PIPELINETIMINGS); + case D3DQUERYTYPE_INTERFACETIMINGS: return sizeof(D3DDEVINFO_D3D9INTERFACETIMINGS); + case D3DQUERYTYPE_VERTEXTIMINGS: return sizeof(D3DDEVINFO_D3D9STAGETIMINGS); + case D3DQUERYTYPE_PIXELTIMINGS: return sizeof(D3DDEVINFO_D3D9PIPELINETIMINGS); + case D3DQUERYTYPE_BANDWIDTHTIMINGS: return sizeof(D3DDEVINFO_D3D9BANDWIDTHTIMINGS); + case D3DQUERYTYPE_CACHEUTILIZATION: return sizeof(D3DDEVINFO_D3D9CACHEUTILIZATION); + default: return 0; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9Query::Issue(DWORD dwIssueFlags) { + // Note: No need to submit to CS if we don't do anything! + + if (dwIssueFlags == D3DISSUE_BEGIN) { + if (QueryBeginnable(m_queryType)) { + if (m_state == D3D9_VK_QUERY_BEGUN && QueryEndable(m_queryType)) + m_parent->End(this); + + m_parent->Begin(this); + + m_state = D3D9_VK_QUERY_BEGUN; + } + } + else { + if (QueryEndable(m_queryType)) { + if (m_state != D3D9_VK_QUERY_BEGUN && QueryBeginnable(m_queryType)) + m_parent->Begin(this); + + m_resetCtr.fetch_add(1, std::memory_order_acquire); + + m_parent->End(this); + + } + m_state = D3D9_VK_QUERY_ENDED; + } + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Query::GetData(void* pData, DWORD dwSize, DWORD dwGetDataFlags) { + HRESULT hr = this->GetQueryData(pData, dwSize); + + bool flush = dwGetDataFlags & D3DGETDATA_FLUSH; + + // If we get S_FALSE and it's not from the fact + // they didn't call end, do some flushy stuff... + if (flush && hr == S_FALSE && m_state != D3D9_VK_QUERY_BEGUN) { + this->NotifyStall(); + m_parent->FlushImplicit(FALSE); + } + + return hr; + } + + + HRESULT D3D9Query::GetQueryData(void* pData, DWORD dwSize) { + // Let the game know that calling end might be a good idea... + if (m_state == D3D9_VK_QUERY_BEGUN) + return S_FALSE; + + if (unlikely(!pData && dwSize)) + return D3DERR_INVALIDCALL; + + // The game forgot to even issue the query! + // Let's do it for them... + // This will issue both the begin, and the end. + if (m_state == D3D9_VK_QUERY_INITIAL) + this->Issue(D3DISSUE_END); + + if (m_resetCtr != 0u) + return S_FALSE; + + if (m_queryType == D3DQUERYTYPE_EVENT) { + DxvkGpuEventStatus status = m_event[0]->test(); + + if (status == DxvkGpuEventStatus::Invalid) + return D3DERR_INVALIDCALL; + + bool signaled = status == DxvkGpuEventStatus::Signaled; + + if (pData != nullptr) + *static_cast(pData) = signaled; + + return signaled ? D3D_OK : S_FALSE; + } + else { + std::array queryData = { }; + + for (uint32_t i = 0; i < MaxGpuQueries && m_query[i] != nullptr; i++) { + DxvkGpuQueryStatus status = m_query[i]->getData(queryData[i]); + + if (status == DxvkGpuQueryStatus::Invalid + || status == DxvkGpuQueryStatus::Failed) + return D3DERR_INVALIDCALL; + + if (status == DxvkGpuQueryStatus::Pending) + return S_FALSE; + } + + if (pData == nullptr) + return D3D_OK; + + auto* data = static_cast(pData); + + switch (m_queryType) { + case D3DQUERYTYPE_VCACHE: + // Don't know what the hell any of this means. + // Nor do I care. This just makes games work. + data->VCache.Pattern = MAKEFOURCC('H', 'C', 'A', 'C'); + data->VCache.OptMethod = 1; + data->VCache.CacheSize = 24; + data->VCache.MagicNumber = 20; + return D3D_OK; + + case D3DQUERYTYPE_OCCLUSION: + data->Occlusion = DWORD(queryData[0].occlusion.samplesPassed); + return D3D_OK; + + case D3DQUERYTYPE_TIMESTAMP: + data->Timestamp = queryData[0].timestamp.time; + return D3D_OK; + + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + data->TimestampDisjoint = queryData[0].timestamp.time < queryData[1].timestamp.time; + return D3D_OK; + + case D3DQUERYTYPE_TIMESTAMPFREQ: + data->TimestampFreq = GetTimestampQueryFrequency(); + return D3D_OK; + + case D3DQUERYTYPE_VERTEXSTATS: + data->VertexStats.NumRenderedTriangles = queryData[0].statistic.iaPrimitives; + data->VertexStats.NumExtraClippingTriangles = queryData[0].statistic.clipPrimitives; + return D3D_OK; + + default: + return D3D_OK; + } + } + } + + + UINT64 D3D9Query::GetTimestampQueryFrequency() const { + Rc device = m_parent->GetDXVKDevice(); + Rc adapter = device->adapter(); + + VkPhysicalDeviceLimits limits = adapter->deviceProperties().limits; + return uint64_t(1'000'000'000.0f / limits.timestampPeriod); + } + + + void D3D9Query::Begin(DxvkContext* ctx) { + switch (m_queryType) { + case D3DQUERYTYPE_OCCLUSION: + case D3DQUERYTYPE_VERTEXSTATS: + ctx->beginQuery(m_query[0]); + break; + + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + ctx->writeTimestamp(m_query[1]); + break; + + default: break; + } + } + + + void D3D9Query::End(DxvkContext* ctx) { + switch (m_queryType) { + case D3DQUERYTYPE_TIMESTAMP: + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + ctx->writeTimestamp(m_query[0]); + break; + + case D3DQUERYTYPE_VERTEXSTATS: + case D3DQUERYTYPE_OCCLUSION: + ctx->endQuery(m_query[0]); + break; + + case D3DQUERYTYPE_EVENT: + ctx->signalGpuEvent(m_event[0]); + break; + + default: break; + } + + m_resetCtr.fetch_sub(1, std::memory_order_release); + } + + + bool D3D9Query::QueryBeginnable(D3DQUERYTYPE QueryType) { + return QueryType == D3DQUERYTYPE_OCCLUSION + || QueryType == D3DQUERYTYPE_VERTEXSTATS + || QueryType == D3DQUERYTYPE_TIMESTAMPDISJOINT; + } + + + bool D3D9Query::QueryEndable(D3DQUERYTYPE QueryType) { + return QueryBeginnable(QueryType) + || QueryType == D3DQUERYTYPE_TIMESTAMP + || QueryType == D3DQUERYTYPE_EVENT; + } + + + HRESULT D3D9Query::QuerySupported(D3DQUERYTYPE QueryType) { + switch (QueryType) { + case D3DQUERYTYPE_VCACHE: + case D3DQUERYTYPE_EVENT: + case D3DQUERYTYPE_OCCLUSION: + case D3DQUERYTYPE_TIMESTAMP: + case D3DQUERYTYPE_TIMESTAMPDISJOINT: + case D3DQUERYTYPE_TIMESTAMPFREQ: + case D3DQUERYTYPE_VERTEXSTATS: + return D3D_OK; + + default: + return D3DERR_NOTAVAILABLE; + } + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_query.h b/src/d3d9/d3d9_query.h new file mode 100644 index 000000000..b53d55316 --- /dev/null +++ b/src/d3d9/d3d9_query.h @@ -0,0 +1,88 @@ +#pragma once + +#include "d3d9_device_child.h" + +#include "../dxvk/dxvk_context.h" + +namespace dxvk { + + enum D3D9_VK_QUERY_STATE : uint32_t { + D3D9_VK_QUERY_INITIAL, + D3D9_VK_QUERY_BEGUN, + D3D9_VK_QUERY_ENDED, + }; + + union D3D9_QUERY_DATA { + D3DDEVINFO_VCACHE VCache; + DWORD Occlusion; + UINT64 Timestamp; + BOOL TimestampDisjoint; + UINT64 TimestampFreq; + D3DDEVINFO_D3DVERTEXSTATS VertexStats; + }; + + class D3D9Query : public D3D9DeviceChild { + constexpr static uint32_t MaxGpuQueries = 2; + constexpr static uint32_t MaxGpuEvents = 1; + public: + + D3D9Query( + D3D9DeviceEx* pDevice, + D3DQUERYTYPE QueryType); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DQUERYTYPE STDMETHODCALLTYPE GetType() final; + + DWORD STDMETHODCALLTYPE GetDataSize() final; + + HRESULT STDMETHODCALLTYPE Issue(DWORD dwIssueFlags) final; + + HRESULT STDMETHODCALLTYPE GetData(void* pData, DWORD dwSize, DWORD dwGetDataFlags) final; + + HRESULT GetQueryData(void* pData, DWORD dwSize); + + void Begin(DxvkContext* ctx); + void End(DxvkContext* ctx); + + static bool QueryBeginnable(D3DQUERYTYPE QueryType); + static bool QueryEndable(D3DQUERYTYPE QueryType); + + static HRESULT QuerySupported(D3DQUERYTYPE QueryType); + + bool IsEvent() const { + return m_queryType == D3DQUERYTYPE_EVENT; + } + + bool IsStalling() const { + return m_stallFlag; + } + + void NotifyEnd() { + m_stallMask <<= 1; + } + + void NotifyStall() { + m_stallMask |= 1; + m_stallFlag |= bit::popcnt(m_stallMask) >= 16; + } + + private: + + D3DQUERYTYPE m_queryType; + + D3D9_VK_QUERY_STATE m_state; + + std::array, MaxGpuQueries> m_query; + std::array, MaxGpuEvents> m_event; + + uint32_t m_stallMask = 0; + bool m_stallFlag = false; + + std::atomic m_resetCtr = { 0u }; + + UINT64 GetTimestampQueryFrequency() const; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_resource.h b/src/d3d9/d3d9_resource.h new file mode 100644 index 000000000..caa9f5046 --- /dev/null +++ b/src/d3d9/d3d9_resource.h @@ -0,0 +1,87 @@ +#pragma once + +#include "d3d9_device_child.h" + +#include "../util/com/com_private_data.h" + +namespace dxvk { + + template + class D3D9Resource : public D3D9DeviceChild { + + public: + + D3D9Resource(D3D9DeviceEx* pDevice) + : D3D9DeviceChild(pDevice) + , m_priority ( 0 ) { } + + HRESULT STDMETHODCALLTYPE SetPrivateData( + REFGUID refguid, + const void* pData, + DWORD SizeOfData, + DWORD Flags) final { + HRESULT hr; + if (Flags & D3DSPD_IUNKNOWN) { + IUnknown* unknown = + const_cast( + reinterpret_cast(pData)); + hr = m_privateData.setInterface( + refguid, unknown); + } + else + hr = m_privateData.setData( + refguid, SizeOfData, pData); + + if (FAILED(hr)) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE GetPrivateData( + REFGUID refguid, + void* pData, + DWORD* pSizeOfData) final { + HRESULT hr = m_privateData.getData( + refguid, reinterpret_cast(pSizeOfData), pData); + + if (FAILED(hr)) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE FreePrivateData(REFGUID refguid) final { + HRESULT hr = m_privateData.setData(refguid, 0, nullptr); + + if (FAILED(hr)) + return D3DERR_INVALIDCALL; + + return D3D_OK; + } + + DWORD STDMETHODCALLTYPE SetPriority(DWORD PriorityNew) { + DWORD oldPriority = m_priority; + m_priority = PriorityNew; + return oldPriority; + } + + DWORD STDMETHODCALLTYPE GetPriority() { + return m_priority; + } + + void STDMETHODCALLTYPE PreLoad() { + } + + + protected: + + DWORD m_priority; + + private: + + ComPrivateData m_privateData; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_sampler.cpp b/src/d3d9/d3d9_sampler.cpp new file mode 100644 index 000000000..79d23af90 --- /dev/null +++ b/src/d3d9/d3d9_sampler.cpp @@ -0,0 +1,47 @@ +#include "d3d9_sampler.h" + +namespace dxvk { + + size_t D3D9SamplerKeyHash::operator () (const D3D9SamplerKey& key) const { + DxvkHashState state; + + std::hash dhash; + std::hash tahash; + std::hash tfhash; + std::hash fhash; + + state.add(tahash(key.AddressU)); + state.add(tahash(key.AddressV)); + state.add(tahash(key.AddressW)); + state.add(tfhash(key.MagFilter)); + state.add(tfhash(key.MinFilter)); + state.add(tfhash(key.MipFilter)); + state.add(dhash (key.MaxAnisotropy)); + state.add(fhash (key.MipmapLodBias)); + state.add(dhash (key.MaxMipLevel)); + state.add(fhash (key.BorderColor[0])); + state.add(fhash (key.BorderColor[1])); + state.add(fhash (key.BorderColor[2])); + state.add(fhash (key.BorderColor[3])); + + return state; + } + + + bool D3D9SamplerKeyEq::operator () (const D3D9SamplerKey& a, const D3D9SamplerKey& b) const { + return a.AddressU == b.AddressU + && a.AddressV == b.AddressV + && a.AddressW == b.AddressW + && a.MagFilter == b.MagFilter + && a.MinFilter == b.MinFilter + && a.MipFilter == b.MipFilter + && a.MaxAnisotropy == b.MaxAnisotropy + && a.MipmapLodBias == b.MipmapLodBias + && a.MaxMipLevel == b.MaxMipLevel + && a.BorderColor[0] == b.BorderColor[0] + && a.BorderColor[1] == b.BorderColor[1] + && a.BorderColor[2] == b.BorderColor[2] + && a.BorderColor[3] == b.BorderColor[3]; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_sampler.h b/src/d3d9/d3d9_sampler.h new file mode 100644 index 000000000..c5652a0f1 --- /dev/null +++ b/src/d3d9/d3d9_sampler.h @@ -0,0 +1,75 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_util.h" + +#include "../dxvk/dxvk_hash.h" + +#include "../util/util_math.h" + +namespace dxvk { + + struct D3D9SamplerKey { + D3DTEXTUREADDRESS AddressU; + D3DTEXTUREADDRESS AddressV; + D3DTEXTUREADDRESS AddressW; + D3DTEXTUREFILTERTYPE MagFilter; + D3DTEXTUREFILTERTYPE MinFilter; + D3DTEXTUREFILTERTYPE MipFilter; + DWORD MaxAnisotropy; + float MipmapLodBias; + DWORD MaxMipLevel; + float BorderColor[4]; + }; + + struct D3D9SamplerKeyHash { + size_t operator () (const D3D9SamplerKey& key) const; + }; + + struct D3D9SamplerKeyEq { + bool operator () (const D3D9SamplerKey& a, const D3D9SamplerKey& b) const; + }; + + inline void NormalizeSamplerKey(D3D9SamplerKey& key) { + key.AddressU = std::clamp(key.AddressU, D3DTADDRESS_WRAP, D3DTADDRESS_MIRRORONCE); + key.AddressV = std::clamp(key.AddressV, D3DTADDRESS_WRAP, D3DTADDRESS_MIRRORONCE); + key.AddressW = std::clamp(key.AddressW, D3DTADDRESS_WRAP, D3DTADDRESS_MIRRORONCE); + + key.MagFilter = std::clamp(key.MagFilter, D3DTEXF_NONE, D3DTEXF_ANISOTROPIC); + key.MinFilter = std::clamp(key.MinFilter, D3DTEXF_NONE, D3DTEXF_ANISOTROPIC); + key.MipFilter = std::clamp(key.MipFilter, D3DTEXF_NONE, D3DTEXF_ANISOTROPIC); + + key.MaxAnisotropy = std::clamp(key.MaxAnisotropy, 0, 16); + + if (key.MipFilter == D3DTEXF_NONE) { + // May as well try and keep slots down. + key.MipmapLodBias = 0; + } + else { + // Games also pass NAN/INF here, this accounts for that. + if (unlikely(std::isnan(key.MipmapLodBias))) + key.MipmapLodBias = 0.0f; + + // Clamp between -15.0f and 15.0f, matching mip limits of d3d9. + key.MipmapLodBias = std::clamp(key.MipmapLodBias, -15.0f, 15.0f); + + // Round to the nearest .5 + // Fixes sampler leaks in UE3 games w/ mip streaming + // eg. Borderlands 2 + key.MipmapLodBias = std::round(key.MipmapLodBias * 2.0f) / 2.0f; + } + + if (key.AddressU != D3DTADDRESS_BORDER + && key.AddressV != D3DTADDRESS_BORDER + && key.AddressW != D3DTADDRESS_BORDER) { + for (auto& val : key.BorderColor) + val = 0.0f; + } + else { + for (auto& val : key.BorderColor) + val = val >= 0.5f ? 1.0f : 0.0f; + } + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_shader.cpp b/src/d3d9/d3d9_shader.cpp new file mode 100644 index 000000000..a3e1cf6bd --- /dev/null +++ b/src/d3d9/d3d9_shader.cpp @@ -0,0 +1,143 @@ +#include "d3d9_shader.h" + +#include "d3d9_device.h" +#include "d3d9_util.h" + +namespace dxvk { + + D3D9CommonShader::D3D9CommonShader() {} + + D3D9CommonShader::D3D9CommonShader( + D3D9DeviceEx* pDevice, + VkShaderStageFlagBits ShaderStage, + const Sha1Hash* pHash, + const DxsoModuleInfo* pDxsoModuleInfo, + const void* pShaderBytecode, + const DxsoAnalysisInfo& AnalysisInfo, + DxsoModule* pModule) { + const uint32_t bytecodeLength = AnalysisInfo.bytecodeByteLength; + m_bytecode.resize(bytecodeLength); + std::memcpy(m_bytecode.data(), pShaderBytecode, bytecodeLength); + + DxvkShaderKey shaderKey = { ShaderStage, *pHash }; + + const std::string name = shaderKey.toString(); + Logger::debug(str::format("Compiling shader ", name)); + + // If requested by the user, dump both the raw DXBC + // shader and the compiled SPIR-V module to a file. + const std::string dumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); + + if (dumpPath.size() != 0) { + DxsoReader reader( + reinterpret_cast(pShaderBytecode)); + + reader.store(std::ofstream(str::format(dumpPath, "/", name, ".dxso"), + std::ios_base::binary | std::ios_base::trunc), bytecodeLength); + + char comment[2048]; + Com blob; + HRESULT hr = DisassembleShader( + pShaderBytecode, + TRUE, + comment, + &blob); + + if (SUCCEEDED(hr)) { + std::ofstream disassembledOut(str::format(dumpPath, "/", name, ".dxso.dis"), std::ios_base::binary | std::ios_base::trunc); + disassembledOut.write( + reinterpret_cast(blob->GetBufferPointer()), + blob->GetBufferSize()); + } + } + + // Decide whether we need to create a pass-through + // geometry shader for vertex shader stream output + + const D3D9ConstantLayout& constantLayout = ShaderStage == VK_SHADER_STAGE_VERTEX_BIT + ? pDevice->GetVertexConstantLayout() + : pDevice->GetPixelConstantLayout(); + + m_shaders = pModule->compile(*pDxsoModuleInfo, name, AnalysisInfo, constantLayout); + m_isgn = pModule->isgn(); + m_usedSamplers = pModule->usedSamplers(); + m_usedRTs = pModule->usedRTs(); + + m_info = pModule->info(); + m_meta = pModule->meta(); + m_constants = pModule->constants(); + + m_shaders[0]->setShaderKey(shaderKey); + + if (m_shaders[1] != nullptr) { + // Lets lie about the shader key type for the state cache. + m_shaders[1]->setShaderKey({ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, *pHash }); + } + + if (dumpPath.size() != 0) { + std::ofstream dumpStream( + str::format(dumpPath, "/", name, ".spv"), + std::ios_base::binary | std::ios_base::trunc); + + m_shaders[0]->dump(dumpStream); + } + + pDevice->GetDXVKDevice()->registerShader(m_shaders[0]); + + if (m_shaders[1] != nullptr) + pDevice->GetDXVKDevice()->registerShader(m_shaders[1]); + } + + + D3D9CommonShader D3D9ShaderModuleSet::GetShaderModule( + D3D9DeviceEx* pDevice, + VkShaderStageFlagBits ShaderStage, + const DxsoModuleInfo* pDxbcModuleInfo, + const void* pShaderBytecode) { + DxsoReader reader( + reinterpret_cast(pShaderBytecode)); + + DxsoModule module(reader); + + if (module.info().majorVersion() > pDxbcModuleInfo->options.shaderModel) + throw DxvkError("GetShaderModule: Out of range of supported shader model"); + + if (module.info().shaderStage() != ShaderStage) + throw DxvkError("GetShaderModule: Bytecode does not match shader stage"); + + DxsoAnalysisInfo info = module.analyze(); + + Sha1Hash hash = Sha1Hash::compute( + pShaderBytecode, info.bytecodeByteLength); + + DxvkShaderKey lookupKey = DxvkShaderKey(ShaderStage, hash); + + // Use the shader's unique key for the lookup + { std::unique_lock lock(m_mutex); + + auto entry = m_modules.find(lookupKey); + if (entry != m_modules.end()) + return entry->second; + } + + // This shader has not been compiled yet, so we have to create a + // new module. This takes a while, so we won't lock the structure. + D3D9CommonShader commonShader( + pDevice, ShaderStage, &hash, + pDxbcModuleInfo, pShaderBytecode, + info, &module); + + // Insert the new module into the lookup table. If another thread + // has compiled the same shader in the meantime, we should return + // that object instead and discard the newly created module. + { std::unique_lock lock(m_mutex); + + auto status = m_modules.insert({ lookupKey, commonShader }); + if (!status.second) + return status.first->second; + } + + return commonShader; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_shader.h b/src/d3d9/d3d9_shader.h new file mode 100644 index 000000000..829e93b9a --- /dev/null +++ b/src/d3d9/d3d9_shader.h @@ -0,0 +1,195 @@ +#pragma once + +#include "d3d9_resource.h" +#include "../dxso/dxso_module.h" +#include "d3d9_shader_permutations.h" +#include "d3d9_util.h" + +#include + +namespace dxvk { + + + /** + * \brief Common shader object + * + * Stores the compiled SPIR-V shader and the SHA-1 + * hash of the original DXBC shader, which can be + * used to identify the shader. + */ + class D3D9CommonShader { + + public: + + D3D9CommonShader(); + + D3D9CommonShader( + D3D9DeviceEx* pDevice, + VkShaderStageFlagBits ShaderStage, + const Sha1Hash* pHash, + const DxsoModuleInfo* pDxbcModuleInfo, + const void* pShaderBytecode, + const DxsoAnalysisInfo& AnalysisInfo, + DxsoModule* pModule); + + + Rc GetShader(D3D9ShaderPermutation Permutation) const { + return m_shaders[Permutation]; + } + + std::string GetName() const { + return m_shaders[D3D9ShaderPermutations::None]->debugName(); + } + + const std::vector& GetBytecode() const { + return m_bytecode; + } + + const DxsoIsgn& GetIsgn() const { + return m_isgn; + } + + const DxsoShaderMetaInfo& GetMeta() const { return m_meta; } + const DxsoDefinedConstants& GetConstants() const { return m_constants; } + + D3D9ShaderMasks GetShaderMask() const { return D3D9ShaderMasks{ m_usedSamplers, m_usedRTs }; } + + const DxsoProgramInfo& GetInfo() const { return m_info; } + + private: + + DxsoIsgn m_isgn; + uint32_t m_usedSamplers; + uint32_t m_usedRTs; + + DxsoProgramInfo m_info; + DxsoShaderMetaInfo m_meta; + DxsoDefinedConstants m_constants; + + DxsoPermutations m_shaders; + + std::vector m_bytecode; + + }; + + /** + * \brief Common shader interface + * + * Implements methods for all D3D11*Shader + * interfaces and stores the actual shader + * module object. + */ + template + class D3D9Shader : public D3D9DeviceChild { + + public: + + D3D9Shader( + D3D9DeviceEx* pDevice, + const D3D9CommonShader& CommonShader) + : D3D9DeviceChild( pDevice ) + , m_shader ( CommonShader ) { } + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(Base)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Shader::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + HRESULT STDMETHODCALLTYPE GetFunction(void* pOut, UINT* pSizeOfData) { + if (pSizeOfData == nullptr) + return D3DERR_INVALIDCALL; + + const auto& bytecode = m_shader.GetBytecode(); + + if (pOut == nullptr) { + *pSizeOfData = bytecode.size(); + return D3D_OK; + } + + size_t copyAmount = std::min(size_t(*pSizeOfData), bytecode.size()); + std::memcpy(pOut, bytecode.data(), copyAmount); + + return D3D_OK; + } + + const D3D9CommonShader* GetCommonShader() const { + return &m_shader; + } + + private: + + D3D9CommonShader m_shader; + + }; + + // Needs their own classes and not usings for forward decl. + + class D3D9VertexShader final : public D3D9Shader { + + public: + + D3D9VertexShader( + D3D9DeviceEx* pDevice, + const D3D9CommonShader& CommonShader) + : D3D9Shader( pDevice, CommonShader ) { } + + }; + + class D3D9PixelShader final : public D3D9Shader { + + public: + + D3D9PixelShader( + D3D9DeviceEx* pDevice, + const D3D9CommonShader& CommonShader) + : D3D9Shader( pDevice, CommonShader ) { } + + }; + + /** + * \brief Shader module set + * + * Some applications may compile the same shader multiple + * times, so we should cache the resulting shader modules + * and reuse them rather than creating new ones. This + * class is thread-safe. + */ + class D3D9ShaderModuleSet : public RcObject { + + public: + + D3D9CommonShader GetShaderModule( + D3D9DeviceEx* pDevice, + VkShaderStageFlagBits ShaderStage, + const DxsoModuleInfo* pDxbcModuleInfo, + const void* pShaderBytecode); + + private: + + std::mutex m_mutex; + + std::unordered_map< + DxvkShaderKey, + D3D9CommonShader, + DxvkHash, DxvkEq> m_modules; + + }; + + template + const D3D9CommonShader* GetCommonShader(const T& pShader) { + return pShader != nullptr ? pShader->GetCommonShader() : nullptr; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_shader_permutations.h b/src/d3d9/d3d9_shader_permutations.h new file mode 100644 index 000000000..cf2301d29 --- /dev/null +++ b/src/d3d9/d3d9_shader_permutations.h @@ -0,0 +1,20 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + class DxvkShader; + + namespace D3D9ShaderPermutations { + enum D3D9ShaderPermutation { + None, + FlatShade, + Count + }; + } + using D3D9ShaderPermutation = D3D9ShaderPermutations::D3D9ShaderPermutation; + + using DxsoPermutations = std::array, D3D9ShaderPermutations::Count>; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_shader_validator.h b/src/d3d9/d3d9_shader_validator.h new file mode 100644 index 000000000..8e433027c --- /dev/null +++ b/src/d3d9/d3d9_shader_validator.h @@ -0,0 +1,68 @@ +#pragma once + +#include "d3d9_include.h" + +namespace dxvk { + + class IDirect3DShaderValidator9 : public IUnknown { + + public: + + virtual HRESULT STDMETHODCALLTYPE Begin( + void* pCallback, + void* pUserParam, + DWORD Unknown) = 0; + + virtual HRESULT STDMETHODCALLTYPE Instruction( + const char* pUnknown1, + UINT Unknown2, + const DWORD* pInstruction, + DWORD InstructionLength) = 0; + + virtual HRESULT STDMETHODCALLTYPE End() = 0; + + }; + + class D3D9ShaderValidator final : public ComObjectClamp { + + public: + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = ref(this); + return S_OK; + } + + + HRESULT STDMETHODCALLTYPE Begin( + void* pCallback, + void* pUserParam, + DWORD Unknown) { + Logger::debug("D3D9ShaderValidator::Begin: Stub"); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE Instruction( + const char* pUnknown1, + UINT Unknown2, + const DWORD* pInstruction, + DWORD InstructionLength) { + Logger::debug("D3D9ShaderValidator::Instruction: Stub"); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE End() { + Logger::debug("D3D9ShaderValidator::End: Stub"); + + return D3D_OK; + } + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_spec_constants.h b/src/d3d9/d3d9_spec_constants.h new file mode 100644 index 000000000..cdd04a631 --- /dev/null +++ b/src/d3d9/d3d9_spec_constants.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +namespace dxvk { + + enum D3D9SpecConstantId : uint32_t { + AlphaTestEnable = 0, + AlphaCompareOp = 1, + SamplerType = 2, + FogEnabled = 3, + VertexFogMode = 4, + PixelFogMode = 5, + + PointMode = 6, + ProjectionType = 7, + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_state.cpp b/src/d3d9/d3d9_state.cpp new file mode 100644 index 000000000..c0ee06ecb --- /dev/null +++ b/src/d3d9/d3d9_state.cpp @@ -0,0 +1,26 @@ +#include "d3d9_state.h" + +#include "d3d9_texture.h" + +namespace dxvk { + + D3D9CapturableState::D3D9CapturableState() { + for (uint32_t i = 0; i < textures.size(); i++) + textures[i] = nullptr; + + for (uint32_t i = 0; i < clipPlanes.size(); i++) + clipPlanes[i] = D3D9ClipPlane(); + + for (uint32_t i = 0; i < streamFreq.size(); i++) + streamFreq[i] = 1; + + for (uint32_t i = 0; i < enabledLightIndices.size(); i++) + enabledLightIndices[i] = UINT32_MAX; + } + + D3D9CapturableState::~D3D9CapturableState() { + for (uint32_t i = 0; i < textures.size(); i++) + TextureChangePrivate(textures[i], nullptr); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_state.h b/src/d3d9/d3d9_state.h new file mode 100644 index 000000000..df2516eb6 --- /dev/null +++ b/src/d3d9/d3d9_state.h @@ -0,0 +1,352 @@ +#pragma once + +#include "d3d9_caps.h" +#include "d3d9_constant_set.h" +#include "../dxso/dxso_common.h" +#include "../util/util_matrix.h" + +#include "d3d9_surface.h" +#include "d3d9_shader.h" +#include "d3d9_vertex_declaration.h" +#include "d3d9_buffer.h" + +#include +#include +#include + +namespace dxvk { + + static constexpr uint32_t RenderStateCount = 256; + static constexpr uint32_t SamplerStateCount = D3DSAMP_DMAPOFFSET + 1; + static constexpr uint32_t SamplerCount = 21; + static constexpr uint32_t TextureStageStateCount = D3DTSS_CONSTANT + 1; + + namespace hacks::PointSize { + static constexpr DWORD AlphaToCoverageDisabled = MAKEFOURCC('A', '2', 'M', '0'); + static constexpr DWORD AlphaToCoverageEnabled = MAKEFOURCC('A', '2', 'M', '1'); + } + + struct D3D9ClipPlane { + float coeff[4]; + }; + + struct D3D9RenderStateInfo { + std::array fogColor = { }; + float fogScale = 0.0f; + float fogEnd = 1.0f; + float fogDensity = 1.0f; + + float alphaRef = 0.0f; + + float pointSize = 1.0f; + float pointSizeMin = 1.0f; + float pointSizeMax = 64.0f; + float pointScaleA = 1.0f; + float pointScaleB = 0.0f; + float pointScaleC = 0.0f; + }; + + enum class D3D9RenderStateItem { + FogColor = 0, + FogScale = 1, + FogEnd, + FogDensity, + AlphaRef, + + PointSize, + PointSizeMin, + PointSizeMax, + PointScaleA, + PointScaleB, + PointScaleC, + + Count + }; + + + // This is needed in fixed function for POSITION_T support. + // These are constants we need to * and add to move + // Window Coords -> Real Coords w/ respect to the viewport. + struct D3D9ViewportInfo { + Vector4 inverseOffset; + Vector4 inverseExtent; + }; + + struct D3D9Light { + D3D9Light(const D3DLIGHT9& light, Matrix4 viewMtx) { + Diffuse = Vector4(light.Diffuse.r, light.Diffuse.g, light.Diffuse.b, light.Diffuse.a); + Specular = Vector4(light.Specular.r, light.Specular.g, light.Specular.b, light.Specular.a); + Ambient = Vector4(light.Ambient.r, light.Ambient.g, light.Ambient.b, light.Ambient.a); + + Position = viewMtx * Vector4(light.Position.x, light.Position.y, light.Position.z, 1.0f); + Direction = Vector4(light.Direction.x, light.Direction.y, light.Direction.z, 0.0f); + Direction = normalize(viewMtx * Direction); + + Type = light.Type; + Range = light.Range; + Falloff = light.Falloff; + Attenuation0 = light.Attenuation0; + Attenuation1 = light.Attenuation1; + Attenuation2 = light.Attenuation2; + Theta = cosf(light.Theta / 2.0f); + Phi = cosf(light.Phi / 2.0f); + } + + Vector4 Diffuse; + Vector4 Specular; + Vector4 Ambient; + + Vector4 Position; + Vector4 Direction; + + D3DLIGHTTYPE Type; + float Range; + float Falloff; + float Attenuation0; + float Attenuation1; + float Attenuation2; + float Theta; + float Phi; + }; + + + struct D3D9FixedFunctionVS { + Matrix4 WorldView; + Matrix4 NormalMatrix; + Matrix4 Projection; + + std::array TexcoordMatrices; + + D3D9ViewportInfo ViewportInfo; + + Vector4 GlobalAmbient; + std::array Lights; + D3DMATERIAL9 Material; + float TweenFactor; + }; + + + struct D3D9FixedFunctionVertexBlendDataHW { + Matrix4 WorldView[8]; + }; + + + struct D3D9FixedFunctionVertexBlendDataSW { + Matrix4 WorldView[256]; + }; + + + struct D3D9FixedFunctionPS { + Vector4 textureFactor; + }; + + enum D3D9SharedPSStages { + D3D9SharedPSStages_Constant, + D3D9SharedPSStages_BumpEnvMat0, + D3D9SharedPSStages_BumpEnvMat1, + D3D9SharedPSStages_BumpEnvLScale, + D3D9SharedPSStages_BumpEnvLOffset, + D3D9SharedPSStages_Count, + }; + + struct D3D9SharedPS { + struct Stage { + float Constant[4]; + float BumpEnvMat[2][2]; + float BumpEnvLScale; + float BumpEnvLOffset; + float Padding[2]; + } Stages[8]; + }; + + struct D3D9VBO { + Com vertexBuffer; + + UINT offset = 0; + UINT stride = 0; + }; + + constexpr D3DLIGHT9 DefaultLight = { + D3DLIGHT_DIRECTIONAL, // Type + {1.0f, 1.0f, 1.0f, 1.0f}, // Diffuse + {0.0f, 0.0f, 0.0f, 0.0f}, // Specular + {0.0f, 0.0f, 0.0f, 0.0f}, // Ambient + {0.0f, 0.0f, 0.0f}, // Position + {0.0f, 0.0f, 0.0f}, // Direction + 0.0f, // Range + 0.0f, // Falloff + 0.0f, 0.0f, 0.0f, // Attenuations [constant, linear, quadratic] + 0.0f, // Theta + 0.0f // Phi + }; + + struct D3D9CapturableState { + D3D9CapturableState(); + + ~D3D9CapturableState(); + + Com vertexDecl; + Com indices; + + std::array renderStates = { 0 }; + + std::array< + std::array, + SamplerCount> samplerStates; + + std::array vertexBuffers; + + std::array< + IDirect3DBaseTexture9*, + SamplerCount> textures; + + Com vertexShader; + Com pixelShader; + + D3DVIEWPORT9 viewport; + RECT scissorRect; + + std::array< + D3D9ClipPlane, + caps::MaxClipPlanes> clipPlanes; + + std::array< + std::array, + caps::TextureStageCount> textureStages; + + D3D9ShaderConstantsVSSoftware vsConsts; + D3D9ShaderConstantsPS psConsts; + + std::array streamFreq; + + std::array transforms; + + D3DMATERIAL9 material = D3DMATERIAL9(); + + std::vector> lights; + std::array enabledLightIndices; + + bool IsLightEnabled(DWORD Index) { + const auto& indices = enabledLightIndices; + return std::find(indices.begin(), indices.end(), Index) != indices.end(); + } + }; + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT UpdateStateConstants( + D3D9CapturableState* pState, + UINT StartRegister, + const T* pConstantData, + UINT Count, + bool FloatEmu) { + auto UpdateHelper = [&] (auto& set) { + if constexpr (ConstantType == D3D9ConstantType::Float) { + auto begin = reinterpret_cast(pConstantData); + auto end = begin + Count; + + if (!FloatEmu) + std::copy(begin, end, &set.fConsts[StartRegister]); + else + std::transform(begin, end, &set.fConsts[StartRegister], replaceNaN); + } + else if constexpr (ConstantType == D3D9ConstantType::Int) { + auto begin = reinterpret_cast(pConstantData); + auto end = begin + Count; + + std::copy(begin, end, &set.iConsts[StartRegister]); + } + else { + for (uint32_t i = 0; i < Count; i++) { + const uint32_t constantIdx = StartRegister + i; + const uint32_t arrayIdx = constantIdx / 32; + const uint32_t bitIdx = constantIdx % 32; + + const uint32_t bit = 1u << bitIdx; + + set.bConsts[arrayIdx] &= ~bit; + if (pConstantData[i]) + set.bConsts[arrayIdx] |= bit; + } + } + + return D3D_OK; + }; + + return ProgramType == DxsoProgramTypes::VertexShader + ? UpdateHelper(pState->vsConsts) + : UpdateHelper(pState->psConsts); + } + + enum class D3D9CapturedStateFlag : uint32_t { + VertexDecl, + Indices, + RenderStates, + SamplerStates, + VertexBuffers, + Textures, + VertexShader, + PixelShader, + Viewport, + ScissorRect, + ClipPlanes, + VsConstants, + PsConstants, + StreamFreq, + Transforms, + TextureStages, + Material + }; + + using D3D9CapturedStateFlags = Flags; + + struct D3D9StateCaptures { + D3D9CapturedStateFlags flags; + + std::bitset renderStates; + + std::bitset samplers; + std::array< + std::bitset, + SamplerCount> samplerStates; + + std::bitset vertexBuffers; + std::bitset textures; + std::bitset clipPlanes; + std::bitset streamFreq; + std::bitset transforms; + std::bitset textureStages; + std::array< + std::bitset, + caps::TextureStageCount> textureStageStates; + + struct { + std::bitset fConsts; + std::bitset iConsts; + std::bitset bConsts; + } vsConsts; + + struct { + std::bitset fConsts; + std::bitset iConsts; + std::bitset bConsts; + } psConsts; + }; + + struct Direct3DState9 : public D3D9CapturableState { + + std::array, caps::MaxSimultaneousRenderTargets> renderTargets; + Com depthStencil; + + }; + + + struct D3D9InputAssemblyState { + D3DPRIMITIVETYPE primitiveType = D3DPRIMITIVETYPE(0); + uint32_t streamsInstanced = 0; + uint32_t streamsUsed = 0; + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_stateblock.cpp b/src/d3d9/d3d9_stateblock.cpp new file mode 100644 index 000000000..597b1bcce --- /dev/null +++ b/src/d3d9/d3d9_stateblock.cpp @@ -0,0 +1,519 @@ +#include "d3d9_stateblock.h" + +#include "d3d9_device.h" +#include "d3d9_vertex_declaration.h" +#include "d3d9_buffer.h" +#include "d3d9_shader.h" +#include "d3d9_texture.h" + +#include "d3d9_util.h" + +namespace dxvk { + + D3D9StateBlock::D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type) + : D3D9StateBlockBase(pDevice) + , m_deviceState (pDevice->GetRawState()) { + CaptureType(Type); + } + + + HRESULT STDMETHODCALLTYPE D3D9StateBlock::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DStateBlock9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9StateBlock::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9StateBlock::Capture() { + ApplyOrCapture(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9StateBlock::Apply() { + m_applying = true; + ApplyOrCapture(); + m_applying = false; + + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetVertexDeclaration(D3D9VertexDecl* pDecl) { + m_state.vertexDecl = pDecl; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexDecl); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetIndices(D3D9IndexBuffer* pIndexData) { + m_state.indices = pIndexData; + + m_captures.flags.set(D3D9CapturedStateFlag::Indices); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { + m_state.renderStates[State] = Value; + + m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); + m_captures.renderStates[State] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value) { + m_state.samplerStates[StateSampler][Type] = Value; + + m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); + m_captures.samplers[StateSampler] = true; + m_captures.samplerStates[StateSampler][Type] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStreamSource( + UINT StreamNumber, + D3D9VertexBuffer* pStreamData, + UINT OffsetInBytes, + UINT Stride) { + m_state.vertexBuffers[StreamNumber].vertexBuffer = pStreamData; + + m_state.vertexBuffers[StreamNumber].offset = OffsetInBytes; + m_state.vertexBuffers[StreamNumber].stride = Stride; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); + m_captures.vertexBuffers[StreamNumber] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { + m_state.streamFreq[StreamNumber] = Setting; + + m_captures.flags.set(D3D9CapturedStateFlag::StreamFreq); + m_captures.streamFreq[StreamNumber] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { + TextureChangePrivate(m_state.textures[StateSampler], pTexture); + + m_captures.flags.set(D3D9CapturedStateFlag::Textures); + m_captures.textures[StateSampler] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetVertexShader(D3D9VertexShader* pShader) { + m_state.vertexShader = pShader; + + m_captures.flags.set(D3D9CapturedStateFlag::VertexShader); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetPixelShader(D3D9PixelShader* pShader) { + m_state.pixelShader = pShader; + + m_captures.flags.set(D3D9CapturedStateFlag::PixelShader); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetMaterial(const D3DMATERIAL9* pMaterial) { + m_state.material = *pMaterial; + + m_captures.flags.set(D3D9CapturedStateFlag::Material); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + m_state.transforms[idx] = ConvertMatrix(pMatrix); + + m_captures.flags.set(D3D9CapturedStateFlag::Transforms); + m_captures.transforms.set(idx); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD Value) { + m_state.textureStages[Stage][Type] = Value; + + m_captures.flags.set(D3D9CapturedStateFlag::TextureStages); + m_captures.textureStages[Stage] = true; + m_captures.textureStageStates[Stage][Type] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { + m_state.transforms[idx] = ConvertMatrix(pMatrix) * m_state.transforms[idx]; + + m_captures.flags.set(D3D9CapturedStateFlag::Transforms); + m_captures.transforms.set(idx); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetViewport(const D3DVIEWPORT9* pViewport) { + m_state.viewport = *pViewport; + + m_captures.flags.set(D3D9CapturedStateFlag::Viewport); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetScissorRect(const RECT* pRect) { + m_state.scissorRect = *pRect; + + m_captures.flags.set(D3D9CapturedStateFlag::ScissorRect); + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetClipPlane(DWORD Index, const float* pPlane) { + for (uint32_t i = 0; i < 4; i++) + m_state.clipPlanes[Index].coeff[i] = pPlane[i]; + + m_captures.flags.set(D3D9CapturedStateFlag::ClipPlanes); + m_captures.clipPlanes[Index] = true; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT D3D9StateBlock::SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT D3D9StateBlock::SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + return SetShaderConstants< + DxsoProgramTypes::VertexShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT D3D9StateBlock::SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount) { + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Float>( + StartRegister, + pConstantData, + Vector4fCount); + } + + + HRESULT D3D9StateBlock::SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount) { + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Int>( + StartRegister, + pConstantData, + Vector4iCount); + } + + + HRESULT D3D9StateBlock::SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount) { + return SetShaderConstants< + DxsoProgramTypes::PixelShader, + D3D9ConstantType::Bool>( + StartRegister, + pConstantData, + BoolCount); + } + + + HRESULT D3D9StateBlock::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.vsConsts.bConsts[idx] &= ~mask; + m_state.vsConsts.bConsts[idx] |= bits & mask; + return D3D_OK; + } + + + HRESULT D3D9StateBlock::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { + m_state.psConsts.bConsts[idx] &= ~mask; + m_state.psConsts.bConsts[idx] |= bits & mask; + return D3D_OK; + } + + + void D3D9StateBlock::CapturePixelRenderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); + + m_captures.renderStates[D3DRS_ZENABLE] = true; + m_captures.renderStates[D3DRS_FILLMODE] = true; + m_captures.renderStates[D3DRS_SHADEMODE] = true; + m_captures.renderStates[D3DRS_ZWRITEENABLE] = true; + m_captures.renderStates[D3DRS_ALPHATESTENABLE] = true; + m_captures.renderStates[D3DRS_LASTPIXEL] = true; + m_captures.renderStates[D3DRS_SRCBLEND] = true; + m_captures.renderStates[D3DRS_DESTBLEND] = true; + m_captures.renderStates[D3DRS_ZFUNC] = true; + m_captures.renderStates[D3DRS_ALPHAREF] = true; + m_captures.renderStates[D3DRS_ALPHAFUNC] = true; + m_captures.renderStates[D3DRS_DITHERENABLE] = true; + m_captures.renderStates[D3DRS_FOGSTART] = true; + m_captures.renderStates[D3DRS_FOGEND] = true; + m_captures.renderStates[D3DRS_FOGDENSITY] = true; + m_captures.renderStates[D3DRS_ALPHABLENDENABLE] = true; + m_captures.renderStates[D3DRS_DEPTHBIAS] = true; + m_captures.renderStates[D3DRS_STENCILENABLE] = true; + m_captures.renderStates[D3DRS_STENCILFAIL] = true; + m_captures.renderStates[D3DRS_STENCILZFAIL] = true; + m_captures.renderStates[D3DRS_STENCILPASS] = true; + m_captures.renderStates[D3DRS_STENCILFUNC] = true; + m_captures.renderStates[D3DRS_STENCILREF] = true; + m_captures.renderStates[D3DRS_STENCILMASK] = true; + m_captures.renderStates[D3DRS_STENCILWRITEMASK] = true; + m_captures.renderStates[D3DRS_TEXTUREFACTOR] = true; + m_captures.renderStates[D3DRS_WRAP0] = true; + m_captures.renderStates[D3DRS_WRAP1] = true; + m_captures.renderStates[D3DRS_WRAP2] = true; + m_captures.renderStates[D3DRS_WRAP3] = true; + m_captures.renderStates[D3DRS_WRAP4] = true; + m_captures.renderStates[D3DRS_WRAP5] = true; + m_captures.renderStates[D3DRS_WRAP6] = true; + m_captures.renderStates[D3DRS_WRAP7] = true; + m_captures.renderStates[D3DRS_WRAP8] = true; + m_captures.renderStates[D3DRS_WRAP9] = true; + m_captures.renderStates[D3DRS_WRAP10] = true; + m_captures.renderStates[D3DRS_WRAP11] = true; + m_captures.renderStates[D3DRS_WRAP12] = true; + m_captures.renderStates[D3DRS_WRAP13] = true; + m_captures.renderStates[D3DRS_WRAP14] = true; + m_captures.renderStates[D3DRS_WRAP15] = true; + m_captures.renderStates[D3DRS_COLORWRITEENABLE] = true; + m_captures.renderStates[D3DRS_BLENDOP] = true; + m_captures.renderStates[D3DRS_SCISSORTESTENABLE] = true; + m_captures.renderStates[D3DRS_SLOPESCALEDEPTHBIAS] = true; + m_captures.renderStates[D3DRS_ANTIALIASEDLINEENABLE] = true; + m_captures.renderStates[D3DRS_TWOSIDEDSTENCILMODE] = true; + m_captures.renderStates[D3DRS_CCW_STENCILFAIL] = true; + m_captures.renderStates[D3DRS_CCW_STENCILZFAIL] = true; + m_captures.renderStates[D3DRS_CCW_STENCILPASS] = true; + m_captures.renderStates[D3DRS_CCW_STENCILFUNC] = true; + m_captures.renderStates[D3DRS_COLORWRITEENABLE1] = true; + m_captures.renderStates[D3DRS_COLORWRITEENABLE2] = true; + m_captures.renderStates[D3DRS_COLORWRITEENABLE3] = true; + m_captures.renderStates[D3DRS_BLENDFACTOR] = true; + m_captures.renderStates[D3DRS_SRGBWRITEENABLE] = true; + m_captures.renderStates[D3DRS_SEPARATEALPHABLENDENABLE] = true; + m_captures.renderStates[D3DRS_SRCBLENDALPHA] = true; + m_captures.renderStates[D3DRS_DESTBLENDALPHA] = true; + m_captures.renderStates[D3DRS_BLENDOPALPHA] = true; + } + + + void D3D9StateBlock::CapturePixelSamplerStates() { + m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); + + for (uint32_t i = 0; i < 17; i++) { + m_captures.samplers[i] = true; + + m_captures.samplerStates[i][D3DSAMP_ADDRESSU] = true; + m_captures.samplerStates[i][D3DSAMP_ADDRESSV] = true; + m_captures.samplerStates[i][D3DSAMP_ADDRESSW] = true; + m_captures.samplerStates[i][D3DSAMP_BORDERCOLOR] = true; + m_captures.samplerStates[i][D3DSAMP_MAGFILTER] = true; + m_captures.samplerStates[i][D3DSAMP_MINFILTER] = true; + m_captures.samplerStates[i][D3DSAMP_MIPFILTER] = true; + m_captures.samplerStates[i][D3DSAMP_MIPMAPLODBIAS] = true; + m_captures.samplerStates[i][D3DSAMP_MAXMIPLEVEL] = true; + m_captures.samplerStates[i][D3DSAMP_MAXANISOTROPY] = true; + m_captures.samplerStates[i][D3DSAMP_SRGBTEXTURE] = true; + m_captures.samplerStates[i][D3DSAMP_ELEMENTINDEX] = true; + } + } + + + void D3D9StateBlock::CapturePixelShaderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::PixelShader); + m_captures.flags.set(D3D9CapturedStateFlag::PsConstants); + + m_captures.psConsts.fConsts.flip(); + m_captures.psConsts.iConsts.flip(); + m_captures.psConsts.bConsts.flip(); + } + + + void D3D9StateBlock::CaptureVertexRenderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::RenderStates); + + m_captures.renderStates[D3DRS_CULLMODE] = true; + m_captures.renderStates[D3DRS_FOGENABLE] = true; + m_captures.renderStates[D3DRS_FOGCOLOR] = true; + m_captures.renderStates[D3DRS_FOGTABLEMODE] = true; + m_captures.renderStates[D3DRS_FOGSTART] = true; + m_captures.renderStates[D3DRS_FOGEND] = true; + m_captures.renderStates[D3DRS_FOGDENSITY] = true; + m_captures.renderStates[D3DRS_RANGEFOGENABLE] = true; + m_captures.renderStates[D3DRS_AMBIENT] = true; + m_captures.renderStates[D3DRS_COLORVERTEX] = true; + m_captures.renderStates[D3DRS_FOGVERTEXMODE] = true; + m_captures.renderStates[D3DRS_CLIPPING] = true; + m_captures.renderStates[D3DRS_LIGHTING] = true; + m_captures.renderStates[D3DRS_LOCALVIEWER] = true; + m_captures.renderStates[D3DRS_EMISSIVEMATERIALSOURCE] = true; + m_captures.renderStates[D3DRS_AMBIENTMATERIALSOURCE] = true; + m_captures.renderStates[D3DRS_DIFFUSEMATERIALSOURCE] = true; + m_captures.renderStates[D3DRS_SPECULARMATERIALSOURCE] = true; + m_captures.renderStates[D3DRS_VERTEXBLEND] = true; + m_captures.renderStates[D3DRS_CLIPPLANEENABLE] = true; + m_captures.renderStates[D3DRS_POINTSIZE] = true; + m_captures.renderStates[D3DRS_POINTSIZE_MIN] = true; + m_captures.renderStates[D3DRS_POINTSPRITEENABLE] = true; + m_captures.renderStates[D3DRS_POINTSCALEENABLE] = true; + m_captures.renderStates[D3DRS_POINTSCALE_A] = true; + m_captures.renderStates[D3DRS_POINTSCALE_B] = true; + m_captures.renderStates[D3DRS_POINTSCALE_C] = true; + m_captures.renderStates[D3DRS_MULTISAMPLEANTIALIAS] = true; + m_captures.renderStates[D3DRS_MULTISAMPLEMASK] = true; + m_captures.renderStates[D3DRS_PATCHEDGESTYLE] = true; + m_captures.renderStates[D3DRS_POINTSIZE_MAX] = true; + m_captures.renderStates[D3DRS_INDEXEDVERTEXBLENDENABLE] = true; + m_captures.renderStates[D3DRS_TWEENFACTOR] = true; + m_captures.renderStates[D3DRS_POSITIONDEGREE] = true; + m_captures.renderStates[D3DRS_NORMALDEGREE] = true; + m_captures.renderStates[D3DRS_MINTESSELLATIONLEVEL] = true; + m_captures.renderStates[D3DRS_MAXTESSELLATIONLEVEL] = true; + m_captures.renderStates[D3DRS_ADAPTIVETESS_X] = true; + m_captures.renderStates[D3DRS_ADAPTIVETESS_Y] = true; + m_captures.renderStates[D3DRS_ADAPTIVETESS_Z] = true; + m_captures.renderStates[D3DRS_ADAPTIVETESS_W] = true; + m_captures.renderStates[D3DRS_ENABLEADAPTIVETESSELLATION] = true; + m_captures.renderStates[D3DRS_NORMALIZENORMALS] = true; + m_captures.renderStates[D3DRS_SPECULARENABLE] = true; + m_captures.renderStates[D3DRS_SHADEMODE] = true; + } + + + void D3D9StateBlock::CaptureVertexSamplerStates() { + m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); + + for (uint32_t i = 17; i < SamplerCount; i++) { + m_captures.samplers[i] = true; + m_captures.samplerStates[i][D3DSAMP_DMAPOFFSET] = true; + } + } + + + void D3D9StateBlock::CaptureVertexShaderStates() { + m_captures.flags.set(D3D9CapturedStateFlag::VertexShader); + m_captures.flags.set(D3D9CapturedStateFlag::VsConstants); + + m_captures.vsConsts.fConsts.flip(); + m_captures.vsConsts.iConsts.flip(); + m_captures.vsConsts.bConsts.flip(); + } + + + void D3D9StateBlock::CaptureType(D3D9StateBlockType Type) { + if (Type == D3D9StateBlockType::PixelState || Type == D3D9StateBlockType::All) { + CapturePixelRenderStates(); + CapturePixelSamplerStates(); + CapturePixelShaderStates(); + + m_captures.flags.set(D3D9CapturedStateFlag::TextureStages); + m_captures.textureStages.flip(); + for (auto& stage : m_captures.textureStageStates) + stage.flip(); + } + + if (Type == D3D9StateBlockType::VertexState || Type == D3D9StateBlockType::All) { + CaptureVertexRenderStates(); + CaptureVertexSamplerStates(); + CaptureVertexShaderStates(); + + m_captures.flags.set(D3D9CapturedStateFlag::VertexDecl); + m_captures.flags.set(D3D9CapturedStateFlag::StreamFreq); + + for (uint32_t i = 0; i < caps::MaxStreams; i++) + m_captures.streamFreq[i] = true; + } + + if (Type == D3D9StateBlockType::All) { + m_captures.flags.set(D3D9CapturedStateFlag::Textures); + m_captures.textures.flip(); + + m_captures.flags.set(D3D9CapturedStateFlag::VertexBuffers); + m_captures.vertexBuffers.flip(); + + m_captures.flags.set(D3D9CapturedStateFlag::Indices); + m_captures.flags.set(D3D9CapturedStateFlag::Viewport); + m_captures.flags.set(D3D9CapturedStateFlag::ScissorRect); + + m_captures.flags.set(D3D9CapturedStateFlag::ClipPlanes); + m_captures.clipPlanes.flip(); + + m_captures.flags.set(D3D9CapturedStateFlag::Transforms); + m_captures.transforms.flip(); + + m_captures.flags.set(D3D9CapturedStateFlag::Material); + } + + if (Type != D3D9StateBlockType::None) + this->Capture(); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_stateblock.h b/src/d3d9/d3d9_stateblock.h new file mode 100644 index 000000000..897cc47e5 --- /dev/null +++ b/src/d3d9/d3d9_stateblock.h @@ -0,0 +1,333 @@ +#pragma once + +#include "d3d9_device_child.h" +#include "d3d9_device.h" +#include "d3d9_state.h" + +namespace dxvk { + + enum class D3D9StateBlockType :uint32_t { + None, + VertexState, + PixelState, + All + }; + + inline D3D9StateBlockType ConvertStateBlockType(D3DSTATEBLOCKTYPE type) { + switch (type) { + case D3DSBT_PIXELSTATE: return D3D9StateBlockType::PixelState; + case D3DSBT_VERTEXSTATE: return D3D9StateBlockType::VertexState; + default: + case D3DSBT_ALL: return D3D9StateBlockType::All; + } + } + + using D3D9StateBlockBase = D3D9DeviceChild; + class D3D9StateBlock : public D3D9StateBlockBase { + + public: + + D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject) final; + + HRESULT STDMETHODCALLTYPE Capture() final; + HRESULT STDMETHODCALLTYPE Apply() final; + + HRESULT SetVertexDeclaration(D3D9VertexDecl* pDecl); + + HRESULT SetIndices(D3D9IndexBuffer* pIndexData); + + HRESULT SetRenderState(D3DRENDERSTATETYPE State, DWORD Value); + + HRESULT SetStateSamplerState( + DWORD StateSampler, + D3DSAMPLERSTATETYPE Type, + DWORD Value); + + HRESULT SetStreamSource( + UINT StreamNumber, + D3D9VertexBuffer* pStreamData, + UINT OffsetInBytes, + UINT Stride); + + HRESULT SetStreamSourceFreq(UINT StreamNumber, UINT Setting); + + HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); + + HRESULT SetVertexShader(D3D9VertexShader* pShader); + + HRESULT SetPixelShader(D3D9PixelShader* pShader); + + HRESULT SetMaterial(const D3DMATERIAL9* pMaterial); + + HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + + HRESULT SetTextureStageState( + DWORD Stage, + D3DTEXTURESTAGESTATETYPE Type, + DWORD Value); + + HRESULT MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + + HRESULT SetViewport(const D3DVIEWPORT9* pViewport); + + HRESULT SetScissorRect(const RECT* pRect); + + HRESULT SetClipPlane(DWORD Index, const float* pPlane); + + + HRESULT SetVertexShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT SetVertexShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT SetVertexShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + + HRESULT SetPixelShaderConstantF( + UINT StartRegister, + const float* pConstantData, + UINT Vector4fCount); + + HRESULT SetPixelShaderConstantI( + UINT StartRegister, + const int* pConstantData, + UINT Vector4iCount); + + HRESULT SetPixelShaderConstantB( + UINT StartRegister, + const BOOL* pConstantData, + UINT BoolCount); + + enum class D3D9StateFunction { + Apply, + Capture + }; + + template + void ApplyOrCapture(Dst* dst, const Src* src) { + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl)) + dst->SetVertexDeclaration(src->vertexDecl.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::StreamFreq)) { + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + if (m_captures.streamFreq[i]) + dst->SetStreamSourceFreq(i, src->streamFreq[i]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::Indices)) + dst->SetIndices(src->indices.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::RenderStates)) { + for (uint32_t i = 0; i < m_captures.renderStates.size(); i++) { + if (m_captures.renderStates[i]) + dst->SetRenderState(D3DRENDERSTATETYPE(i), src->renderStates[i]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::SamplerStates)) { + for (uint32_t i = 0; i < m_captures.samplerStates.size(); i++) { + if (m_captures.samplers[i]) { + for (uint32_t j = 0; j < m_captures.samplerStates[i].size(); j++) { + if (m_captures.samplerStates[i][j]) + dst->SetStateSamplerState(i, D3DSAMPLERSTATETYPE(j), src->samplerStates[i][j]); + } + } + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexBuffers)) { + for (uint32_t i = 0; i < m_captures.vertexBuffers.size(); i++) { + if (m_captures.vertexBuffers[i]) { + const auto& vbo = src->vertexBuffers[i]; + dst->SetStreamSource( + i, + vbo.vertexBuffer.ptr(), + vbo.offset, + vbo.stride); + } + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) + dst->SetMaterial(&src->material); + + if (m_captures.flags.test(D3D9CapturedStateFlag::Textures)) { + for (uint32_t i = 0; i < m_captures.textures.size(); i++) { + if (m_captures.textures[i]) + dst->SetStateTexture(i, src->textures[i]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::VertexShader)) + dst->SetVertexShader(src->vertexShader.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::PixelShader)) + dst->SetPixelShader(src->pixelShader.ptr()); + + if (m_captures.flags.test(D3D9CapturedStateFlag::Transforms)) { + for (uint32_t i = 0; i < m_captures.transforms.size(); i++) { + if (m_captures.transforms[i]) + dst->SetStateTransform(i, reinterpret_cast(&src->transforms[i])); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::TextureStages)) { + for (uint32_t i = 0; i < m_captures.textureStages.size(); i++) { + if (m_captures.textureStages[i]) { + for (uint32_t j = 0; j < m_captures.textureStageStates[i].size(); j++) { + if (m_captures.textureStageStates[i][j]) + dst->SetTextureStageState(i, (D3DTEXTURESTAGESTATETYPE)j, src->textureStages[i][j]); + } + } + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::Viewport)) + dst->SetViewport(&src->viewport); + + if (m_captures.flags.test(D3D9CapturedStateFlag::ScissorRect)) + dst->SetScissorRect(&src->scissorRect); + + if (m_captures.flags.test(D3D9CapturedStateFlag::ClipPlanes)) { + for (uint32_t i = 0; i < m_captures.clipPlanes.size(); i++) { + if (m_captures.clipPlanes[i]) + dst->SetClipPlane(i, src->clipPlanes[i].coeff); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::VsConstants)) { + for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.size(); i++) { + if (m_captures.vsConsts.fConsts[i]) + dst->SetVertexShaderConstantF(i, (float*)&src->vsConsts.fConsts[i], 1); + } + + for (uint32_t i = 0; i < m_captures.vsConsts.iConsts.size(); i++) { + if (m_captures.vsConsts.iConsts[i]) + dst->SetVertexShaderConstantI(i, (int*)&src->vsConsts.iConsts[i], 1); + } + + const uint32_t bitfieldCount = m_parent->GetVertexConstantLayout().bitmaskCount; + for (uint32_t i = 0; i < bitfieldCount; i++) { + uint32_t boolMask = 0; + for (uint32_t j = 0; j < 32; j++) { + if (m_captures.vsConsts.bConsts[i * 32 + j]) + boolMask |= 1u << j; + } + + dst->SetVertexBoolBitfield(i, boolMask, src->vsConsts.bConsts[i]); + } + } + + if (m_captures.flags.test(D3D9CapturedStateFlag::PsConstants)) { + for (uint32_t i = 0; i < m_captures.psConsts.fConsts.size(); i++) { + if (m_captures.psConsts.fConsts[i]) + dst->SetPixelShaderConstantF(i, (float*)&src->psConsts.fConsts[i], 1); + } + + for (uint32_t i = 0; i < m_captures.psConsts.iConsts.size(); i++) { + if (m_captures.psConsts.iConsts[i]) + dst->SetPixelShaderConstantI(i, (int*)&src->psConsts.iConsts[i], 1); + } + + uint32_t boolMask = 0; + for (uint32_t i = 0; i < m_captures.psConsts.bConsts.size(); i++) { + if (m_captures.psConsts.bConsts[i]) + boolMask |= 1u << i; + } + + dst->SetPixelBoolBitfield(0, boolMask, src->psConsts.bConsts[0]); + } + } + + template + void ApplyOrCapture() { + if constexpr (Func == D3D9StateFunction::Apply) + ApplyOrCapture(m_parent, &m_state); + else if constexpr (Func == D3D9StateFunction::Capture) + ApplyOrCapture(this, m_deviceState); + } + + template < + DxsoProgramType ProgramType, + D3D9ConstantType ConstantType, + typename T> + HRESULT SetShaderConstants( + UINT StartRegister, + const T* pConstantData, + UINT Count) { + auto SetHelper = [&](auto& setCaptures) { + if constexpr (ProgramType == DxsoProgramTypes::VertexShader) + m_captures.flags.set(D3D9CapturedStateFlag::VsConstants); + else + m_captures.flags.set(D3D9CapturedStateFlag::PsConstants); + + for (uint32_t i = 0; i < Count; i++) { + uint32_t reg = StartRegister + i; + if constexpr (ConstantType == D3D9ConstantType::Float) + setCaptures.fConsts[reg] = true; + else if constexpr (ConstantType == D3D9ConstantType::Int) + setCaptures.iConsts[reg] = true; + else if constexpr (ConstantType == D3D9ConstantType::Bool) + setCaptures.bConsts[reg] = true; + } + + UpdateStateConstants< + ProgramType, + ConstantType, + T>( + &m_state, + StartRegister, + pConstantData, + Count, + false); + + return D3D_OK; + }; + + return ProgramType == DxsoProgramTypes::VertexShader + ? SetHelper(m_captures.vsConsts) + : SetHelper(m_captures.psConsts); + } + + HRESULT SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits); + HRESULT SetPixelBoolBitfield (uint32_t idx, uint32_t mask, uint32_t bits); + + inline bool IsApplying() { + return m_applying; + } + + private: + + void CapturePixelRenderStates(); + void CapturePixelSamplerStates(); + void CapturePixelShaderStates(); + + void CaptureVertexRenderStates(); + void CaptureVertexSamplerStates(); + void CaptureVertexShaderStates(); + + void CaptureType(D3D9StateBlockType State); + + D3D9CapturableState m_state; + D3D9StateCaptures m_captures; + + D3D9CapturableState* m_deviceState; + + bool m_applying = false; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_subresource.h b/src/d3d9/d3d9_subresource.h new file mode 100644 index 000000000..c7a1611e1 --- /dev/null +++ b/src/d3d9/d3d9_subresource.h @@ -0,0 +1,106 @@ +#pragma once + +#include "d3d9_resource.h" +#include "d3d9_common_texture.h" + +namespace dxvk { + + template + class D3D9Subresource : public D3D9Resource { + + public: + + D3D9Subresource( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer) + : D3D9Resource ( pDevice ) + , m_container ( pContainer ) + , m_texture ( pTexture ) + , m_face ( Face ) + , m_mipLevel ( MipLevel ) { } + + ~D3D9Subresource() { + // We own the texture! + if (m_container == nullptr) + delete m_texture; + } + + ULONG STDMETHODCALLTYPE AddRef() final { + if (m_container != nullptr) + return m_container->AddRef(); + + return D3D9Resource::AddRef(); + } + + ULONG STDMETHODCALLTYPE Release() final { + if (m_container != nullptr) + return m_container->Release(); + + return D3D9Resource::Release(); + } + + HRESULT STDMETHODCALLTYPE GetContainer(REFIID riid, void** ppContainer) final { + if (m_container != nullptr) + return m_container->QueryInterface(riid, ppContainer); + + return this->GetDevice()->QueryInterface(riid, ppContainer); + } + + D3D9CommonTexture* GetCommonTexture() { + return m_texture; + } + + UINT GetFace() const { + return m_face; + } + + UINT GetMipLevel() const { + return m_mipLevel; + } + + UINT GetSubresource() const { + return m_texture->CalcSubresource(m_face, m_mipLevel); + } + + Rc GetImageView(bool Srgb) { + return m_texture->GetViews().SubresourceSample[m_face][m_mipLevel].Pick(Srgb); + } + + Rc GetRenderTargetView(bool Srgb) { + return m_texture->GetViews().SubresourceRenderTarget[m_face][m_mipLevel].Pick(Srgb); + } + + VkImageLayout GetRenderTargetLayout() { + return m_texture->GetViews().GetRTLayout(); + } + + Rc GetDepthStencilView() { + return m_texture->GetViews().SubresourceDepth[m_face][m_mipLevel]; + } + + VkImageLayout GetDepthLayout() { + return m_texture->GetViews().GetDepthLayout(); + } + + bool IsNull() { + return m_texture->Desc()->Format == D3D9Format::NULL_FORMAT; + } + + IDirect3DBaseTexture9* GetBaseTexture() { + return m_container; + } + + protected: + + IDirect3DBaseTexture9* m_container; + + D3D9CommonTexture* m_texture; + UINT m_face; + UINT m_mipLevel; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_surface.cpp b/src/d3d9/d3d9_surface.cpp new file mode 100644 index 000000000..86472cb71 --- /dev/null +++ b/src/d3d9/d3d9_surface.cpp @@ -0,0 +1,184 @@ +#include "d3d9_surface.h" +#include "d3d9_texture.h" + +#include "d3d9_device.h" + +namespace dxvk { + + D3D9Surface::D3D9Surface( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping) + : D3D9SurfaceBase( + pDevice, + new D3D9CommonTexture( pDevice, pDesc, D3DRTYPE_TEXTURE, Mapping ), + 0, 0, + nullptr) { } + + D3D9Surface::D3D9Surface( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer) + : D3D9SurfaceBase( + pDevice, + pTexture, + Face, MipLevel, + pContainer) { } + + void D3D9Surface::AddRefPrivate() { + IDirect3DBaseTexture9* pContainer = this->m_container; + + if (pContainer != nullptr) { + D3DRESOURCETYPE type = pContainer->GetType(); + if (type == D3DRTYPE_TEXTURE) + reinterpret_cast (pContainer)->AddRefPrivate(); + else //if (type == D3DRTYPE_CUBETEXTURE) + reinterpret_cast(pContainer)->AddRefPrivate(); + + return; + } + + D3D9SurfaceBase::AddRefPrivate(); + } + + void D3D9Surface::ReleasePrivate() { + IDirect3DBaseTexture9* pContainer = this->m_container; + + if (pContainer != nullptr) { + D3DRESOURCETYPE type = pContainer->GetType(); + if (type == D3DRTYPE_TEXTURE) + reinterpret_cast (pContainer)->ReleasePrivate(); + else //if (type == D3DRTYPE_CUBETEXTURE) + reinterpret_cast(pContainer)->ReleasePrivate(); + + return; + } + + D3D9SurfaceBase::ReleasePrivate(); + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DSurface9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Surface::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9Surface::GetType() { + return D3DRTYPE_SURFACE; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::GetDesc(D3DSURFACE_DESC *pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + auto& desc = *(m_texture->Desc()); + + pDesc->Format = static_cast(desc.Format); + pDesc->Type = D3DRTYPE_SURFACE; + pDesc->Usage = desc.Usage; + pDesc->Pool = desc.Pool; + + pDesc->MultiSampleType = desc.MultiSample; + pDesc->MultiSampleQuality = desc.MultisampleQuality; + pDesc->Width = std::max(1u, desc.Width >> m_mipLevel); + pDesc->Height = std::max(1u, desc.Height >> m_mipLevel); + + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::LockRect(D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) { + D3DBOX box; + if (pRect != nullptr) { + box.Left = pRect->left; + box.Right = pRect->right; + box.Top = pRect->top; + box.Bottom = pRect->bottom; + box.Front = 0; + box.Back = 1; + } + + D3DLOCKED_BOX lockedBox; + + HRESULT hr = m_parent->LockImage( + m_texture, + m_face, m_mipLevel, + &lockedBox, + pRect != nullptr ? &box : nullptr, + Flags); + + pLockedRect->pBits = lockedBox.pBits; + pLockedRect->Pitch = lockedBox.RowPitch; + + return hr; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::UnlockRect() { + return m_parent->UnlockImage( + m_texture, + m_face, m_mipLevel); + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::GetDC(HDC *phDC) { + if (phDC == nullptr) + return D3DERR_INVALIDCALL; + + const D3D9_COMMON_TEXTURE_DESC& desc = *m_texture->Desc(); + + D3DLOCKED_RECT lockedRect; + HRESULT hr = LockRect(&lockedRect, nullptr, 0); + if (FAILED(hr)) + return hr; + + D3DKMT_CREATEDCFROMMEMORY createInfo; + // In... + createInfo.pMemory = lockedRect.pBits; + createInfo.Format = static_cast(desc.Format); + createInfo.Width = desc.Width; + createInfo.Height = desc.Height; + createInfo.Pitch = lockedRect.Pitch; + createInfo.hDeviceDc = CreateCompatibleDC(NULL); + createInfo.pColorTable = nullptr; + + // Out... + createInfo.hBitmap = nullptr; + createInfo.hDc = nullptr; + + D3DKMTCreateDCFromMemory(&createInfo); + DeleteDC(createInfo.hDeviceDc); + + // These should now be set... + m_dcDesc.hDC = createInfo.hDc; + m_dcDesc.hBitmap = createInfo.hBitmap; + + *phDC = m_dcDesc.hDC; + return D3D_OK; + } + + HRESULT STDMETHODCALLTYPE D3D9Surface::ReleaseDC(HDC hDC) { + if (m_dcDesc.hDC == nullptr || m_dcDesc.hDC != hDC) + return D3DERR_INVALIDCALL; + + D3DKMTDestroyDCFromMemory(&m_dcDesc); + + HRESULT hr = UnlockRect(); + if (FAILED(hr)) + return hr; + + return D3D_OK; + } + +} diff --git a/src/d3d9/d3d9_surface.h b/src/d3d9/d3d9_surface.h new file mode 100644 index 000000000..27b57bcff --- /dev/null +++ b/src/d3d9/d3d9_surface.h @@ -0,0 +1,53 @@ +#pragma once + +#include "d3d9_subresource.h" + +#include "d3d9_common_texture.h" + +#include "../util/util_gdi.h" + +namespace dxvk { + + using D3D9GDIDesc = D3DKMT_DESTROYDCFROMMEMORY; + + using D3D9SurfaceBase = D3D9Subresource; + class D3D9Surface final : public D3D9SurfaceBase { + + public: + + D3D9Surface( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping); + + D3D9Surface( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer); + + void AddRefPrivate(); + + void ReleasePrivate(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType() final; + + HRESULT STDMETHODCALLTYPE GetDesc(D3DSURFACE_DESC *pDesc) final; + + HRESULT STDMETHODCALLTYPE LockRect(D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) final; + + HRESULT STDMETHODCALLTYPE UnlockRect() final; + + HRESULT STDMETHODCALLTYPE GetDC(HDC *phDC) final; + + HRESULT STDMETHODCALLTYPE ReleaseDC(HDC hDC) final; + + private: + + D3D9GDIDesc m_dcDesc; + + }; +} \ No newline at end of file diff --git a/src/d3d9/d3d9_swapchain.cpp b/src/d3d9/d3d9_swapchain.cpp new file mode 100644 index 000000000..466b1e240 --- /dev/null +++ b/src/d3d9/d3d9_swapchain.cpp @@ -0,0 +1,1216 @@ +#include "d3d9_swapchain.h" +#include "d3d9_surface.h" +#include "d3d9_monitor.h" + +#include "d3d9_hud.h" + +#include +#include + +namespace dxvk { + + static uint16_t MapGammaControlPoint(float x) { + if (x < 0.0f) x = 0.0f; + if (x > 1.0f) x = 1.0f; + return uint16_t(65535.0f * x); + } + + + struct D3D9PresentInfo { + float scale[2]; + float offset[2]; + }; + + + D3D9SwapChainEx::D3D9SwapChainEx( + D3D9DeviceEx* pDevice, + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode) + : D3D9SwapChainExBase(pDevice) + , m_device (pDevice->GetDXVKDevice()) + , m_context (m_device->createContext()) + , m_frameLatencyCap (pDevice->GetOptions()->maxFrameLatency) + , m_frameLatencySignal(new sync::Fence(m_frameId)) + , m_dialog (pDevice->GetOptions()->enableDialogMode) { + UpdateMonitorInfo(); + + this->NormalizePresentParameters(pPresentParams); + m_presentParams = *pPresentParams; + m_window = m_presentParams.hDeviceWindow; + + UpdatePresentRegion(nullptr, nullptr); + if (!pDevice->GetOptions()->deferSurfaceCreation) + CreatePresenter(); + + CreateBackBuffer(); + CreateHud(); + + InitRenderState(); + InitSamplers(); + InitShaders(); + InitRamp(); + + // Apply initial window mode and fullscreen state + if (!m_presentParams.Windowed && FAILED(EnterFullscreenMode(pPresentParams, pFullscreenDisplayMode))) + throw DxvkError("D3D9: Failed to set initial fullscreen state"); + } + + + D3D9SwapChainEx::~D3D9SwapChainEx() { + RestoreDisplayMode(m_monitor); + + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DSwapChain9) + || (GetParent()->IsExtended() && riid == __uuidof(IDirect3DSwapChain9Ex))) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9SwapChainEx::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::Present( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags) { + auto lock = m_parent->LockDevice(); + + uint32_t presentInterval = m_presentParams.PresentationInterval; + + // This is not true directly in d3d9 to to timing differences that don't matter for us. + // For our purposes... + // D3DPRESENT_INTERVAL_DEFAULT (0) == D3DPRESENT_INTERVAL_ONE (1) which means VSYNC. + presentInterval = std::max(presentInterval, 1u); + + if (presentInterval == D3DPRESENT_INTERVAL_IMMEDIATE || (dwFlags & D3DPRESENT_FORCEIMMEDIATE)) + presentInterval = 0; + + auto options = m_parent->GetOptions(); + + if (options->presentInterval >= 0) + presentInterval = options->presentInterval; + + bool vsync = presentInterval != 0; + + HWND window = m_presentParams.hDeviceWindow; + if (hDestWindowOverride != nullptr) + window = hDestWindowOverride; + + bool recreate = false; + recreate |= m_presenter == nullptr; + recreate |= window != m_window; + recreate |= m_dialogChanged; + + m_window = window; + + m_dirty |= vsync != m_vsync; + m_dirty |= UpdatePresentRegion(pSourceRect, pDestRect); + m_dirty |= recreate; + m_dirty |= !m_presenter->hasSwapChain(); + m_vsync = vsync; + + m_dialogChanged = false; + + try { + if (recreate) + CreatePresenter(); + + if (std::exchange(m_dirty, false)) + RecreateSwapChain(vsync); + + // We aren't going to device loss simply because + // 99% of D3D9 games don't handle this properly and + // just end up crashing (like with alt-tab loss) + if (!m_presenter->hasSwapChain()) + return D3D_OK; + + PresentImage(presentInterval); + return D3D_OK; + } catch (const DxvkError& e) { + Logger::err(e.message()); + return D3DERR_DEVICEREMOVED; + } + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetFrontBufferData(IDirect3DSurface9* pDestSurface) { + Logger::warn("D3D9SwapChainEx::GetFrontBufferData: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetBackBuffer( + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer) { + InitReturnPtr(ppBackBuffer); + + if (ppBackBuffer == nullptr) + return D3DERR_INVALIDCALL; + + if (iBackBuffer > 0) { + Logger::err("D3D9: GetBackBuffer: iBackBuffer > 0 not supported"); + return D3DERR_INVALIDCALL; + } + + *ppBackBuffer = m_backBuffer.ref(); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetRasterStatus(D3DRASTER_STATUS* pRasterStatus) { + // We could use D3DKMTGetScanLine but Wine doesn't implement that. + // So... we lie here and make some stuff up + // enough that it makes games work. + + // Assume there's 20 lines in a vBlank. + constexpr uint32_t vBlankLineCount = 20; + + if (pRasterStatus == nullptr) + return D3DERR_INVALIDCALL; + + D3DDISPLAYMODEEX mode; + mode.Size = sizeof(mode); + if (FAILED(this->GetDisplayModeEx(&mode, nullptr))) + return D3DERR_INVALIDCALL; + + uint32_t scanLineCount = mode.Height + vBlankLineCount; + + auto nowUs = std::chrono::time_point_cast( + dxvk::high_resolution_clock::now()) + .time_since_epoch(); + + auto frametimeUs = std::chrono::microseconds(1000000u / mode.RefreshRate); + auto scanLineUs = frametimeUs / scanLineCount; + + pRasterStatus->ScanLine = (nowUs % frametimeUs) / scanLineUs; + pRasterStatus->InVBlank = pRasterStatus->ScanLine >= mode.Height; + + if (pRasterStatus->InVBlank) + pRasterStatus->ScanLine = 0; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetDisplayMode(D3DDISPLAYMODE* pMode) { + if (pMode == nullptr) + return D3DERR_INVALIDCALL; + + *pMode = D3DDISPLAYMODE(); + + D3DDISPLAYMODEEX mode; + mode.Size = sizeof(mode); + HRESULT hr = this->GetDisplayModeEx(&mode, nullptr); + + if (FAILED(hr)) + return hr; + + pMode->Width = mode.Width; + pMode->Height = mode.Height; + pMode->Format = mode.Format; + pMode->RefreshRate = mode.RefreshRate; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetPresentParameters(D3DPRESENT_PARAMETERS* pPresentationParameters) { + if (pPresentationParameters == nullptr) + return D3DERR_INVALIDCALL; + + *pPresentationParameters = m_presentParams; + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetLastPresentCount(UINT* pLastPresentCount) { + Logger::warn("D3D9SwapChainEx::GetLastPresentCount: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetPresentStats(D3DPRESENTSTATS* pPresentationStatistics) { + Logger::warn("D3D9SwapChainEx::GetPresentStats: Stub"); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9SwapChainEx::GetDisplayModeEx(D3DDISPLAYMODEEX* pMode, D3DDISPLAYROTATION* pRotation) { + if (pMode == nullptr && pRotation == nullptr) + return D3DERR_INVALIDCALL; + + if (pRotation != nullptr) + *pRotation = D3DDISPLAYROTATION_IDENTITY; + + if (pMode != nullptr) { + DEVMODEW devMode = DEVMODEW(); + devMode.dmSize = sizeof(devMode); + + if (!::EnumDisplaySettingsW(m_monInfo.szDevice, ENUM_CURRENT_SETTINGS, &devMode)) { + Logger::err("D3D9SwapChainEx::GetDisplayModeEx: Failed to enum display settings"); + return D3DERR_INVALIDCALL; + } + + pMode->Size = sizeof(D3DDISPLAYMODEEX); + pMode->Width = devMode.dmPelsWidth; + pMode->Height = devMode.dmPelsHeight; + pMode->RefreshRate = devMode.dmDisplayFrequency; + pMode->Format = D3DFMT_X8R8G8B8; + pMode->ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + } + + return D3D_OK; + } + + + void D3D9SwapChainEx::Reset( + D3DPRESENT_PARAMETERS* pPresentParams, + D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + auto lock = m_parent->LockDevice(); + + this->NormalizePresentParameters(pPresentParams); + + m_dirty |= m_presentParams.BackBufferFormat != pPresentParams->BackBufferFormat + || m_presentParams.BackBufferWidth != pPresentParams->BackBufferWidth + || m_presentParams.BackBufferHeight != pPresentParams->BackBufferHeight + || m_presentParams.BackBufferCount != pPresentParams->BackBufferCount; + + bool changeFullscreen = m_presentParams.Windowed != pPresentParams->Windowed; + + if (pPresentParams->Windowed) { + if (changeFullscreen) + this->LeaveFullscreenMode(); + + // Adjust window position and size + RECT newRect = { 0, 0, 0, 0 }; + RECT oldRect = { 0, 0, 0, 0 }; + + ::GetWindowRect(m_window, &oldRect); + ::SetRect(&newRect, 0, 0, pPresentParams->BackBufferWidth, pPresentParams->BackBufferHeight); + ::AdjustWindowRectEx(&newRect, + ::GetWindowLongW(m_window, GWL_STYLE), FALSE, + ::GetWindowLongW(m_window, GWL_EXSTYLE)); + ::SetRect(&newRect, 0, 0, newRect.right - newRect.left, newRect.bottom - newRect.top); + ::OffsetRect(&newRect, oldRect.left, oldRect.top); + ::MoveWindow(m_window, newRect.left, newRect.top, + newRect.right - newRect.left, newRect.bottom - newRect.top, TRUE); + } + else { + if (changeFullscreen) + this->EnterFullscreenMode(pPresentParams, pFullscreenDisplayMode); + else + ChangeDisplayMode(pPresentParams, pFullscreenDisplayMode); + + // Move the window so that it covers the entire output + RECT rect; + GetMonitorRect(GetDefaultMonitor(), &rect); + + ::SetWindowPos(m_window, HWND_TOPMOST, + rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, + SWP_FRAMECHANGED | SWP_SHOWWINDOW | SWP_NOACTIVATE); + } + + m_presentParams = *pPresentParams; + + if (changeFullscreen) + SetGammaRamp(0, &m_ramp); + + UpdatePresentRegion(nullptr, nullptr); + CreateBackBuffer(); + + // If we would fail to go into dialog box mode with + // the new mode, let's escape from dialog mode. + HRESULT hr = SetDialogBoxMode(m_dialog); + + if (FAILED(hr)) + SetDialogBoxMode(false); + } + + + HRESULT D3D9SwapChainEx::WaitForVBlank() { + Logger::warn("D3D9SwapChainEx::WaitForVBlank: Stub"); + return D3D_OK; + } + + + void D3D9SwapChainEx::SetGammaRamp( + DWORD Flags, + const D3DGAMMARAMP* pRamp) { + if (unlikely(pRamp == nullptr)) + return; + + m_ramp = *pRamp; + + bool isIdentity = true; + + std::array cp; + + for (uint32_t i = 0; i < NumControlPoints; i++) { + uint16_t identity = MapGammaControlPoint(float(i) / float(NumControlPoints - 1)); + + cp[i].R = pRamp->red[i]; + cp[i].G = pRamp->green[i]; + cp[i].B = pRamp->blue[i]; + cp[i].A = 0; + + isIdentity &= cp[i].R == identity + && cp[i].G == identity + && cp[i].B == identity; + } + + if (isIdentity || m_presentParams.Windowed) + DestroyGammaTexture(); + else + CreateGammaTexture(NumControlPoints, cp.data()); + } + + + void D3D9SwapChainEx::GetGammaRamp(D3DGAMMARAMP* pRamp) { + if (likely(pRamp != nullptr)) + *pRamp = m_ramp; + } + + + void D3D9SwapChainEx::Invalidate(HWND hWindow) { + if (hWindow == nullptr) + hWindow = m_parent->GetWindow(); + + if (m_presentParams.hDeviceWindow == hWindow) + m_presenter = nullptr; + } + + + HRESULT D3D9SwapChainEx::SetDialogBoxMode(bool bEnableDialogs) { + if (bEnableDialogs) { + if (m_presentParams.BackBufferFormat != D3DFMT_X1R5G5B5 && + m_presentParams.BackBufferFormat != D3DFMT_R5G6B5 && + m_presentParams.BackBufferFormat != D3DFMT_X8R8G8B8) + return D3DERR_INVALIDCALL; + + if (m_presentParams.SwapEffect == D3DSWAPEFFECT_DISCARD) + return D3DERR_INVALIDCALL; + + if (m_presentParams.Flags & D3DPRESENTFLAG_LOCKABLE_BACKBUFFER) + return D3DERR_INVALIDCALL; + } + + m_dialogChanged = m_dialog != bEnableDialogs; + m_dialog = bEnableDialogs; + + return D3D_OK; + } + + + D3D9Surface* D3D9SwapChainEx::GetBackBuffer(UINT iBackBuffer) { + return m_backBuffer.ptr(); + } + + + void D3D9SwapChainEx::NormalizePresentParameters(D3DPRESENT_PARAMETERS* pPresentParams) { + if (pPresentParams->hDeviceWindow == nullptr) + pPresentParams->hDeviceWindow = m_parent->GetWindow(); + + pPresentParams->BackBufferCount = std::max(pPresentParams->BackBufferCount, 1u); + + if (pPresentParams->Windowed) { + GetWindowClientSize(pPresentParams->hDeviceWindow, + pPresentParams->BackBufferWidth ? nullptr : &pPresentParams->BackBufferWidth, + pPresentParams->BackBufferHeight ? nullptr : &pPresentParams->BackBufferHeight); + } + else { + GetMonitorClientSize(GetDefaultMonitor(), + pPresentParams->BackBufferWidth ? nullptr : &pPresentParams->BackBufferWidth, + pPresentParams->BackBufferHeight ? nullptr : &pPresentParams->BackBufferHeight); + } + + if (pPresentParams->BackBufferFormat == D3DFMT_UNKNOWN) + pPresentParams->BackBufferFormat = D3DFMT_X8R8G8B8; + + if (env::getEnvVar("DXVK_FORCE_WINDOWED") == "1") + pPresentParams->Windowed = TRUE; + } + + + void D3D9SwapChainEx::PresentImage(UINT SyncInterval) { + m_parent->Flush(); + + // Wait for the sync event so that we respect the maximum frame latency + uint64_t frameId = ++m_frameId; + m_frameLatencySignal->wait(frameId - GetActualFrameLatency()); + + for (uint32_t i = 0; i < SyncInterval || i < 1; i++) { + SynchronizePresent(); + + m_context->beginRecording( + m_device->createCommandList()); + + // Resolve back buffer if it is multisampled. We + // only have to do it only for the first frame. + if (m_swapImageResolve != nullptr && i == 0) { + VkImageSubresourceLayers resolveSubresource; + resolveSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + resolveSubresource.mipLevel = 0; + resolveSubresource.baseArrayLayer = 0; + resolveSubresource.layerCount = 1; + + VkImageResolve resolveRegion; + resolveRegion.srcSubresource = resolveSubresource; + resolveRegion.srcOffset = VkOffset3D { 0, 0, 0 }; + resolveRegion.dstSubresource = resolveSubresource; + resolveRegion.dstOffset = VkOffset3D { 0, 0, 0 }; + resolveRegion.extent = m_swapImage->info().extent; + + m_context->resolveImage( + m_swapImageResolve, m_swapImage, + resolveRegion, VK_FORMAT_UNDEFINED); + } + + // Presentation semaphores and WSI swap chain image + vk::PresenterInfo info = m_presenter->info(); + vk::PresenterSync sync = m_presenter->getSyncSemaphores(); + + uint32_t imageIndex = 0; + + VkResult status = m_presenter->acquireNextImage( + sync.acquire, VK_NULL_HANDLE, imageIndex); + + while (status != VK_SUCCESS && status != VK_SUBOPTIMAL_KHR) { + RecreateSwapChain(m_vsync); + + info = m_presenter->info(); + sync = m_presenter->getSyncSemaphores(); + + status = m_presenter->acquireNextImage( + sync.acquire, VK_NULL_HANDLE, imageIndex); + } + + // Use an appropriate texture filter depending on whether + // the back buffer size matches the swap image size + m_context->bindShader(VK_SHADER_STAGE_VERTEX_BIT, m_vertShader); + m_context->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, m_fragShader); + + DxvkRenderTargets renderTargets; + renderTargets.color[0].view = m_imageViews.at(imageIndex); + renderTargets.color[0].layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + m_context->bindRenderTargets(renderTargets); + + VkViewport viewport; + viewport.x = float(m_dstRect.left); + viewport.y = float(m_dstRect.top); + viewport.width = float(m_dstRect.right - m_dstRect.left); + viewport.height = float(m_dstRect.bottom - m_dstRect.top); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + + VkRect2D scissor; + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = m_dstRect.right - m_dstRect.left; + scissor.extent.height = m_dstRect.bottom - m_dstRect.top; + + m_context->setViewports(1, &viewport, &scissor); + + D3D9PresentInfo presentInfoConsts; + presentInfoConsts.scale[0] = float(m_srcRect.right - m_srcRect.left) / float(m_swapImage->info().extent.width); + presentInfoConsts.scale[1] = float(m_srcRect.bottom - m_srcRect.top) / float(m_swapImage->info().extent.height); + + presentInfoConsts.offset[0] = float(m_srcRect.left) / float(m_swapImage->info().extent.width); + presentInfoConsts.offset[1] = float(m_srcRect.top) / float(m_swapImage->info().extent.height); + + m_context->pushConstants(0, sizeof(D3D9PresentInfo), &presentInfoConsts); + + m_context->setRasterizerState(m_rsState); + m_context->setMultisampleState(m_msState); + m_context->setDepthStencilState(m_dsState); + m_context->setLogicOpState(m_loState); + m_context->setBlendMode(0, m_blendMode); + + m_context->setInputAssemblyState(m_iaState); + m_context->setInputLayout(0, nullptr, 0, nullptr); + + m_context->bindResourceSampler(BindingIds::Image, m_samplerFitting); + m_context->bindResourceSampler(BindingIds::Gamma, m_gammaSampler); + + m_context->bindResourceView(BindingIds::Image, m_swapImageView, nullptr); + m_context->bindResourceView(BindingIds::Gamma, m_gammaTextureView, nullptr); + + m_context->draw(3, 1, 0, 0); + + if (m_hud != nullptr) + m_hud->render(m_context, info.format, info.imageExtent); + + if (i + 1 >= SyncInterval) + m_context->signal(m_frameLatencySignal, frameId); + + SubmitPresent(sync, i); + } + } + + + void D3D9SwapChainEx::SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId) { + // Present from CS thread so that we don't + // have to synchronize with it first. + m_presentStatus.result = VK_NOT_READY; + + m_parent->EmitCs([this, + cFrameId = FrameId, + cSync = Sync, + cHud = m_hud, + cCommandList = m_context->endRecording() + ] (DxvkContext* ctx) { + m_device->submitCommandList(cCommandList, + cSync.acquire, cSync.present); + + if (cHud != nullptr && !cFrameId) + cHud->update(); + + m_device->presentImage(m_presenter, + cSync.present, &m_presentStatus); + }); + + m_parent->FlushCsChunk(); + } + + + void D3D9SwapChainEx::SynchronizePresent() { + // Recreate swap chain if the previous present call failed + VkResult status = m_device->waitForSubmission(&m_presentStatus); + + if (status != VK_SUCCESS) + RecreateSwapChain(m_vsync); + } + + + void D3D9SwapChainEx::RecreateSwapChain(BOOL Vsync) { + // Ensure that we can safely destroy the swap chain + m_device->waitForSubmission(&m_presentStatus); + m_device->waitForIdle(); + + m_presentStatus.result = VK_SUCCESS; + + vk::PresenterDesc presenterDesc; + presenterDesc.imageExtent = GetPresentExtent(); + presenterDesc.imageCount = PickImageCount(m_presentParams.BackBufferCount + 1); + presenterDesc.numFormats = PickFormats(EnumerateFormat(m_presentParams.BackBufferFormat), presenterDesc.formats); + presenterDesc.numPresentModes = PickPresentModes(Vsync, presenterDesc.presentModes); + presenterDesc.fullScreenExclusive = PickFullscreenMode(); + + if (m_presenter->recreateSwapChain(presenterDesc) != VK_SUCCESS) + throw DxvkError("D3D9SwapChainEx: Failed to recreate swap chain"); + + CreateRenderTargetViews(); + } + + + void D3D9SwapChainEx::CreatePresenter() { + DxvkDeviceQueue graphicsQueue = m_device->queues().graphics; + + vk::PresenterDevice presenterDevice; + presenterDevice.queueFamily = graphicsQueue.queueFamily; + presenterDevice.queue = graphicsQueue.queueHandle; + presenterDevice.adapter = m_device->adapter()->handle(); + + vk::PresenterDesc presenterDesc; + presenterDesc.imageExtent = GetPresentExtent(); + presenterDesc.imageCount = PickImageCount(m_presentParams.BackBufferCount + 1); + presenterDesc.numFormats = PickFormats(EnumerateFormat(m_presentParams.BackBufferFormat), presenterDesc.formats); + presenterDesc.numPresentModes = PickPresentModes(false, presenterDesc.presentModes); + presenterDesc.fullScreenExclusive = PickFullscreenMode(); + + m_presenter = new vk::Presenter(m_window, + m_device->adapter()->vki(), + m_device->vkd(), + presenterDevice, + presenterDesc); + + CreateRenderTargetViews(); + } + + + void D3D9SwapChainEx::CreateRenderTargetViews() { + vk::PresenterInfo info = m_presenter->info(); + + m_imageViews.clear(); + m_imageViews.resize(info.imageCount); + + DxvkImageCreateInfo imageInfo; + imageInfo.type = VK_IMAGE_TYPE_2D; + imageInfo.format = info.format.format; + imageInfo.flags = 0; + imageInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + imageInfo.extent = { info.imageExtent.width, info.imageExtent.height, 1 }; + imageInfo.numLayers = 1; + imageInfo.mipLevels = 1; + imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + imageInfo.stages = 0; + imageInfo.access = 0; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + DxvkImageViewCreateInfo viewInfo; + viewInfo.type = VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = info.format.format; + viewInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + viewInfo.aspect = VK_IMAGE_ASPECT_COLOR_BIT; + viewInfo.minLevel = 0; + viewInfo.numLevels = 1; + viewInfo.minLayer = 0; + viewInfo.numLayers = 1; + + for (uint32_t i = 0; i < info.imageCount; i++) { + VkImage imageHandle = m_presenter->getImage(i).image; + + Rc image = new DxvkImage( + m_device->vkd(), imageInfo, imageHandle); + + m_imageViews[i] = new DxvkImageView( + m_device->vkd(), image, viewInfo); + } + } + + + void D3D9SwapChainEx::CreateBackBuffer() { + // Explicitly destroy current swap image before + // creating a new one to free up resources + m_swapImage = nullptr; + m_swapImageResolve = nullptr; + m_swapImageView = nullptr; + m_backBuffer = nullptr; + + // Create new back buffer + D3D9_COMMON_TEXTURE_DESC desc; + desc.Width = std::max(m_presentParams.BackBufferWidth, 1u); + desc.Height = std::max(m_presentParams.BackBufferHeight, 1u); + desc.Depth = 1; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = EnumerateFormat(m_presentParams.BackBufferFormat); + desc.MultiSample = m_presentParams.MultiSampleType; + desc.MultisampleQuality = m_presentParams.MultiSampleQuality; + desc.Pool = D3DPOOL_DEFAULT; + desc.Usage = D3DUSAGE_RENDERTARGET; + desc.Discard = FALSE; + + auto mapping = m_parent->LookupFormat(desc.Format); + + m_backBuffer = new D3D9Surface(m_parent, &desc, mapping); + + m_swapImage = m_backBuffer->GetCommonTexture()->GetImage(); + + // If the image is multisampled, we need to create + // another image which we'll use as a resolve target + if (m_swapImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT) { + DxvkImageCreateInfo resolveInfo; + resolveInfo.type = VK_IMAGE_TYPE_2D; + resolveInfo.format = m_swapImage->info().format; + resolveInfo.flags = 0; + resolveInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + resolveInfo.extent = m_swapImage->info().extent; + resolveInfo.numLayers = 1; + resolveInfo.mipLevels = 1; + resolveInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT + | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + resolveInfo.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_TRANSFER_BIT; + resolveInfo.access = VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + resolveInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + resolveInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + m_swapImageResolve = m_device->createImage( + resolveInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + + // Create an image view that allows the + // image to be bound as a shader resource. + DxvkImageViewCreateInfo viewInfo; + viewInfo.type = VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = m_swapImage->info().format; + viewInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + viewInfo.aspect = VK_IMAGE_ASPECT_COLOR_BIT; + viewInfo.minLevel = 0; + viewInfo.numLevels = 1; + viewInfo.minLayer = 0; + viewInfo.numLayers = 1; + + m_swapImageView = m_device->createImageView( + m_swapImageResolve != nullptr + ? m_swapImageResolve + : m_swapImage, + viewInfo); + + // Initialize the image so that we can use it. Clearing + // to black prevents garbled output for the first frame. + VkImageSubresourceRange subresources; + subresources.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresources.baseMipLevel = 0; + subresources.levelCount = 1; + subresources.baseArrayLayer = 0; + subresources.layerCount = 1; + + VkClearColorValue clearColor; + clearColor.float32[0] = 0.0f; + clearColor.float32[1] = 0.0f; + clearColor.float32[2] = 0.0f; + clearColor.float32[3] = 0.0f; + + m_context->beginRecording( + m_device->createCommandList()); + + m_context->clearColorImage( + m_swapImage, clearColor, subresources); + + m_device->submitCommandList( + m_context->endRecording(), + VK_NULL_HANDLE, + VK_NULL_HANDLE); + } + + + void D3D9SwapChainEx::CreateGammaTexture( + UINT NumControlPoints, + const D3D9_VK_GAMMA_CP* pControlPoints) { + if (m_gammaTexture == nullptr + || m_gammaTexture->info().extent.width != NumControlPoints) { + DxvkImageCreateInfo imgInfo; + imgInfo.type = VK_IMAGE_TYPE_1D; + imgInfo.format = VK_FORMAT_R16G16B16A16_UNORM; + imgInfo.flags = 0; + imgInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + imgInfo.extent = { NumControlPoints, 1, 1 }; + imgInfo.numLayers = 1; + imgInfo.mipLevels = 1; + imgInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT + | VK_IMAGE_USAGE_SAMPLED_BIT; + imgInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT + | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + imgInfo.access = VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_SHADER_READ_BIT; + imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imgInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + m_gammaTexture = m_device->createImage( + imgInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + DxvkImageViewCreateInfo viewInfo; + viewInfo.type = VK_IMAGE_VIEW_TYPE_1D; + viewInfo.format = VK_FORMAT_R16G16B16A16_UNORM; + viewInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + viewInfo.aspect = VK_IMAGE_ASPECT_COLOR_BIT; + viewInfo.minLevel = 0; + viewInfo.numLevels = 1; + viewInfo.minLayer = 0; + viewInfo.numLayers = 1; + + m_gammaTextureView = m_device->createImageView(m_gammaTexture, viewInfo); + } + + m_context->beginRecording( + m_device->createCommandList()); + + m_context->updateImage(m_gammaTexture, + VkImageSubresourceLayers { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }, + VkOffset3D { 0, 0, 0 }, + VkExtent3D { NumControlPoints, 1, 1 }, + pControlPoints, 0, 0); + + m_device->submitCommandList( + m_context->endRecording(), + VK_NULL_HANDLE, + VK_NULL_HANDLE); + } + + + void D3D9SwapChainEx::DestroyGammaTexture() { + m_gammaTexture = nullptr; + m_gammaTextureView = nullptr; + } + + + void D3D9SwapChainEx::CreateHud() { + m_hud = hud::Hud::createHud(m_device); + + if (m_hud != nullptr) + m_hud->addItem("samplers", m_parent); + } + + + void D3D9SwapChainEx::InitRenderState() { + m_iaState.primitiveTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + m_iaState.primitiveRestart = VK_FALSE; + m_iaState.patchVertexCount = 0; + + m_rsState.polygonMode = VK_POLYGON_MODE_FILL; + m_rsState.cullMode = VK_CULL_MODE_BACK_BIT; + m_rsState.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + m_rsState.depthClipEnable = VK_FALSE; + m_rsState.depthBiasEnable = VK_FALSE; + m_rsState.sampleCount = VK_SAMPLE_COUNT_1_BIT; + + m_msState.sampleMask = 0xffffffff; + m_msState.enableAlphaToCoverage = VK_FALSE; + + VkStencilOpState stencilOp; + stencilOp.failOp = VK_STENCIL_OP_KEEP; + stencilOp.passOp = VK_STENCIL_OP_KEEP; + stencilOp.depthFailOp = VK_STENCIL_OP_KEEP; + stencilOp.compareOp = VK_COMPARE_OP_ALWAYS; + stencilOp.compareMask = 0xFFFFFFFF; + stencilOp.writeMask = 0xFFFFFFFF; + stencilOp.reference = 0; + + m_dsState.enableDepthTest = VK_FALSE; + m_dsState.enableDepthWrite = VK_FALSE; + m_dsState.enableStencilTest = VK_FALSE; + m_dsState.depthCompareOp = VK_COMPARE_OP_ALWAYS; + m_dsState.stencilOpFront = stencilOp; + m_dsState.stencilOpBack = stencilOp; + + m_loState.enableLogicOp = VK_FALSE; + m_loState.logicOp = VK_LOGIC_OP_NO_OP; + + m_blendMode.enableBlending = VK_FALSE; + m_blendMode.colorSrcFactor = VK_BLEND_FACTOR_ONE; + m_blendMode.colorDstFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + m_blendMode.colorBlendOp = VK_BLEND_OP_ADD; + m_blendMode.alphaSrcFactor = VK_BLEND_FACTOR_ONE; + m_blendMode.alphaDstFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + m_blendMode.alphaBlendOp = VK_BLEND_OP_ADD; + m_blendMode.writeMask = VK_COLOR_COMPONENT_R_BIT + | VK_COLOR_COMPONENT_G_BIT + | VK_COLOR_COMPONENT_B_BIT + | VK_COLOR_COMPONENT_A_BIT; + } + + + void D3D9SwapChainEx::InitSamplers() { + DxvkSamplerCreateInfo samplerInfo; + samplerInfo.magFilter = VK_FILTER_NEAREST; + samplerInfo.minFilter = VK_FILTER_NEAREST; + samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerInfo.mipmapLodBias = 0.0f; + samplerInfo.mipmapLodMin = 0.0f; + samplerInfo.mipmapLodMax = 0.0f; + samplerInfo.useAnisotropy = VK_FALSE; + samplerInfo.maxAnisotropy = 1.0f; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + samplerInfo.compareToDepth = VK_FALSE; + samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS; + samplerInfo.borderColor = VkClearColorValue(); + samplerInfo.usePixelCoord = VK_FALSE; + m_samplerFitting = m_device->createSampler(samplerInfo); + + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.minFilter = VK_FILTER_LINEAR; + m_samplerScaling = m_device->createSampler(samplerInfo); + + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + m_gammaSampler = m_device->createSampler(samplerInfo); + } + + + void D3D9SwapChainEx::InitShaders() { + const SpirvCodeBuffer vsCode(d3d9_presenter_vert); + const SpirvCodeBuffer fsCode(d3d9_presenter_frag); + + const std::array fsResourceSlots = {{ + { BindingIds::Image, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_IMAGE_VIEW_TYPE_2D }, + { BindingIds::Gamma, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_IMAGE_VIEW_TYPE_1D }, + }}; + + m_vertShader = m_device->createShader( + VK_SHADER_STAGE_VERTEX_BIT, + 0, nullptr, + { 0u, 1u, + 0u, sizeof(D3D9PresentInfo) }, + vsCode); + + m_fragShader = m_device->createShader( + VK_SHADER_STAGE_FRAGMENT_BIT, + fsResourceSlots.size(), + fsResourceSlots.data(), + { 1u, 1u }, fsCode); + } + + + void D3D9SwapChainEx::InitRamp() { + for (uint32_t i = 0; i < NumControlPoints; i++) { + DWORD identity = DWORD(MapGammaControlPoint(float(i) / float(NumControlPoints - 1))); + + m_ramp.red[i] = identity; + m_ramp.green[i] = identity; + m_ramp.blue[i] = identity; + } + } + + + + uint32_t D3D9SwapChainEx::GetActualFrameLatency() { + uint32_t maxFrameLatency = m_parent->GetFrameLatency(); + + if (m_frameLatencyCap) + maxFrameLatency = std::min(maxFrameLatency, m_frameLatencyCap); + + maxFrameLatency = std::min(maxFrameLatency, m_presentParams.BackBufferCount + 1); + return maxFrameLatency; + } + + + uint32_t D3D9SwapChainEx::PickFormats( + D3D9Format Format, + VkSurfaceFormatKHR* pDstFormats) { + uint32_t n = 0; + + switch (Format) { + default: + Logger::warn(str::format("D3D9SwapChainEx: Unexpected format: ", Format)); + + case D3D9Format::A8R8G8B8: + case D3D9Format::X8R8G8B8: + case D3D9Format::A8B8G8R8: + case D3D9Format::X8B8G8R8: { + pDstFormats[n++] = { VK_FORMAT_R8G8B8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } break; + + case D3D9Format::A2R10G10B10: + case D3D9Format::A2B10G10R10: { + pDstFormats[n++] = { VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_A2R10G10B10_UNORM_PACK32, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } break; + + case D3D9Format::X1R5G5B5: + case D3D9Format::A1R5G5B5: { + pDstFormats[n++] = { VK_FORMAT_B5G5R5A1_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_R5G5B5A1_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } + + case D3D9Format::R5G6B5: { + pDstFormats[n++] = { VK_FORMAT_B5G6R5_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + pDstFormats[n++] = { VK_FORMAT_R5G6B5_UNORM_PACK16, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }; + } + } + + return n; + } + + + uint32_t D3D9SwapChainEx::PickPresentModes( + BOOL Vsync, + VkPresentModeKHR* pDstModes) { + uint32_t n = 0; + + if (Vsync) { + pDstModes[n++] = VK_PRESENT_MODE_FIFO_KHR; + } else { + pDstModes[n++] = VK_PRESENT_MODE_IMMEDIATE_KHR; + pDstModes[n++] = VK_PRESENT_MODE_MAILBOX_KHR; + pDstModes[n++] = VK_PRESENT_MODE_FIFO_RELAXED_KHR; + } + + return n; + } + + + uint32_t D3D9SwapChainEx::PickImageCount( + UINT Preferred) { + int32_t option = m_parent->GetOptions()->numBackBuffers; + return option > 0 ? uint32_t(option) : uint32_t(Preferred); + } + + HRESULT D3D9SwapChainEx::EnterFullscreenMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + // Find a display mode that matches what we need + ::GetWindowRect(m_window, &m_windowState.rect); + + if (FAILED(ChangeDisplayMode(pPresentParams, pFullscreenDisplayMode))) { + Logger::err("D3D9: EnterFullscreenMode: Failed to change display mode"); + return D3DERR_INVALIDCALL; + } + + // Change the window flags to remove the decoration etc. + LONG style = ::GetWindowLongW(m_window, GWL_STYLE); + LONG exstyle = ::GetWindowLongW(m_window, GWL_EXSTYLE); + + m_windowState.style = style; + m_windowState.exstyle = exstyle; + + style &= ~WS_OVERLAPPEDWINDOW; + exstyle &= ~WS_EX_OVERLAPPEDWINDOW; + + ::SetWindowLongW(m_window, GWL_STYLE, style); + ::SetWindowLongW(m_window, GWL_EXSTYLE, exstyle); + + // Move the window so that it covers the entire output + RECT rect; + GetMonitorRect(GetDefaultMonitor(), &rect); + + ::SetWindowPos(m_window, HWND_TOPMOST, + rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, + SWP_FRAMECHANGED | SWP_SHOWWINDOW | SWP_NOACTIVATE); + + m_monitor = GetDefaultMonitor(); + + return D3D_OK; + } + + + HRESULT D3D9SwapChainEx::LeaveFullscreenMode() { + if (!IsWindow(m_window)) + return D3DERR_INVALIDCALL; + + if (FAILED(RestoreDisplayMode(m_monitor))) + Logger::warn("D3D9: LeaveFullscreenMode: Failed to restore display mode"); + + m_monitor = nullptr; + + // Only restore the window style if the application hasn't + // changed them. This is in line with what native D3D9 does. + LONG curStyle = ::GetWindowLongW(m_window, GWL_STYLE) & ~WS_VISIBLE; + LONG curExstyle = ::GetWindowLongW(m_window, GWL_EXSTYLE) & ~WS_EX_TOPMOST; + + if (curStyle == (m_windowState.style & ~(WS_VISIBLE | WS_OVERLAPPEDWINDOW)) + && curExstyle == (m_windowState.exstyle & ~(WS_EX_TOPMOST | WS_EX_OVERLAPPEDWINDOW))) { + ::SetWindowLongW(m_window, GWL_STYLE, m_windowState.style); + ::SetWindowLongW(m_window, GWL_EXSTYLE, m_windowState.exstyle); + } + + // Restore window position and apply the style + const RECT rect = m_windowState.rect; + + ::SetWindowPos(m_window, 0, + rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, + SWP_FRAMECHANGED | SWP_NOZORDER | SWP_NOACTIVATE); + + return D3D_OK; + } + + + HRESULT D3D9SwapChainEx::ChangeDisplayMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode) { + D3DDISPLAYMODEEX mode; + + if (pFullscreenDisplayMode == nullptr) { + mode.Width = pPresentParams->BackBufferWidth; + mode.Height = pPresentParams->BackBufferHeight; + mode.Format = pPresentParams->BackBufferFormat; + mode.RefreshRate = pPresentParams->FullScreen_RefreshRateInHz; + mode.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + mode.Size = sizeof(D3DDISPLAYMODEEX); + } + + return SetMonitorDisplayMode(GetDefaultMonitor(), pFullscreenDisplayMode == nullptr ? &mode : pFullscreenDisplayMode); + } + + + HRESULT D3D9SwapChainEx::RestoreDisplayMode(HMONITOR hMonitor) { + if (hMonitor == nullptr) + return D3DERR_INVALIDCALL; + + DEVMODEW devMode = { }; + devMode.dmSize = sizeof(devMode); + + if (!::EnumDisplaySettingsW(m_monInfo.szDevice, ENUM_REGISTRY_SETTINGS, &devMode)) + return D3DERR_INVALIDCALL; + + Logger::info(str::format("D3D9: Setting display mode: ", + devMode.dmPelsWidth, "x", devMode.dmPelsHeight, "@", + devMode.dmDisplayFrequency)); + + D3DDISPLAYMODEEX mode; + mode.Width = devMode.dmPelsWidth; + mode.Height = devMode.dmPelsHeight; + mode.RefreshRate = devMode.dmDisplayFrequency; + mode.Format = D3DFMT_X8R8G8B8; // Fix me + mode.ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + mode.Size = sizeof(D3DDISPLAYMODEEX); + + return SetMonitorDisplayMode(GetDefaultMonitor(), &mode); + } + + bool D3D9SwapChainEx::UpdatePresentRegion(const RECT* pSourceRect, const RECT* pDestRect) { + if (pSourceRect == nullptr) { + m_srcRect.top = 0; + m_srcRect.left = 0; + m_srcRect.right = m_presentParams.BackBufferWidth; + m_srcRect.bottom = m_presentParams.BackBufferHeight; + } + else + m_srcRect = *pSourceRect; + + RECT dstRect; + if (pDestRect == nullptr) { + // TODO: Should we hook WM_SIZE message for this? + UINT width, height; + GetWindowClientSize(m_window, &width, &height); + + dstRect.top = 0; + dstRect.left = 0; + dstRect.right = LONG(width); + dstRect.bottom = LONG(height); + } + else + dstRect = *pDestRect; + + bool recreate = + m_dstRect.left != dstRect.left + || m_dstRect.top != dstRect.top + || m_dstRect.right != dstRect.right + || m_dstRect.bottom != dstRect.bottom; + + m_dstRect = dstRect; + + return recreate; + } + + VkExtent2D D3D9SwapChainEx::GetPresentExtent() { + return VkExtent2D { + std::max(m_dstRect.right - m_dstRect.left, 1u), + std::max(m_dstRect.bottom - m_dstRect.top, 1u) }; + } + + void D3D9SwapChainEx::UpdateMonitorInfo() { + m_monInfo.cbSize = sizeof(m_monInfo); + + if (!::GetMonitorInfoW(GetDefaultMonitor(), reinterpret_cast(&m_monInfo))) + throw DxvkError("D3D9SwapChainEx::GetDisplayModeEx: Failed to query monitor info"); + } + + + VkFullScreenExclusiveEXT D3D9SwapChainEx::PickFullscreenMode() { + return m_dialog + ? VK_FULL_SCREEN_EXCLUSIVE_DISALLOWED_EXT + : VK_FULL_SCREEN_EXCLUSIVE_DEFAULT_EXT; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_swapchain.h b/src/d3d9/d3d9_swapchain.h new file mode 100644 index 000000000..9711a4ffb --- /dev/null +++ b/src/d3d9/d3d9_swapchain.h @@ -0,0 +1,226 @@ +#pragma once + +#include "d3d9_device_child.h" +#include "d3d9_device.h" +#include "d3d9_format.h" + +#include "../dxvk/hud/dxvk_hud.h" + +#include "../util/sync/sync_signal.h" + +#include + +namespace dxvk { + + class D3D9Surface; + + /** + * \brief Gamma control point + * + * Control points are stored as normalized + * 16-bit unsigned integer values that will + * be converted back to floats in the shader. + */ + struct D3D9_VK_GAMMA_CP { + uint16_t R, G, B, A; + }; + + using D3D9SwapChainExBase = D3D9DeviceChild; + class D3D9SwapChainEx final : public D3D9SwapChainExBase { + static constexpr uint32_t NumControlPoints = 256; + public: + + D3D9SwapChainEx( + D3D9DeviceEx* pDevice, + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + ~D3D9SwapChainEx(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE Present( + const RECT* pSourceRect, + const RECT* pDestRect, + HWND hDestWindowOverride, + const RGNDATA* pDirtyRegion, + DWORD dwFlags); + + HRESULT STDMETHODCALLTYPE GetFrontBufferData(IDirect3DSurface9* pDestSurface); + + HRESULT STDMETHODCALLTYPE GetBackBuffer( + UINT iBackBuffer, + D3DBACKBUFFER_TYPE Type, + IDirect3DSurface9** ppBackBuffer); + + HRESULT STDMETHODCALLTYPE GetRasterStatus(D3DRASTER_STATUS* pRasterStatus); + + HRESULT STDMETHODCALLTYPE GetDisplayMode(D3DDISPLAYMODE* pMode); + + HRESULT STDMETHODCALLTYPE GetPresentParameters(D3DPRESENT_PARAMETERS* pPresentationParameters); + + HRESULT STDMETHODCALLTYPE GetLastPresentCount(UINT* pLastPresentCount); + + HRESULT STDMETHODCALLTYPE GetPresentStats(D3DPRESENTSTATS* pPresentationStatistics); + + HRESULT STDMETHODCALLTYPE GetDisplayModeEx(D3DDISPLAYMODEEX* pMode, D3DDISPLAYROTATION* pRotation); + + void Reset( + D3DPRESENT_PARAMETERS* pPresentParams, + D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT WaitForVBlank(); + + void SetGammaRamp( + DWORD Flags, + const D3DGAMMARAMP* pRamp); + + void GetGammaRamp(D3DGAMMARAMP* pRamp); + + void Invalidate(HWND hWindow); + + HRESULT SetDialogBoxMode(bool bEnableDialogs); + + D3D9Surface* GetBackBuffer(UINT iBackBuffer); + + private: + + enum BindingIds : uint32_t { + Image = 0, + Gamma = 1, + }; + + + struct WindowState { + LONG style = 0; + LONG exstyle = 0; + RECT rect = { 0, 0, 0, 0 }; + }; + + D3DPRESENT_PARAMETERS m_presentParams; + D3DGAMMARAMP m_ramp; + + Rc m_device; + Rc m_context; + + Rc m_presenter; + + Rc m_vertShader; + Rc m_fragShader; + + Rc m_samplerFitting; + Rc m_samplerScaling; + + Rc m_gammaSampler; + Rc m_gammaTexture; + Rc m_gammaTextureView; + + Rc m_swapImage; + Rc m_swapImageResolve; + Rc m_swapImageView; + + Rc m_hud; + + DxvkInputAssemblyState m_iaState; + DxvkRasterizerState m_rsState; + DxvkMultisampleState m_msState; + DxvkDepthStencilState m_dsState; + DxvkLogicOpState m_loState; + DxvkBlendMode m_blendMode; + + Com m_backBuffer = nullptr; + + RECT m_srcRect; + RECT m_dstRect; + + DxvkSubmitStatus m_presentStatus; + + std::vector> m_imageViews; + + + uint64_t m_frameId = D3D9DeviceEx::MaxFrameLatency; + uint32_t m_frameLatencyCap = 0; + Rc m_frameLatencySignal; + + bool m_dirty = true; + bool m_vsync = true; + + bool m_dialog; + bool m_dialogChanged = false; + + HWND m_window = nullptr; + HMONITOR m_monitor = nullptr; + + MONITORINFOEXW m_monInfo; + + WindowState m_windowState; + + void PresentImage(UINT PresentInterval); + + void SubmitPresent(const vk::PresenterSync& Sync, uint32_t FrameId); + + void SynchronizePresent(); + + void RecreateSwapChain( + BOOL Vsync); + + void CreatePresenter(); + + void CreateRenderTargetViews(); + + void CreateBackBuffer(); + + void CreateGammaTexture( + UINT NumControlPoints, + const D3D9_VK_GAMMA_CP* pControlPoints); + + void DestroyGammaTexture(); + + void CreateHud(); + + void InitRenderState(); + + void InitSamplers(); + + void InitShaders(); + + void InitRamp(); + + uint32_t GetActualFrameLatency(); + + uint32_t PickFormats( + D3D9Format Format, + VkSurfaceFormatKHR* pDstFormats); + + uint32_t PickPresentModes( + BOOL Vsync, + VkPresentModeKHR* pDstModes); + + uint32_t PickImageCount( + UINT Preferred); + + void NormalizePresentParameters(D3DPRESENT_PARAMETERS* pPresentParams); + + HRESULT EnterFullscreenMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT LeaveFullscreenMode(); + + HRESULT ChangeDisplayMode( + D3DPRESENT_PARAMETERS* pPresentParams, + const D3DDISPLAYMODEEX* pFullscreenDisplayMode); + + HRESULT RestoreDisplayMode(HMONITOR hMonitor); + + void UpdateMonitorInfo(); + + bool UpdatePresentRegion(const RECT* pSourceRect, const RECT* pDestRect); + + VkExtent2D GetPresentExtent(); + + VkFullScreenExclusiveEXT PickFullscreenMode(); + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_swvp_emu.cpp b/src/d3d9/d3d9_swvp_emu.cpp new file mode 100644 index 000000000..a874082be --- /dev/null +++ b/src/d3d9/d3d9_swvp_emu.cpp @@ -0,0 +1,358 @@ +#include "d3d9_swvp_emu.h" + +#include "d3d9_device.h" +#include "d3d9_vertex_declaration.h" + +#include "../spirv/spirv_module.h" + +namespace dxvk { + + // Doesn't compare everything, only what we use in SWVP. + + size_t D3D9VertexDeclHash::operator () (const D3D9VertexElements& key) const { + DxvkHashState hash; + + std::hash bytehash; + std::hash wordhash; + + for (auto& element : key) { + hash.add(wordhash(element.Stream)); + hash.add(wordhash(element.Offset)); + hash.add(bytehash(element.Type)); + hash.add(bytehash(element.Method)); + hash.add(bytehash(element.Usage)); + hash.add(bytehash(element.UsageIndex)); + } + + return hash; + } + + bool D3D9VertexDeclEq::operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const { + if (a.size() != b.size()) + return false; + + bool equal = true; + + for (uint32_t i = 0; i < a.size(); i++) + equal &= std::memcmp(&a[i], &b[i], sizeof(a[0])) == 0; + + return equal; + } + + enum class DecltypeClass { + Float, Byte, Short, Dec, Half + }; + + enum DecltypeFlags { + Signed = 1, + Normalize = 2, + ReverseRGB = 4 + }; + + struct Decltype { + DecltypeClass Class; + uint32_t VectorCount; + uint32_t Flags; + }; + + Decltype ClassifyDecltype(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return { DecltypeClass::Float, 1, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT2: return { DecltypeClass::Float, 2, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT3: return { DecltypeClass::Float, 3, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT4: return { DecltypeClass::Float, 4, DecltypeFlags::Signed }; + case D3DDECLTYPE_D3DCOLOR: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize | DecltypeFlags::ReverseRGB }; + case D3DDECLTYPE_UBYTE4: return { DecltypeClass::Byte, 4, 0 }; + case D3DDECLTYPE_SHORT2: return { DecltypeClass::Short, 2, DecltypeFlags::Signed }; + case D3DDECLTYPE_SHORT4: return { DecltypeClass::Short, 4, DecltypeFlags::Signed }; + case D3DDECLTYPE_UBYTE4N: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize }; + case D3DDECLTYPE_SHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Signed | DecltypeFlags::Normalize }; + case D3DDECLTYPE_SHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Signed | DecltypeFlags::Normalize }; + case D3DDECLTYPE_USHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Normalize }; + case D3DDECLTYPE_USHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Normalize }; + case D3DDECLTYPE_UDEC3: return { DecltypeClass::Dec, 3, 0 }; + case D3DDECLTYPE_DEC3N: return { DecltypeClass::Dec, 3, DecltypeFlags::Signed | DecltypeFlags::Normalize }; + case D3DDECLTYPE_FLOAT16_2: return { DecltypeClass::Half, 2, DecltypeFlags::Signed }; + case D3DDECLTYPE_FLOAT16_4: return { DecltypeClass::Half, 4, DecltypeFlags::Signed }; + default: return { DecltypeClass::Float, 4, DecltypeFlags::Signed }; + } + } + + class D3D9SWVPEmulatorGenerator { + + public: + + D3D9SWVPEmulatorGenerator(const std::string& name) { + m_entryPointId = m_module.allocateId(); + + m_module.setDebugSource( + spv::SourceLanguageUnknown, 0, + m_module.addDebugString(name.c_str()), + nullptr); + + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + + m_module.enableCapability(spv::CapabilityGeometry); + + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeInputPoints); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeOutputPoints); + // This has to be > 0 for some reason even though + // we will never emit a vertex + m_module.setOutputVertices(m_entryPointId, 1); + m_module.setInvocations(m_entryPointId, 1); + + m_module.functionBegin(m_module.defVoidType(), m_entryPointId, m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), spv::FunctionControlMaskNone); + m_module.opLabel(m_module.allocateId()); + } + + void compile(const D3D9VertexDecl* pDecl) { + uint32_t uint_t = m_module.defIntType(32, false); + uint32_t float_t = m_module.defFloatType(32); + uint32_t vec4_t = m_module.defVectorType(float_t, 4); + + uint32_t vec4_singular_array_t = m_module.defArrayType(vec4_t, m_module.constu32(1)); + + // Setup the buffer + uint32_t bufferSlot = getSWVPBufferSlot(); + + uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(uint_t); + m_module.decorateArrayStride(arrayType, sizeof(uint32_t)); + + uint32_t buffer_t = m_module.defStructTypeUnique(1, &arrayType); + m_module.memberDecorateOffset(buffer_t, 0, 0); + m_module.decorate(buffer_t, spv::DecorationBufferBlock); + + uint32_t buffer = m_module.newVar(m_module.defPointerType(buffer_t, spv::StorageClassUniform), spv::StorageClassUniform); + m_module.decorateDescriptorSet(buffer, 0); + m_module.decorateBinding(buffer, bufferSlot); + + DxvkResourceSlot bufferRes; + bufferRes.slot = bufferSlot; + bufferRes.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bufferRes.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + bufferRes.access = VK_ACCESS_SHADER_WRITE_BIT; + m_resourceSlots.push_back(bufferRes); + + // Load our builtins + uint32_t primitiveIdPtr = m_module.newVar(m_module.defPointerType(uint_t, spv::StorageClassInput), spv::StorageClassInput); + m_module.decorateBuiltIn(primitiveIdPtr, spv::BuiltInPrimitiveId); + m_entryPointInterfaces.push_back(primitiveIdPtr); + + uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr); + + // The size of any given vertex + uint32_t vertexSize = m_module.constu32(pDecl->GetSize() / sizeof(uint32_t)); + + //The offset of this vertex from the beginning of the buffer + uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId); + + + for (auto& element : pDecl->GetElements()) { + // Load the slot associated with this element + DxsoSemantic semantic = { DxsoUsage(element.Usage), element.UsageIndex }; + + uint32_t elementPtr; + uint32_t elementVar; + + elementPtr = m_module.newVar(m_module.defPointerType(vec4_singular_array_t, spv::StorageClassInput), spv::StorageClassInput); + if ((semantic.usage == DxsoUsage::Position || semantic.usage == DxsoUsage::PositionT) && element.UsageIndex == 0) { + // Load from builtin + m_module.decorateBuiltIn(elementPtr, spv::BuiltInPosition); + } + else { + // Load from slot + uint32_t slotIdx = RegisterLinkerSlot(semantic); + + m_module.decorateLocation(elementPtr, slotIdx); + m_interfaceSlots.inputSlots |= 1u << slotIdx; + } + + uint32_t zero = m_module.constu32(0); + elementVar = m_module.opAccessChain(m_module.defPointerType(vec4_t, spv::StorageClassInput), elementPtr, 1, &zero); + elementVar = m_module.opLoad(vec4_t, elementVar); + + m_entryPointInterfaces.push_back(elementPtr); + + // The offset of this element from the beginning of any given vertex + uint32_t perVertexElementOffset = m_module.constu32(element.Offset / sizeof(uint32_t)); + + // The offset of this element from the beginning of the buffer for **THIS** vertex + uint32_t elementOffset = m_module.opIAdd(uint_t, thisVertexOffset, perVertexElementOffset); + + // Write to the buffer at the element offset for each part of the vector. + Decltype elementInfo = ClassifyDecltype(D3DDECLTYPE(element.Type)); + + if (elementInfo.Class == DecltypeClass::Dec) { + // TODO! + Logger::warn("Encountered DEC3/UDEC3N class, ignoring..."); + continue; + } + + uint32_t vecn_t = m_module.defVectorType(float_t, elementInfo.VectorCount); + uint32_t componentSet; + + // Modifiers... + if (elementInfo.Flags & DecltypeFlags::ReverseRGB) { + std::array indices = { 2, 1, 0, 3 }; + componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data()); + } + else { + std::array indices = { 0, 1, 2, 3 }; + componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data()); + } + + if (elementInfo.Flags & DecltypeFlags::Normalize) + componentSet = m_module.opVectorTimesScalar(vecn_t, componentSet, m_module.constf32(255.0f)); + + + bool isSigned = elementInfo.Flags & DecltypeFlags::Signed; + + // Convert the component to the correct type/value. + switch (elementInfo.Class) { + case DecltypeClass::Float: break; // Do nothing! + case DecltypeClass::Byte: { + m_module.enableCapability(spv::CapabilityInt8); + + uint32_t type = m_module.defIntType(8, isSigned); + type = m_module.defVectorType(type, elementInfo.VectorCount); + + componentSet = isSigned + ? m_module.opConvertFtoS(type, componentSet) + : m_module.opConvertFtoU(type, componentSet); + + break; + } + case DecltypeClass::Short: { + m_module.enableCapability(spv::CapabilityInt16); + + uint32_t type = m_module.defIntType(16, isSigned); + type = m_module.defVectorType(type, elementInfo.VectorCount); + + componentSet = isSigned + ? m_module.opConvertFtoS(type, componentSet) + : m_module.opConvertFtoU(type, componentSet); + + break; + } + case DecltypeClass::Half: { + m_module.enableCapability(spv::CapabilityFloat16); + + uint32_t type = m_module.defFloatType(16); + type = m_module.defVectorType(type, elementInfo.VectorCount); + componentSet = m_module.opFConvert(type, componentSet); + + break; + } + case DecltypeClass::Dec: { + // TODO! + break; + } + } + + // Bitcast to dwords before we write. + uint32_t dwordCount = GetDecltypeSize(D3DDECLTYPE(element.Type)) / sizeof(uint32_t); + uint32_t dwordVector = m_module.opBitcast( + m_module.defVectorType(uint_t, dwordCount), + componentSet); + + // Finally write each dword to the buffer! + for (uint32_t i = 0; i < dwordCount; i++) { + std::array bufferIndices = { m_module.constu32(0), elementOffset }; + + uint32_t writeDest = m_module.opAccessChain(m_module.defPointerType(uint_t, spv::StorageClassUniform), buffer, bufferIndices.size(), bufferIndices.data()); + uint32_t currentDword = m_module.opCompositeExtract(uint_t, dwordVector, 1, &i); + + m_module.opStore(writeDest, currentDword); + + elementOffset = m_module.opIAdd(uint_t, elementOffset, m_module.constu32(1)); + } + } + } + + Rc finalize() { + m_module.opReturn(); + m_module.functionEnd(); + + m_module.addEntryPoint(m_entryPointId, + spv::ExecutionModelGeometry, "main", + m_entryPointInterfaces.size(), + m_entryPointInterfaces.data()); + m_module.setDebugName(m_entryPointId, "main"); + + DxvkShaderConstData constData = { }; + + return new DxvkShader( + VK_SHADER_STAGE_GEOMETRY_BIT, + m_resourceSlots.size(), + m_resourceSlots.data(), + m_interfaceSlots, + m_module.compile(), + DxvkShaderOptions(), + std::move(constData)); + } + + private: + + SpirvModule m_module; + + std::vector m_entryPointInterfaces; + uint32_t m_entryPointId = 0; + + std::vector m_resourceSlots; + DxvkInterfaceSlots m_interfaceSlots; + + }; + + Rc D3D9SWVPEmulator::GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexDecl* pDecl) { + auto& elements = pDecl->GetElements(); + + // Use the shader's unique key for the lookup + { std::unique_lock lock(m_mutex); + + auto entry = m_modules.find(elements); + if (entry != m_modules.end()) + return entry->second; + } + + Sha1Hash hash = Sha1Hash::compute( + elements.data(), elements.size() * sizeof(elements[0])); + + DxvkShaderKey key = { VK_SHADER_STAGE_GEOMETRY_BIT , hash }; + std::string name = str::format("SWVP_", key.toString()); + + // This shader has not been compiled yet, so we have to create a + // new module. This takes a while, so we won't lock the structure. + D3D9SWVPEmulatorGenerator generator(name); + generator.compile(pDecl); + Rc shader = generator.finalize(); + + shader->setShaderKey(key); + pDevice->GetDXVKDevice()->registerShader(shader); + + const std::string dumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); + + if (dumpPath.size() != 0) { + std::ofstream dumpStream( + str::format(dumpPath, "/", name, ".spv"), + std::ios_base::binary | std::ios_base::trunc); + + shader->dump(dumpStream); + } + + // Insert the new module into the lookup table. If another thread + // has compiled the same shader in the meantime, we should return + // that object instead and discard the newly created module. + { std::unique_lock lock(m_mutex); + + auto status = m_modules.insert({ elements, shader }); + if (!status.second) + return status.first->second; + } + + return shader; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_swvp_emu.h b/src/d3d9/d3d9_swvp_emu.h new file mode 100644 index 000000000..397bd83b0 --- /dev/null +++ b/src/d3d9/d3d9_swvp_emu.h @@ -0,0 +1,36 @@ +#pragma once + +#include "d3d9_include.h" + +#include "../dxvk/dxvk_shader.h" + +namespace dxvk { + + class D3D9VertexDecl; + class D3D9DeviceEx; + + struct D3D9VertexDeclHash { + size_t operator () (const D3D9VertexElements& key) const; + }; + + struct D3D9VertexDeclEq { + bool operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const; + }; + + class D3D9SWVPEmulator { + + public: + + Rc GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexDecl* pDecl); + + private: + + std::mutex m_mutex; + + std::unordered_map< + D3D9VertexElements, Rc, + D3D9VertexDeclHash, D3D9VertexDeclEq> m_modules; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_texture.cpp b/src/d3d9/d3d9_texture.cpp new file mode 100644 index 000000000..33aaef255 --- /dev/null +++ b/src/d3d9/d3d9_texture.cpp @@ -0,0 +1,252 @@ +#include "d3d9_texture.h" + +#include "d3d9_util.h" + +namespace dxvk { + + // Direct3DTexture9 + + D3D9Texture2D::D3D9Texture2D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping) + : D3D9Texture2DBase( pDevice, pDesc, D3DRTYPE_TEXTURE, Mapping ) { } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DBaseTexture9) + || riid == __uuidof(IDirect3DTexture9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Texture2D::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9Texture2D::GetType() { + return D3DRTYPE_TEXTURE; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) { + auto* surface = GetSubresource(Level); + if (surface == nullptr) + return D3DERR_INVALIDCALL; + + return surface->GetDesc(pDesc); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::GetSurfaceLevel(UINT Level, IDirect3DSurface9** ppSurfaceLevel) { + InitReturnPtr(ppSurfaceLevel); + auto* surface = GetSubresource(Level); + + if (ppSurfaceLevel == nullptr || surface == nullptr) + return D3DERR_INVALIDCALL; + + *ppSurfaceLevel = ref(surface); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::LockRect(UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) { + auto* surface = GetSubresource(Level); + if (surface == nullptr || pLockedRect == nullptr) + return D3DERR_INVALIDCALL; + + return surface->LockRect(pLockedRect, pRect, Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::UnlockRect(UINT Level) { + auto* surface = GetSubresource(Level); + if (surface == nullptr) + return D3DERR_INVALIDCALL; + + return surface->UnlockRect(); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture2D::AddDirtyRect(CONST RECT* pDirtyRect) { + return D3D_OK; + } + + + // Direct3DVolumeTexture9 + + + D3D9Texture3D::D3D9Texture3D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping) + : D3D9Texture3DBase( pDevice, pDesc, D3DRTYPE_VOLUMETEXTURE, Mapping ) { } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DBaseTexture9) + || riid == __uuidof(IDirect3DVolumeTexture9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Texture3D::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9Texture3D::GetType() { + return D3DRTYPE_VOLUMETEXTURE; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::GetLevelDesc(UINT Level, D3DVOLUME_DESC *pDesc) { + auto* volume = GetSubresource(Level); + if (volume == nullptr) + return D3DERR_INVALIDCALL; + + return volume->GetDesc(pDesc); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::GetVolumeLevel(UINT Level, IDirect3DVolume9** ppVolumeLevel) { + InitReturnPtr(ppVolumeLevel); + auto* volume = GetSubresource(Level); + + if (ppVolumeLevel == nullptr || volume == nullptr) + return D3DERR_INVALIDCALL; + + *ppVolumeLevel = ref(volume); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::LockBox(UINT Level, D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags) { + auto* volume = GetSubresource(Level); + if (volume == nullptr || pLockedBox == nullptr) + return D3DERR_INVALIDCALL; + + return volume->LockBox(pLockedBox, pBox, Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::UnlockBox(UINT Level) { + auto* volume = GetSubresource(Level); + if (volume == nullptr) + return D3DERR_INVALIDCALL; + + return volume->UnlockBox(); + } + + + HRESULT STDMETHODCALLTYPE D3D9Texture3D::AddDirtyBox(CONST D3DBOX* pDirtyBox) { + return D3D_OK; + } + + + // Direct3DCubeTexture9 + + + D3D9TextureCube::D3D9TextureCube( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping) + : D3D9TextureCubeBase( pDevice, pDesc, D3DRTYPE_CUBETEXTURE, Mapping ) { } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DBaseTexture9) + || riid == __uuidof(IDirect3DCubeTexture9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9TextureCube::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + D3DRESOURCETYPE STDMETHODCALLTYPE D3D9TextureCube::GetType() { + return D3DRTYPE_CUBETEXTURE; + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) { + auto* surface = GetSubresource(Level); + + if (surface == nullptr) + return D3DERR_INVALIDCALL; + + return surface->GetDesc(pDesc); + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::GetCubeMapSurface(D3DCUBEMAP_FACES Face, UINT Level, IDirect3DSurface9** ppSurfaceLevel) { + InitReturnPtr(ppSurfaceLevel); + + if (Level >= m_texture.Desc()->MipLevels) + return D3DERR_INVALIDCALL; + + auto* surface = GetSubresource( + m_texture.CalcSubresource(UINT(Face), Level)); + + if (ppSurfaceLevel == nullptr || surface == nullptr) + return D3DERR_INVALIDCALL; + + *ppSurfaceLevel = ref(surface); + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::LockRect(D3DCUBEMAP_FACES Face, UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags) { + auto* surface = GetSubresource( + m_texture.CalcSubresource(UINT(Face), Level)); + + if (surface == nullptr || pLockedRect == nullptr) + return D3DERR_INVALIDCALL; + + return surface->LockRect(pLockedRect, pRect, Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::UnlockRect(D3DCUBEMAP_FACES Face, UINT Level) { + auto* surface = GetSubresource( + m_texture.CalcSubresource(UINT(Face), Level)); + + if (surface == nullptr) + return D3DERR_INVALIDCALL; + + return surface->UnlockRect(); + } + + + HRESULT STDMETHODCALLTYPE D3D9TextureCube::AddDirtyRect(D3DCUBEMAP_FACES Face, CONST RECT* pDirtyRect) { + return D3D_OK; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_texture.h b/src/d3d9/d3d9_texture.h new file mode 100644 index 000000000..8c6c8c116 --- /dev/null +++ b/src/d3d9/d3d9_texture.h @@ -0,0 +1,236 @@ +#pragma once + +#include "d3d9_device.h" +#include "d3d9_surface.h" +#include "d3d9_volume.h" +#include "d3d9_util.h" + +#include +#include +#include +#include + +namespace dxvk { + + template + class D3D9BaseTexture : public D3D9Resource { + + public: + + struct alignas(16) SubresourceData { uint8_t data[sizeof(SubresourceType)]; }; + + D3D9BaseTexture( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3DRESOURCETYPE ResourceType, + D3D9_VK_FORMAT_MAPPING Mapping) + : D3D9Resource ( pDevice ) + , m_texture ( pDevice, pDesc, ResourceType, Mapping ) + , m_lod ( 0 ) + , m_autogenFilter ( D3DTEXF_LINEAR ) { + const uint32_t arraySlices = m_texture.Desc()->ArraySize; + const uint32_t mipLevels = m_texture.Desc()->MipLevels; + + m_subresources.resize(arraySlices * mipLevels); + + for (uint32_t i = 0; i < arraySlices; i++) { + for (uint32_t j = 0; j < mipLevels; j++) { + const uint32_t subresource = m_texture.CalcSubresource(i, j); + + SubresourceType* subObj = this->GetSubresource(subresource); + + new (subObj) SubresourceType( + pDevice, + &m_texture, + i, j, + this); + } + } + } + + ~D3D9BaseTexture() { + for (uint32_t i = 0; i < m_subresources.size(); i++) { + SubresourceType* subObj = this->GetSubresource(i); + subObj->~SubresourceType(); + } + } + + DWORD STDMETHODCALLTYPE SetLOD(DWORD LODNew) final { + DWORD oldLod = m_lod; + m_lod = LODNew; + + m_texture.RecreateSampledView(LODNew); + if (this->GetPrivateRefCount() > 0) + this->m_parent->MarkSamplersDirty(); + + return oldLod; + } + + DWORD STDMETHODCALLTYPE GetLOD() final { + return m_lod; + } + + DWORD STDMETHODCALLTYPE GetLevelCount() final { + return m_texture.Desc()->MipLevels; + } + + HRESULT STDMETHODCALLTYPE SetAutoGenFilterType(D3DTEXTUREFILTERTYPE FilterType) final { + m_autogenFilter = FilterType; + return D3D_OK; + } + + D3DTEXTUREFILTERTYPE STDMETHODCALLTYPE GetAutoGenFilterType() final { + return m_autogenFilter; + } + + void STDMETHODCALLTYPE GenerateMipSubLevels() final { + if (m_texture.IsAutomaticMip()) + this->m_parent->GenerateMips(&m_texture); + } + + D3D9CommonTexture* GetCommonTexture() { + return &m_texture; + } + + SubresourceType* GetSubresource(UINT Subresource) { + if (unlikely(Subresource >= m_subresources.size())) + return nullptr; + + return reinterpret_cast(&m_subresources[Subresource]); + } + + protected: + + D3D9CommonTexture m_texture; + + DWORD m_lod; + D3DTEXTUREFILTERTYPE m_autogenFilter; + + std::vector m_subresources; + + }; + + using D3D9Texture2DBase = D3D9BaseTexture; + class D3D9Texture2D final : public D3D9Texture2DBase { + + public: + + D3D9Texture2D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc); + + HRESULT STDMETHODCALLTYPE GetSurfaceLevel(UINT Level, IDirect3DSurface9** ppSurfaceLevel); + + HRESULT STDMETHODCALLTYPE LockRect(UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags); + + HRESULT STDMETHODCALLTYPE UnlockRect(UINT Level); + + HRESULT STDMETHODCALLTYPE AddDirtyRect(CONST RECT* pDirtyRect); + + }; + + using D3D9Texture3DBase = D3D9BaseTexture; + class D3D9Texture3D final : public D3D9Texture3DBase { + + public: + + D3D9Texture3D( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetLevelDesc(UINT Level, D3DVOLUME_DESC *pDesc); + + HRESULT STDMETHODCALLTYPE GetVolumeLevel(UINT Level, IDirect3DVolume9** ppSurfaceLevel); + + HRESULT STDMETHODCALLTYPE LockBox(UINT Level, D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags); + + HRESULT STDMETHODCALLTYPE UnlockBox(UINT Level); + + HRESULT STDMETHODCALLTYPE AddDirtyBox(CONST D3DBOX* pDirtyBox); + + }; + + using D3D9TextureCubeBase = D3D9BaseTexture; + class D3D9TextureCube final : public D3D9TextureCubeBase { + + public: + + D3D9TextureCube( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + D3DRESOURCETYPE STDMETHODCALLTYPE GetType(); + + HRESULT STDMETHODCALLTYPE GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc); + + HRESULT STDMETHODCALLTYPE GetCubeMapSurface(D3DCUBEMAP_FACES Face, UINT Level, IDirect3DSurface9** ppSurfaceLevel); + + HRESULT STDMETHODCALLTYPE LockRect(D3DCUBEMAP_FACES Face, UINT Level, D3DLOCKED_RECT* pLockedRect, CONST RECT* pRect, DWORD Flags); + + HRESULT STDMETHODCALLTYPE UnlockRect(D3DCUBEMAP_FACES Face, UINT Level); + + HRESULT STDMETHODCALLTYPE AddDirtyRect(D3DCUBEMAP_FACES Face, CONST RECT* pDirtyRect); + + }; + + inline D3D9CommonTexture* GetCommonTexture(IDirect3DBaseTexture9* ptr) { + if (ptr == nullptr) + return nullptr; + + switch (ptr->GetType()) { + case D3DRTYPE_TEXTURE: return static_cast (ptr)->GetCommonTexture(); + case D3DRTYPE_CUBETEXTURE: return static_cast(ptr)->GetCommonTexture(); + case D3DRTYPE_VOLUMETEXTURE: return static_cast (ptr)->GetCommonTexture(); + default: + Logger::warn("Unknown texture resource type."); break; + } + + return nullptr; + } + + inline D3D9CommonTexture* GetCommonTexture(D3D9Surface* ptr) { + if (ptr == nullptr) + return nullptr; + + return ptr->GetCommonTexture(); + } + + inline D3D9CommonTexture* GetCommonTexture(IDirect3DSurface9* ptr) { + return GetCommonTexture(static_cast(ptr)); + } + + inline void TextureRefPrivate(IDirect3DBaseTexture9* tex, bool AddRef) { + if (tex == nullptr) + return; + + switch (tex->GetType()) { + case D3DRTYPE_TEXTURE: CastRefPrivate (tex, AddRef); break; + case D3DRTYPE_CUBETEXTURE: CastRefPrivate(tex, AddRef); break; + case D3DRTYPE_VOLUMETEXTURE: CastRefPrivate (tex, AddRef); break; + default: + Logger::warn("Unknown texture resource type."); break; + } + } + + inline void TextureChangePrivate(IDirect3DBaseTexture9*& dst, IDirect3DBaseTexture9* src) { + TextureRefPrivate(dst, false); + TextureRefPrivate(src, true); + dst = src; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_util.cpp b/src/d3d9/d3d9_util.cpp new file mode 100644 index 000000000..6ca5f1e11 --- /dev/null +++ b/src/d3d9/d3d9_util.cpp @@ -0,0 +1,414 @@ +#include "d3d9_util.h" + +namespace dxvk { + + typedef HRESULT (STDMETHODCALLTYPE *D3DXDisassembleShader) ( + const void* pShader, + BOOL EnableColorCode, + char* pComments, + ID3DBlob** ppDisassembly); // ppDisassembly is actually a D3DXBUFFER, but it has the exact same vtable as a ID3DBlob at the start. + + D3DXDisassembleShader g_pfnDisassembleShader = nullptr; + + HRESULT DisassembleShader( + const void* pShader, + BOOL EnableColorCode, + char* pComments, + ID3DBlob** ppDisassembly) { + if (g_pfnDisassembleShader == nullptr) { + HMODULE d3d9x = LoadLibraryA("d3dx9.dll"); + + if (d3d9x == nullptr) + d3d9x = LoadLibraryA("d3dx9_43.dll"); + + g_pfnDisassembleShader = + reinterpret_cast(GetProcAddress(d3d9x, "D3DXDisassembleShader")); + } + + if (g_pfnDisassembleShader == nullptr) + return D3DERR_INVALIDCALL; + + return g_pfnDisassembleShader( + pShader, + EnableColorCode, + pComments, + ppDisassembly); + } + + + HRESULT DecodeMultiSampleType( + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + VkSampleCountFlagBits* pCount) { + uint32_t sampleCount = std::max(MultiSample, 1u); + + // Check if this is a power of two... + if (sampleCount & (sampleCount - 1)) + return D3DERR_INVALIDCALL; + + if (MultiSample == D3DMULTISAMPLE_NONMASKABLE) + sampleCount = 1u << MultisampleQuality; + + if (pCount != nullptr) + *pCount = VkSampleCountFlagBits(sampleCount); + + return D3D_OK; + } + + + VkFormat GetPackedDepthStencilFormat(D3D9Format Format) { + switch (Format) { + case D3D9Format::D15S1: + return VK_FORMAT_D16_UNORM_S8_UINT; // This should never happen! + + case D3D9Format::D16: + case D3D9Format::D16_LOCKABLE: + case D3D9Format::DF16: + return VK_FORMAT_D16_UNORM; + + case D3D9Format::D24X8: + case D3D9Format::DF24: + return VK_FORMAT_X8_D24_UNORM_PACK32; + + case D3D9Format::D24X4S4: + case D3D9Format::D24FS8: + case D3D9Format::D24S8: + case D3D9Format::INTZ: + return VK_FORMAT_D24_UNORM_S8_UINT; + + case D3D9Format::D32: + case D3D9Format::D32_LOCKABLE: + case D3D9Format::D32F_LOCKABLE: + return VK_FORMAT_D32_SFLOAT; + + case D3D9Format::S8_LOCKABLE: + return VK_FORMAT_S8_UINT; + + default: + return VK_FORMAT_UNDEFINED; + } + } + + + VkFormatFeatureFlags GetImageFormatFeatures(DWORD Usage) { + VkFormatFeatureFlags features = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + if (Usage & D3DUSAGE_DEPTHSTENCIL) + features |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (Usage & D3DUSAGE_RENDERTARGET) + features |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + + return features; + } + + + VkImageUsageFlags GetImageUsageFlags(DWORD Usage) { + VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT; + + if (Usage & D3DUSAGE_DEPTHSTENCIL) + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + if (Usage & D3DUSAGE_RENDERTARGET) + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return usage; + } + + + uint32_t GetVertexCount(D3DPRIMITIVETYPE type, UINT count) { + switch (type) { + default: + case D3DPT_TRIANGLELIST: return count * 3; + case D3DPT_POINTLIST: return count; + case D3DPT_LINELIST: return count * 2; + case D3DPT_LINESTRIP: return count + 1; + case D3DPT_TRIANGLESTRIP: return count + 2; + case D3DPT_TRIANGLEFAN: return count + 2; + } + } + + + DxvkInputAssemblyState DecodeInputAssemblyState(D3DPRIMITIVETYPE type) { + switch (type) { + default: + case D3DPT_TRIANGLELIST: + return { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_FALSE, 0 }; + + case D3DPT_POINTLIST: + return { VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_FALSE, 0 }; + + case D3DPT_LINELIST: + return { VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_FALSE, 0 }; + + case D3DPT_LINESTRIP: + return { VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, VK_TRUE, 0 }; + + case D3DPT_TRIANGLESTRIP: + return { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, VK_TRUE, 0 }; + + case D3DPT_TRIANGLEFAN: + return { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, VK_TRUE, 0 }; + } + } + + + VkBlendFactor DecodeBlendFactor(D3DBLEND BlendFactor, bool IsAlpha) { + switch (BlendFactor) { + default: + case D3DBLEND_ZERO: return VK_BLEND_FACTOR_ZERO; + case D3DBLEND_ONE: return VK_BLEND_FACTOR_ONE; + case D3DBLEND_SRCCOLOR: return VK_BLEND_FACTOR_SRC_COLOR; + case D3DBLEND_INVSRCCOLOR: return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case D3DBLEND_SRCALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case D3DBLEND_INVSRCALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case D3DBLEND_DESTALPHA: return VK_BLEND_FACTOR_DST_ALPHA; + case D3DBLEND_INVDESTALPHA: return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case D3DBLEND_DESTCOLOR: return VK_BLEND_FACTOR_DST_COLOR; + case D3DBLEND_INVDESTCOLOR: return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case D3DBLEND_SRCALPHASAT: return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; + case D3DBLEND_BOTHSRCALPHA: return VK_BLEND_FACTOR_SRC_ALPHA; + case D3DBLEND_BOTHINVSRCALPHA: return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case D3DBLEND_BLENDFACTOR: return IsAlpha ? VK_BLEND_FACTOR_CONSTANT_ALPHA : VK_BLEND_FACTOR_CONSTANT_COLOR; + case D3DBLEND_INVBLENDFACTOR: return IsAlpha ? VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA : VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + case D3DBLEND_SRCCOLOR2: return VK_BLEND_FACTOR_SRC1_COLOR; + case D3DBLEND_INVSRCCOLOR2: return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; + } + } + + + VkBlendOp DecodeBlendOp(D3DBLENDOP BlendOp) { + switch (BlendOp) { + default: + case D3DBLENDOP_ADD: return VK_BLEND_OP_ADD; + case D3DBLENDOP_SUBTRACT: return VK_BLEND_OP_SUBTRACT; + case D3DBLENDOP_REVSUBTRACT: return VK_BLEND_OP_REVERSE_SUBTRACT; + case D3DBLENDOP_MIN: return VK_BLEND_OP_MIN; + case D3DBLENDOP_MAX: return VK_BLEND_OP_MAX; + } + } + + + VkFilter DecodeFilter(D3DTEXTUREFILTERTYPE Filter) { + switch (Filter) { + case D3DTEXF_NONE: + case D3DTEXF_POINT: + return VK_FILTER_NEAREST; + default: + return VK_FILTER_LINEAR; + } + } + + + D3D9MipFilter DecodeMipFilter(D3DTEXTUREFILTERTYPE Filter) { + D3D9MipFilter filter; + filter.MipsEnabled = Filter != D3DTEXF_NONE; + + switch (Filter) { + case D3DTEXF_POINT: + case D3DTEXF_NONE: + filter.MipFilter = VK_SAMPLER_MIPMAP_MODE_NEAREST; break; + default: + filter.MipFilter = VK_SAMPLER_MIPMAP_MODE_LINEAR; break; + } + + return filter; + } + + + bool IsAnisotropic(D3DTEXTUREFILTERTYPE Filter) { + return Filter == D3DTEXF_ANISOTROPIC; + } + + + VkSamplerAddressMode DecodeAddressMode(D3DTEXTUREADDRESS Mode) { + switch (Mode) { + default: + case D3DTADDRESS_WRAP: + return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case D3DTADDRESS_MIRROR: + return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case D3DTADDRESS_CLAMP: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case D3DTADDRESS_BORDER: + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + case D3DTADDRESS_MIRRORONCE: + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + } + } + + + VkCompareOp DecodeCompareOp(D3DCMPFUNC Func) { + switch (Func) { + default: + case D3DCMP_NEVER: return VK_COMPARE_OP_NEVER; + case D3DCMP_LESS: return VK_COMPARE_OP_LESS; + case D3DCMP_EQUAL: return VK_COMPARE_OP_EQUAL; + case D3DCMP_LESSEQUAL: return VK_COMPARE_OP_LESS_OR_EQUAL; + case D3DCMP_GREATER: return VK_COMPARE_OP_GREATER; + case D3DCMP_NOTEQUAL: return VK_COMPARE_OP_NOT_EQUAL; + case D3DCMP_GREATEREQUAL: return VK_COMPARE_OP_GREATER_OR_EQUAL; + case D3DCMP_ALWAYS: return VK_COMPARE_OP_ALWAYS; + } + } + + + VkStencilOp DecodeStencilOp(D3DSTENCILOP Op) { + switch (Op) { + default: + case D3DSTENCILOP_KEEP: return VK_STENCIL_OP_KEEP; + case D3DSTENCILOP_ZERO: return VK_STENCIL_OP_ZERO; + case D3DSTENCILOP_REPLACE: return VK_STENCIL_OP_REPLACE; + case D3DSTENCILOP_INCRSAT: return VK_STENCIL_OP_INCREMENT_AND_CLAMP; + case D3DSTENCILOP_DECRSAT: return VK_STENCIL_OP_DECREMENT_AND_CLAMP; + case D3DSTENCILOP_INVERT: return VK_STENCIL_OP_INVERT; + case D3DSTENCILOP_INCR: return VK_STENCIL_OP_INCREMENT_AND_WRAP; + case D3DSTENCILOP_DECR: return VK_STENCIL_OP_DECREMENT_AND_WRAP; + } + } + + + VkCullModeFlags DecodeCullMode(D3DCULL Mode) { + switch (Mode) { + case D3DCULL_NONE: return VK_CULL_MODE_NONE; + case D3DCULL_CW: return VK_CULL_MODE_FRONT_BIT; + default: + case D3DCULL_CCW: return VK_CULL_MODE_BACK_BIT; + } + } + + + VkPolygonMode DecodeFillMode(D3DFILLMODE Mode) { + switch (Mode) { + case D3DFILL_POINT: return VK_POLYGON_MODE_POINT; + case D3DFILL_WIREFRAME: return VK_POLYGON_MODE_LINE; + default: + case D3DFILL_SOLID: return VK_POLYGON_MODE_FILL; + } + } + + + VkIndexType DecodeIndexType(D3D9Format Format) { + return Format == D3D9Format::INDEX16 + ? VK_INDEX_TYPE_UINT16 + : VK_INDEX_TYPE_UINT32; + } + + + VkFormat DecodeDecltype(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return VK_FORMAT_R32_SFLOAT; + case D3DDECLTYPE_FLOAT2: return VK_FORMAT_R32G32_SFLOAT; + case D3DDECLTYPE_FLOAT3: return VK_FORMAT_R32G32B32_SFLOAT; + case D3DDECLTYPE_FLOAT4: return VK_FORMAT_R32G32B32A32_SFLOAT; + case D3DDECLTYPE_D3DCOLOR: return VK_FORMAT_B8G8R8A8_UNORM; + case D3DDECLTYPE_UBYTE4: return VK_FORMAT_R8G8B8A8_USCALED; + case D3DDECLTYPE_SHORT2: return VK_FORMAT_R16G16_SSCALED; + case D3DDECLTYPE_SHORT4: return VK_FORMAT_R16G16B16A16_SSCALED; + case D3DDECLTYPE_UBYTE4N: return VK_FORMAT_R8G8B8A8_UNORM; + case D3DDECLTYPE_SHORT2N: return VK_FORMAT_R16G16_SNORM; + case D3DDECLTYPE_SHORT4N: return VK_FORMAT_R16G16B16A16_SNORM; + case D3DDECLTYPE_USHORT2N: return VK_FORMAT_R16G16_UNORM; + case D3DDECLTYPE_USHORT4N: return VK_FORMAT_R16G16B16A16_UNORM; + case D3DDECLTYPE_UDEC3: return VK_FORMAT_A2B10G10R10_USCALED_PACK32; + case D3DDECLTYPE_FLOAT16_2: return VK_FORMAT_R16G16_SFLOAT; + case D3DDECLTYPE_FLOAT16_4: return VK_FORMAT_R16G16B16A16_SFLOAT; + case D3DDECLTYPE_DEC3N: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; + case D3DDECLTYPE_UNUSED: + default: return VK_FORMAT_UNDEFINED; + } + } + + void ConvertBox(D3DBOX box, VkOffset3D& offset, VkExtent3D& extent) { + offset.x = box.Left; + offset.y = box.Top; + offset.z = box.Front; + + extent.width = box.Right - box.Left; + extent.height = box.Bottom - box.Top; + extent.depth = box.Back - box.Front; + } + + void ConvertRect(RECT rect, VkOffset3D& offset, VkExtent3D& extent) { + offset.x = rect.left; + offset.y = rect.top; + offset.z = 0; + + extent.width = rect.right - rect.left; + extent.height = rect.bottom - rect.top; + extent.depth = 1; + } + + void ConvertRect(RECT rect, VkOffset2D& offset, VkExtent2D& extent) { + offset.x = rect.left; + offset.y = rect.top; + + extent.width = rect.right - rect.left; + extent.height = rect.bottom - rect.top; + } + + uint32_t GetDecltypeSize(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return 1 * sizeof(float); + case D3DDECLTYPE_FLOAT2: return 2 * sizeof(float); + case D3DDECLTYPE_FLOAT3: return 3 * sizeof(float); + case D3DDECLTYPE_FLOAT4: return 4 * sizeof(float); + case D3DDECLTYPE_D3DCOLOR: return 1 * sizeof(DWORD); + case D3DDECLTYPE_UBYTE4: return 4 * sizeof(BYTE); + case D3DDECLTYPE_SHORT2: return 2 * sizeof(short); + case D3DDECLTYPE_SHORT4: return 4 * sizeof(short); + case D3DDECLTYPE_UBYTE4N: return 4 * sizeof(BYTE); + case D3DDECLTYPE_SHORT2N: return 2 * sizeof(short); + case D3DDECLTYPE_SHORT4N: return 4 * sizeof(short); + case D3DDECLTYPE_USHORT2N: return 2 * sizeof(short); + case D3DDECLTYPE_USHORT4N: return 4 * sizeof(short); + case D3DDECLTYPE_UDEC3: return 4; + case D3DDECLTYPE_DEC3N: return 4; + case D3DDECLTYPE_FLOAT16_2: return 2 * 2; + case D3DDECLTYPE_FLOAT16_4: return 4 * 2; + default: return 0; + } + } + + + uint32_t GetDecltypeCount(D3DDECLTYPE Type) { + switch (Type) { + case D3DDECLTYPE_FLOAT1: return 1; + case D3DDECLTYPE_FLOAT2: return 2; + case D3DDECLTYPE_FLOAT3: return 3; + case D3DDECLTYPE_FLOAT4: return 4; + case D3DDECLTYPE_D3DCOLOR: return 4; + case D3DDECLTYPE_UBYTE4: return 4; + case D3DDECLTYPE_SHORT2: return 2; + case D3DDECLTYPE_SHORT4: return 4; + case D3DDECLTYPE_UBYTE4N: return 4; + case D3DDECLTYPE_SHORT2N: return 2; + case D3DDECLTYPE_SHORT4N: return 4; + case D3DDECLTYPE_USHORT2N: return 2; + case D3DDECLTYPE_USHORT4N: return 4; + case D3DDECLTYPE_UDEC3: return 3; + case D3DDECLTYPE_DEC3N: return 3; + case D3DDECLTYPE_FLOAT16_2: return 2; + case D3DDECLTYPE_FLOAT16_4: return 4; + default: return 0; + } + } + + + bool IsDepthFormat(D3D9Format Format) { + return Format == D3D9Format::D16_LOCKABLE + || Format == D3D9Format::D32 + || Format == D3D9Format::D15S1 + || Format == D3D9Format::D24S8 + || Format == D3D9Format::D24X8 + || Format == D3D9Format::D24X4S4 + || Format == D3D9Format::D16 + || Format == D3D9Format::D32F_LOCKABLE + || Format == D3D9Format::D24FS8 + || Format == D3D9Format::D32_LOCKABLE + || Format == D3D9Format::DF16 + || Format == D3D9Format::DF24 + || Format == D3D9Format::INTZ; + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_util.h b/src/d3d9/d3d9_util.h new file mode 100644 index 000000000..3a1447c3e --- /dev/null +++ b/src/d3d9/d3d9_util.h @@ -0,0 +1,202 @@ +#pragma once + +#include "d3d9_include.h" + +#include "d3d9_format.h" + +#include "../dxso/dxso_common.h" +#include "../dxvk/dxvk_device.h" + +#include "../util/util_matrix.h" + +#include + +namespace dxvk { + + struct D3D9ShaderMasks { + uint32_t samplerMask; + uint32_t rtMask; + }; + + struct D3D9MipFilter { + bool MipsEnabled; + VkSamplerMipmapMode MipFilter; + }; + + struct D3D9BlendState { + D3DBLEND Src; + D3DBLEND Dst; + D3DBLENDOP Op; + }; + + inline void FixupBlendState(D3D9BlendState& State) { + // Old DirectX 6 HW feature that still exists... + // Yuck! + if (unlikely(State.Src == D3DBLEND_BOTHSRCALPHA)) { + State.Src = D3DBLEND_SRCALPHA; + State.Dst = D3DBLEND_INVSRCALPHA; + } + else if (unlikely(State.Src == D3DBLEND_BOTHINVSRCALPHA)) { + State.Src = D3DBLEND_INVSRCALPHA; + State.Dst = D3DBLEND_SRCALPHA; + } + } + + inline bool InvalidSampler(DWORD Sampler) { + if (Sampler > 15 && Sampler < D3DDMAPSAMPLER) + return true; + + if (Sampler > D3DVERTEXTEXTURESAMPLER3) + return true; + + return false; + } + + inline DWORD RemapSamplerState(DWORD Sampler) { + if (Sampler >= D3DDMAPSAMPLER) + Sampler = 16 + (Sampler - D3DDMAPSAMPLER); + + return Sampler; + } + + inline std::pair RemapStateSamplerShader(DWORD Sampler) { + if (Sampler >= 17) + return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - 17); + + return std::make_pair(DxsoProgramTypes::PixelShader, Sampler); + } + + inline std::pair RemapSamplerShader(DWORD Sampler) { + Sampler = RemapSamplerState(Sampler); + + return RemapStateSamplerShader(Sampler); + } + + template + void CastRefPrivate(J* ptr, bool AddRef) { + if (ptr == nullptr) + return; + + T* castedPtr = reinterpret_cast(ptr); + AddRef ? castedPtr->AddRefPrivate() : castedPtr->ReleasePrivate(); + } + + HRESULT DisassembleShader( + const void* pShader, + BOOL EnableColorCode, + char* pComments, + ID3DBlob** ppDisassembly); + + HRESULT DecodeMultiSampleType( + D3DMULTISAMPLE_TYPE MultiSample, + DWORD MultisampleQuality, + VkSampleCountFlagBits* pCount); + + VkFormat GetPackedDepthStencilFormat(D3D9Format Format); + + VkFormatFeatureFlags GetImageFormatFeatures(DWORD Usage); + + VkImageUsageFlags GetImageUsageFlags(DWORD Usage); + + inline void DecodeD3DCOLOR(D3DCOLOR color, float* rgba) { + // Encoded in D3DCOLOR as argb + rgba[3] = (float)((color & 0xff000000) >> 24) / 255.0f; + rgba[0] = (float)((color & 0x00ff0000) >> 16) / 255.0f; + rgba[1] = (float)((color & 0x0000ff00) >> 8) / 255.0f; + rgba[2] = (float)((color & 0x000000ff)) / 255.0f; + } + + inline VkFormat PickSRGB(VkFormat format, VkFormat srgbFormat, bool srgb) { + return srgb ? srgbFormat : format; + } + + inline VkShaderStageFlagBits GetShaderStage(DxsoProgramType ProgramType) { + switch (ProgramType) { + case DxsoProgramTypes::VertexShader: return VK_SHADER_STAGE_VERTEX_BIT; + case DxsoProgramTypes::PixelShader: return VK_SHADER_STAGE_FRAGMENT_BIT; + default: return VkShaderStageFlagBits(0); + } + } + + inline uint32_t GetTransformIndex(D3DTRANSFORMSTATETYPE Type) { + if (Type == D3DTS_VIEW) + return 0; + + if (Type == D3DTS_PROJECTION) + return 1; + + if (Type >= D3DTS_TEXTURE0 && Type <= D3DTS_TEXTURE7) + return 2 + (Type - D3DTS_TEXTURE0); + + return 10 + (Type - D3DTS_WORLD); + } + + inline Matrix4 ConvertMatrix(const D3DMATRIX* Matrix) { + if (Matrix == nullptr) // Identity. + return Matrix4(); + + return *(reinterpret_cast(Matrix)); + } + + uint32_t GetVertexCount(D3DPRIMITIVETYPE type, UINT count); + + DxvkInputAssemblyState DecodeInputAssemblyState(D3DPRIMITIVETYPE type); + + VkBlendFactor DecodeBlendFactor(D3DBLEND BlendFactor, bool IsAlpha); + + VkBlendOp DecodeBlendOp(D3DBLENDOP BlendOp); + + VkFilter DecodeFilter(D3DTEXTUREFILTERTYPE Filter); + + D3D9MipFilter DecodeMipFilter(D3DTEXTUREFILTERTYPE Filter); + + bool IsAnisotropic(D3DTEXTUREFILTERTYPE Filter); + + VkSamplerAddressMode DecodeAddressMode(D3DTEXTUREADDRESS Mode); + + VkCompareOp DecodeCompareOp(D3DCMPFUNC Func); + + VkStencilOp DecodeStencilOp(D3DSTENCILOP Op); + + VkCullModeFlags DecodeCullMode(D3DCULL Mode); + + VkPolygonMode DecodeFillMode(D3DFILLMODE Mode); + + VkIndexType DecodeIndexType(D3D9Format Format); + + VkFormat DecodeDecltype(D3DDECLTYPE Type); + + uint32_t GetDecltypeSize(D3DDECLTYPE Type); + + uint32_t GetDecltypeCount(D3DDECLTYPE Type); + + void ConvertBox(D3DBOX box, VkOffset3D& offset, VkExtent3D& extent); + + void ConvertRect(RECT rect, VkOffset3D& offset, VkExtent3D& extent); + + void ConvertRect(RECT rect, VkOffset2D& offset, VkExtent2D& extent); + + template + UINT CompactSparseList(T* pData, UINT Mask) { + uint32_t count = 0; + + while (Mask != 0) { + uint32_t id = bit::tzcnt(Mask); + pData[count++] = pData[id]; + Mask &= Mask - 1; + } + + return count; + } + + bool IsDepthFormat(D3D9Format Format); + + inline bool IsPoolManaged(D3DPOOL Pool) { + return Pool == D3DPOOL_MANAGED || Pool == D3DPOOL_MANAGED_EX; + } + + inline D3DRENDERSTATETYPE ColorWriteIndex(uint32_t i) { + return D3DRENDERSTATETYPE(i ? D3DRS_COLORWRITEENABLE1 + i - 1 : D3DRS_COLORWRITEENABLE); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_vertex_declaration.cpp b/src/d3d9/d3d9_vertex_declaration.cpp new file mode 100644 index 000000000..e5e89040e --- /dev/null +++ b/src/d3d9/d3d9_vertex_declaration.cpp @@ -0,0 +1,231 @@ +#include "d3d9_vertex_declaration.h" +#include "d3d9_util.h" + +#include +#include + +namespace dxvk { + + D3D9VertexDecl::D3D9VertexDecl( + D3D9DeviceEx* pDevice, + DWORD FVF) + : D3D9VertexDeclBase(pDevice) { + this->SetFVF(FVF); + this->Classify(); + } + + + D3D9VertexDecl::D3D9VertexDecl( + D3D9DeviceEx* pDevice, + const D3DVERTEXELEMENT9* pVertexElements, + uint32_t DeclCount) + : D3D9VertexDeclBase( pDevice ) + , m_elements ( DeclCount ) + , m_fvf ( 0 ) { + std::copy(pVertexElements, pVertexElements + DeclCount, m_elements.begin()); + this->Classify(); + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexDecl::QueryInterface( + REFIID riid, + void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DVertexDeclaration9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9VertexDecl::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9VertexDecl::GetDeclaration( + D3DVERTEXELEMENT9* pElement, + UINT* pNumElements) { + if (pNumElements == nullptr) + return D3DERR_INVALIDCALL; + + *pNumElements = UINT(m_elements.size()) + 1u; // Account for D3DDECL_END + + if (pElement == nullptr) + return D3D_OK; + + // The native runtime ignores pNumElements here... + std::copy(m_elements.begin(), m_elements.end(), pElement); + pElement[m_elements.size()] = D3DDECL_END(); + + return D3D_OK; + } + + + void D3D9VertexDecl::SetFVF(DWORD FVF) { + m_fvf = FVF; + + std::array elements; + uint32_t elemCount = 0; + uint32_t texCount = 0; + + uint32_t betas = 0; + uint8_t betaIdx = 0xFF; + + switch (FVF & D3DFVF_POSITION_MASK) { + case D3DFVF_XYZ: + case D3DFVF_XYZB1: + case D3DFVF_XYZB2: + case D3DFVF_XYZB3: + case D3DFVF_XYZB4: + case D3DFVF_XYZB5: + elements[elemCount].Type = D3DDECLTYPE_FLOAT3; + elements[elemCount].Usage = D3DDECLUSAGE_POSITION; + elements[elemCount].UsageIndex = 0; + elemCount++; + + if ((FVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZ) + break; + + betas = (((FVF & D3DFVF_XYZB5) - D3DFVF_XYZB1) >> 1) + 1; + if (FVF & D3DFVF_LASTBETA_D3DCOLOR) + betaIdx = D3DDECLTYPE_D3DCOLOR; + else if (FVF & D3DFVF_LASTBETA_UBYTE4) + betaIdx = D3DDECLTYPE_UBYTE4; + else if ((FVF & D3DFVF_XYZB5) == D3DFVF_XYZB5) + betaIdx = D3DDECLTYPE_FLOAT1; + + if (betaIdx != 0xFF) + betas--; + + if (betas > 0) { + switch (betas) { + case 1: elements[elemCount].Type = D3DDECLTYPE_FLOAT1; break; + case 2: elements[elemCount].Type = D3DDECLTYPE_FLOAT2; break; + case 3: elements[elemCount].Type = D3DDECLTYPE_FLOAT3; break; + case 4: elements[elemCount].Type = D3DDECLTYPE_FLOAT4; break; + default: break; + } + elements[elemCount].Usage = D3DDECLUSAGE_BLENDWEIGHT; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + + if (betaIdx != 0xFF) { + elements[elemCount].Type = betaIdx; + elements[elemCount].Usage = D3DDECLUSAGE_BLENDINDICES; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + break; + + case D3DFVF_XYZW: + case D3DFVF_XYZRHW: + elements[elemCount].Type = D3DDECLTYPE_FLOAT4; + elements[elemCount].Usage = + ((FVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZW) + ? D3DDECLUSAGE_POSITION + : D3DDECLUSAGE_POSITIONT; + elements[elemCount].UsageIndex = 0; + elemCount++; + break; + + default: + break; + } + + if (FVF & D3DFVF_NORMAL) { + elements[elemCount].Type = D3DDECLTYPE_FLOAT3; + elements[elemCount].Usage = D3DDECLUSAGE_NORMAL; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + if (FVF & D3DFVF_PSIZE) { + elements[elemCount].Type = D3DDECLTYPE_FLOAT1; + elements[elemCount].Usage = D3DDECLUSAGE_PSIZE; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + if (FVF & D3DFVF_DIFFUSE) { + elements[elemCount].Type = D3DDECLTYPE_D3DCOLOR; + elements[elemCount].Usage = D3DDECLUSAGE_COLOR; + elements[elemCount].UsageIndex = 0; + elemCount++; + } + if (FVF & D3DFVF_SPECULAR) { + elements[elemCount].Type = D3DDECLTYPE_D3DCOLOR; + elements[elemCount].Usage = D3DDECLUSAGE_COLOR; + elements[elemCount].UsageIndex = 1; + elemCount++; + } + + texCount = (FVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT; + texCount = std::min(texCount, 8u); + + for (uint32_t i = 0; i < texCount; i++) { + switch ((FVF >> (16 + i * 2)) & 0x3) { + case D3DFVF_TEXTUREFORMAT1: + elements[elemCount].Type = D3DDECLTYPE_FLOAT1; + break; + + case D3DFVF_TEXTUREFORMAT2: + elements[elemCount].Type = D3DDECLTYPE_FLOAT2; + break; + + case D3DFVF_TEXTUREFORMAT3: + elements[elemCount].Type = D3DDECLTYPE_FLOAT3; + break; + + case D3DFVF_TEXTUREFORMAT4: + elements[elemCount].Type = D3DDECLTYPE_FLOAT4; + break; + + default: + break; + } + elements[elemCount].Usage = D3DDECLUSAGE_TEXCOORD; + elements[elemCount].UsageIndex = i; + elemCount++; + } + + for (uint32_t i = 0; i < elemCount; i++) { + elements[i].Stream = 0; + elements[i].Offset = (i == 0) + ? 0 + : (elements[i - 1].Offset + GetDecltypeSize(D3DDECLTYPE(elements[i - 1].Type))); + + elements[i].Method = D3DDECLMETHOD_DEFAULT; + } + + m_elements.resize(elemCount); + std::copy(elements.begin(), elements.begin() + elemCount, m_elements.data()); + } + + + void D3D9VertexDecl::Classify() { + for (const auto& element : m_elements) { + if (element.Usage == D3DDECLUSAGE_COLOR && element.UsageIndex == 0) + m_flags.set(D3D9VertexDeclFlag::HasColor0); + else if (element.Usage == D3DDECLUSAGE_COLOR && element.UsageIndex == 1) + m_flags.set(D3D9VertexDeclFlag::HasColor1); + else if (element.Usage == D3DDECLUSAGE_POSITIONT) + m_flags.set(D3D9VertexDeclFlag::HasPositionT); + else if (element.Usage == D3DDECLUSAGE_PSIZE) + m_flags.set(D3D9VertexDeclFlag::HasPointSize); + else if (element.Usage == D3DDECLUSAGE_FOG) + m_flags.set(D3D9VertexDeclFlag::HasFog); + else if (element.Usage == D3DDECLUSAGE_BLENDWEIGHT) + m_flags.set(D3D9VertexDeclFlag::HasBlendWeight); + else if (element.Usage == D3DDECLUSAGE_BLENDINDICES) + m_flags.set(D3D9VertexDeclFlag::HasBlendIndices); + + if (element.Usage == D3DDECLUSAGE_TEXCOORD) + m_texcoordMask |= GetDecltypeCount(D3DDECLTYPE(element.Type)) << (element.UsageIndex * 3); + } + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_vertex_declaration.h b/src/d3d9/d3d9_vertex_declaration.h new file mode 100644 index 000000000..25977efa8 --- /dev/null +++ b/src/d3d9/d3d9_vertex_declaration.h @@ -0,0 +1,83 @@ +#pragma once + +#include "d3d9_device_child.h" +#include "d3d9_util.h" + +#include + +namespace dxvk { + + enum D3D9VertexDeclFlag { + HasColor0, + HasColor1, + HasPositionT, + HasPointSize, + HasFog, + HasBlendWeight, + HasBlendIndices + }; + using D3D9VertexDeclFlags = Flags; + + using D3D9VertexDeclBase = D3D9DeviceChild; + class D3D9VertexDecl final : public D3D9VertexDeclBase { + + public: + + D3D9VertexDecl( + D3D9DeviceEx* pDevice, + DWORD FVF); + + D3D9VertexDecl( + D3D9DeviceEx* pDevice, + const D3DVERTEXELEMENT9* pVertexElements, + uint32_t DeclCount); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject); + + HRESULT STDMETHODCALLTYPE GetDeclaration( + D3DVERTEXELEMENT9* pElement, + UINT* pNumElements); + + inline DWORD GetFVF() { + return m_fvf; + } + + void SetFVF(DWORD FVF); + + const D3D9VertexElements& GetElements() const { + return m_elements; + } + + UINT GetSize() const { + if (m_elements.size() == 0) + return 0; + + auto& end = m_elements.back(); + return end.Offset + GetDecltypeSize(D3DDECLTYPE(end.Type)); + } + + bool TestFlag(D3D9VertexDeclFlag flag) const { + return m_flags.test(flag); + } + + uint32_t GetTexcoordMask() const { + return m_texcoordMask; + } + + private: + + void Classify(); + + D3D9VertexDeclFlags m_flags; + + D3D9VertexElements m_elements; + + DWORD m_fvf; + + uint32_t m_texcoordMask = 0; + + }; + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_volume.cpp b/src/d3d9/d3d9_volume.cpp new file mode 100644 index 000000000..46fb89d7a --- /dev/null +++ b/src/d3d9/d3d9_volume.cpp @@ -0,0 +1,110 @@ +#include "d3d9_volume.h" + +#include "d3d9_device.h" +#include "d3d9_texture.h" + +namespace dxvk { + + D3D9Volume::D3D9Volume( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping) + : D3D9VolumeBase( + pDevice, + new D3D9CommonTexture( pDevice, pDesc, D3DRTYPE_VOLUMETEXTURE, Mapping ), + 0, 0, + nullptr) { } + + + D3D9Volume::D3D9Volume( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer) + : D3D9VolumeBase( + pDevice, + pTexture, + Face, MipLevel, + pContainer) { } + + + void D3D9Volume::AddRefPrivate() { + IDirect3DBaseTexture9* pContainer = this->m_container; + + if (pContainer != nullptr) { + reinterpret_cast (pContainer)->AddRefPrivate(); + return; + } + + D3D9VolumeBase::AddRefPrivate(); + } + + + void D3D9Volume::ReleasePrivate() { + IDirect3DBaseTexture9* pContainer = this->m_container; + + if (pContainer != nullptr) { + reinterpret_cast (pContainer)->ReleasePrivate(); + return; + } + + D3D9VolumeBase::ReleasePrivate(); + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::QueryInterface(REFIID riid, void** ppvObject) { + if (ppvObject == nullptr) + return E_POINTER; + + *ppvObject = nullptr; + + if (riid == __uuidof(IUnknown) + || riid == __uuidof(IDirect3DResource9) + || riid == __uuidof(IDirect3DVolume9)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("D3D9Volume::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::GetDesc(D3DVOLUME_DESC *pDesc) { + if (pDesc == nullptr) + return D3DERR_INVALIDCALL; + + auto& desc = *(m_texture->Desc()); + + pDesc->Format = static_cast(desc.Format); + pDesc->Type = D3DRTYPE_VOLUME; + pDesc->Usage = desc.Usage; + pDesc->Pool = desc.Pool; + + pDesc->Width = std::max(1u, desc.Width >> m_mipLevel); + pDesc->Height = std::max(1u, desc.Height >> m_mipLevel); + pDesc->Depth = std::max(1u, desc.Depth >> m_mipLevel); + + return D3D_OK; + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::LockBox(D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags) { + return m_parent->LockImage( + m_texture, + m_face, m_mipLevel, + pLockedBox, + pBox, + Flags); + } + + + HRESULT STDMETHODCALLTYPE D3D9Volume::UnlockBox() { + return m_parent->UnlockImage( + m_texture, + m_face, m_mipLevel); + } + +} \ No newline at end of file diff --git a/src/d3d9/d3d9_volume.h b/src/d3d9/d3d9_volume.h new file mode 100644 index 000000000..c74e8e493 --- /dev/null +++ b/src/d3d9/d3d9_volume.h @@ -0,0 +1,39 @@ +#pragma once + +#include "d3d9_subresource.h" + +#include "d3d9_common_texture.h" + +namespace dxvk { + + using D3D9VolumeBase = D3D9Subresource; + class D3D9Volume final : public D3D9VolumeBase { + + public: + + D3D9Volume( + D3D9DeviceEx* pDevice, + const D3D9_COMMON_TEXTURE_DESC* pDesc, + D3D9_VK_FORMAT_MAPPING Mapping); + + D3D9Volume( + D3D9DeviceEx* pDevice, + D3D9CommonTexture* pTexture, + UINT Face, + UINT MipLevel, + IDirect3DBaseTexture9* pContainer); + + void AddRefPrivate(); + + void ReleasePrivate(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + + HRESULT STDMETHODCALLTYPE GetDesc(D3DVOLUME_DESC *pDesc) final; + + HRESULT STDMETHODCALLTYPE LockBox(D3DLOCKED_BOX* pLockedBox, CONST D3DBOX* pBox, DWORD Flags) final; + + HRESULT STDMETHODCALLTYPE UnlockBox() final; + + }; +} \ No newline at end of file diff --git a/src/d3d9/meson.build b/src/d3d9/meson.build new file mode 100644 index 000000000..988bc72a0 --- /dev/null +++ b/src/d3d9/meson.build @@ -0,0 +1,52 @@ +d3d9_res = wrc_generator.process('version.rc') + +d3d9_shaders = files([ + 'shaders/d3d9_presenter_frag.frag', + 'shaders/d3d9_presenter_vert.vert', + 'shaders/d3d9_convert_yuy2_uyvy.comp' +]) + +d3d9_src = [ + 'd3d9_main.cpp', + 'd3d9_interface.cpp', + 'd3d9_adapter.cpp', + 'd3d9_monitor.cpp', + 'd3d9_device.cpp', + 'd3d9_state.cpp', + 'd3d9_cursor.cpp', + 'd3d9_swapchain.cpp', + 'd3d9_format.cpp', + 'd3d9_common_texture.cpp', + 'd3d9_texture.cpp', + 'd3d9_surface.cpp', + 'd3d9_volume.cpp', + 'd3d9_common_buffer.cpp', + 'd3d9_buffer.cpp', + 'd3d9_shader.cpp', + 'd3d9_vertex_declaration.cpp', + 'd3d9_query.cpp', + 'd3d9_multithread.cpp', + 'd3d9_options.cpp', + 'd3d9_stateblock.cpp', + 'd3d9_sampler.cpp', + 'd3d9_util.cpp', + 'd3d9_initializer.cpp', + 'd3d9_fixed_function.cpp', + 'd3d9_names.cpp', + 'd3d9_swvp_emu.cpp', + 'd3d9_format_helpers.cpp', + 'd3d9_hud.cpp' +] + +d3d9_dll = shared_library('d3d9'+dll_ext, d3d9_src, glsl_generator.process(d3d9_shaders), d3d9_res, + name_prefix : '', + dependencies : [ dxso_dep, dxvk_dep ], + include_directories : dxvk_include_path, + install : true, + objects : not dxvk_msvc ? 'd3d9'+def_spec_ext : [], + vs_module_defs : 'd3d9'+def_spec_ext, + override_options : ['cpp_std='+dxvk_cpp_std]) + +d3d9_dep = declare_dependency( + link_with : [ d3d9_dll ], + include_directories : [ dxvk_include_path ]) diff --git a/src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp b/src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp new file mode 100644 index 000000000..d4ae32344 --- /dev/null +++ b/src/d3d9/shaders/d3d9_convert_yuy2_uyvy.comp @@ -0,0 +1,63 @@ +#version 450 + +layout(constant_id = 1249) const bool s_is_uyvy = false; + +layout( + local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +layout(binding = 0) +writeonly uniform image2D dst; + +layout(binding = 1) +readonly buffer yuy2_buffer_t { + uint data[]; +} src; + +layout(push_constant) +uniform u_info_t { + uvec2 extent; +} u_info; + +mat3x4 g_yuv_to_rgb = { + { 298 / 256, 0, 409 / 256, 0.5 }, + { 298 / 256, -100 / 256, -208 / 256, 0.5 }, + { 298 / 256, 516 / 256, 0, 0.5 } +}; + +vec4 convertYUV(vec3 cde) { + vec3 value = vec4(cde, 1 / 255.0) * g_yuv_to_rgb; + + return vec4(clamp(value, 0, 1), 1); +} + +void main() { + ivec3 thread_id = ivec3(gl_GlobalInvocationID); + + if (all(lessThan(thread_id.xy, u_info.extent))) { + uint offset = thread_id.x + + thread_id.y * u_info.extent.x; + + vec4 data = unpackUnorm4x8(src.data[offset]); + + // Flip around stuff for UYVY + if (s_is_uyvy) + data = data.yxwz; + + float c0 = data.x - (16 / 255.0); + float d = data.y - (128 / 255.0); + float c1 = data.z - (16 / 255.0); + float e = data.w - (128 / 255.0); + + vec4 color0 = convertYUV(vec3(c0, d, e)); + vec4 color1 = convertYUV(vec3(c1, d, e)); + + // YUY2 has a macropixel of [2, 1] + // so we write 2 pixels in this run. + ivec2 writePos = thread_id.xy * ivec2(2, 1); + + imageStore(dst, ivec2(writePos.x, writePos.y), color0); + imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1); + } +} \ No newline at end of file diff --git a/src/d3d9/shaders/d3d9_presenter_frag.frag b/src/d3d9/shaders/d3d9_presenter_frag.frag new file mode 100644 index 000000000..e52a0d765 --- /dev/null +++ b/src/d3d9/shaders/d3d9_presenter_frag.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(constant_id = 1) const bool s_gamma_bound = true; + +layout(binding = 0) uniform sampler2D s_image; +layout(binding = 1) uniform sampler1D s_gamma; + +layout(location = 0) in vec2 i_texcoord; +layout(location = 0) out vec4 o_color; + +void main() { + o_color = texture(s_image, i_texcoord); + + if (s_gamma_bound) { + o_color = vec4( + texture(s_gamma, o_color.r).r, + texture(s_gamma, o_color.g).g, + texture(s_gamma, o_color.b).b, + o_color.a); + } +} \ No newline at end of file diff --git a/src/d3d9/shaders/d3d9_presenter_vert.vert b/src/d3d9/shaders/d3d9_presenter_vert.vert new file mode 100644 index 000000000..eb5edc3cf --- /dev/null +++ b/src/d3d9/shaders/d3d9_presenter_vert.vert @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec2 o_texcoord; + +layout(push_constant) uniform present_info_t { + vec2 scale; + vec2 offset; +} u_presentInfo; + +void main() { + vec2 coord = vec2( + float(gl_VertexIndex & 2), + float(gl_VertexIndex & 1) * 2.0f); + + gl_Position = vec4(-1.0f + 2.0f * coord, 0.0f, 1.0f); + + coord *= u_presentInfo.scale; + coord += u_presentInfo.offset; + + o_texcoord = coord; +} \ No newline at end of file diff --git a/src/d3d9/version.rc b/src/d3d9/version.rc new file mode 100644 index 000000000..b4027eee9 --- /dev/null +++ b/src/d3d9/version.rc @@ -0,0 +1,31 @@ +#include + +// DLL version information. +VS_VERSION_INFO VERSIONINFO +FILEVERSION 10,0,17763,1 +PRODUCTVERSION 10,0,17763,1 +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEFLAGS 0 +FILEOS VOS_NT_WINDOWS32 +FILETYPE VFT_DLL +FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "080904b0" + BEGIN + VALUE "CompanyName", "DXVK" + VALUE "FileDescription", "Direct3D 9 Runtime" + VALUE "FileVersion", "10.0.17763.1 (WinBuild.160101.0800)" + VALUE "InternalName", "D3D9.dll" + VALUE "LegalCopyright", "zlib/libpng license" + VALUE "OriginalFilename", "D3D9.dll" + VALUE "ProductName", "DXVK" + VALUE "ProductVersion", "10.0.17763.1" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0809, 1200 + END +END diff --git a/src/dxso/dxso_analysis.cpp b/src/dxso/dxso_analysis.cpp new file mode 100644 index 000000000..467bd02c7 --- /dev/null +++ b/src/dxso/dxso_analysis.cpp @@ -0,0 +1,47 @@ +#include "dxso_analysis.h" + +namespace dxvk { + + DxsoAnalyzer::DxsoAnalyzer( + DxsoAnalysisInfo& analysis) + : m_analysis(&analysis) { } + + void DxsoAnalyzer::processInstruction( + const DxsoInstructionContext& ctx) { + DxsoOpcode opcode = ctx.instruction.opcode; + + if (opcode == DxsoOpcode::TexKill) + m_analysis->usesKill = true; + + if (opcode == DxsoOpcode::DsX + || opcode == DxsoOpcode::DsY + + || opcode == DxsoOpcode::Tex + || opcode == DxsoOpcode::TexCoord + || opcode == DxsoOpcode::TexBem + || opcode == DxsoOpcode::TexBemL + || opcode == DxsoOpcode::TexReg2Ar + || opcode == DxsoOpcode::TexReg2Gb + || opcode == DxsoOpcode::TexM3x2Pad + || opcode == DxsoOpcode::TexM3x2Tex + || opcode == DxsoOpcode::TexM3x3Pad + || opcode == DxsoOpcode::TexM3x3Tex + || opcode == DxsoOpcode::TexM3x3Spec + || opcode == DxsoOpcode::TexM3x3VSpec + || opcode == DxsoOpcode::TexReg2Rgb + || opcode == DxsoOpcode::TexDp3Tex + || opcode == DxsoOpcode::TexM3x2Depth + || opcode == DxsoOpcode::TexDp3 + || opcode == DxsoOpcode::TexM3x3 + // Explicit LOD. + //|| opcode == DxsoOpcode::TexLdd + //|| opcode == DxsoOpcode::TexLdl + || opcode == DxsoOpcode::TexDepth) + m_analysis->usesDerivatives = true; + } + + void DxsoAnalyzer::finalize(size_t tokenCount) { + m_analysis->bytecodeByteLength = tokenCount * sizeof(uint32_t); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_analysis.h b/src/dxso/dxso_analysis.h new file mode 100644 index 000000000..9cbfd9672 --- /dev/null +++ b/src/dxso/dxso_analysis.h @@ -0,0 +1,37 @@ +#pragma once + +#include "dxso_modinfo.h" +#include "dxso_decoder.h" + +namespace dxvk { + + struct DxsoAnalysisInfo { + uint32_t bytecodeByteLength; + + bool usesDerivatives = false; + bool usesKill = false; + }; + + class DxsoAnalyzer { + + public: + + DxsoAnalyzer( + DxsoAnalysisInfo& analysis); + + /** + * \brief Processes a single instruction + * \param [in] ins The instruction + */ + void processInstruction( + const DxsoInstructionContext& ctx); + + void finalize(size_t tokenCount); + + private: + + DxsoAnalysisInfo* m_analysis = nullptr; + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_code.cpp b/src/dxso/dxso_code.cpp new file mode 100644 index 000000000..9fd73d933 --- /dev/null +++ b/src/dxso/dxso_code.cpp @@ -0,0 +1,28 @@ +#include "dxso_code.h" + +namespace dxvk { + + DxsoCode::DxsoCode(DxsoReader& reader) { + m_code = + reinterpret_cast(reader.currentPtr()); + } + + const uint32_t* DxsoCodeIter::ptrAt(uint32_t id) const { + return m_ptr + id; + } + + + uint32_t DxsoCodeIter::at(uint32_t id) const { + return m_ptr[id]; + } + + + uint32_t DxsoCodeIter::read() { + return *(m_ptr++); + } + + DxsoCodeIter DxsoCodeIter::skip(uint32_t n) const { + return DxsoCodeIter(m_ptr + n); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_code.h b/src/dxso/dxso_code.h new file mode 100644 index 000000000..51fdab78e --- /dev/null +++ b/src/dxso/dxso_code.h @@ -0,0 +1,54 @@ +#pragma once + +#include "dxso_include.h" +#include "dxso_reader.h" + +#include +#include + +namespace dxvk { + + /** + * \brief DXBC code iterator + * + * Convenient pointer wrapper that allows + * reading the code token stream. + */ + class DxsoCodeIter { + + public: + + DxsoCodeIter( + const uint32_t* ptr) + : m_ptr(ptr) { } + + const uint32_t* ptrAt(uint32_t id) const; + + uint32_t at(uint32_t id) const; + uint32_t read(); + + DxsoCodeIter skip(uint32_t n) const; + + private: + + const uint32_t* m_ptr = nullptr; + + }; + + class DxsoCode { + + public: + + DxsoCode(DxsoReader& reader); + + DxsoCodeIter iter() const { + return DxsoCodeIter(m_code); + } + + private: + + const uint32_t* m_code; + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_common.cpp b/src/dxso/dxso_common.cpp new file mode 100644 index 000000000..0709fa220 --- /dev/null +++ b/src/dxso/dxso_common.cpp @@ -0,0 +1,26 @@ +#include "dxso_common.h" + +namespace dxvk { + + VkShaderStageFlagBits DxsoProgramInfo::shaderStage() const { + switch (m_type) { + case DxsoProgramTypes::PixelShader: return VK_SHADER_STAGE_FRAGMENT_BIT; + case DxsoProgramTypes::VertexShader: return VK_SHADER_STAGE_VERTEX_BIT; + default: break; + } + + throw DxvkError("DxsoProgramInfo::shaderStage: Unsupported program type"); + } + + + spv::ExecutionModel DxsoProgramInfo::executionModel() const { + switch (m_type) { + case DxsoProgramTypes::PixelShader: return spv::ExecutionModelFragment; + case DxsoProgramTypes::VertexShader: return spv::ExecutionModelVertex; + default: break; + } + + throw DxvkError("DxsoProgramInfo::executionModel: Unsupported program type"); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_common.h b/src/dxso/dxso_common.h new file mode 100644 index 000000000..fcc57cd85 --- /dev/null +++ b/src/dxso/dxso_common.h @@ -0,0 +1,88 @@ +#pragma once + +#include "dxso_include.h" + +#include + +namespace dxvk { + + /** + * \brief DXSO Program type + * + * Defines the shader stage that a DXSO + * module has been compiled for. + */ + namespace DxsoProgramTypes { + enum DxsoProgramType : uint16_t { + VertexShader = 0, + PixelShader = 1, + Count = 2, + }; + } + using DxsoProgramType = DxsoProgramTypes::DxsoProgramType; + + class DxsoProgramInfo { + + public: + + DxsoProgramInfo() { } + DxsoProgramInfo( + DxsoProgramType type, + uint32_t minorVersion, + uint32_t majorVersion) + : m_type{ type } + , m_minorVersion{ minorVersion } + , m_majorVersion{ majorVersion } {} + + /** + * \brief Program type + * \returns Program type + */ + DxsoProgramType type() const { + return m_type; + } + + /** + * \brief Vulkan shader stage + * + * The \c VkShaderStageFlagBits constant + * that corresponds to the program type. + * \returns Vulkan shader stage + */ + VkShaderStageFlagBits shaderStage() const; + + /** + * \brief SPIR-V execution model + * + * The execution model that corresponds + * to the Vulkan shader stage. + * \returns SPIR-V execution model + */ + spv::ExecutionModel executionModel() const; + + /** + * \brief Minor version + * \returns The minor version of the shader model. + */ + uint32_t minorVersion() const { + return m_minorVersion; + } + + /** + * \brief Major version + * \returns The major version of the shader model. + */ + uint32_t majorVersion() const { + return m_majorVersion; + } + + private: + + DxsoProgramType m_type; + + uint32_t m_minorVersion; + uint32_t m_majorVersion; + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp new file mode 100644 index 000000000..ae2e2aece --- /dev/null +++ b/src/dxso/dxso_compiler.cpp @@ -0,0 +1,3579 @@ +#include "dxso_compiler.h" + +#include "dxso_analysis.h" + +#include "../d3d9/d3d9_caps.h" +#include "../d3d9/d3d9_constant_set.h" +#include "../d3d9/d3d9_state.h" +#include "../d3d9/d3d9_spec_constants.h" +#include "../d3d9/d3d9_fixed_function.h" +#include "dxso_util.h" + +#include "../dxvk/dxvk_spec_const.h" + +#include + +namespace dxvk { + + DxsoCompiler::DxsoCompiler( + const std::string& fileName, + const DxsoModuleInfo& moduleInfo, + const DxsoProgramInfo& programInfo, + const DxsoAnalysisInfo& analysis, + const D3D9ConstantLayout& layout) + : m_moduleInfo ( moduleInfo ) + , m_programInfo( programInfo ) + , m_analysis ( &analysis ) + , m_layout ( &layout ) { + // Declare an entry point ID. We'll need it during the + // initialization phase where the execution mode is set. + m_entryPointId = m_module.allocateId(); + + // Set the shader name so that we recognize it in renderdoc + m_module.setDebugSource( + spv::SourceLanguageUnknown, 0, + m_module.addDebugString(fileName.c_str()), + nullptr); + + // Set the memory model. This is the same for all shaders. + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + + m_usedSamplers = 0; + m_usedRTs = 0; + + for (uint32_t i = 0; i < m_rRegs.size(); i++) + m_rRegs.at(i) = DxsoRegisterPointer{ }; + + for (uint32_t i = 0; i < m_cFloat.size(); i++) + m_cFloat.at(i) = 0; + + for (uint32_t i = 0; i < m_cInt.size(); i++) + m_cInt.at(i) = 0; + + for (uint32_t i = 0; i < m_cBool.size(); i++) + m_cBool.at(i) = 0; + + m_vs.addr = DxsoRegisterPointer{ }; + m_vs.oPos = DxsoRegisterPointer{ }; + m_fog = DxsoRegisterPointer{ }; + m_vs.oPSize = DxsoRegisterPointer{ }; + + for (uint32_t i = 0; i < m_ps.oColor.size(); i++) + m_ps.oColor.at(i) = DxsoRegisterPointer{ }; + m_ps.oDepth = DxsoRegisterPointer{ }; + m_ps.vFace = DxsoRegisterPointer{ }; + m_ps.vPos = DxsoRegisterPointer{ }; + + m_loopCounter = DxsoRegisterPointer{ }; + + this->emitInit(); + } + + + void DxsoCompiler::processInstruction( + const DxsoInstructionContext& ctx) { + const DxsoOpcode opcode = ctx.instruction.opcode; + + switch (opcode) { + case DxsoOpcode::Nop: + return; + + case DxsoOpcode::Dcl: + return this->emitDcl(ctx); + + case DxsoOpcode::Def: + case DxsoOpcode::DefI: + case DxsoOpcode::DefB: + return this->emitDef(ctx); + + case DxsoOpcode::Mov: + case DxsoOpcode::Mova: + return this->emitMov(ctx); + + case DxsoOpcode::Add: + case DxsoOpcode::Sub: + case DxsoOpcode::Mad: + case DxsoOpcode::Mul: + case DxsoOpcode::Rcp: + case DxsoOpcode::Rsq: + case DxsoOpcode::Dp3: + case DxsoOpcode::Dp4: + case DxsoOpcode::Slt: + case DxsoOpcode::Sge: + case DxsoOpcode::Min: + case DxsoOpcode::ExpP: + case DxsoOpcode::Exp: + case DxsoOpcode::Max: + case DxsoOpcode::Pow: + case DxsoOpcode::Crs: + case DxsoOpcode::Abs: + case DxsoOpcode::Nrm: + case DxsoOpcode::SinCos: + case DxsoOpcode::Lit: + case DxsoOpcode::Dst: + case DxsoOpcode::LogP: + case DxsoOpcode::Log: + case DxsoOpcode::Lrp: + case DxsoOpcode::Frc: + case DxsoOpcode::Cmp: + case DxsoOpcode::Cnd: + case DxsoOpcode::Dp2Add: + case DxsoOpcode::DsX: + case DxsoOpcode::DsY: + return this->emitVectorAlu(ctx); + + case DxsoOpcode::SetP: + return this->emitPredicateOp(ctx); + + case DxsoOpcode::M3x2: + case DxsoOpcode::M3x3: + case DxsoOpcode::M3x4: + case DxsoOpcode::M4x3: + case DxsoOpcode::M4x4: + return this->emitMatrixAlu(ctx); + + case DxsoOpcode::Loop: + return this->emitControlFlowLoop(ctx); + case DxsoOpcode::EndLoop: + return this->emitControlFlowEndLoop(ctx); + + case DxsoOpcode::Rep: + return this->emitControlFlowRep(ctx); + case DxsoOpcode::EndRep: + return this->emitControlFlowEndRep(ctx); + + case DxsoOpcode::Break: + return this->emitControlFlowBreak(ctx); + case DxsoOpcode::BreakC: + return this->emitControlFlowBreakC(ctx); + + case DxsoOpcode::If: + case DxsoOpcode::Ifc: + return this->emitControlFlowIf(ctx); + case DxsoOpcode::Else: + return this->emitControlFlowElse(ctx); + case DxsoOpcode::EndIf: + return this->emitControlFlowEndIf(ctx); + + case DxsoOpcode::TexCoord: + return this->emitTexCoord(ctx); + + case DxsoOpcode::Tex: + case DxsoOpcode::TexLdl: + case DxsoOpcode::TexLdd: + case DxsoOpcode::TexDp3Tex: + case DxsoOpcode::TexReg2Ar: + case DxsoOpcode::TexReg2Gb: + case DxsoOpcode::TexReg2Rgb: + case DxsoOpcode::TexBem: + case DxsoOpcode::TexM3x2Tex: + case DxsoOpcode::TexM3x3Tex: + case DxsoOpcode::TexM3x3Spec: + case DxsoOpcode::TexM3x3VSpec: + return this->emitTextureSample(ctx); + case DxsoOpcode::TexKill: + return this->emitTextureKill(ctx); + + case DxsoOpcode::TexM3x3Pad: + case DxsoOpcode::TexM3x2Pad: + // We don't need to do anything here, these are just padding instructions + break; + + case DxsoOpcode::End: + case DxsoOpcode::Comment: + break; + + default: + Logger::warn(str::format("DxsoCompiler::processInstruction: unhandled opcode: ", opcode)); + break; + } + } + + void DxsoCompiler::finalize() { + if (m_programInfo.type() == DxsoProgramTypes::VertexShader) + this->emitVsFinalize(); + else + this->emitPsFinalize(); + + // Declare the entry point, we now have all the + // information we need, including the interfaces + m_module.addEntryPoint(m_entryPointId, + m_programInfo.executionModel(), "main", + m_entryPointInterfaces.size(), + m_entryPointInterfaces.data()); + m_module.setDebugName(m_entryPointId, "main"); + } + + + DxsoPermutations DxsoCompiler::compile() { + DxsoPermutations permutations = { }; + + // Create the shader module object + permutations[D3D9ShaderPermutations::None] = compileShader(); + + // If we need to add more permuations, might be worth making a copy of module + // before we do anything more. :-) + if (m_programInfo.type() == DxsoProgramType::PixelShader) { + if (m_ps.diffuseColorIn) + m_module.decorate(m_ps.diffuseColorIn, spv::DecorationFlat); + + if (m_ps.specularColorIn) + m_module.decorate(m_ps.specularColorIn, spv::DecorationFlat); + + permutations[D3D9ShaderPermutations::FlatShade] = compileShader(); + } + + return permutations; + } + + + Rc DxsoCompiler::compileShader() { + DxvkShaderOptions shaderOptions = { }; + DxvkShaderConstData constData = { }; + + return new DxvkShader( + m_programInfo.shaderStage(), + m_resourceSlots.size(), + m_resourceSlots.data(), + m_interfaceSlots, + m_module.compile(), + shaderOptions, + std::move(constData)); + } + + void DxsoCompiler::emitInit() { + // Set up common capabilities for all shaders + m_module.enableCapability(spv::CapabilityShader); + m_module.enableCapability(spv::CapabilityImageQuery); + + this->emitDclConstantBuffer(); + this->emitDclInputArray(); + + // Initialize the shader module with capabilities + // etc. Each shader type has its own peculiarities. + switch (m_programInfo.type()) { + case DxsoProgramTypes::VertexShader: return this->emitVsInit(); + case DxsoProgramTypes::PixelShader: return this->emitPsInit(); + default: break; + } + } + + + void DxsoCompiler::emitDclConstantBuffer() { + std::array members = { + // float f[256 or 224 or 8192] + m_module.defArrayTypeUnique( + getVectorTypeId({ DxsoScalarType::Float32, 4 }), + m_module.constu32(m_layout->floatCount)), + + // int i[16 or 2048] + m_module.defArrayTypeUnique( + getVectorTypeId({ DxsoScalarType::Sint32, 4 }), + m_module.constu32(m_layout->intCount)), + + // uint32_t boolBitmask + // or uvec4 boolBitmask[512] + // Defined later... + 0 + }; + + // Decorate array strides, this is required. + m_module.decorateArrayStride(members[0], 16); + m_module.decorateArrayStride(members[1], 16); + + if (m_layout->bitmaskCount == 1) { + members[2] = getScalarTypeId(DxsoScalarType::Uint32); + } + else { + // Must be a multiple of 4 otherwise. + members[2] = m_module.defArrayTypeUnique( + getVectorTypeId({ DxsoScalarType::Uint32, 4 }), + m_module.constu32(m_layout->bitmaskCount / 4)); + + m_module.decorateArrayStride(members[2], 16); + } + + const uint32_t structType = + m_module.defStructType(members.size(), members.data()); + + m_module.decorateBlock(structType); + + m_module.memberDecorateOffset(structType, 0, m_layout->floatOffset()); + m_module.memberDecorateOffset(structType, 1, m_layout->intOffset()); + m_module.memberDecorateOffset(structType, 2, m_layout->bitmaskOffset()); + + m_module.setDebugName(structType, "cbuffer_t"); + m_module.setDebugMemberName(structType, 0, "f"); + m_module.setDebugMemberName(structType, 1, "i"); + m_module.setDebugMemberName(structType, 2, "b"); + + m_cBuffer = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(m_cBuffer, "c"); + + const uint32_t bindingId = computeResourceSlotId( + m_programInfo.type(), DxsoBindingType::ConstantBuffer, + 0); + + m_module.decorateDescriptorSet(m_cBuffer, 0); + m_module.decorateBinding(m_cBuffer, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void DxsoCompiler::emitDclInputArray() { + DxsoArrayType info; + info.ctype = DxsoScalarType::Float32; + info.ccount = 4; + info.alength = DxsoMaxInterfaceRegs; + + uint32_t arrayTypeId = getArrayTypeId(info); + + // Define the actual variable. Note that this is private + // because we will copy input registers + // to the array during the setup phase. + const uint32_t ptrTypeId = m_module.defPointerType( + arrayTypeId, spv::StorageClassPrivate); + + m_vArray = m_module.newVar( + ptrTypeId, spv::StorageClassPrivate); + m_module.setDebugName(m_vArray, "v"); + } + + void DxsoCompiler::emitDclOutputArray() { + DxsoArrayType info; + info.ctype = DxsoScalarType::Float32; + info.ccount = 4; + info.alength = m_programInfo.type() == DxsoProgramTypes::VertexShader + ? DxsoMaxInterfaceRegs + : caps::MaxSimultaneousRenderTargets; + + uint32_t arrayTypeId = getArrayTypeId(info); + + // Define the actual variable. Note that this is private + // because we will copy input registers + // to the array during the setup phase. + const uint32_t ptrTypeId = m_module.defPointerType( + arrayTypeId, spv::StorageClassPrivate); + + m_oArray = m_module.newVar( + ptrTypeId, spv::StorageClassPrivate); + m_module.setDebugName(m_oArray, "o"); + } + + + void DxsoCompiler::emitVsInit() { + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityDrawParameters); + + m_module.enableExtension("SPV_KHR_shader_draw_parameters"); + + // Only VS needs this, because PS has + // non-indexable specialized output regs + this->emitDclOutputArray(); + + // Main function of the vertex shader + m_vs.functionId = m_module.allocateId(); + m_module.setDebugName(m_vs.functionId, "vs_main"); + + this->setupRenderStateInfo(); + + this->emitFunctionBegin( + m_vs.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxsoCompiler::emitPsSharedConstants() { + m_ps.sharedState = GetSharedConstants(m_module); + + const uint32_t bindingId = computeResourceSlotId( + m_programInfo.type(), DxsoBindingType::ConstantBuffer, + PSShared); + + m_module.decorateDescriptorSet(m_ps.sharedState, 0); + m_module.decorateBinding(m_ps.sharedState, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + } + + + void DxsoCompiler::emitPsInit() { + m_module.enableCapability(spv::CapabilityDerivativeControl); + + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeOriginUpperLeft); + + + // Main function of the pixel shader + m_ps.functionId = m_module.allocateId(); + m_module.setDebugName(m_ps.functionId, "ps_main"); + + if (m_programInfo.majorVersion() < 2) { + m_ps.samplerTypeSpec = m_module.specConst32(m_module.defIntType(32, 0), 0); + m_module.decorateSpecId(m_ps.samplerTypeSpec, getSpecId(D3D9SpecConstantId::SamplerType)); + m_module.setDebugName(m_ps.samplerTypeSpec, "s_sampler_types"); + + m_ps.projectionSpec = m_module.specConst32(m_module.defIntType(32, 0), 0); + m_module.decorateSpecId(m_ps.projectionSpec, getSpecId(D3D9SpecConstantId::ProjectionType)); + m_module.setDebugName(m_ps.projectionSpec, "s_projections"); + } + + this->setupRenderStateInfo(); + this->emitPsSharedConstants(); + + this->emitFunctionBegin( + m_ps.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + + // We may have to defer kill operations to the end of + // the shader in order to keep derivatives correct. + if (m_analysis->usesKill && m_moduleInfo.options.useDemoteToHelperInvocation) { + // This extension basically implements D3D-style discard + m_module.enableExtension("SPV_EXT_demote_to_helper_invocation"); + m_module.enableCapability(spv::CapabilityDemoteToHelperInvocationEXT); + } + else if (m_analysis->usesKill && m_analysis->usesDerivatives) { + m_ps.killState = m_module.newVarInit( + m_module.defPointerType(m_module.defBoolType(), spv::StorageClassPrivate), + spv::StorageClassPrivate, m_module.constBool(false)); + + m_module.setDebugName(m_ps.killState, "ps_kill"); + + if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) { + m_module.enableCapability(spv::CapabilityGroupNonUniform); + m_module.enableCapability(spv::CapabilityGroupNonUniformBallot); + + DxsoRegisterInfo laneId; + laneId.type = { DxsoScalarType::Uint32, 1, 0 }; + laneId.sclass = spv::StorageClassInput; + + m_ps.builtinLaneId = emitNewBuiltinVariable( + laneId, spv::BuiltInSubgroupLocalInvocationId, + "fLaneId", 0); + } + } + } + + + void DxsoCompiler::emitFunctionBegin( + uint32_t entryPoint, + uint32_t returnType, + uint32_t funcType) { + this->emitFunctionEnd(); + + m_module.functionBegin( + returnType, entryPoint, funcType, + spv::FunctionControlMaskNone); + + m_insideFunction = true; + } + + + void DxsoCompiler::emitFunctionEnd() { + if (m_insideFunction) { + m_module.opReturn(); + m_module.functionEnd(); + } + + m_insideFunction = false; + } + + + uint32_t DxsoCompiler::emitFunctionLabel() { + uint32_t labelId = m_module.allocateId(); + m_module.opLabel(labelId); + return labelId; + } + + + void DxsoCompiler::emitMainFunctionBegin() { + this->emitFunctionBegin( + m_entryPointId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + m_mainFuncLabel = this->emitFunctionLabel(); + } + + + uint32_t DxsoCompiler::emitNewVariable(const DxsoRegisterInfo& info) { + const uint32_t ptrTypeId = this->getPointerTypeId(info); + return m_module.newVar(ptrTypeId, info.sclass); + } + + + uint32_t DxsoCompiler::emitNewVariableDefault( + const DxsoRegisterInfo& info, + uint32_t value) { + const uint32_t ptrTypeId = this->getPointerTypeId(info); + if (value == 0) + return m_module.newVar(ptrTypeId, info.sclass); + else + return m_module.newVarInit(ptrTypeId, info.sclass, value); + } + + + uint32_t DxsoCompiler::emitNewBuiltinVariable( + const DxsoRegisterInfo& info, + spv::BuiltIn builtIn, + const char* name, + uint32_t value) { + const uint32_t varId = emitNewVariableDefault(info, value); + + m_module.setDebugName(varId, name); + m_module.decorateBuiltIn(varId, builtIn); + + if (m_programInfo.type() == DxsoProgramTypes::PixelShader + && info.type.ctype != DxsoScalarType::Float32 + && info.type.ctype != DxsoScalarType::Bool + && info.sclass == spv::StorageClassInput) + m_module.decorate(varId, spv::DecorationFlat); + + m_entryPointInterfaces.push_back(varId); + return varId; + } + + DxsoCfgBlock* DxsoCompiler::cfgFindBlock( + const std::initializer_list& types) { + for (auto cur = m_controlFlowBlocks.rbegin(); + cur != m_controlFlowBlocks.rend(); cur++) { + for (auto type : types) { + if (cur->type == type) + return &(*cur); + } + } + + return nullptr; + } + + spv::BuiltIn semanticToBuiltIn(bool input, DxsoSemantic semantic) { + if (input) + return spv::BuiltInMax; + + if (semantic == DxsoSemantic{ DxsoUsage::Position, 0 }) + return spv::BuiltInPosition; + + if (semantic == DxsoSemantic{ DxsoUsage::PointSize, 0 }) + return spv::BuiltInPointSize; + + return spv::BuiltInMax; + } + + void DxsoCompiler::emitDclInterface( + bool input, + uint32_t regNumber, + DxsoSemantic semantic, + DxsoRegMask mask, + bool centroid) { + auto& sgn = input + ? m_isgn : m_osgn; + + const bool pixel = m_programInfo.type() == DxsoProgramTypes::PixelShader; + const bool vertex = !pixel; + + uint32_t slot = 0; + + uint32_t& slots = input + ? m_interfaceSlots.inputSlots + : m_interfaceSlots.outputSlots; + + uint16_t& explicits = input + ? m_explicitInputs + : m_explicitOutputs; + + // Some things we consider builtins could be packed in an output reg. + bool builtin = semanticToBuiltIn(input, semantic) != spv::BuiltInMax; + + uint32_t i = sgn.elemCount++; + + if (input && vertex) { + // Any slot will do! Let's chose the next one + slot = i; + } + else if ( (!input && vertex) + || (input && pixel ) ) { + // Don't register the slot if it belongs to a builtin + if (!builtin) + slot = RegisterLinkerSlot(semantic); + } + else { //if (!input && pixel) + // We want to make the output slot the same as the + // output register for pixel shaders so they go to + // the right render target. + slot = regNumber; + } + + // Don't want to mark down any of these builtins. + if (!builtin) + slots |= 1u << slot; + explicits |= 1u << regNumber; + + auto& elem = sgn.elems[i]; + elem.slot = slot; + elem.regNumber = regNumber; + elem.semantic = semantic; + elem.mask = mask; + elem.centroid = centroid; + } + + void DxsoCompiler::emitDclSampler( + uint32_t idx, + DxsoTextureType type) { + m_usedSamplers |= (1u << idx); + + auto DclSampler = [this]( + uint32_t idx, + DxsoSamplerType type, + bool depth, + bool implicit) { + // Setup our combines sampler. + DxsoSamplerInfo& sampler = !depth + ? m_samplers[idx].color[type] + : m_samplers[idx].depth[type]; + + spv::Dim dimensionality; + VkImageViewType viewType; + + const char* suffix = "_2d"; + + switch (type) { + default: + case SamplerTypeTexture2D: + sampler.dimensions = 2; + dimensionality = spv::Dim2D; + viewType = VK_IMAGE_VIEW_TYPE_2D; + break; + + case SamplerTypeTextureCube: + suffix = "_cube"; + sampler.dimensions = 3; + dimensionality = spv::DimCube; + viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + + case SamplerTypeTexture3D: + suffix = "_3d"; + sampler.dimensions = 3; + dimensionality = spv::Dim3D; + viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + } + + sampler.typeId = m_module.defImageType( + m_module.defFloatType(32), + dimensionality, depth ? 1 : 0, 0, 0, 1, + spv::ImageFormatUnknown); + + sampler.typeId = m_module.defSampledImageType(sampler.typeId); + + sampler.varId = m_module.newVar( + m_module.defPointerType( + sampler.typeId, spv::StorageClassUniformConstant), + spv::StorageClassUniformConstant); + + std::string name = str::format("s", idx, suffix, depth ? "_shadow" : ""); + m_module.setDebugName(sampler.varId, name.c_str()); + + const uint32_t bindingId = computeResourceSlotId(m_programInfo.type(), + !depth ? DxsoBindingType::ColorImage : DxsoBindingType::DepthImage, + idx); + + m_module.decorateDescriptorSet(sampler.varId, 0); + m_module.decorateBinding (sampler.varId, bindingId); + + // Store descriptor info for the shader interface + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + resource.view = implicit ? VK_IMAGE_VIEW_TYPE_MAX_ENUM : viewType; + resource.access = VK_ACCESS_SHADER_READ_BIT; + m_resourceSlots.push_back(resource); + }; + + if (m_programInfo.majorVersion() >= 2) { + DxsoSamplerType samplerType = + SamplerTypeFromTextureType(type); + + DclSampler(idx, samplerType, false, false); + + if (samplerType != SamplerTypeTexture3D) { + // We could also be depth compared! + DclSampler(idx, samplerType, true, false); + } + } + else { + // Could be any of these! + // We will check with the spec constant at sample time. + for (uint32_t i = 0; i < SamplerTypeCount; i++) { + auto samplerType = static_cast(i); + + DclSampler(idx, samplerType, false, true); + + if (samplerType != SamplerTypeTexture3D) + DclSampler(idx, samplerType, true, true); + } + } + + + // Declare a specialization constant which will + // store whether or not the depth view is bound. + const uint32_t depthBinding = computeResourceSlotId(m_programInfo.type(), + DxsoBindingType::DepthImage, idx); + + DxsoSampler& sampler = m_samplers[idx]; + + sampler.depthSpecConst = m_module.specConstBool(true); + sampler.type = type; + m_module.decorateSpecId(sampler.depthSpecConst, depthBinding); + m_module.setDebugName(sampler.depthSpecConst, + str::format("s", idx, "_useShadow").c_str()); + } + + + uint32_t DxsoCompiler::emitArrayIndex( + uint32_t idx, + const DxsoBaseRegister* relative) { + uint32_t result = m_module.consti32(idx); + + if (relative != nullptr) { + DxsoRegisterValue offset = emitRegisterLoad(*relative, DxsoRegMask(true, false, false, false), nullptr); + + result = m_module.opIAdd( + getVectorTypeId(offset.type), + result, offset.id); + } + + return result; + } + + + DxsoRegisterPointer DxsoCompiler::emitInputPtr( + bool texture, + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative) { + uint32_t idx = reg.id.num; + + // Account for the two color regs. + if (texture) + idx += 2; + + DxsoRegisterPointer input; + + input.type = DxsoVectorType{ DxsoScalarType::Float32, 4 }; + + uint32_t index = this->emitArrayIndex(idx, relative); + + const uint32_t typeId = getVectorTypeId(input.type); + input.id = m_module.opAccessChain( + m_module.defPointerType(typeId, spv::StorageClassPrivate), + m_vArray, + 1, &index); + + return input; + } + + DxsoRegisterPointer DxsoCompiler::emitRegisterPtr( + const char* name, + DxsoScalarType ctype, + uint32_t ccount, + uint32_t defaultVal, + spv::StorageClass storageClass, + spv::BuiltIn builtIn) { + DxsoRegisterPointer result; + + DxsoRegisterInfo info; + info.type.ctype = ctype; + info.type.ccount = ccount; + info.type.alength = 1; + info.sclass = storageClass; + + result.type = DxsoVectorType{ ctype, ccount }; + if (builtIn == spv::BuiltInMax) { + result.id = this->emitNewVariableDefault(info, defaultVal); + m_module.setDebugName(result.id, name); + } + else { + result.id = this->emitNewBuiltinVariable( + info, builtIn, name, defaultVal); + } + + return result; + } + + + DxsoRegisterValue DxsoCompiler::emitLoadConstant( + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative) { + // struct cBuffer_t { + // + // Type Member Index + // + // float f[256 or 224]; 0 + // int32_t i[16]; 1 + // uint32_t boolBitmask; 2 + // } + DxsoRegisterValue result = { }; + + switch (reg.id.type) { + case DxsoRegisterType::Const: + result.type = { DxsoScalarType::Float32, 4 }; + + if (!relative) { + result.id = m_cFloat.at(reg.id.num); + m_meta.maxConstIndexF = std::max(m_meta.maxConstIndexF, reg.id.num + 1); + m_meta.maxConstIndexF = std::min(m_meta.maxConstIndexF, m_layout->floatCount); + } else { + m_meta.maxConstIndexF = m_layout->floatCount; + m_meta.needsConstantCopies |= m_moduleInfo.options.strictConstantCopies + || m_cFloat.at(reg.id.num) != 0; + } + break; + + case DxsoRegisterType::ConstInt: + result.type = { DxsoScalarType::Sint32, 4 }; + result.id = m_cInt.at(reg.id.num); + m_meta.maxConstIndexI = std::max(m_meta.maxConstIndexI, reg.id.num + 1); + m_meta.maxConstIndexI = std::min(m_meta.maxConstIndexI, m_layout->intCount); + break; + + case DxsoRegisterType::ConstBool: + result.type = { DxsoScalarType::Bool, 1 }; + result.id = m_cBool.at(reg.id.num); + m_meta.maxConstIndexB = std::max(m_meta.maxConstIndexB, reg.id.num + 1); + m_meta.maxConstIndexB = std::min(m_meta.maxConstIndexB, m_layout->boolCount); + break; + + default: break; + } + + if (result.id) + return result; + + uint32_t relativeIdx = this->emitArrayIndex(reg.id.num, relative); + + if (reg.id.type != DxsoRegisterType::ConstBool) { + uint32_t structIdx = reg.id.type == DxsoRegisterType::Const + ? m_module.constu32(0) + : m_module.constu32(1); + + std::array indices = { structIdx, relativeIdx }; + + uint32_t typeId = getVectorTypeId(result.type); + uint32_t ptrId = m_module.opAccessChain( + m_module.defPointerType(typeId, spv::StorageClassUniform), + m_cBuffer, indices.size(), indices.data()); + + result.id = m_module.opLoad(typeId, ptrId); + + if (relative) { + uint32_t constCount = m_module.constu32(m_layout->floatCount); + + // Expand condition to bvec4 since the result has four components + uint32_t cond = m_module.opULessThan(m_module.defBoolType(), relativeIdx, constCount); + std::array condIds = { cond, cond, cond, cond }; + + cond = m_module.opCompositeConstruct( + m_module.defVectorType(m_module.defBoolType(), 4), + condIds.size(), condIds.data()); + + result.id = m_module.opSelect(typeId, cond, result.id, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f)); + } + } else { + // Bool constants have no relative indexing, so we can do the bitfield + // magic for SWVP at compile time. + + uint32_t uintType = getScalarTypeId(DxsoScalarType::Uint32); + uint32_t uvec4Type = getVectorTypeId({ DxsoScalarType::Uint32, 4 }); + + std::array indices = { m_module.constu32(2), m_module.constu32(reg.id.num / 128) }; + + uint32_t indexCount = m_layout->bitmaskCount == 1 ? 1 : 2; + uint32_t accessType = m_layout->bitmaskCount == 1 ? uintType : uvec4Type; + + uint32_t ptrId = m_module.opAccessChain( + m_module.defPointerType(accessType, spv::StorageClassUniform), + m_cBuffer, indexCount, indices.data()); + + uint32_t bitIdx = m_module.consti32(reg.id.num % 32); + + uint32_t bitfield = m_module.opLoad(accessType, ptrId); + if (m_layout->bitmaskCount != 1) { + uint32_t index = (reg.id.num % 128) / 32; + bitfield = m_module.opCompositeExtract(uintType, bitfield, 1, &index); + } + uint32_t bit = m_module.opBitFieldUExtract( + uintType, bitfield, bitIdx, m_module.consti32(1)); + + result.id = m_module.opINotEqual( + getVectorTypeId(result.type), + bit, m_module.constu32(0)); + } + + return result; + } + + + DxsoRegisterPointer DxsoCompiler::emitOutputPtr( + bool texcrdOut, + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative) { + uint32_t idx = reg.id.num; + + // Account for the two color regs. + if (texcrdOut) + idx += 2; + + DxsoRegisterPointer input; + + input.type = DxsoVectorType{ DxsoScalarType::Float32, 4 }; + + uint32_t index = this->emitArrayIndex(idx, relative); + + const uint32_t typeId = getVectorTypeId(input.type); + input.id = m_module.opAccessChain( + m_module.defPointerType(typeId, spv::StorageClassPrivate), + m_oArray, + 1, &index); + + return input; + } + + + DxsoRegisterPointer DxsoCompiler::emitGetOperandPtr( + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative) { + switch (reg.id.type) { + case DxsoRegisterType::Temp: { + DxsoRegisterPointer& ptr = m_rRegs.at(reg.id.num); + if (ptr.id == 0) { + std::string name = str::format("r", reg.id.num); + ptr = this->emitRegisterPtr( + name.c_str(), DxsoScalarType::Float32, 4, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f)); + } + return ptr; + } + + case DxsoRegisterType::Input: { + if (!(m_explicitInputs & 1u << reg.id.num)) { + this->emitDclInterface( + true, reg.id.num, + DxsoSemantic{ DxsoUsage::Color, reg.id.num }, + IdentityWriteMask, false); + } + + return this->emitInputPtr(false, reg, relative); + } + + case DxsoRegisterType::PixelTexcoord: + case DxsoRegisterType::Texture: { + if (m_programInfo.type() == DxsoProgramTypes::PixelShader) { + // Texture register + + // SM2, or SM 1.4 + if (reg.id.type == DxsoRegisterType::PixelTexcoord + || m_programInfo.majorVersion() >= 2 + || (m_programInfo.majorVersion() == 1 + && m_programInfo.minorVersion() == 4)) { + uint32_t adjustedNumber = reg.id.num + 2; + if (!(m_explicitInputs & 1u << adjustedNumber)) { + this->emitDclInterface( + true, adjustedNumber, + DxsoSemantic{ DxsoUsage::Texcoord, reg.id.num }, + IdentityWriteMask, false); + } + + return this->emitInputPtr(true, reg, relative); + } + else { + // User must use tex/texcoord to put data in this private register. + // We use the an oob id which fxc never generates for the texcoord data. + DxsoRegisterPointer& ptr = m_tRegs.at(reg.id.num); + if (ptr.id == 0) { + std::string name = str::format("t", reg.id.num); + ptr = this->emitRegisterPtr( + name.c_str(), DxsoScalarType::Float32, 4, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f)); + } + return ptr; + } + } + else { + // Address register + if (m_vs.addr.id == 0) { + m_vs.addr = this->emitRegisterPtr( + "a0", DxsoScalarType::Sint32, 4, + m_module.constvec4i32(0, 0, 0, 0)); + } + return m_vs.addr; + } + } + + case DxsoRegisterType::RasterizerOut: + switch (reg.id.num) { + case RasterOutPosition: + if (m_vs.oPos.id == 0) { + m_vs.oPos = this->emitRegisterPtr( + "oPos", DxsoScalarType::Float32, 4, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + spv::StorageClassOutput, spv::BuiltInPosition); + } + return m_vs.oPos; + + case RasterOutFog: + if (m_fog.id == 0) { + bool input = m_programInfo.type() == DxsoProgramType::PixelShader; + DxsoSemantic semantic = DxsoSemantic{ DxsoUsage::Fog, 0 }; + + uint32_t slot = RegisterLinkerSlot(semantic); + + uint32_t& slots = input + ? m_interfaceSlots.inputSlots + : m_interfaceSlots.outputSlots; + + slots |= 1u << slot; + + m_fog = this->emitRegisterPtr( + input ? "vFog" : "oFog", + DxsoScalarType::Float32, 1, + input ? 0 : m_module.constf32(1.0f), + input ? spv::StorageClassInput : spv::StorageClassOutput); + + m_entryPointInterfaces.push_back(m_fog.id); + + m_module.decorateLocation(m_fog.id, slot); + } + return m_fog; + + case RasterOutPointSize: + if (m_vs.oPSize.id == 0) { + m_vs.oPSize = this->emitRegisterPtr( + "oPSize", DxsoScalarType::Float32, 1, + m_module.constf32(0.0f), + spv::StorageClassOutput, spv::BuiltInPointSize); + } + return m_vs.oPSize; + } + + case DxsoRegisterType::ColorOut: { + uint32_t idx = std::min(reg.id.num, 4u); + + if (m_ps.oColor[idx].id == 0) { + std::string name = str::format("oC", idx); + m_ps.oColor[idx] = this->emitRegisterPtr( + name.c_str(), DxsoScalarType::Float32, 4, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f), + spv::StorageClassOutput); + + m_interfaceSlots.outputSlots |= 1u << idx; + m_module.decorateLocation(m_ps.oColor[idx].id, idx); + m_module.decorateIndex(m_ps.oColor[idx].id, 0); + + m_entryPointInterfaces.push_back(m_ps.oColor[idx].id); + m_usedRTs |= (1u << idx); + } + return m_ps.oColor[idx]; + } + + case DxsoRegisterType::AttributeOut: { + auto ptr = this->emitOutputPtr(false, reg, nullptr); + + if (!(m_explicitOutputs & 1u << reg.id.num)) { + this->emitDclInterface( + false, reg.id.num, + DxsoSemantic{ DxsoUsage::Color, reg.id.num }, + IdentityWriteMask, false); // TODO: Do we want to make this centroid? + + m_module.opStore(ptr.id, m_module.constfReplicant(0, ptr.type.ccount)); + } + + return ptr; + } + + case DxsoRegisterType::Output: { + bool texcrdOut = m_programInfo.type() == DxsoProgramTypes::VertexShader + && m_programInfo.majorVersion() != 3; + + auto ptr = this->emitOutputPtr(texcrdOut, reg, !texcrdOut ? relative : nullptr); + + if (texcrdOut) { + uint32_t adjustedNumber = reg.id.num + 2; + if (!(m_explicitOutputs & 1u << adjustedNumber)) { + this->emitDclInterface( + false, adjustedNumber, + DxsoSemantic{ DxsoUsage::Texcoord, reg.id.num }, + IdentityWriteMask, false); + + m_module.opStore(ptr.id, m_module.constfReplicant(0, ptr.type.ccount)); + } + } + + return ptr; + } + + case DxsoRegisterType::DepthOut: + if (m_ps.oDepth.id == 0) { + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeDepthReplacing); + + m_ps.oDepth = this->emitRegisterPtr( + "oDepth", DxsoScalarType::Float32, 1, + m_module.constf32(0.0f), + spv::StorageClassOutput, spv::BuiltInFragDepth); + } + return m_ps.oDepth; + + case DxsoRegisterType::Loop: + if (m_loopCounter.id == 0) { + m_loopCounter = this->emitRegisterPtr( + "aL", DxsoScalarType::Sint32, 1, + m_module.consti32(0)); + } + return m_loopCounter; + + case DxsoRegisterType::MiscType: + if (reg.id.num == MiscTypePosition) { + if (m_ps.vPos.id == 0) { + DxsoRegisterPointer fragCoord = this->emitRegisterPtr( + "ps_frag_coord", DxsoScalarType::Float32, 4, 0, + spv::StorageClassInput, spv::BuiltInFragCoord); + + DxsoRegisterValue val = this->emitValueLoad(fragCoord); + val.id = m_module.opFSub( + getVectorTypeId(val.type), val.id, + m_module.constvec4f32(0.5f, 0.5f, 0.0f, 0.0f)); + + m_ps.vPos = this->emitRegisterPtr( + "vPos", DxsoScalarType::Float32, 4, 0); + + m_module.opStore(m_ps.vPos.id, val.id); + } + return m_ps.vPos; + } + else { // MiscTypeFace + if (m_ps.vFace.id == 0) { + DxsoRegisterPointer faceBool = this->emitRegisterPtr( + "ps_is_front_face", DxsoScalarType::Bool, 1, 0, + spv::StorageClassInput, spv::BuiltInFrontFacing); + + DxsoRegisterValue frontFace = emitValueLoad(faceBool); + DxsoRegisterValue selectOp = emitRegisterExtend(frontFace, 4); + + m_ps.vFace = this->emitRegisterPtr( + "vFace", DxsoScalarType::Float32, 4, 0); + + m_module.opStore( + m_ps.vFace.id, + m_module.opSelect(getVectorTypeId(m_ps.vFace.type), selectOp.id, + m_module.constvec4f32( 1.0f, 1.0f, 1.0f, 1.0f), + m_module.constvec4f32(-1.0f, -1.0f, -1.0f, -1.0f))); + } + return m_ps.vFace; + } + + case DxsoRegisterType::Predicate: { + DxsoRegisterPointer& ptr = m_pRegs.at(reg.id.num); + if (ptr.id == 0) { + std::string name = str::format("p", reg.id.num); + ptr = this->emitRegisterPtr( + name.c_str(), DxsoScalarType::Bool, 4, + m_module.constvec4b32(false, false, false, false)); + } + return ptr; + } + + default: { + //Logger::warn(str::format("emitGetOperandPtr: unhandled reg type: ", reg.id.type)); + + DxsoRegisterPointer nullPointer; + nullPointer.id = 0; + return nullPointer; + } + } + } + + + uint32_t DxsoCompiler::emitBoolComparison(DxsoVectorType type, DxsoComparison cmp, uint32_t a, uint32_t b) { + const uint32_t typeId = getVectorTypeId(type); + switch (cmp) { + default: + case DxsoComparison::Never: return m_module.constbReplicant(false, type.ccount); break; + case DxsoComparison::GreaterThan: return m_module.opFOrdGreaterThan (typeId, a, b); break; + case DxsoComparison::Equal: return m_module.opFOrdEqual (typeId, a, b); break; + case DxsoComparison::GreaterEqual: return m_module.opFOrdGreaterThanEqual(typeId, a, b); break; + case DxsoComparison::LessThan: return m_module.opFOrdLessThan (typeId, a, b); break; + case DxsoComparison::NotEqual: return m_module.opFOrdNotEqual (typeId, a, b); break; + case DxsoComparison::LessEqual: return m_module.opFOrdLessThanEqual (typeId, a, b); break; + case DxsoComparison::Always: return m_module.constbReplicant(true, type.ccount); break; + } +} + + + DxsoRegisterValue DxsoCompiler::emitValueLoad( + DxsoRegisterPointer ptr) { + DxsoRegisterValue result; + result.type = ptr.type; + result.id = m_module.opLoad( + getVectorTypeId(result.type), + ptr.id); + return result; + } + + + DxsoRegisterValue DxsoCompiler::applyPredicate(DxsoRegisterValue pred, DxsoRegisterValue dst, DxsoRegisterValue src) { + if (dst.type.ccount != pred.type.ccount) { + DxsoRegMask mask = DxsoRegMask( + pred.type.ccount > 0, + pred.type.ccount > 1, + pred.type.ccount > 2, + pred.type.ccount > 3); + + pred = emitRegisterSwizzle(pred, IdentitySwizzle, mask); + } + + dst.id = m_module.opSelect( + getVectorTypeId(dst.type), + pred.id, + src.id, dst.id); + + return dst; + } + + + void DxsoCompiler::emitValueStore( + DxsoRegisterPointer ptr, + DxsoRegisterValue value, + DxsoRegMask writeMask, + DxsoRegisterValue predicate) { + // If the source value consists of only one component, + // it is stored in all components of the destination. + if (value.type.ccount == 1) + value = emitRegisterExtend(value, writeMask.popCount()); + + if (ptr.type.ccount == writeMask.popCount()) { + if (predicate.id) + value = applyPredicate(predicate, emitValueLoad(ptr), value); + + // Simple case: We write to the entire register + m_module.opStore(ptr.id, value.id); + } else { + // We only write to part of the destination + // register, so we need to load and modify it + DxsoRegisterValue tmp = emitValueLoad(ptr); + tmp = emitRegisterInsert(tmp, value, writeMask); + + if (predicate.id) + value = applyPredicate(predicate, emitValueLoad(ptr), tmp); + + m_module.opStore(ptr.id, tmp.id); + } + } + + + DxsoRegisterValue DxsoCompiler::emitClampBoundReplicant( + DxsoRegisterValue srcValue, + float lb, + float ub) { + srcValue.id = m_module.opFClamp(getVectorTypeId(srcValue.type), srcValue.id, + m_module.constfReplicant(lb, srcValue.type.ccount), + m_module.constfReplicant(ub, srcValue.type.ccount)); + + return srcValue; + } + + + DxsoRegisterValue DxsoCompiler::emitSaturate( + DxsoRegisterValue srcValue) { + return emitClampBoundReplicant(srcValue, 0.0f, 1.0f); + } + + + DxsoRegisterValue DxsoCompiler::emitDot( + DxsoRegisterValue a, + DxsoRegisterValue b) { + DxsoRegisterValue dot; + dot.type = a.type; + dot.type.ccount = 1; + + dot.id = m_module.opDot(getVectorTypeId(dot.type), a.id, b.id); + + return dot; + } + + + DxsoRegisterValue DxsoCompiler::emitRegisterInsert( + DxsoRegisterValue dstValue, + DxsoRegisterValue srcValue, + DxsoRegMask srcMask) { + DxsoRegisterValue result; + result.type = dstValue.type; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (srcMask.popCount() == 0) { + // Nothing to do if the insertion mask is empty + result.id = dstValue.id; + } else if (dstValue.type.ccount == 1) { + // Both values are scalar, so the first component + // of the write mask decides which one to take. + result.id = srcMask[0] ? srcValue.id : dstValue.id; + } else if (srcValue.type.ccount == 1) { + // The source value is scalar. Since OpVectorShuffle + // requires both arguments to be vectors, we have to + // use OpCompositeInsert to modify the vector instead. + const uint32_t componentId = srcMask.firstSet(); + + result.id = m_module.opCompositeInsert(typeId, + srcValue.id, dstValue.id, 1, &componentId); + } else { + // Both arguments are vectors. We can determine which + // components to take from which vector and use the + // OpVectorShuffle instruction. + std::array components; + uint32_t srcComponentId = dstValue.type.ccount; + + for (uint32_t i = 0; i < dstValue.type.ccount; i++) + components.at(i) = srcMask[i] ? srcComponentId++ : i; + + result.id = m_module.opVectorShuffle( + typeId, dstValue.id, srcValue.id, + dstValue.type.ccount, components.data()); + } + + return result; + } + + + DxsoRegisterValue DxsoCompiler::emitRegisterLoadRaw( + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative) { + switch (reg.id.type) { + case DxsoRegisterType::Const: + case DxsoRegisterType::ConstInt: + case DxsoRegisterType::ConstBool: + return emitLoadConstant(reg, relative); + + default: + return emitValueLoad(emitGetOperandPtr(reg, relative)); + } + } + + + DxsoRegisterValue DxsoCompiler::emitRegisterExtend( + DxsoRegisterValue value, + uint32_t size) { + if (size == 1) + return value; + + std::array ids = {{ + value.id, value.id, + value.id, value.id, + }}; + + DxsoRegisterValue result; + result.type.ctype = value.type.ctype; + result.type.ccount = size; + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + size, ids.data()); + return result; + } + + + DxsoRegisterValue DxsoCompiler::emitRegisterSwizzle( + DxsoRegisterValue value, + DxsoRegSwizzle swizzle, + DxsoRegMask writeMask) { + if (value.type.ccount == 1) + return emitRegisterExtend(value, writeMask.popCount()); + + std::array indices; + + uint32_t dstIndex = 0; + + for (uint32_t i = 0; i < 4; i++) { + if (writeMask[i]) + indices[dstIndex++] = swizzle[i]; + } + + // If the swizzle combined with the mask can be reduced + // to a no-op, we don't need to insert any instructions. + bool isIdentitySwizzle = dstIndex == value.type.ccount; + + for (uint32_t i = 0; i < dstIndex && isIdentitySwizzle; i++) + isIdentitySwizzle &= indices[i] == i; + + if (isIdentitySwizzle) + return value; + + // Use OpCompositeExtract if the resulting vector contains + // only one component, and OpVectorShuffle if it is a vector. + DxsoRegisterValue result; + result.type.ctype = value.type.ctype; + result.type.ccount = dstIndex; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (dstIndex == 1) { + result.id = m_module.opCompositeExtract( + typeId, value.id, 1, indices.data()); + } else { + result.id = m_module.opVectorShuffle( + typeId, value.id, value.id, + dstIndex, indices.data()); + } + + return result; + } + + + DxsoRegisterValue DxsoCompiler::emitSrcOperandPreSwizzleModifiers( + DxsoRegisterValue value, + DxsoRegModifier modifier) { + // r / r.z + // r / r.w + if (modifier == DxsoRegModifier::Dz + || modifier == DxsoRegModifier::Dw) { + const uint32_t index = modifier == DxsoRegModifier::Dz ? 2 : 3; + + std::array indices = { index, index, index, index }; + + uint32_t component = m_module.opVectorShuffle( + getVectorTypeId(value.type), value.id, value.id, value.type.ccount, indices.data()); + + value.id = m_module.opFDiv( + getVectorTypeId(value.type), value.id, component); + } + + return value; + } + + + DxsoRegisterValue DxsoCompiler::emitSrcOperandPostSwizzleModifiers( + DxsoRegisterValue value, + DxsoRegModifier modifier) { + // r - 0.5 + if (modifier == DxsoRegModifier::Bias + || modifier == DxsoRegModifier::BiasNeg) { + uint32_t halfVec = m_module.constfReplicant( + 0.5f, value.type.ccount); + + value.id = m_module.opFSub( + getVectorTypeId(value.type), value.id, halfVec); + } + + // fma(r, 2.0f, -1.0f) + if (modifier == DxsoRegModifier::Sign + || modifier == DxsoRegModifier::SignNeg) { + uint32_t twoVec = m_module.constfReplicant( + 2.0f, value.type.ccount); + + uint32_t minusOneVec = m_module.constfReplicant( + -1.0f, value.type.ccount); + + value.id = m_module.opFFma( + getVectorTypeId(value.type), value.id, twoVec, minusOneVec); + } + + // 1 - r + if (modifier == DxsoRegModifier::Comp) { + uint32_t oneVec = m_module.constfReplicant( + 1.0f, value.type.ccount); + + value.id = m_module.opFSub( + getVectorTypeId(value.type), oneVec, value.id); + } + + // r * 2 + if (modifier == DxsoRegModifier::X2 + || modifier == DxsoRegModifier::X2Neg) { + uint32_t twoVec = m_module.constfReplicant( + 2.0f, value.type.ccount); + + value.id = m_module.opFMul( + getVectorTypeId(value.type), value.id, twoVec); + } + + // abs( r ) + if (modifier == DxsoRegModifier::Abs + || modifier == DxsoRegModifier::AbsNeg) { + value.id = m_module.opFAbs( + getVectorTypeId(value.type), value.id); + } + + // !r + if (modifier == DxsoRegModifier::Not) { + value.id = + m_module.opLogicalNot(getVectorTypeId(value.type), value.id); + } + + // -r + // Treating as -r + // Treating as -r + // -r * 2 + // -abs(r) + if (modifier == DxsoRegModifier::Neg + || modifier == DxsoRegModifier::BiasNeg + || modifier == DxsoRegModifier::SignNeg + || modifier == DxsoRegModifier::X2Neg + || modifier == DxsoRegModifier::AbsNeg) { + value.id = m_module.opFNegate( + getVectorTypeId(value.type), value.id); + } + + return value; + } + + DxsoRegisterValue DxsoCompiler::emitRegisterLoad( + const DxsoBaseRegister& reg, + DxsoRegMask writeMask, + const DxsoBaseRegister* relative) { + // Load operand from the operand pointer + DxsoRegisterValue result = emitRegisterLoadRaw(reg, relative); + + // PS 1.x clamps float constants + if (m_programInfo.type() == DxsoProgramType::PixelShader && m_programInfo.majorVersion() == 1 + && reg.id.type == DxsoRegisterType::Const) + result = emitClampBoundReplicant(result, -1.0f, 1.0f); + + // Apply operand modifiers + result = emitSrcOperandPreSwizzleModifiers(result, reg.modifier); + + // Apply operand swizzle to the operand value + result = emitRegisterSwizzle(result, reg.swizzle, writeMask); + + // Apply operand modifiers + result = emitSrcOperandPostSwizzleModifiers(result, reg.modifier); + return result; + } + + void DxsoCompiler::emitDcl(const DxsoInstructionContext& ctx) { + auto id = ctx.dst.id; + + if (id.type == DxsoRegisterType::Sampler) { + this->emitDclSampler( + ctx.dst.id.num, + ctx.dcl.textureType); + } + else if (id.type == DxsoRegisterType::Input + || id.type == DxsoRegisterType::Texture + || id.type == DxsoRegisterType::Output) { + DxsoSemantic semantic = ctx.dcl.semantic; + + uint32_t vIndex = id.num; + + if (m_programInfo.type() == DxsoProgramTypes::PixelShader) { + // Semantic in PS < 3 is based upon id. + if (m_programInfo.majorVersion() < 3) { + // Account for the two color registers. + if (id.type == DxsoRegisterType::Texture) + vIndex += 2; + + semantic = DxsoSemantic{ + id.type == DxsoRegisterType::Texture ? DxsoUsage::Texcoord : DxsoUsage::Color, + id.num }; + } + } + + this->emitDclInterface( + id.type != DxsoRegisterType::Output, + vIndex, + semantic, + ctx.dst.mask, + ctx.dst.centroid); + } + else { + //Logger::warn(str::format("DxsoCompiler::emitDcl: unhandled register type ", id.type)); + } + } + + void DxsoCompiler::emitDef(const DxsoInstructionContext& ctx) { + switch (ctx.instruction.opcode) { + case DxsoOpcode::Def: emitDefF(ctx); break; + case DxsoOpcode::DefI: emitDefI(ctx); break; + case DxsoOpcode::DefB: emitDefB(ctx); break; + default: + throw DxvkError("DxsoCompiler::emitDef: Invalid definition opcode"); + break; + } + } + + void DxsoCompiler::emitDefF(const DxsoInstructionContext& ctx) { + const float* data = ctx.def.float32; + + uint32_t constId = m_module.constvec4f32(data[0], data[1], data[2], data[3]); + m_cFloat.at(ctx.dst.id.num) = constId; + + std::string name = str::format("cF", ctx.dst.id.num, "_def"); + m_module.setDebugName(constId, name.c_str()); + + DxsoDefinedConstant constant; + constant.uboIdx = ctx.dst.id.num; + for (uint32_t i = 0; i < 4; i++) + constant.float32[i] = data[i]; + m_constants.push_back(constant); + } + + void DxsoCompiler::emitDefI(const DxsoInstructionContext& ctx) { + const int32_t* data = ctx.def.int32; + + uint32_t constId = m_module.constvec4i32(data[0], data[1], data[2], data[3]); + m_cInt.at(ctx.dst.id.num) = constId; + + std::string name = str::format("cI", ctx.dst.id.num, "_def"); + m_module.setDebugName(constId, name.c_str()); + } + + void DxsoCompiler::emitDefB(const DxsoInstructionContext& ctx) { + const int32_t* data = ctx.def.int32; + + uint32_t constId = m_module.constBool(data[0] != 0); + m_cBool.at(ctx.dst.id.num) = constId; + + std::string name = str::format("cB", ctx.dst.id.num, "_def"); + m_module.setDebugName(constId, name.c_str()); + } + + + bool DxsoCompiler::isScalarRegister(DxsoRegisterId id) { + return id == DxsoRegisterId{DxsoRegisterType::DepthOut, 0} + || id == DxsoRegisterId{DxsoRegisterType::RasterizerOut, RasterOutPointSize} + || id == DxsoRegisterId{DxsoRegisterType::RasterizerOut, RasterOutFog}; + } + + + void DxsoCompiler::emitMov(const DxsoInstructionContext& ctx) { + DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst); + + DxsoRegMask mask = ctx.dst.mask; + + if (isScalarRegister(ctx.dst.id)) + mask = DxsoRegMask(true, false, false, false); + + DxsoRegisterValue src0 = emitRegisterLoad(ctx.src[0], mask); + + DxsoRegisterValue result; + result.type.ctype = dst.type.ctype; + result.type.ccount = mask.popCount(); + + const uint32_t typeId = getVectorTypeId(result.type); + + if (dst.type.ctype != src0.type.ctype) { + // We have Mova for this... but it turns out Mov has the same behaviour in d3d9! + + // Convert float -> int32_t + // and vice versa + if (dst.type.ctype == DxsoScalarType::Sint32) { + // We need to floor for VS 1.1 and below, the documentation is a dirty stinking liar. + if (m_programInfo.majorVersion() < 2 && m_programInfo.minorVersion() < 2) + result.id = m_module.opFloor(getVectorTypeId(src0.type), src0.id); + else + result.id = m_module.opRound(getVectorTypeId(src0.type), src0.id); + + result.id = m_module.opConvertFtoS(typeId, result.id); + } + else // Float32 + result.id = m_module.opConvertStoF(typeId, src0.id); + } + else // No special stuff needed! + result.id = src0.id; + + this->emitDstStore(dst, result, mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id); + } + + + void DxsoCompiler::emitVectorAlu(const DxsoInstructionContext& ctx) { + const auto& src = ctx.src; + + DxsoRegMask mask = ctx.dst.mask; + + DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst); + + if (isScalarRegister(ctx.dst.id)) + mask = DxsoRegMask(true, false, false, false); + + DxsoRegisterValue result; + result.type.ctype = dst.type.ctype; + result.type.ccount = mask.popCount(); + + DxsoVectorType scalarType = result.type; + scalarType.ccount = 1; + + const uint32_t typeId = getVectorTypeId(result.type); + const uint32_t scalarTypeId = getVectorTypeId(scalarType); + + const DxsoOpcode opcode = ctx.instruction.opcode; + switch (opcode) { + case DxsoOpcode::Add: + result.id = m_module.opFAdd(typeId, + emitRegisterLoad(src[0], mask).id, + emitRegisterLoad(src[1], mask).id); + break; + case DxsoOpcode::Sub: + result.id = m_module.opFSub(typeId, + emitRegisterLoad(src[0], mask).id, + emitRegisterLoad(src[1], mask).id); + break; + case DxsoOpcode::Mad: + result.id = m_module.opFFma(typeId, + emitRegisterLoad(src[0], mask).id, + emitRegisterLoad(src[1], mask).id, + emitRegisterLoad(src[2], mask).id); + break; + case DxsoOpcode::Mul: + result.id = m_module.opFMul(typeId, + emitRegisterLoad(src[0], mask).id, + emitRegisterLoad(src[1], mask).id); + break; + case DxsoOpcode::Rcp: + result.id = m_module.opFDiv(typeId, + m_module.constfReplicant(1.0f, result.type.ccount), + emitRegisterLoad(src[0], mask).id); + + if (m_moduleInfo.options.d3d9FloatEmulation) { + result.id = m_module.opNMin(typeId, result.id, + m_module.constfReplicant(FLT_MAX, result.type.ccount)); + } + break; + case DxsoOpcode::Rsq: + result.id = m_module.opFAbs(typeId, + emitRegisterLoad(src[0], mask).id); + + result.id = m_module.opInverseSqrt(typeId, + result.id); + + if (m_moduleInfo.options.d3d9FloatEmulation) { + result.id = m_module.opNMin(typeId, result.id, + m_module.constfReplicant(FLT_MAX, result.type.ccount)); + } + break; + case DxsoOpcode::Dp3: { + DxsoRegMask srcMask(true, true, true, false); + result = emitDot( + emitRegisterLoad(src[0], srcMask), + emitRegisterLoad(src[1], srcMask)); + break; + } + case DxsoOpcode::Dp4: + result = emitDot( + emitRegisterLoad(src[0], IdentityWriteMask), + emitRegisterLoad(src[1], IdentityWriteMask)); + break; + case DxsoOpcode::Slt: + case DxsoOpcode::Sge: { + const uint32_t boolTypeId = + getVectorTypeId({ DxsoScalarType::Bool, result.type.ccount }); + + uint32_t cmpResult = opcode == DxsoOpcode::Slt + ? m_module.opFOrdLessThan (boolTypeId, emitRegisterLoad(src[0], mask).id, emitRegisterLoad(src[1], mask).id) + : m_module.opFOrdGreaterThanEqual(boolTypeId, emitRegisterLoad(src[0], mask).id, emitRegisterLoad(src[1], mask).id); + + result.id = m_module.opSelect(typeId, cmpResult, + m_module.constfReplicant(1.0f, result.type.ccount), + m_module.constfReplicant(0.0f, result.type.ccount)); + break; + } + case DxsoOpcode::Min: + result.id = m_module.opFMin(typeId, + emitRegisterLoad(src[0], mask).id, + emitRegisterLoad(src[1], mask).id); + break; + case DxsoOpcode::Max: + result.id = m_module.opFMax(typeId, + emitRegisterLoad(src[0], mask).id, + emitRegisterLoad(src[1], mask).id); + break; + case DxsoOpcode::ExpP: + if (m_programInfo.majorVersion() < 2) { + DxsoRegMask srcMask(true, false, false, false); + uint32_t src0 = emitRegisterLoad(src[0], srcMask).id; + + uint32_t index = 0; + + std::array resultIndices; + + if (mask[0]) resultIndices[index++] = m_module.opExp2(scalarTypeId, m_module.opFloor(scalarTypeId, src0)); + if (mask[1]) resultIndices[index++] = m_module.opFSub(scalarTypeId, src0, m_module.opFloor(scalarTypeId, src0)); + if (mask[2]) resultIndices[index++] = m_module.opExp2(scalarTypeId, src0); + if (mask[3]) resultIndices[index++] = m_module.constf32(1.0f); + + if (result.type.ccount == 1) + result.id = resultIndices[0]; + else + result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, resultIndices.data()); + + break; + } + case DxsoOpcode::Exp: + result.id = m_module.opExp2(typeId, + emitRegisterLoad(src[0], mask).id); + break; + case DxsoOpcode::Pow: { + uint32_t base = emitRegisterLoad(src[0], mask).id; + base = m_module.opFAbs(typeId, base); + + uint32_t exponent = emitRegisterLoad(src[1], mask).id; + + result.id = m_module.opPow(typeId, base, exponent); + + if (m_moduleInfo.options.strictPow && m_moduleInfo.options.d3d9FloatEmulation) { + DxsoRegisterValue cmp; + cmp.type = { DxsoScalarType::Bool, result.type.ccount }; + cmp.id = m_module.opFOrdEqual(getVectorTypeId(cmp.type), + exponent, m_module.constfReplicant(0.0f, cmp.type.ccount)); + + result.id = m_module.opSelect(typeId, cmp.id, + m_module.constfReplicant(1.0f, cmp.type.ccount), result.id); + } + break; + } + case DxsoOpcode::Crs: { + DxsoRegMask vec3Mask(true, true, true, false); + + DxsoRegisterValue crossValue; + crossValue.type = { DxsoScalarType::Float32, 3 }; + crossValue.id = m_module.opCross(getVectorTypeId(crossValue.type), + emitRegisterLoad(src[0], vec3Mask).id, + emitRegisterLoad(src[1], vec3Mask).id); + + std::array indices = { 0, 0, 0 }; + + uint32_t index = 0; + for (uint32_t i = 0; i < indices.size(); i++) { + if (mask[i]) + indices[index++] = m_module.opCompositeExtract(m_module.defFloatType(32), crossValue.id, 1, &i); + } + + result.id = m_module.opCompositeConstruct(getVectorTypeId(result.type), result.type.ccount, indices.data()); + + break; + } + case DxsoOpcode::Abs: + result.id = m_module.opFAbs(typeId, + emitRegisterLoad(src[0], mask).id); + break; + case DxsoOpcode::Nrm: { + // Nrm is 3D... + DxsoRegMask srcMask(true, true, true, false); + auto vec3 = emitRegisterLoad(src[0], srcMask); + + DxsoRegisterValue dot = emitDot(vec3, vec3); + dot.id = m_module.opInverseSqrt (scalarTypeId, dot.id); + if (m_moduleInfo.options.d3d9FloatEmulation) { + dot.id = m_module.opNMin (scalarTypeId, dot.id, + m_module.constf32(FLT_MAX)); + } + + // r * rsq(r . r); + result.id = m_module.opVectorTimesScalar( + typeId, + emitRegisterLoad(src[0], mask).id, + dot.id); + break; + } + case DxsoOpcode::SinCos: { + DxsoRegMask srcMask(true, false, false, false); + uint32_t src0 = emitRegisterLoad(src[0], srcMask).id; + + std::array sincosVectorIndices = { 0, 0, 0, 0 }; + + uint32_t index = 0; + if (mask[0]) + sincosVectorIndices[index++] = m_module.opCos(scalarTypeId, src0); + + if (mask[1]) + sincosVectorIndices[index++] = m_module.opSin(scalarTypeId, src0); + + for (; index < result.type.ccount; index++) { + if (sincosVectorIndices[index] == 0) + sincosVectorIndices[index] = m_module.constf32(0.0f); + } + + if (result.type.ccount == 1) + result.id = sincosVectorIndices[0]; + else + result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, sincosVectorIndices.data()); + + break; + } + case DxsoOpcode::Lit: { + DxsoRegMask srcMask(true, true, true, true); + uint32_t srcOp = emitRegisterLoad(src[0], srcMask).id; + + const uint32_t x = 0; + const uint32_t y = 1; + const uint32_t w = 3; + + uint32_t srcX = m_module.opCompositeExtract(scalarTypeId, srcOp, 1, &x); + uint32_t srcY = m_module.opCompositeExtract(scalarTypeId, srcOp, 1, &y); + uint32_t srcW = m_module.opCompositeExtract(scalarTypeId, srcOp, 1, &w); + + uint32_t power = m_module.opFClamp( + scalarTypeId, srcW, + m_module.constf32(-127.9961f), m_module.constf32(127.9961f)); + + std::array resultIndices; + + uint32_t index = 0; + + if (mask[0]) resultIndices[index++] = m_module.constf32(1.0f); + if (mask[1]) resultIndices[index++] = m_module.opFMax(scalarTypeId, srcX, m_module.constf32(0)); + if (mask[2]) resultIndices[index++] = m_module.opPow (scalarTypeId, srcY, power); + if (mask[3]) resultIndices[index++] = m_module.constf32(1.0f); + + const uint32_t boolType = m_module.defBoolType(); + uint32_t zTestX = m_module.opFOrdGreaterThanEqual(boolType, srcX, m_module.constf32(0)); + uint32_t zTestY = m_module.opFOrdGreaterThanEqual(boolType, srcY, m_module.constf32(0)); + uint32_t zTest = m_module.opLogicalAnd(boolType, zTestX, zTestY); + + if (result.type.ccount > 2) + resultIndices[2] = m_module.opSelect( + scalarTypeId, + zTest, + resultIndices[2], + m_module.constf32(0.0f)); + + if (result.type.ccount == 1) + result.id = resultIndices[0]; + else + result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, resultIndices.data()); + break; + } + case DxsoOpcode::Dst: { + //dest.x = 1; + //dest.y = src0.y * src1.y; + //dest.z = src0.z; + //dest.w = src1.w; + + DxsoRegMask srcMask(true, true, true, true); + + uint32_t src0 = emitRegisterLoad(src[0], srcMask).id; + uint32_t src1 = emitRegisterLoad(src[1], srcMask).id; + + const uint32_t y = 1; + const uint32_t z = 2; + const uint32_t w = 3; + + uint32_t src0Y = m_module.opCompositeExtract(scalarTypeId, src0, 1, &y); + uint32_t src1Y = m_module.opCompositeExtract(scalarTypeId, src1, 1, &y); + + uint32_t src0Z = m_module.opCompositeExtract(scalarTypeId, src0, 1, &z); + uint32_t src1W = m_module.opCompositeExtract(scalarTypeId, src1, 1, &w); + + std::array resultIndices; + resultIndices[0] = m_module.constf32(1.0f); + resultIndices[1] = m_module.opFMul(scalarTypeId, src0Y, src1Y); + resultIndices[2] = src0Z; + resultIndices[3] = src1W; + + if (result.type.ccount == 1) + result.id = resultIndices[0]; + else + result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, resultIndices.data()); + break; + } + case DxsoOpcode::LogP: + case DxsoOpcode::Log: + result.id = m_module.opFAbs(typeId, emitRegisterLoad(src[0], mask).id); + result.id = m_module.opLog2(typeId, result.id); + if (m_moduleInfo.options.d3d9FloatEmulation) { + result.id = m_module.opNMax(typeId, result.id, + m_module.constfReplicant(-FLT_MAX, result.type.ccount)); + } + break; + case DxsoOpcode::Lrp: + result.id = m_module.opFMix(typeId, + emitRegisterLoad(src[2], mask).id, + emitRegisterLoad(src[1], mask).id, + emitRegisterLoad(src[0], mask).id); + break; + case DxsoOpcode::Frc: + result.id = m_module.opFract(typeId, + emitRegisterLoad(src[0], mask).id); + break; + case DxsoOpcode::Cmp: { + const uint32_t boolTypeId = + getVectorTypeId({ DxsoScalarType::Bool, result.type.ccount }); + + uint32_t cmp = m_module.opFOrdGreaterThanEqual( + boolTypeId, + emitRegisterLoad(src[0], mask).id, + m_module.constfReplicant(0.0f, result.type.ccount)); + + result.id = m_module.opSelect( + typeId, cmp, + emitRegisterLoad(src[1], mask).id, + emitRegisterLoad(src[2], mask).id); + break; + } + case DxsoOpcode::Cnd: { + const uint32_t boolTypeId = + getVectorTypeId({ DxsoScalarType::Bool, result.type.ccount }); + + uint32_t cmp = m_module.opFOrdGreaterThan( + boolTypeId, + emitRegisterLoad(src[0], mask).id, + m_module.constfReplicant(0.5f, result.type.ccount)); + + result.id = m_module.opSelect( + typeId, cmp, + emitRegisterLoad(src[1], mask).id, + emitRegisterLoad(src[2], mask).id); + break; + } + case DxsoOpcode::Dp2Add: { + DxsoRegMask dotSrcMask(true, true, false, false); + DxsoRegMask addSrcMask(true, false, false, false); + + DxsoRegisterValue dot = emitDot( + emitRegisterLoad(src[0], dotSrcMask), + emitRegisterLoad(src[1], dotSrcMask)); + + dot.id = m_module.opFAdd(scalarTypeId, + dot.id, emitRegisterLoad(src[2], addSrcMask).id); + + result.id = dot.id; + result.type = scalarType; + break; + } + case DxsoOpcode::DsX: + result.id = m_module.opDpdx( + typeId, emitRegisterLoad(src[0], mask).id); + break; + case DxsoOpcode::DsY: + result.id = m_module.opDpdy( + typeId, emitRegisterLoad(src[0], mask).id); + break; + default: + Logger::warn(str::format("DxsoCompiler::emitVectorAlu: unimplemented op ", opcode)); + return; + } + + this->emitDstStore(dst, result, mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id); + } + + + void DxsoCompiler::emitPredicateOp(const DxsoInstructionContext& ctx) { + const auto& src = ctx.src; + + DxsoRegMask mask = ctx.dst.mask; + + DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst); + + DxsoRegisterValue result; + result.type.ctype = dst.type.ctype; + result.type.ccount = mask.popCount(); + + result.id = emitBoolComparison( + result.type, ctx.instruction.specificData.comparison, + emitRegisterLoad(src[0], mask).id, emitRegisterLoad(src[1], mask).id); + + this->emitValueStore(dst, result, mask, emitPredicateLoad(ctx)); + } + + + void DxsoCompiler::emitMatrixAlu(const DxsoInstructionContext& ctx) { + const auto& src = ctx.src; + + DxsoRegMask mask = ctx.dst.mask; + + DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst); + + DxsoRegisterValue result; + result.type.ctype = dst.type.ctype; + result.type.ccount = mask.popCount(); + + DxsoVectorType scalarType = result.type; + scalarType.ccount = 1; + + const uint32_t typeId = getVectorTypeId(result.type); + const uint32_t scalarTypeId = getVectorTypeId(scalarType); + + const DxsoOpcode opcode = ctx.instruction.opcode; + + uint32_t dotCount; + uint32_t iterCount; + + switch (opcode) { + case DxsoOpcode::M3x2: + dotCount = 3; + iterCount = 2; + break; + case DxsoOpcode::M3x3: + dotCount = 3; + iterCount = 3; + break; + case DxsoOpcode::M3x4: + dotCount = 3; + iterCount = 4; + break; + case DxsoOpcode::M4x3: + dotCount = 4; + iterCount = 3; + break; + case DxsoOpcode::M4x4: + dotCount = 4; + iterCount = 4; + break; + default: + Logger::warn(str::format("DxsoCompiler::emitMatrixAlu: unimplemented op ", opcode)); + return; + } + + DxsoRegMask srcMask(true, true, true, dotCount == 4); + std::array indices; + + DxsoRegister src0 = src[0]; + DxsoRegister src1 = src[1]; + + for (uint32_t i = 0; i < iterCount; i++) { + indices[i] = m_module.opDot(scalarTypeId, + emitRegisterLoad(src0, srcMask).id, + emitRegisterLoad(src1, srcMask).id); + + src1.id.num++; + } + + result.id = m_module.opCompositeConstruct( + typeId, iterCount, indices.data()); + + this->emitDstStore(dst, result, mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id); + } + + +void DxsoCompiler::emitControlFlowGenericLoop( + bool count, + uint32_t initialVar, + uint32_t strideVar, + uint32_t iterationCountVar) { + const uint32_t itType = m_module.defIntType(32, 1); + + DxsoCfgBlock block; + block.type = DxsoCfgBlockType::Loop; + block.b_loop.labelHeader = m_module.allocateId(); + block.b_loop.labelBegin = m_module.allocateId(); + block.b_loop.labelContinue = m_module.allocateId(); + block.b_loop.labelBreak = m_module.allocateId(); + block.b_loop.iteratorPtr = m_module.newVar( + m_module.defPointerType(itType, spv::StorageClassPrivate), spv::StorageClassPrivate); + block.b_loop.strideVar = strideVar; + block.b_loop.countBackup = 0; + + if (count) { + DxsoBaseRegister loop; + loop.id = { DxsoRegisterType::Loop, 0 }; + + DxsoRegisterPointer loopPtr = emitGetOperandPtr(loop, nullptr); + uint32_t loopVal = m_module.opLoad( + getVectorTypeId(loopPtr.type), loopPtr.id); + + block.b_loop.countBackup = loopVal; + + m_module.opStore(loopPtr.id, initialVar); + } + + m_module.setDebugName(block.b_loop.iteratorPtr, "iter"); + + m_module.opStore(block.b_loop.iteratorPtr, iterationCountVar); + + m_module.opBranch(block.b_loop.labelHeader); + m_module.opLabel (block.b_loop.labelHeader); + + m_module.opLoopMerge( + block.b_loop.labelBreak, + block.b_loop.labelContinue, + spv::LoopControlMaskNone); + + m_module.opBranch(block.b_loop.labelBegin); + m_module.opLabel (block.b_loop.labelBegin); + + uint32_t iterator = m_module.opLoad(itType, block.b_loop.iteratorPtr); + uint32_t complete = m_module.opIEqual(m_module.defBoolType(), iterator, m_module.consti32(0)); + + const uint32_t breakBlock = m_module.allocateId(); + const uint32_t mergeBlock = m_module.allocateId(); + + m_module.opSelectionMerge(mergeBlock, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + complete, breakBlock, mergeBlock); + + m_module.opLabel(breakBlock); + + m_module.opBranch(block.b_loop.labelBreak); + + m_module.opLabel(mergeBlock); + + iterator = m_module.opISub(itType, iterator, m_module.consti32(1)); + m_module.opStore(block.b_loop.iteratorPtr, iterator); + + m_controlFlowBlocks.push_back(block); + } + + void DxsoCompiler::emitControlFlowGenericLoopEnd() { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxsoCfgBlockType::Loop) + throw DxvkError("DxsoCompiler: 'EndRep' without 'Rep' or 'Loop' found"); + + // Remove the block from the stack, it's closed + const DxsoCfgBlock block = m_controlFlowBlocks.back(); + m_controlFlowBlocks.pop_back(); + + if (block.b_loop.strideVar) { + DxsoBaseRegister loop; + loop.id = { DxsoRegisterType::Loop, 0 }; + + DxsoRegisterPointer loopPtr = emitGetOperandPtr(loop, nullptr); + uint32_t val = m_module.opLoad( + getVectorTypeId(loopPtr.type), loopPtr.id); + + val = m_module.opIAdd( + getVectorTypeId(loopPtr.type), + val, block.b_loop.strideVar); + + m_module.opStore(loopPtr.id, val); + } + + // Declare the continue block + m_module.opBranch(block.b_loop.labelContinue); + m_module.opLabel(block.b_loop.labelContinue); + + // Declare the merge block + m_module.opBranch(block.b_loop.labelHeader); + m_module.opLabel(block.b_loop.labelBreak); + + if (block.b_loop.countBackup) { + DxsoBaseRegister loop; + loop.id = { DxsoRegisterType::Loop, 0 }; + + DxsoRegisterPointer loopPtr = emitGetOperandPtr(loop, nullptr); + + m_module.opStore(loopPtr.id, block.b_loop.countBackup); + } + } + + void DxsoCompiler::emitControlFlowRep(const DxsoInstructionContext& ctx) { + DxsoRegMask srcMask(true, false, false, false); + this->emitControlFlowGenericLoop( + false, 0, 0, + emitRegisterLoad(ctx.src[0], srcMask).id); + } + + void DxsoCompiler::emitControlFlowEndRep(const DxsoInstructionContext& ctx) { + emitControlFlowGenericLoopEnd(); + } + + void DxsoCompiler::emitControlFlowLoop(const DxsoInstructionContext& ctx) { + const uint32_t itType = m_module.defIntType(32, 1); + + DxsoRegMask srcMask(true, true, true, false); + uint32_t integerRegister = emitRegisterLoad(ctx.src[1], srcMask).id; + uint32_t x = 0; + uint32_t y = 1; + uint32_t z = 2; + + uint32_t iterCount = m_module.opCompositeExtract(itType, integerRegister, 1, &x); + uint32_t initialValue = m_module.opCompositeExtract(itType, integerRegister, 1, &y); + uint32_t strideSize = m_module.opCompositeExtract(itType, integerRegister, 1, &z); + + this->emitControlFlowGenericLoop( + true, + initialValue, + strideSize, + iterCount); + } + + void DxsoCompiler::emitControlFlowEndLoop(const DxsoInstructionContext& ctx) { + this->emitControlFlowGenericLoopEnd(); + } + + void DxsoCompiler::emitControlFlowBreak(const DxsoInstructionContext& ctx) { + DxsoCfgBlock* cfgBlock = + cfgFindBlock({ DxsoCfgBlockType::Loop }); + + if (cfgBlock == nullptr) + throw DxvkError("DxbcCompiler: 'Break' outside 'Rep' or 'Loop' found"); + + m_module.opBranch(cfgBlock->b_loop.labelBreak); + + // Subsequent instructions assume that there is an open block + const uint32_t labelId = m_module.allocateId(); + m_module.opLabel(labelId); + } + + void DxsoCompiler::emitControlFlowBreakC(const DxsoInstructionContext& ctx) { + DxsoCfgBlock* cfgBlock = + cfgFindBlock({ DxsoCfgBlockType::Loop }); + + if (cfgBlock == nullptr) + throw DxvkError("DxbcCompiler: 'BreakC' outside 'Rep' or 'Loop' found"); + + DxsoRegMask srcMask(true, false, false, false); + auto a = emitRegisterLoad(ctx.src[0], srcMask); + auto b = emitRegisterLoad(ctx.src[1], srcMask); + + uint32_t result = this->emitBoolComparison( + { DxsoScalarType::Bool, a.type.ccount }, + ctx.instruction.specificData.comparison, + a.id, b.id); + + // We basically have to wrap this into an 'if' block + const uint32_t breakBlock = m_module.allocateId(); + const uint32_t mergeBlock = m_module.allocateId(); + + m_module.opSelectionMerge(mergeBlock, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + result, breakBlock, mergeBlock); + + m_module.opLabel(breakBlock); + + m_module.opBranch(cfgBlock->b_loop.labelBreak); + + m_module.opLabel(mergeBlock); + } + + void DxsoCompiler::emitControlFlowIf(const DxsoInstructionContext& ctx) { + const auto opcode = ctx.instruction.opcode; + + uint32_t result; + + DxsoRegMask srcMask(true, false, false, false); + if (opcode == DxsoOpcode::Ifc) { + auto a = emitRegisterLoad(ctx.src[0], srcMask); + auto b = emitRegisterLoad(ctx.src[1], srcMask); + + result = this->emitBoolComparison( + { DxsoScalarType::Bool, a.type.ccount }, + ctx.instruction.specificData.comparison, + a.id, b.id); + } else + result = emitRegisterLoad(ctx.src[0], srcMask).id; + + // Declare the 'if' block. We do not know if there + // will be an 'else' block or not, so we'll assume + // that there is one and leave it empty otherwise. + DxsoCfgBlock block; + block.type = DxsoCfgBlockType::If; + block.b_if.ztestId = result; + block.b_if.labelIf = m_module.allocateId(); + block.b_if.labelElse = 0; + block.b_if.labelEnd = m_module.allocateId(); + block.b_if.headerPtr = m_module.getInsertionPtr(); + m_controlFlowBlocks.push_back(block); + + // We'll insert the branch instruction when closing + // the block, since we don't know whether or not an + // else block is needed right now. + m_module.opLabel(block.b_if.labelIf); + } + + void DxsoCompiler::emitControlFlowElse(const DxsoInstructionContext& ctx) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxsoCfgBlockType::If + || m_controlFlowBlocks.back().b_if.labelElse != 0) + throw DxvkError("DxsoCompiler: 'Else' without 'If' found"); + + // Set the 'Else' flag so that we do + // not insert a dummy block on 'EndIf' + DxsoCfgBlock& block = m_controlFlowBlocks.back(); + block.b_if.labelElse = m_module.allocateId(); + + // Close the 'If' block by branching to + // the merge block we declared earlier + m_module.opBranch(block.b_if.labelEnd); + m_module.opLabel (block.b_if.labelElse); + } + + void DxsoCompiler::emitControlFlowEndIf(const DxsoInstructionContext& ctx) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxsoCfgBlockType::If) + throw DxvkError("DxsoCompiler: 'EndIf' without 'If' found"); + + // Remove the block from the stack, it's closed + DxsoCfgBlock block = m_controlFlowBlocks.back(); + m_controlFlowBlocks.pop_back(); + + // Write out the 'if' header + m_module.beginInsertion(block.b_if.headerPtr); + + m_module.opSelectionMerge( + block.b_if.labelEnd, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + block.b_if.ztestId, + block.b_if.labelIf, + block.b_if.labelElse != 0 + ? block.b_if.labelElse + : block.b_if.labelEnd); + + m_module.endInsertion(); + + // End the active 'if' or 'else' block + m_module.opBranch(block.b_if.labelEnd); + m_module.opLabel (block.b_if.labelEnd); + } + + + void DxsoCompiler::emitTexCoord(const DxsoInstructionContext& ctx) { + DxsoRegisterValue result; + + if (m_programInfo.majorVersion() == 1 && m_programInfo.minorVersion() == 4) { + // TexCrd Op (PS 1.4) + DxsoRegister texcoord; + texcoord.id.type = DxsoRegisterType::PixelTexcoord; + texcoord.id.num = ctx.src[0].id.num; + + result = emitRegisterLoadRaw(texcoord, nullptr); + } else { + // TexCoord Op (PS 1.0 - PS 1.3) + DxsoRegister texcoord; + texcoord.id.type = DxsoRegisterType::PixelTexcoord; + texcoord.id.num = ctx.dst.id.num; + + result = emitRegisterLoadRaw(texcoord, nullptr); + // Saturate + result = emitSaturate(result); + // w = 1.0f + uint32_t wIndex = 3; + result.id = m_module.opCompositeInsert(getVectorTypeId(result.type), + m_module.constf32(1.0f), + result.id, + 1, &wIndex); + } + + DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst); + + this->emitDstStore(dst, result, ctx.dst.mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id); + } + + void DxsoCompiler::emitTextureSample(const DxsoInstructionContext& ctx) { + DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst); + + const DxsoOpcode opcode = ctx.instruction.opcode; + + DxsoRegisterValue texcoordVar; + uint32_t samplerIdx; + + DxsoRegMask vec3Mask(true, true, true, false); + DxsoRegMask srcMask (true, true, true, true); + + if (opcode == DxsoOpcode::TexM3x2Tex || opcode == DxsoOpcode::TexM3x3Tex || opcode == DxsoOpcode::TexM3x3Spec || opcode == DxsoOpcode::TexM3x3VSpec) { + const uint32_t count = opcode == DxsoOpcode::TexM3x2Tex ? 2 : 3; + + auto n = emitRegisterLoad(ctx.src[0], vec3Mask); + + std::array indices = { 0, 0, m_module.constf32(0.0f), m_module.constf32(0.0f) }; + for (uint32_t i = 0; i < count; i++) { + auto reg = ctx.dst; + reg.id.num -= (count - 1) - i; + auto m = emitRegisterLoadTexcoord(reg, vec3Mask); + + indices[i] = m_module.opDot(getScalarTypeId(DxsoScalarType::Float32), m.id, n.id); + } + + if (opcode == DxsoOpcode::TexM3x3Spec || opcode == DxsoOpcode::TexM3x3VSpec) { + uint32_t vec3Type = getVectorTypeId({ DxsoScalarType::Float32, 3 }); + uint32_t normal = m_module.opCompositeConstruct(vec3Type, 3, indices.data()); + + uint32_t eyeRay; + // VSpec -> Create eye ray from .w of last 3 tex coords (m, m-1, m-2) + // Spec -> Get eye ray from src[1] + if (opcode == DxsoOpcode::TexM3x3VSpec) { + DxsoRegMask wMask(false, false, false, true); + + std::array eyeRayIndices; + for (uint32_t i = 0; i < 3; i++) { + auto reg = ctx.dst; + reg.id.num -= (count - 1) - i; + eyeRayIndices[i] = emitRegisterLoadTexcoord(reg, wMask).id; + } + + eyeRay = m_module.opCompositeConstruct(vec3Type, eyeRayIndices.size(), eyeRayIndices.data()); + } + else + eyeRay = emitRegisterLoad(ctx.src[1], vec3Mask).id; + + uint32_t reflection = m_module.opReflect(vec3Type, eyeRay, normal); + + for (uint32_t i = 0; i < 3; i++) + indices[i] = m_module.opCompositeExtract(m_module.defFloatType(32), reflection, 1, &i); + } + + texcoordVar.type = { DxsoScalarType::Float32, 4 }; + texcoordVar.id = m_module.opCompositeConstruct(getVectorTypeId(texcoordVar.type), indices.size(), indices.data()); + + samplerIdx = ctx.dst.id.num; + } + else if (opcode == DxsoOpcode::TexBem) { + auto m = emitRegisterLoadTexcoord(ctx.dst, srcMask); + auto n = emitRegisterLoad(ctx.src[0], srcMask); + + texcoordVar = m; + + // u' = tc(m).x + [bm00(m) * t(n).x + bm10(m) * t(n).y] + // v' = tc(m).y + [bm01(m) * t(n).x + bm11(m) * t(n).y] + + // But we flipped the bm indices so we can use dot here... + + // u' = tc(m).x + dot(bm0, tn) + // v' = tc(m).y + dot(bm1, tn) + + for (uint32_t i = 0; i < 2; i++) { + uint32_t fl_t = getScalarTypeId(DxsoScalarType::Float32); + uint32_t vec2_t = getVectorTypeId({ DxsoScalarType::Float32, 2 }); + std::array indices = { 0, 1, 2, 3 }; + + uint32_t tc_m_n = m_module.opCompositeExtract(fl_t, m.id, 1, &i); + + uint32_t offset = m_module.constu32(D3D9SharedPSStages_Count * ctx.dst.id.num + D3D9SharedPSStages_BumpEnvMat0 + i); + uint32_t bm = m_module.opAccessChain(m_module.defPointerType(vec2_t, spv::StorageClassUniform), + m_ps.sharedState, 1, &offset); + bm = m_module.opLoad(vec2_t, bm); + + uint32_t t = m_module.opVectorShuffle(vec2_t, n.id, n.id, 2, indices.data()); + + uint32_t dot = m_module.opDot(fl_t, bm, t); + + uint32_t result = m_module.opFAdd(fl_t, tc_m_n, dot); + texcoordVar.id = m_module.opCompositeInsert(getVectorTypeId(texcoordVar.type), result, texcoordVar.id, 1, &i); + } + + samplerIdx = ctx.dst.id.num; + } + else if (opcode == DxsoOpcode::TexReg2Ar) { + texcoordVar = emitRegisterLoad(ctx.src[0], srcMask); + texcoordVar = emitRegisterSwizzle(texcoordVar, DxsoRegSwizzle(3, 0, 0, 0), srcMask); + + samplerIdx = ctx.dst.id.num; + } + else if (opcode == DxsoOpcode::TexReg2Gb) { + texcoordVar = emitRegisterLoad(ctx.src[0], srcMask); + texcoordVar = emitRegisterSwizzle(texcoordVar, DxsoRegSwizzle(1, 2, 2, 2), srcMask); + + samplerIdx = ctx.dst.id.num; + } + else if (opcode == DxsoOpcode::TexReg2Rgb) { + texcoordVar = emitRegisterLoad(ctx.src[0], srcMask); + texcoordVar = emitRegisterSwizzle(texcoordVar, DxsoRegSwizzle(0, 1, 2, 2), srcMask); + + samplerIdx = ctx.dst.id.num; + } + else if (opcode == DxsoOpcode::TexDp3Tex) { + auto m = emitRegisterLoadTexcoord(ctx.dst, vec3Mask); + auto n = emitRegisterLoad(ctx.src[0], vec3Mask); + + auto dot = emitDot(m, n); + + std::array indices = { dot.id, m_module.constf32(0.0f), m_module.constf32(0.0f), m_module.constf32(0.0f) }; + + texcoordVar.type = { DxsoScalarType::Float32, 4 }; + texcoordVar.id = m_module.opCompositeConstruct(getVectorTypeId(texcoordVar.type), + indices.size(), indices.data()); + + samplerIdx = ctx.dst.id.num; + } + else { + if (m_programInfo.majorVersion() >= 2) { // SM 2.0+ + texcoordVar = emitRegisterLoad(ctx.src[0], srcMask); + samplerIdx = ctx.src[1].id.num; + } else if ( + m_programInfo.majorVersion() == 1 + && m_programInfo.minorVersion() == 4) { // SM 1.4 + texcoordVar = emitRegisterLoad(ctx.src[0], srcMask); + samplerIdx = ctx.dst.id.num; + } + else { // SM 1.0-1.3 + texcoordVar = emitRegisterLoadTexcoord(ctx.dst, srcMask); + samplerIdx = ctx.dst.id.num; + } + } + + // SM < 1.x does not have dcl sampler type. + if (m_programInfo.majorVersion() < 2 && m_samplers[samplerIdx].color[SamplerTypeTexture2D].varId == 0) + emitDclSampler(samplerIdx, DxsoTextureType::Texture2D); + + DxsoSampler sampler = m_samplers.at(samplerIdx); + + auto SampleImage = [this, opcode, dst, ctx, samplerIdx](DxsoRegisterValue texcoordVar, DxsoSamplerInfo& sampler, bool depth, DxsoSamplerType samplerType) { + DxsoRegisterValue result; + result.type.ctype = dst.type.ctype; + result.type.ccount = depth ? 1 : 4; + + const uint32_t typeId = getVectorTypeId(result.type); + + const uint32_t imageVarId = m_module.opLoad(sampler.typeId, sampler.varId); + + SpirvImageOperands imageOperands; + if (m_programInfo.type() == DxsoProgramTypes::VertexShader) { + imageOperands.sLod = m_module.constf32(0.0f); + imageOperands.flags |= spv::ImageOperandsLodMask; + } + + if (opcode == DxsoOpcode::TexLdl) { + uint32_t w = 3; + imageOperands.sLod = m_module.opCompositeExtract( + m_module.defFloatType(32), texcoordVar.id, 1, &w); + imageOperands.flags |= spv::ImageOperandsLodMask; + } + + if (opcode == DxsoOpcode::TexLdd) { + DxsoRegMask gradMask(true, true, false, false); + imageOperands.flags |= spv::ImageOperandsGradMask; + imageOperands.sGradX = emitRegisterLoad(ctx.src[2], gradMask).id; + imageOperands.sGradY = emitRegisterLoad(ctx.src[3], gradMask).id; + } + + uint32_t projDivider = 0; + + auto GetProjectionValue = [&]() { + uint32_t w = 3; + return m_module.opCompositeExtract( + m_module.defFloatType(32), texcoordVar.id, 1, &w); + }; + + if (opcode == DxsoOpcode::Tex + && m_programInfo.majorVersion() >= 2) { + if (ctx.instruction.specificData.texld == DxsoTexLdMode::Project) { + projDivider = GetProjectionValue(); + } + else if (ctx.instruction.specificData.texld == DxsoTexLdMode::Bias) { + uint32_t w = 3; + imageOperands.sLodBias = m_module.opCompositeExtract( + m_module.defFloatType(32), texcoordVar.id, 1, &w); + imageOperands.flags |= spv::ImageOperandsBiasMask; + } + } + + bool switchProjResult = m_programInfo.majorVersion() < 2 && samplerType != SamplerTypeTextureCube; + if (switchProjResult) + projDivider = GetProjectionValue(); + + uint32_t reference = 0; + + if (depth) { + uint32_t component = sampler.dimensions; + reference = m_module.opCompositeExtract( + m_module.defFloatType(32), texcoordVar.id, 1, &component); + } + + if (projDivider != 0) { + for (uint32_t i = sampler.dimensions; i < 4; i++) { + texcoordVar.id = m_module.opCompositeInsert(getVectorTypeId(texcoordVar.type), + projDivider, texcoordVar.id, 1, &i); + } + } + + result.id = this->emitSample( + projDivider != 0, + typeId, + imageVarId, + texcoordVar.id, + reference, + imageOperands); + + if (switchProjResult) { + uint32_t bool_t = m_module.defBoolType(); + + uint32_t nonProjResult = this->emitSample( + 0, + typeId, + imageVarId, + texcoordVar.id, + reference, + imageOperands); + + uint32_t shouldProj = m_module.opBitFieldUExtract( + m_module.defIntType(32, 0), m_ps.projectionSpec, + m_module.consti32(samplerIdx), m_module.consti32(1)); + + shouldProj = m_module.opIEqual(m_module.defBoolType(), shouldProj, m_module.constu32(1)); + + // Depth -> .x + // Colour -> .xyzw + // Need to replicate the bool for the opSelect. + if (!depth) { + uint32_t bvec4_t = m_module.defVectorType(bool_t, 4); + std::array indices = { shouldProj, shouldProj, shouldProj, shouldProj }; + shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data()); + } + + result.id = m_module.opSelect(typeId, shouldProj, result.id, nonProjResult); + } + + // Apply operand swizzle to the operand value + result = emitRegisterSwizzle(result, IdentitySwizzle, ctx.dst.mask); + + this->emitDstStore(dst, result, ctx.dst.mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id); + }; + + auto SampleType = [&](DxsoSamplerType samplerType) { + // Only do the check for depth comp. samplers + // if we aren't a 3D texture + if (samplerType != SamplerTypeTexture3D) { + uint32_t colorLabel = m_module.allocateId(); + uint32_t depthLabel = m_module.allocateId(); + uint32_t endLabel = m_module.allocateId(); + + m_module.opSelectionMerge(endLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(sampler.depthSpecConst, depthLabel, colorLabel); + + m_module.opLabel(colorLabel); + SampleImage(texcoordVar, sampler.color[samplerType], false, samplerType); + m_module.opBranch(endLabel); + + m_module.opLabel(depthLabel); + SampleImage(texcoordVar, sampler.depth[samplerType], true, samplerType); + m_module.opBranch(endLabel); + + m_module.opLabel(endLabel); + } + else + SampleImage(texcoordVar, sampler.color[samplerType], false, samplerType); + }; + + if (m_programInfo.majorVersion() >= 2) { + DxsoSamplerType samplerType = + SamplerTypeFromTextureType(sampler.type); + + SampleType(samplerType); + } + else { + std::array typeCaseLabels = {{ + { uint32_t(SamplerTypeTexture2D), m_module.allocateId() }, + { uint32_t(SamplerTypeTexture3D), m_module.allocateId() }, + { uint32_t(SamplerTypeTextureCube), m_module.allocateId() }, + }}; + + uint32_t switchEndLabel = m_module.allocateId(); + + uint32_t typeId = m_module.defIntType(32, 0); + + uint32_t offset = m_module.consti32(samplerIdx * 2); + uint32_t bitCnt = m_module.consti32(2); + uint32_t type = m_module.opBitFieldUExtract(typeId, m_ps.samplerTypeSpec, offset, bitCnt); + + m_module.opSelectionMerge(switchEndLabel, spv::SelectionControlMaskNone); + m_module.opSwitch(type, + typeCaseLabels[uint32_t(SamplerTypeTexture2D)].labelId, + typeCaseLabels.size(), + typeCaseLabels.data()); + + for (const auto& label : typeCaseLabels) { + m_module.opLabel(label.labelId); + + SampleType(DxsoSamplerType(label.literal)); + + m_module.opBranch(switchEndLabel); + } + + m_module.opLabel(switchEndLabel); + } + } + + void DxsoCompiler::emitTextureKill(const DxsoInstructionContext& ctx) { + DxsoRegisterValue texReg; + + if (m_programInfo.majorVersion() >= 2 || + (m_programInfo.majorVersion() == 1 + && m_programInfo.minorVersion() == 4)) // SM 2.0+ or 1.4 + texReg = emitRegisterLoadRaw(ctx.dst, ctx.dst.hasRelative ? &ctx.dst.relative : nullptr); + else { // SM 1.0-1.3 + DxsoRegister texcoord; + texcoord.id = { DxsoRegisterType::PixelTexcoord, ctx.dst.id.num }; + + texReg = emitRegisterLoadRaw(texcoord, nullptr); + } + + std::array indices = { 0, 1, 2, 3 }; + + // On SM1 it only works on the first + if (m_programInfo.majorVersion() < 2) { + texReg.type.ccount = 3; + + texReg.id = m_module.opVectorShuffle( + getVectorTypeId(texReg.type), + texReg.id, texReg.id, + texReg.type.ccount, indices.data()); + } + + const uint32_t boolVecTypeId = + getVectorTypeId({ DxsoScalarType::Bool, texReg.type.ccount }); + + uint32_t result = m_module.opFOrdLessThan( + boolVecTypeId, texReg.id, + m_module.constfReplicant(0.0f, texReg.type.ccount)); + + result = m_module.opAny(m_module.defBoolType(), result); + + if (m_ps.killState == 0) { + uint32_t labelIf = m_module.allocateId(); + uint32_t labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(result, labelIf, labelEnd); + + m_module.opLabel(labelIf); + + if (m_moduleInfo.options.useDemoteToHelperInvocation) { + m_module.opDemoteToHelperInvocation(); + m_module.opBranch(labelEnd); + } else { + // OpKill terminates the block + m_module.opKill(); + } + + m_module.opLabel(labelEnd); + } + else { + uint32_t typeId = m_module.defBoolType(); + + uint32_t killState = m_module.opLoad (typeId, m_ps.killState); + killState = m_module.opLogicalOr(typeId, killState, result); + m_module.opStore(m_ps.killState, killState); + + if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) { + uint32_t ballot = m_module.opGroupNonUniformBallot( + getVectorTypeId({ DxsoScalarType::Uint32, 4 }), + m_module.constu32(spv::ScopeSubgroup), + killState); + + uint32_t laneId = m_module.opLoad( + getScalarTypeId(DxsoScalarType::Uint32), + m_ps.builtinLaneId); + + uint32_t laneIdPart = m_module.opShiftRightLogical( + getScalarTypeId(DxsoScalarType::Uint32), + laneId, m_module.constu32(5)); + + uint32_t laneMask = m_module.opVectorExtractDynamic( + getScalarTypeId(DxsoScalarType::Uint32), + ballot, laneIdPart); + + uint32_t laneIdQuad = m_module.opBitwiseAnd( + getScalarTypeId(DxsoScalarType::Uint32), + laneId, m_module.constu32(0x1c)); + + laneMask = m_module.opShiftRightLogical( + getScalarTypeId(DxsoScalarType::Uint32), + laneMask, laneIdQuad); + + laneMask = m_module.opBitwiseAnd( + getScalarTypeId(DxsoScalarType::Uint32), + laneMask, m_module.constu32(0xf)); + + uint32_t killSubgroup = m_module.opIEqual( + m_module.defBoolType(), + laneMask, m_module.constu32(0xf)); + + uint32_t labelIf = m_module.allocateId(); + uint32_t labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(killSubgroup, labelIf, labelEnd); + + // OpKill terminates the block + m_module.opLabel(labelIf); + m_module.opKill(); + + m_module.opLabel(labelEnd); + } + } + } + + + uint32_t DxsoCompiler::emitSample( + bool projected, + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands) { + const bool depthCompare = reference != 0; + const bool explicitLod = + (operands.flags & spv::ImageOperandsLodMask) + || (operands.flags & spv::ImageOperandsGradMask); + + if (projected) { + if (depthCompare) { + if (explicitLod) + return m_module.opImageSampleProjDrefExplicitLod(resultType, sampledImage, coordinates, reference, operands); + else + return m_module.opImageSampleProjDrefImplicitLod(resultType, sampledImage, coordinates, reference, operands); + } + else { + if (explicitLod) + return m_module.opImageSampleProjExplicitLod(resultType, sampledImage, coordinates, operands); + else + return m_module.opImageSampleProjImplicitLod(resultType, sampledImage, coordinates, operands); + } + } + else { + if (depthCompare) { + if (explicitLod) + return m_module.opImageSampleDrefExplicitLod(resultType, sampledImage, coordinates, reference, operands); + else + return m_module.opImageSampleDrefImplicitLod(resultType, sampledImage, coordinates, reference, operands); + } + else { + if (explicitLod) + return m_module.opImageSampleExplicitLod(resultType, sampledImage, coordinates, operands); + else + return m_module.opImageSampleImplicitLod(resultType, sampledImage, coordinates, operands); + } + } + } + + + void DxsoCompiler::emitInputSetup() { + uint32_t pointCoord = 0; + D3D9PointSizeInfoPS pointInfo; + + if (m_programInfo.type() == DxsoProgramType::PixelShader) { + pointCoord = GetPointCoord(m_module, m_entryPointInterfaces); + pointInfo = GetPointSizeInfoPS(m_module, m_rsBlock); + } + + for (uint32_t i = 0; i < m_isgn.elemCount; i++) { + const auto& elem = m_isgn.elems[i]; + const uint32_t slot = elem.slot; + + DxsoRegisterInfo info; + info.type.ctype = DxsoScalarType::Float32; + info.type.ccount = 4; + info.type.alength = 1; + info.sclass = spv::StorageClassInput; + + DxsoRegisterPointer inputPtr; + inputPtr.id = emitNewVariable(info); + inputPtr.type.ctype = DxsoScalarType::Float32; + inputPtr.type.ccount = info.type.ccount; + + m_module.decorateLocation(inputPtr.id, slot); + + std::string name = + str::format("in_", elem.semantic.usage, elem.semantic.usageIndex); + m_module.setDebugName(inputPtr.id, name.c_str()); + + if (elem.centroid) + m_module.decorate(inputPtr.id, spv::DecorationCentroid); + + m_entryPointInterfaces.push_back(inputPtr.id); + + uint32_t typeId = this->getVectorTypeId({ DxsoScalarType::Float32, 4 }); + uint32_t ptrTypeId = m_module.defPointerType(typeId, spv::StorageClassPrivate); + + uint32_t regNumVar = m_module.constu32(elem.regNumber); + + DxsoRegisterPointer indexPtr; + indexPtr.id = m_module.opAccessChain(ptrTypeId, m_vArray, 1, ®NumVar); + indexPtr.type = inputPtr.type; + indexPtr.type.ccount = 4; + + DxsoRegisterValue indexVal = this->emitValueLoad(inputPtr); + + DxsoRegisterValue workingReg; + workingReg.type = indexVal.type; + + workingReg.id = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + DxsoRegMask mask = elem.mask; + if (mask.popCount() == 0) + mask = DxsoRegMask(true, true, true, true); + + std::array indices = { 0, 1, 2, 3 }; + uint32_t count = 0; + for (uint32_t i = 0; i < 4; i++) { + if (mask[i]) { + indices[i] = i + 4; + count++; + } + } + + workingReg.id = m_module.opVectorShuffle(getVectorTypeId(workingReg.type), + workingReg.id, indexVal.id, 4, indices.data()); + + // We need to replace TEXCOORD inputs with gl_PointCoord + // if D3DRS_POINTSPRITEENABLE is set. + if (m_programInfo.type() == DxsoProgramType::PixelShader && elem.semantic.usage == DxsoUsage::Texcoord) + workingReg.id = m_module.opSelect(getVectorTypeId(workingReg.type), pointInfo.isSprite, pointCoord, workingReg.id); + + if (m_programInfo.type() == DxsoProgramType::PixelShader && elem.semantic.usage == DxsoUsage::Color) { + if (elem.semantic.usageIndex == 0) + m_ps.diffuseColorIn = inputPtr.id; + else if (elem.semantic.usageIndex == 1) + m_ps.specularColorIn = inputPtr.id; + } + + m_module.opStore(indexPtr.id, workingReg.id); + } + } + + + void DxsoCompiler::emitLinkerOutputSetup() { + bool outputtedColor0 = false; + bool outputtedColor1 = false; + + for (uint32_t i = 0; i < m_osgn.elemCount; i++) { + const auto& elem = m_osgn.elems[i]; + const uint32_t slot = elem.slot; + + if (elem.semantic.usage == DxsoUsage::Color) { + if (elem.semantic.usageIndex == 0) + outputtedColor0 = true; + else + outputtedColor1 = true; + } + + DxsoRegisterInfo info; + info.type.ctype = DxsoScalarType::Float32; + info.type.ccount = 4; + info.type.alength = 1; + info.sclass = spv::StorageClassOutput; + + spv::BuiltIn builtIn = + semanticToBuiltIn(false, elem.semantic); + + DxsoRegisterPointer outputPtr; + outputPtr.type.ctype = DxsoScalarType::Float32; + outputPtr.type.ccount = 4; + + DxsoRegMask mask = elem.mask; + + bool scalar = false; + + if (builtIn == spv::BuiltInMax) { + outputPtr.id = emitNewVariableDefault(info, + m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f)); + m_module.decorateLocation(outputPtr.id, slot); + + std::string name = + str::format("out_", elem.semantic.usage, elem.semantic.usageIndex); + m_module.setDebugName(outputPtr.id, name.c_str()); + } + else { + const char* name = "unknown_builtin"; + if (builtIn == spv::BuiltInPosition) + name = "oPos"; + else if (builtIn == spv::BuiltInPointSize) { + outputPtr.type.ccount = 1; + info.type.ccount = 1; + name = "oPSize"; + bool maskValues[4]; + for (uint32_t i = 0; i < 4; i++) + maskValues[i] = i == elem.mask.firstSet(); + mask = DxsoRegMask(maskValues[0], maskValues[1], maskValues[2], maskValues[3]); + } + + outputPtr.id = emitNewVariableDefault(info, + m_module.constfReplicant(0.0f, info.type.ccount)); + + m_module.setDebugName(outputPtr.id, name); + m_module.decorateBuiltIn(outputPtr.id, builtIn); + + if (builtIn == spv::BuiltInPosition) + m_vs.oPos = outputPtr; + else if (builtIn == spv::BuiltInPointSize) { + scalar = true; + m_vs.oPSize = outputPtr; + } + } + + m_entryPointInterfaces.push_back(outputPtr.id); + + uint32_t typeId = this->getVectorTypeId({ DxsoScalarType::Float32, 4 }); + uint32_t ptrTypeId = m_module.defPointerType(typeId, spv::StorageClassPrivate); + + uint32_t regNumVar = m_module.constu32(elem.regNumber); + + DxsoRegisterPointer indexPtr; + indexPtr.id = m_module.opAccessChain(ptrTypeId, m_oArray, 1, ®NumVar); + indexPtr.type = outputPtr.type; + indexPtr.type.ccount = 4; + + DxsoRegisterValue indexVal = this->emitValueLoad(indexPtr); + + DxsoRegisterValue workingReg; + workingReg.type.ctype = indexVal.type.ctype; + workingReg.type.ccount = scalar ? 1 : 4; + + workingReg.id = scalar + ? m_module.constf32(0.0f) + : m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + std::array indices = { 0, 1, 2, 3 }; + + if (scalar) { + workingReg.id = m_module.opCompositeExtract(getVectorTypeId(workingReg.type), + indexVal.id, 1, indices.data()); + } else { + if (mask.popCount() == 0) + mask = DxsoRegMask(true, true, true, true); + + uint32_t count = 0; + for (uint32_t i = 0; i < 4; i++) { + if (mask[i]) + indices[count++] = i + 4; + } + + + workingReg.id = m_module.opVectorShuffle(getVectorTypeId(workingReg.type), + workingReg.id, indexVal.id, 4, indices.data()); + } + + // Ie. 0 or 1 for diffuse and specular color + // and for Shader Model 1 or 2 + // (because those have dedicated color registers + // where this rule applies) + if (elem.semantic.usage == DxsoUsage::Color && + elem.semantic.usageIndex < 2 && + m_programInfo.majorVersion() < 3) + workingReg = emitSaturate(workingReg); + + m_module.opStore(outputPtr.id, workingReg.id); + } + + auto OutputDefault = [&](DxsoSemantic semantic) { + DxsoRegisterInfo info; + info.type.ctype = DxsoScalarType::Float32; + info.type.ccount = 4; + info.type.alength = 1; + info.sclass = spv::StorageClassOutput; + + uint32_t slot = RegisterLinkerSlot(semantic); + + uint32_t value = semantic == DxsoSemantic{ DxsoUsage::Color, 0 } + ? m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f) + : m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f); + + + uint32_t outputPtr = emitNewVariableDefault(info, value); + + m_module.decorateLocation(outputPtr, slot); + + std::string name = + str::format("out_", semantic.usage, semantic.usageIndex, "_default"); + + m_module.setDebugName(outputPtr, name.c_str()); + + m_interfaceSlots.outputSlots |= 1u << slot; + m_entryPointInterfaces.push_back(outputPtr); + }; + + if (!outputtedColor0) + OutputDefault(DxsoSemantic{ DxsoUsage::Color, 0 }); + + if (!outputtedColor1) + OutputDefault(DxsoSemantic{ DxsoUsage::Color, 1 }); + + auto pointInfo = GetPointSizeInfoVS(m_module, m_vs.oPos.id, 0, 0, m_rsBlock); + + if (m_vs.oPSize.id == 0) { + m_vs.oPSize = this->emitRegisterPtr( + "oPSize", DxsoScalarType::Float32, 1, 0, + spv::StorageClassOutput, spv::BuiltInPointSize); + + uint32_t pointSize = m_module.opFClamp(m_module.defFloatType(32), pointInfo.defaultValue, pointInfo.min, pointInfo.max); + + m_module.opStore(m_vs.oPSize.id, pointSize); + } + else { + uint32_t float_t = m_module.defFloatType(32); + uint32_t pointSize = m_module.opFClamp(m_module.defFloatType(32), m_module.opLoad(float_t, m_vs.oPSize.id), pointInfo.min, pointInfo.max); + m_module.opStore(m_vs.oPSize.id, pointSize); + } + } + + + void DxsoCompiler::emitVsClipping() { + uint32_t clipPlaneCountId = m_module.constu32(caps::MaxClipPlanes); + + uint32_t floatType = m_module.defFloatType(32); + uint32_t vec4Type = m_module.defVectorType(floatType, 4); + + // Declare uniform buffer containing clip planes + uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId); + uint32_t clipPlaneStruct = m_module.defStructTypeUnique(1, &clipPlaneArray); + uint32_t clipPlaneBlock = m_module.newVar( + m_module.defPointerType(clipPlaneStruct, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.decorateArrayStride (clipPlaneArray, 16); + + m_module.setDebugName (clipPlaneStruct, "clip_info_t"); + m_module.setDebugMemberName (clipPlaneStruct, 0, "clip_planes"); + m_module.decorate (clipPlaneStruct, spv::DecorationBlock); + m_module.memberDecorateOffset (clipPlaneStruct, 0, 0); + + uint32_t bindingId = computeResourceSlotId( + m_programInfo.type(), DxsoBindingType::ConstantBuffer, + DxsoConstantBuffers::VSClipPlanes); + + m_module.setDebugName (clipPlaneBlock, "clip_info"); + m_module.decorateDescriptorSet(clipPlaneBlock, 0); + m_module.decorateBinding (clipPlaneBlock, bindingId); + + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + resource.access = VK_ACCESS_UNIFORM_READ_BIT; + m_resourceSlots.push_back(resource); + + // Declare output array for clip distances + uint32_t clipDistArray = m_module.newVar( + m_module.defPointerType( + m_module.defArrayType(floatType, clipPlaneCountId), + spv::StorageClassOutput), + spv::StorageClassOutput); + + m_module.decorateBuiltIn(clipDistArray, spv::BuiltInClipDistance); + m_entryPointInterfaces.push_back(clipDistArray); + + if (m_moduleInfo.options.invariantPosition) + m_module.decorate(m_vs.oPos.id, spv::DecorationInvariant); + + const uint32_t positionPtr = m_vs.oPos.id; + + // We generated a bad shader, let's not make it even worse. + if (positionPtr == 0) { + Logger::warn("Shader without Position output. Something is likely wrong here."); + return; + } + + // Compute clip distances + uint32_t positionId = m_module.opLoad(vec4Type, positionPtr); + + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { + std::array blockMembers = {{ + m_module.constu32(0), + m_module.constu32(i), + }}; + + uint32_t planeId = m_module.opLoad(vec4Type, + m_module.opAccessChain( + m_module.defPointerType(vec4Type, spv::StorageClassUniform), + clipPlaneBlock, blockMembers.size(), blockMembers.data())); + + uint32_t distId = m_module.opDot(floatType, positionId, planeId); + + m_module.opStore( + m_module.opAccessChain( + m_module.defPointerType(floatType, spv::StorageClassOutput), + clipDistArray, 1, &blockMembers[1]), + distId); + } + } + + + void DxsoCompiler::setupRenderStateInfo() { + m_rsBlock = SetupRenderStateBlock(m_module); + + // Only need alpha ref for PS 3. + // No FF fog component. + if (m_programInfo.type() == DxsoProgramType::PixelShader) { + if (m_programInfo.majorVersion() == 3) { + m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, alphaRef); + m_interfaceSlots.pushConstSize = sizeof(float); + } + else { + m_interfaceSlots.pushConstOffset = 0; + m_interfaceSlots.pushConstSize = offsetof(D3D9RenderStateInfo, pointSize); + } + } + else { + m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize); + // Point scale never triggers on programmable + m_interfaceSlots.pushConstSize = sizeof(float) * 3; + } + } + + + void DxsoCompiler::emitFog() { + DxsoRegister color0; + color0.id = DxsoRegisterId{ DxsoRegisterType::ColorOut, 0 }; + auto oColor0Ptr = this->emitGetOperandPtr(color0); + + DxsoRegister vFog; + vFog.id = DxsoRegisterId{ DxsoRegisterType::RasterizerOut, RasterOutFog }; + auto vFogPtr = this->emitGetOperandPtr(vFog); + + DxsoRegister vPos; + vPos.id = DxsoRegisterId{ DxsoRegisterType::MiscType, DxsoMiscTypeIndices::MiscTypePosition }; + auto vPosPtr = this->emitGetOperandPtr(vPos); + + D3D9FogContext fogCtx; + fogCtx.IsPixel = true; + fogCtx.RangeFog = false; + fogCtx.RenderState = m_rsBlock; + fogCtx.vPos = m_module.opLoad(getVectorTypeId(vPosPtr.type), vPosPtr.id); + fogCtx.vFog = m_module.opLoad(getVectorTypeId(vFogPtr.type), vFogPtr.id); + fogCtx.oColor = m_module.opLoad(getVectorTypeId(oColor0Ptr.type), oColor0Ptr.id); + + m_module.opStore(oColor0Ptr.id, DoFixedFunctionFog(m_module, fogCtx)); + } + + + void DxsoCompiler::emitPsProcessing() { + uint32_t boolType = m_module.defBoolType(); + uint32_t floatType = m_module.defFloatType(32); + uint32_t floatPtr = m_module.defPointerType(floatType, spv::StorageClassPushConstant); + + // Declare spec constants for render states + uint32_t alphaTestId = m_module.specConstBool(false); + uint32_t alphaFuncId = m_module.specConst32(m_module.defIntType(32, 0), uint32_t(VK_COMPARE_OP_ALWAYS)); + + m_module.setDebugName (alphaTestId, "alpha_test"); + m_module.decorateSpecId (alphaTestId, getSpecId(D3D9SpecConstantId::AlphaTestEnable)); + + m_module.setDebugName (alphaFuncId, "alpha_func"); + m_module.decorateSpecId (alphaFuncId, getSpecId(D3D9SpecConstantId::AlphaCompareOp)); + + // Implement alpha test and fog + DxsoRegister color0; + color0.id = DxsoRegisterId{ DxsoRegisterType::ColorOut, 0 }; + auto oC0 = this->emitGetOperandPtr(color0); + + if (oC0.id) { + if (m_programInfo.majorVersion() < 3) + emitFog(); + + // Labels for the alpha test + std::array atestCaseLabels = {{ + { uint32_t(VK_COMPARE_OP_NEVER), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_LESS), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_LESS_OR_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_GREATER), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_NOT_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_GREATER_OR_EQUAL), m_module.allocateId() }, + { uint32_t(VK_COMPARE_OP_ALWAYS), m_module.allocateId() }, + }}; + + uint32_t atestBeginLabel = m_module.allocateId(); + uint32_t atestTestLabel = m_module.allocateId(); + uint32_t atestDiscardLabel = m_module.allocateId(); + uint32_t atestKeepLabel = m_module.allocateId(); + uint32_t atestSkipLabel = m_module.allocateId(); + + // if (alpha_test) { ... } + m_module.opSelectionMerge(atestSkipLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(alphaTestId, atestBeginLabel, atestSkipLabel); + m_module.opLabel(atestBeginLabel); + + // Load alpha component + uint32_t alphaComponentId = 3; + uint32_t alphaId = m_module.opCompositeExtract(floatType, + m_module.opLoad(m_module.defVectorType(floatType, 4), oC0.id), + 1, &alphaComponentId); + + // Load alpha reference + uint32_t alphaRefMember = m_module.constu32(uint32_t(D3D9RenderStateItem::AlphaRef)); + uint32_t alphaRefId = m_module.opLoad(floatType, + m_module.opAccessChain(floatPtr, m_rsBlock, 1, &alphaRefMember)); + + // switch (alpha_func) { ... } + m_module.opSelectionMerge(atestTestLabel, spv::SelectionControlMaskNone); + m_module.opSwitch(alphaFuncId, + atestCaseLabels[uint32_t(VK_COMPARE_OP_ALWAYS)].labelId, + atestCaseLabels.size(), + atestCaseLabels.data()); + + std::array atestVariables; + + for (uint32_t i = 0; i < atestCaseLabels.size(); i++) { + m_module.opLabel(atestCaseLabels[i].labelId); + + atestVariables[i].labelId = atestCaseLabels[i].labelId; + atestVariables[i].varId = [&] { + switch (VkCompareOp(atestCaseLabels[i].literal)) { + case VK_COMPARE_OP_NEVER: return m_module.constBool(false); + case VK_COMPARE_OP_LESS: return m_module.opFOrdLessThan (boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_EQUAL: return m_module.opFOrdEqual (boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_LESS_OR_EQUAL: return m_module.opFOrdLessThanEqual (boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_GREATER: return m_module.opFOrdGreaterThan (boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_NOT_EQUAL: return m_module.opFOrdNotEqual (boolType, alphaId, alphaRefId); + case VK_COMPARE_OP_GREATER_OR_EQUAL: return m_module.opFOrdGreaterThanEqual(boolType, alphaId, alphaRefId); + default: + case VK_COMPARE_OP_ALWAYS: return m_module.constBool(true); + } + }(); + + m_module.opBranch(atestTestLabel); + } + + // end switch + m_module.opLabel(atestTestLabel); + + uint32_t atestResult = m_module.opPhi(boolType, + atestVariables.size(), + atestVariables.data()); + uint32_t atestDiscard = m_module.opLogicalNot(boolType, atestResult); + + atestResult = m_module.opLogicalNot(boolType, atestResult); + + // if (do_discard) { ... } + m_module.opSelectionMerge(atestKeepLabel, spv::SelectionControlMaskNone); + m_module.opBranchConditional(atestDiscard, atestDiscardLabel, atestKeepLabel); + + m_module.opLabel(atestDiscardLabel); + m_module.opKill(); + + // end if (do_discard) + m_module.opLabel(atestKeepLabel); + m_module.opBranch(atestSkipLabel); + + // end if (alpha_test) + m_module.opLabel(atestSkipLabel); + } + } + + void DxsoCompiler::emitOutputDepthClamp() { + // HACK: Some drivers do not clamp FragDepth to [minDepth..maxDepth] + // before writing to the depth attachment, but we do not have acccess + // to those. Clamp to [0..1] instead. + + if (m_ps.oDepth.id != 0) { + auto result = emitValueLoad(m_ps.oDepth); + + result = emitSaturate(result); + + m_module.opStore( + m_ps.oDepth.id, + result.id); + } +} + + + void DxsoCompiler::emitVsFinalize() { + this->emitMainFunctionBegin(); + + this->emitInputSetup(); + m_module.opFunctionCall( + m_module.defVoidType(), + m_vs.functionId, 0, nullptr); + this->emitLinkerOutputSetup(); + + this->emitVsClipping(); + + this->emitFunctionEnd(); + } + + void DxsoCompiler::emitPsFinalize() { + this->emitMainFunctionBegin(); + + this->emitInputSetup(); + m_module.opFunctionCall( + m_module.defVoidType(), + m_ps.functionId, 0, nullptr); + + if (m_ps.killState != 0) { + uint32_t labelIf = m_module.allocateId(); + uint32_t labelEnd = m_module.allocateId(); + + uint32_t killTest = m_module.opLoad(m_module.defBoolType(), m_ps.killState); + + m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(killTest, labelIf, labelEnd); + + m_module.opLabel(labelIf); + m_module.opKill(); + + m_module.opLabel(labelEnd); + } + + // r0 in PS1 is the colour output register. Move r0 -> cO0 here. + if (m_programInfo.majorVersion() == 1 + && m_programInfo.type() == DxsoProgramTypes::PixelShader) { + DxsoRegister r0; + r0.id = { DxsoRegisterType::Temp, 0 }; + + DxsoRegister c0; + c0.id = { DxsoRegisterType::ColorOut, 0 }; + + DxsoRegisterValue val = emitRegisterLoadRaw(r0, nullptr); + DxsoRegisterPointer out = emitGetOperandPtr(c0); + m_module.opStore(out.id, val.id); + } + + // No need to setup output here as it's non-indexable + // everything has already gone to the right place! + + this->emitPsProcessing(); + this->emitOutputDepthClamp(); + this->emitFunctionEnd(); + } + + + + uint32_t DxsoCompiler::getScalarTypeId(DxsoScalarType type) { + switch (type) { + case DxsoScalarType::Uint32: return m_module.defIntType(32, 0); + case DxsoScalarType::Sint32: return m_module.defIntType(32, 1); + case DxsoScalarType::Float32: return m_module.defFloatType(32); + case DxsoScalarType::Bool: return m_module.defBoolType(); + } + + throw DxvkError("DxsoCompiler: Invalid scalar type"); + } + + + uint32_t DxsoCompiler::getVectorTypeId(const DxsoVectorType& type) { + uint32_t typeId = this->getScalarTypeId(type.ctype); + + if (type.ccount > 1) + typeId = m_module.defVectorType(typeId, type.ccount); + + return typeId; + } + + + uint32_t DxsoCompiler::getArrayTypeId(const DxsoArrayType& type) { + DxsoVectorType vtype; + vtype.ctype = type.ctype; + vtype.ccount = type.ccount; + + uint32_t typeId = this->getVectorTypeId(vtype); + + if (type.alength > 1) { + typeId = m_module.defArrayType(typeId, + m_module.constu32(type.alength)); + } + + return typeId; + } + + + uint32_t DxsoCompiler::getPointerTypeId(const DxsoRegisterInfo& type) { + return m_module.defPointerType( + this->getArrayTypeId(type.type), + type.sclass); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_compiler.h b/src/dxso/dxso_compiler.h new file mode 100644 index 000000000..edb12aa80 --- /dev/null +++ b/src/dxso/dxso_compiler.h @@ -0,0 +1,674 @@ +#pragma once + +#include "dxso_decoder.h" +#include "dxso_header.h" +#include "dxso_modinfo.h" +#include "dxso_isgn.h" + +#include "../d3d9/d3d9_constant_layout.h" +#include "../d3d9/d3d9_shader_permutations.h" +#include "../spirv/spirv_module.h" + +namespace dxvk { + + /** + * \brief Scalar value type + * + * Enumerates possible register component + * types. Scalar types are represented as + * a one-component vector type. + */ + enum class DxsoScalarType : uint32_t { + Uint32 = 0, + Sint32 = 1, + Float32 = 2, + Bool = 3, + }; + + /** + * \brief Vector type + * + * Convenience struct that stores a scalar + * type and a component count. The compiler + * can use this to generate SPIR-V types. + */ + struct DxsoVectorType { + DxsoScalarType ctype; + uint32_t ccount; + }; + + + /** + * \brief Array type + * + * Convenience struct that stores a scalar type, a + * component count and an array size. An array of + * length 0 will be evaluated to a vector type. The + * compiler can use this to generate SPIR-V types. + */ + struct DxsoArrayType { + DxsoScalarType ctype; + uint32_t ccount; + uint32_t alength; + }; + + + /** + * \brief Register info + * + * Stores the array type of a register and + * its storage class. The compiler can use + * this to generate SPIR-V pointer types. + */ + struct DxsoRegisterInfo { + DxsoArrayType type; + spv::StorageClass sclass; + }; + + + /** + * \brief Register value + * + * Stores a vector type and a SPIR-V ID that + * represents an intermediate value. This is + * used to track the type of such values. + */ + struct DxsoRegisterValue { + DxsoVectorType type; + uint32_t id; + }; + + + /** + * \brief Register pointer + * + * Stores a vector type and a SPIR-V ID that + * represents a pointer to such a vector. This + * can be used to load registers conveniently. + */ + struct DxsoRegisterPointer { + DxsoVectorType type; + uint32_t id = 0; + }; + + /** + * \brief Sampler info + * + * Stores a vector type and a SPIR-V ID that + * represents a pointer to such a vector. This + * can be used to load registers conveniently. + */ + struct DxsoSamplerInfo { + uint32_t dimensions = 0; + + uint32_t varId = 0; + uint32_t typeId = 0; + }; + + enum DxsoSamplerType : uint32_t { + SamplerTypeTexture2D = 0, + SamplerTypeTexture3D = 1, + SamplerTypeTextureCube, + + SamplerTypeCount + }; + + inline auto SamplerTypeFromTextureType(DxsoTextureType type) { + switch (type) { + default: + case DxsoTextureType::Texture2D: return SamplerTypeTexture2D; break; + case DxsoTextureType::Texture3D: return SamplerTypeTexture3D; break; + case DxsoTextureType::TextureCube: return SamplerTypeTextureCube; break; + } + } + + struct DxsoSampler { + DxsoSamplerInfo color[SamplerTypeCount]; + DxsoSamplerInfo depth[SamplerTypeCount]; + + uint32_t depthSpecConst; + + DxsoTextureType type; + }; + + struct DxsoAnalysisInfo; + + /** + * \brief Vertex shader-specific structure + */ + struct DxsoCompilerVsPart { + uint32_t functionId = 0; + + //////////////////// + // Address register + DxsoRegisterPointer addr; + + ////////////////////////////// + // Rasterizer output registers + DxsoRegisterPointer oPos; + DxsoRegisterPointer oPSize; + }; + + /** + * \brief Pixel shader-specific structure + */ + struct DxsoCompilerPsPart { + uint32_t functionId = 0; + uint32_t samplerTypeSpec = 0; + uint32_t projectionSpec = 0; + + ////////////// + // Misc Types + DxsoRegisterPointer vPos; + DxsoRegisterPointer vFace; + + /////////////////// + // Colour Outputs + std::array oColor; + + //////////////// + // Depth output + DxsoRegisterPointer oDepth; + + //////////////// + // Shared State + uint32_t sharedState = 0; + + uint32_t killState = 0; + uint32_t builtinLaneId = 0; + + uint32_t diffuseColorIn = 0; + uint32_t specularColorIn = 0; + }; + + struct DxsoCfgBlockIf { + uint32_t ztestId; + uint32_t labelIf; + uint32_t labelElse; + uint32_t labelEnd; + size_t headerPtr; + }; + + struct DxsoCfgBlockLoop { + uint32_t labelHeader; + uint32_t labelBegin; + uint32_t labelContinue; + uint32_t labelBreak; + uint32_t iteratorPtr; + + uint32_t strideVar; + uint32_t countBackup; + }; + + enum class DxsoCfgBlockType : uint32_t { + If, Loop + }; + + struct DxsoCfgBlock { + DxsoCfgBlockType type; + + union { + DxsoCfgBlockIf b_if; + DxsoCfgBlockLoop b_loop; + }; + }; + + using DxsoSrcArray = std::array; + + class DxsoCompiler { + + public: + + DxsoCompiler( + const std::string& fileName, + const DxsoModuleInfo& moduleInfo, + const DxsoProgramInfo& programInfo, + const DxsoAnalysisInfo& analysis, + const D3D9ConstantLayout& layout); + + /** + * \brief Processes a single instruction + * \param [in] ins The instruction + */ + void processInstruction( + const DxsoInstructionContext& ctx); + + /** + * \brief Finalizes the shader + */ + void finalize(); + + /** + * \brief Compiles the shader + * \returns The final shader objects + */ + DxsoPermutations compile(); + + const DxsoIsgn& isgn() { return m_isgn; } + const DxsoIsgn& osgn() { return m_osgn; } + + const DxsoShaderMetaInfo& meta() { return m_meta; } + const DxsoDefinedConstants& constants() { return m_constants; } + uint32_t usedSamplers() const { return m_usedSamplers; } + uint32_t usedRTs() const { return m_usedRTs; } + + private: + + DxsoModuleInfo m_moduleInfo; + DxsoProgramInfo m_programInfo; + const DxsoAnalysisInfo* m_analysis; + const D3D9ConstantLayout* m_layout; + + DxsoShaderMetaInfo m_meta; + DxsoDefinedConstants m_constants; + + SpirvModule m_module; + + /////////////////////////////////////////////////////// + // Resource slot description for the shader. This will + // be used to map D3D9 bindings to DXVK bindings. + std::vector m_resourceSlots; + + //////////////////////////////////////////////// + // Temporary r# vector registers with immediate + // indexing, and x# vector array registers. + std::array< + DxsoRegisterPointer, + DxsoMaxTempRegs> m_rRegs; + + //////////////////////////////////////////////// + // Predicate registers + std::array< + DxsoRegisterPointer, + 1> m_pRegs; + + ////////////////////////////////////////////////////////////////// + // Array of input values. Since v# and o# registers are indexable + // in DXSO, we need to copy them into an array first. + uint32_t m_vArray = 0; + uint32_t m_oArray = 0; + + //////////////////////////////// + // Input and output signatures + DxsoIsgn m_isgn; + DxsoIsgn m_osgn; + + //////////////////////////////////// + // Ptr to the constant buffer array + uint32_t m_cBuffer; + + //////////////////////////////////////// + // Constant buffer deffed mappings + std::array m_cFloat; + std::array m_cInt; + std::array m_cBool; + + ////////////////////// + // Loop counter + DxsoRegisterPointer m_loopCounter; + + /////////////////////////////////// + // Working tex/coord registers (PS) + std::array< + DxsoRegisterPointer, + DxsoMaxTextureRegs> m_tRegs; + + /////////////////////////////////////////////// + // Control flow information. Stores labels for + // currently active if-else blocks and loops. + std::vector m_controlFlowBlocks; + + ////////////////////////////////////////////// + // Function state tracking. Required in order + // to properly end functions in some cases. + bool m_insideFunction = false; + + //////////// + // Samplers + std::array m_samplers; + + //////////////////////////////////////////// + // What io regswe need to + // NOT generate semantics for + uint16_t m_explicitInputs = 0; + uint16_t m_explicitOutputs = 0; + + /////////////////////////////////////////////////// + // Entry point description - we'll need to declare + // the function ID and all input/output variables. + std::vector m_entryPointInterfaces; + uint32_t m_entryPointId = 0; + + //////////////////////////////////////////// + // Inter-stage shader interface slots. Also + // covers vertex input and fragment output. + DxvkInterfaceSlots m_interfaceSlots; + + /////////////////////////////////// + // Shader-specific data structures + DxsoCompilerVsPart m_vs; + DxsoCompilerPsPart m_ps; + + DxsoRegisterPointer m_fog; + + ////////////////////////////////////////// + // Bit masks containing used samplers + // and render targets for hazard tracking + uint32_t m_usedSamplers; + uint32_t m_usedRTs; + + uint32_t m_rsBlock = 0; + uint32_t m_mainFuncLabel = 0; + + ////////////////////////////////////// + // Common function definition methods + void emitInit(); + + ////////////////////// + // Common shader dcls + void emitDclConstantBuffer(); + + void emitDclInputArray(); + void emitDclOutputArray(); + + ///////////////////////////////// + // Shader initialization methods + void emitVsInit(); + + void emitPsSharedConstants(); + void emitPsInit(); + + void emitFunctionBegin( + uint32_t entryPoint, + uint32_t returnType, + uint32_t funcType); + + void emitFunctionEnd(); + + uint32_t emitFunctionLabel(); + + void emitMainFunctionBegin(); + + /////////////////////////////// + // Variable definition methods + uint32_t emitNewVariable( + const DxsoRegisterInfo& info); + + uint32_t emitNewVariableDefault( + const DxsoRegisterInfo& info, + uint32_t value); + + uint32_t emitNewBuiltinVariable( + const DxsoRegisterInfo& info, + spv::BuiltIn builtIn, + const char* name, + uint32_t value); + + DxsoCfgBlock* cfgFindBlock( + const std::initializer_list& types); + + void emitDclInterface( + bool input, + uint32_t regNumber, + DxsoSemantic semantic, + DxsoRegMask mask, + bool centroid); + + void emitDclSampler( + uint32_t idx, + DxsoTextureType type); + + bool defineInput(uint32_t idx) { + bool alreadyDefined = m_interfaceSlots.inputSlots & 1u << idx; + m_interfaceSlots.inputSlots |= 1u << idx; + return alreadyDefined; + } + + bool defineOutput(uint32_t idx) { + bool alreadyDefined = m_interfaceSlots.outputSlots & 1u << idx; + m_interfaceSlots.outputSlots |= 1u << idx; + return alreadyDefined; + } + + uint32_t emitArrayIndex( + uint32_t idx, + const DxsoBaseRegister* relative); + + DxsoRegisterPointer emitInputPtr( + bool texture, + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative); + + DxsoRegisterPointer emitRegisterPtr( + const char* name, + DxsoScalarType ctype, + uint32_t ccount, + uint32_t defaultVal, + spv::StorageClass storageClass = spv::StorageClassPrivate, + spv::BuiltIn builtIn = spv::BuiltInMax); + + DxsoRegisterValue emitLoadConstant( + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative); + + DxsoRegisterPointer emitOutputPtr( + bool texcrdOut, + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative); + + DxsoRegisterPointer emitGetOperandPtr( + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative); + + DxsoRegisterPointer emitGetOperandPtr( + const DxsoRegister& reg) { + return this->emitGetOperandPtr( + reg, + reg.hasRelative ? ®.relative : nullptr); + } + + uint32_t emitBoolComparison(DxsoVectorType type, DxsoComparison cmp, uint32_t a, uint32_t b); + + DxsoRegisterValue emitValueLoad( + DxsoRegisterPointer ptr); + + void emitDstStore( + DxsoRegisterPointer ptr, + DxsoRegisterValue value, + DxsoRegMask writeMask, + bool saturate, + DxsoRegisterValue predicate, + int8_t shift, + DxsoRegisterId regId) { + if (regId.type == DxsoRegisterType::RasterizerOut && regId.num == RasterOutFog) + saturate = true; + + if (value.type.ctype == DxsoScalarType::Float32) { + const uint32_t typeId = getVectorTypeId(value.type); + + // Saturating only makes sense on floats + if (saturate) { + value.id = m_module.opNClamp( + typeId, value.id, + m_module.constfReplicant(0.0f, value.type.ccount), + m_module.constfReplicant(1.0f, value.type.ccount)); + } + + // There doesn't seem to be a nice float bitshift method for float vectors + // in Spirv that I can see... Resorting to multiplication. + if (shift != 0) { + float shiftAmount = shift < 0 + ? 1.0f / (1 << -shift) + : float(1 << shift); + + uint32_t shiftConst = m_module.constf32(shiftAmount); + + if (value.type.ccount == 1) + value.id = m_module.opFMul(typeId, value.id, shiftConst); + else + value.id = m_module.opVectorTimesScalar(typeId, value.id, shiftConst); + } + } + + this->emitValueStore(ptr, value, writeMask, predicate); + } + + DxsoRegisterValue applyPredicate(DxsoRegisterValue pred, DxsoRegisterValue dst, DxsoRegisterValue src); + + void emitValueStore( + DxsoRegisterPointer ptr, + DxsoRegisterValue value, + DxsoRegMask writeMask, + DxsoRegisterValue predicate); + + DxsoRegisterValue emitClampBoundReplicant( + DxsoRegisterValue srcValue, + float lb, + float ub); + + DxsoRegisterValue emitSaturate( + DxsoRegisterValue srcValue); + + DxsoRegisterValue emitDot( + DxsoRegisterValue a, + DxsoRegisterValue b); + + DxsoRegisterValue emitRegisterInsert( + DxsoRegisterValue dstValue, + DxsoRegisterValue srcValue, + DxsoRegMask srcMask); + + DxsoRegisterValue emitRegisterLoadRaw( + const DxsoBaseRegister& reg, + const DxsoBaseRegister* relative); + + DxsoRegisterValue emitRegisterExtend( + DxsoRegisterValue value, + uint32_t size); + + DxsoRegisterValue emitSrcOperandPreSwizzleModifiers( + DxsoRegisterValue value, + DxsoRegModifier modifier); + + DxsoRegisterValue emitSrcOperandPostSwizzleModifiers( + DxsoRegisterValue value, + DxsoRegModifier modifier); + + DxsoRegisterValue emitRegisterSwizzle( + DxsoRegisterValue value, + DxsoRegSwizzle swizzle, + DxsoRegMask writeMask); + + DxsoRegisterValue emitRegisterLoad( + const DxsoBaseRegister& reg, + DxsoRegMask writeMask, + const DxsoBaseRegister* relative); + + DxsoRegisterValue emitRegisterLoad( + const DxsoRegister& reg, + DxsoRegMask writeMask) { + return this->emitRegisterLoad( + reg, writeMask, + reg.hasRelative ? ®.relative : nullptr); + } + + DxsoRegisterValue emitPredicateLoad(const DxsoInstructionContext& ctx) { + if (!ctx.instruction.predicated) + return DxsoRegisterValue(); + + return emitRegisterLoad(ctx.pred, IdentityWriteMask); + } + + DxsoRegisterValue emitRegisterLoadTexcoord( + const DxsoRegister& reg, + DxsoRegMask writeMask) { + DxsoRegister lookup = reg; + if (reg.id.type == DxsoRegisterType::Texture) + lookup.id.type = DxsoRegisterType::PixelTexcoord; + + return this->emitRegisterLoad(lookup, writeMask); + } + + Rc compileShader(); + + /////////////////////////////// + // Handle shader ops + void emitDcl(const DxsoInstructionContext& ctx); + + void emitDef(const DxsoInstructionContext& ctx); + void emitDefF(const DxsoInstructionContext& ctx); + void emitDefI(const DxsoInstructionContext& ctx); + void emitDefB(const DxsoInstructionContext& ctx); + + bool isScalarRegister(DxsoRegisterId id); + + void emitMov(const DxsoInstructionContext& ctx); + void emitPredicateOp(const DxsoInstructionContext& ctx); + void emitVectorAlu(const DxsoInstructionContext& ctx); + void emitMatrixAlu(const DxsoInstructionContext& ctx); + + void emitControlFlowGenericLoop( + bool count, + uint32_t initialVar, + uint32_t strideVar, + uint32_t iterationCountVar); + + void emitControlFlowGenericLoopEnd(); + + void emitControlFlowRep(const DxsoInstructionContext& ctx); + void emitControlFlowEndRep(const DxsoInstructionContext& ctx); + + void emitControlFlowLoop(const DxsoInstructionContext& ctx); + void emitControlFlowEndLoop(const DxsoInstructionContext& ctx); + + void emitControlFlowBreak(const DxsoInstructionContext& ctx); + void emitControlFlowBreakC(const DxsoInstructionContext& ctx); + + void emitControlFlowIf(const DxsoInstructionContext& ctx); + void emitControlFlowElse(const DxsoInstructionContext& ctx); + void emitControlFlowEndIf(const DxsoInstructionContext& ctx); + + void emitTexCoord(const DxsoInstructionContext& ctx); + void emitTextureSample(const DxsoInstructionContext& ctx); + void emitTextureKill(const DxsoInstructionContext& ctx); + + uint32_t emitSample( + bool projected, + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands); + + /////////////////////////////// + // Shader finalization methods + void emitInputSetup(); + + void emitVsClipping(); + void setupRenderStateInfo(); + void emitFog(); + void emitPsProcessing(); + void emitOutputDepthClamp(); + + void emitLinkerOutputSetup(); + + void emitVsFinalize(); + void emitPsFinalize(); + + /////////////////////////// + // Type definition methods + uint32_t getScalarTypeId( + DxsoScalarType type); + + uint32_t getVectorTypeId( + const DxsoVectorType& type); + + uint32_t getArrayTypeId( + const DxsoArrayType& type); + + uint32_t getPointerTypeId( + const DxsoRegisterInfo& type); + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_ctab.cpp b/src/dxso/dxso_ctab.cpp new file mode 100644 index 000000000..880c7ead4 --- /dev/null +++ b/src/dxso/dxso_ctab.cpp @@ -0,0 +1,19 @@ +#include "dxso_ctab.h" + +namespace dxvk { + + DxsoCtab::DxsoCtab(DxsoReader& reader, uint32_t commentTokenCount) { + m_size = reader.readu32(); + + if (m_size != sizeof(DxsoCtab)) + throw DxvkError("DxsoCtab: ctab size invalid"); + + m_creator = reader.readu32(); + m_version = reader.readu32(); + m_constants = reader.readu32(); + m_constantInfo = reader.readu32(); + m_flags = reader.readu32(); + m_target = reader.readu32(); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_ctab.h b/src/dxso/dxso_ctab.h new file mode 100644 index 000000000..56f4f9451 --- /dev/null +++ b/src/dxso/dxso_ctab.h @@ -0,0 +1,32 @@ +#pragma once + +#include "dxso_common.h" + +#include "dxso_reader.h" + +namespace dxvk { + + /** + * \brief DXSO CTAB + * + * Stores meta information about the shader + */ + class DxsoCtab : public RcObject { + + public: + + DxsoCtab(DxsoReader& reader, uint32_t commentTokenCount); + + private: + + uint32_t m_size; + uint32_t m_creator; + uint32_t m_version; + uint32_t m_constants; + uint32_t m_constantInfo; + uint32_t m_flags; + uint32_t m_target; + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_decoder.cpp b/src/dxso/dxso_decoder.cpp new file mode 100644 index 000000000..c51dd224b --- /dev/null +++ b/src/dxso/dxso_decoder.cpp @@ -0,0 +1,276 @@ +#include "dxso_decoder.h" + +#include "dxso_tables.h" + +namespace dxvk { + + bool DxsoSemantic::operator== (const DxsoSemantic& b) const { + return usage == b.usage && usageIndex == b.usageIndex; + } + + bool DxsoSemantic::operator!= (const DxsoSemantic& b) const { + return usage != b.usage || usageIndex != b.usageIndex; + } + + uint32_t DxsoDecodeContext::decodeInstructionLength(uint32_t token) { + auto opcode = m_ctx.instruction.opcode; + + uint32_t length = 0; + const auto& info = this->getProgramInfo(); + + // Comment ops have their own system for getting length. + if (opcode == DxsoOpcode::Comment) + return (token & 0x7fff0000) >> 16; + + if (opcode == DxsoOpcode::End) + return 0; + + // SM2.0 and above has the length of the op in instruction count baked into it. + // SM1.4 and below have fixed lengths and run off expectation. + // Phase does not respect the following rules. :shrug: + if (opcode != DxsoOpcode::Phase) { + if (info.majorVersion() >= 2) + length = (token & 0x0f000000) >> 24; + else + length = DxsoGetDefaultOpcodeLength(opcode); + } + + // We've already logged this... + if (length == InvalidOpcodeLength) + return 0; + + // SM 1.4 has an extra param on Tex and TexCoord + // As stated before, it also doesn't have the length of the op baked into the opcode + if (info.majorVersion() == 1 + && info.minorVersion() == 4) { + switch (opcode) { + case DxsoOpcode::TexCoord: + case DxsoOpcode::Tex: length += 1; + default: break; + } + } + + return length; + } + + bool DxsoDecodeContext::relativeAddressingUsesToken( + DxsoInstructionArgumentType type) { + auto& info = this->getProgramInfo(); + + return (info.majorVersion() >= 2 && type == DxsoInstructionArgumentType::Source) + || (info.majorVersion() >= 3 && type == DxsoInstructionArgumentType::Destination); + } + + void DxsoDecodeContext::decodeDeclaration(DxsoCodeIter& iter) { + uint32_t dclToken = iter.read(); + + m_ctx.dcl.textureType = static_cast((dclToken & 0x78000000) >> 27); + m_ctx.dcl.semantic.usage = static_cast(dclToken & 0x0000000f); + m_ctx.dcl.semantic.usageIndex = (dclToken & 0x000f0000) >> 16; + } + + void DxsoDecodeContext::decodeDefinition(DxsoOpcode opcode, DxsoCodeIter& iter) { + const uint32_t instructionLength = std::min(m_ctx.instruction.tokenLength - 1, 4u); + + for (uint32_t i = 0; i < instructionLength; i++) + m_ctx.def.uint32[i] = iter.read(); + } + + void DxsoDecodeContext::decodeBaseRegister( + DxsoBaseRegister& reg, + uint32_t token) { + reg.id.type = static_cast( + ((token & 0x00001800) >> 8) + | ((token & 0x70000000) >> 28)); + + reg.id.num = token & 0x000007ff; + } + + void DxsoDecodeContext::decodeGenericRegister( + DxsoRegister& reg, + uint32_t token) { + this->decodeBaseRegister(reg, token); + + reg.hasRelative = (token & (1 << 13)) == 8192; + reg.relative.id = DxsoRegisterId { + DxsoRegisterType::Addr, 0 }; + reg.relative.swizzle = IdentitySwizzle; + + reg.centroid = token & (4 << 20); + reg.partialPrecision = token & (2 << 20); + } + + void DxsoDecodeContext::decodeRelativeRegister( + DxsoBaseRegister& reg, + uint32_t token) { + this->decodeBaseRegister(reg, token); + + reg.swizzle = DxsoRegSwizzle( + uint8_t((token & 0x00ff0000) >> 16)); + } + + bool DxsoDecodeContext::decodeDestinationRegister(DxsoCodeIter& iter) { + uint32_t token = iter.read(); + + this->decodeGenericRegister(m_ctx.dst, token); + + m_ctx.dst.mask = DxsoRegMask( + uint8_t((token & 0x000f0000) >> 16)); + + m_ctx.dst.saturate = (token & (1 << 20)) != 0; + + m_ctx.dst.shift = (token & 0x0f000000) >> 24; + m_ctx.dst.shift = (m_ctx.dst.shift & 0x7) - (m_ctx.dst.shift & 0x8); + + const bool extraToken = + relativeAddressingUsesToken(DxsoInstructionArgumentType::Destination); + + if (m_ctx.dst.hasRelative && extraToken) { + this->decodeRelativeRegister(m_ctx.dst.relative, iter.read()); + return true; + } + + return false; + } + + bool DxsoDecodeContext::decodeSourceRegister(uint32_t i, DxsoCodeIter& iter) { + if (i >= m_ctx.src.size()) + throw DxvkError("DxsoDecodeContext::decodeSourceRegister: source register out of range."); + + uint32_t token = iter.read(); + + this->decodeGenericRegister(m_ctx.src[i], token); + + m_ctx.src[i].swizzle = DxsoRegSwizzle( + uint8_t((token & 0x00ff0000) >> 16)); + + m_ctx.src[i].modifier = static_cast( + (token & 0x0f000000) >> 24); + + const bool extraToken = + relativeAddressingUsesToken(DxsoInstructionArgumentType::Source); + + if (m_ctx.src[i].hasRelative && extraToken) { + this->decodeRelativeRegister(m_ctx.src[i].relative, iter.read()); + return true; + } + + return false; + } + + + void DxsoDecodeContext::decodePredicateRegister(DxsoCodeIter& iter) { + uint32_t token = iter.read(); + + this->decodeGenericRegister(m_ctx.pred, token); + + m_ctx.pred.swizzle = DxsoRegSwizzle( + uint8_t((token & 0x00ff0000) >> 16)); + + m_ctx.pred.modifier = static_cast( + (token & 0x0f000000) >> 24); + } + + + bool DxsoDecodeContext::decodeInstruction(DxsoCodeIter& iter) { + uint32_t token = iter.read(); + + m_ctx.instruction.opcode = static_cast( + token & 0x0000ffff); + + m_ctx.instruction.predicated = token & (1 << 28); + + m_ctx.instruction.specificData.uint32 = + (token & 0x00ff0000) >> 16; + + m_ctx.instruction.tokenLength = + this->decodeInstructionLength(token); + + uint32_t tokenLength = + m_ctx.instruction.tokenLength; + + switch (m_ctx.instruction.opcode) { + case DxsoOpcode::If: + case DxsoOpcode::Ifc: + case DxsoOpcode::Rep: + case DxsoOpcode::Loop: + case DxsoOpcode::BreakC: + case DxsoOpcode::BreakP: { + uint32_t sourceIdx = 0; + for (uint32_t i = 0; i < tokenLength; i++) { + if (this->decodeSourceRegister(sourceIdx, iter)) + i++; + + sourceIdx++; + } + return true; + } + + case DxsoOpcode::Dcl: + this->decodeDeclaration(iter); + this->decodeDestinationRegister(iter); + return true; + + case DxsoOpcode::Def: + case DxsoOpcode::DefI: + case DxsoOpcode::DefB: + this->decodeDestinationRegister(iter); + this->decodeDefinition( + m_ctx.instruction.opcode, iter); + return true; + + case DxsoOpcode::Comment: + iter = iter.skip(tokenLength); + return true; + + default: { + uint32_t sourceIdx = 0; + for (uint32_t i = 0; i < tokenLength; i++) { + if (i == 0) { + if (this->decodeDestinationRegister(iter)) + i++; + } + else if (i == 1 && m_ctx.instruction.predicated) { + // Relative addressing makes no sense + // for predicate registers. + this->decodePredicateRegister(iter); + } + else { + if (this->decodeSourceRegister(sourceIdx, iter)) + i++; + + sourceIdx++; + } + } + return true; + } + + case DxsoOpcode::End: + return false; + } + } + + std::ostream& operator << (std::ostream& os, DxsoUsage usage) { + switch (usage) { + case DxsoUsage::Position: os << "Position"; break; + case DxsoUsage::BlendWeight: os << "BlendWeight"; break; + case DxsoUsage::BlendIndices: os << "BlendIndices"; break; + case DxsoUsage::Normal: os << "Normal"; break; + case DxsoUsage::PointSize: os << "PointSize"; break; + case DxsoUsage::Texcoord: os << "Texcoord"; break; + case DxsoUsage::Tangent: os << "Tangent"; break; + case DxsoUsage::Binormal: os << "Binormal"; break; + case DxsoUsage::TessFactor: os << "TessFactor"; break; + case DxsoUsage::PositionT: os << "PositionT"; break; + case DxsoUsage::Color: os << "Color"; break; + case DxsoUsage::Fog: os << "Fog"; break; + case DxsoUsage::Depth: os << "Depth"; break; + case DxsoUsage::Sample: os << "Sample"; break; + default: + os << "Invalid Format (" << static_cast(usage) << ")"; break; + } + + return os; + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_decoder.h b/src/dxso/dxso_decoder.h new file mode 100644 index 000000000..8040b5d3a --- /dev/null +++ b/src/dxso/dxso_decoder.h @@ -0,0 +1,271 @@ +#pragma once + +#include "dxso_common.h" +#include "dxso_enums.h" +#include "dxso_code.h" + +namespace dxvk { + + constexpr size_t DxsoMaxTempRegs = 32; + constexpr size_t DxsoMaxTextureRegs = 10; + constexpr size_t DxsoMaxInterfaceRegs = 16; + constexpr size_t DxsoMaxOperandCount = 8; + + constexpr uint32_t DxsoRegModifierShift = 24; + + class DxsoDecodeContext; + + /** + * \brief Source operand modifiers + * + * These are applied after loading + * an operand register. + */ + enum class DxsoRegModifier : uint32_t { + None = 0, // r + Neg = 1, // -r + Bias = 2, // r - 0.5 + BiasNeg = 3, // -(r - 0.5) + Sign = 4, // fma(r, 2.0f, -1.0f) + SignNeg = 5, // -fma(r, 2.0f, -1.0f) + Comp = 6, // 1 - r + X2 = 7, // r * 2 + X2Neg = 8, // -r * 2 + Dz = 9, // r / r.z + Dw = 10, // r / r.w + Abs = 11, // abs(r) + AbsNeg = 12, // -abs(r) + Not = 13, // !r + }; + + enum class DxsoInstructionArgumentType : uint16_t { + Source, + Destination + }; + + enum class DxsoComparison : uint32_t { + // < = > + Never = 0, // 0 0 0 + GreaterThan = 1, // 0 0 1 + Equal = 2, // 0 1 0 + GreaterEqual = 3, // 0 1 1 + LessThan = 4, // 1 0 0 + NotEqual = 5, // 1 0 1 + LessEqual = 6, // 1 1 0 + Always = 7 // 1 1 1 + }; + + enum class DxsoTexLdMode : uint32_t { + Regular = 0, + Project = 1, + Bias = 2 + }; + + union DxsoOpcodeSpecificData { + DxsoComparison comparison; + DxsoTexLdMode texld; + + uint32_t uint32; + }; + + struct DxsoShaderInstruction { + DxsoOpcode opcode; + bool predicated; + DxsoOpcodeSpecificData specificData; + + uint32_t tokenLength; + }; + + struct DxsoRegisterId { + DxsoRegisterType type; + uint32_t num; + + bool operator == (const DxsoRegisterId& other) const { return type == other.type && num == other.num; } + bool operator != (const DxsoRegisterId& other) const { return type != other.type || num != other.num; } + }; + + class DxsoRegMask { + + public: + + DxsoRegMask(uint8_t mask) + : m_mask(mask) { } + + DxsoRegMask(bool x, bool y, bool z, bool w) + : m_mask((x ? 0x1 : 0) | (y ? 0x2 : 0) + | (z ? 0x4 : 0) | (w ? 0x8 : 0)) { } + + bool operator [] (uint32_t id) const { + return ((m_mask & (1u << id)) != 0); + } + + uint32_t popCount() const { + const uint8_t n[16] = { 0, 1, 1, 2, 1, 2, 2, 3, + 1, 2, 2, 3, 2, 3, 3, 4 }; + return n[m_mask & 0xF]; + } + + uint32_t firstSet() const { + const uint8_t n[16] = { 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0 }; + return n[m_mask & 0xF]; + } + + uint32_t minComponents() const { + const uint8_t n[16] = { 0, 1, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4 }; + return n[m_mask & 0xF]; + } + + bool operator == (const DxsoRegMask& other) const { return m_mask == other.m_mask; } + bool operator != (const DxsoRegMask& other) const { return m_mask != other.m_mask; } + + private: + + uint8_t m_mask; + + }; + + const DxsoRegMask IdentityWriteMask = DxsoRegMask(true, true, true, true); + + class DxsoRegSwizzle { + + public: + + DxsoRegSwizzle(uint8_t mask) + : m_mask(mask) { } + + DxsoRegSwizzle(uint32_t x, uint32_t y, uint32_t z, uint32_t w) + : m_mask((x << 0) | (y << 2) | (z << 4) | (w << 6)) {} + + uint32_t operator [] (uint32_t id) const { + return (m_mask >> (id + id)) & 0x3; + } + + bool operator == (const DxsoRegSwizzle& other) const { return m_mask == other.m_mask; } + bool operator != (const DxsoRegSwizzle& other) const { return m_mask != other.m_mask; } + + private: + + uint8_t m_mask; + + }; + + const DxsoRegSwizzle IdentitySwizzle{ 0, 1, 2, 3 }; + + struct DxsoBaseRegister { + DxsoRegisterId id = { DxsoRegisterType::Temp, 0 }; + bool centroid = false; + bool partialPrecision = false; + bool saturate = false; + DxsoRegModifier modifier = DxsoRegModifier::None; + DxsoRegMask mask = IdentityWriteMask; + DxsoRegSwizzle swizzle = IdentitySwizzle; + int8_t shift = 0; + }; + + struct DxsoRegister : public DxsoBaseRegister { + bool hasRelative = false; + DxsoBaseRegister relative; + }; + + struct DxsoSemantic { + DxsoUsage usage; + uint32_t usageIndex; + + bool operator== (const DxsoSemantic& b) const; + bool operator!= (const DxsoSemantic& b) const; + }; + + struct DxsoDeclaration { + DxsoSemantic semantic; + + DxsoTextureType textureType; + }; + + union DxsoDefinition { + float float32[4]; + int32_t int32[4]; + + // Not a type we actually use in compiler, but used for decoding. + uint32_t uint32[4]; + }; + + struct DxsoInstructionContext { + DxsoShaderInstruction instruction; + + DxsoRegister pred; + + DxsoRegister dst; + std::array< + DxsoRegister, + DxsoMaxOperandCount> src; + + DxsoDefinition def; + + DxsoDeclaration dcl; + }; + + class DxsoDecodeContext { + + public: + + DxsoDecodeContext(const DxsoProgramInfo& programInfo) + : m_programInfo( programInfo ) { } + + /** + * \brief Retrieves current instruction context + * + * This is only valid after a call to \ref decode. + * \returns Reference to last decoded instruction & its context + */ + const DxsoInstructionContext& getInstructionContext() const { + return m_ctx; + } + + const DxsoProgramInfo& getProgramInfo() const { + return m_programInfo; + } + + /** + * \brief Decodes an instruction + * + * This also advances the given code slice by the + * number of dwords consumed by the instruction. + * \param [in] code Code slice + */ + bool decodeInstruction(DxsoCodeIter& iter); + + private: + + uint32_t decodeInstructionLength(uint32_t token); + + void decodeBaseRegister( + DxsoBaseRegister& reg, + uint32_t token); + void decodeGenericRegister( + DxsoRegister& reg, + uint32_t token); + void decodeRelativeRegister( + DxsoBaseRegister& reg, + uint32_t token); + + // Returns whether an extra token was read. + bool decodeDestinationRegister(DxsoCodeIter& iter); + bool decodeSourceRegister(uint32_t i, DxsoCodeIter& iter); + void decodePredicateRegister(DxsoCodeIter& iter); + + void decodeDeclaration(DxsoCodeIter& iter); + void decodeDefinition(DxsoOpcode opcode, DxsoCodeIter& iter); + + bool relativeAddressingUsesToken(DxsoInstructionArgumentType type); + + const DxsoProgramInfo& m_programInfo; + + DxsoInstructionContext m_ctx; + + }; + + std::ostream& operator << (std::ostream& os, DxsoUsage usage); + +} \ No newline at end of file diff --git a/src/dxso/dxso_enums.cpp b/src/dxso/dxso_enums.cpp new file mode 100644 index 000000000..fc966c299 --- /dev/null +++ b/src/dxso/dxso_enums.cpp @@ -0,0 +1,101 @@ +#include "dxso_enums.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, DxsoOpcode opcode) { + switch (opcode) { + case DxsoOpcode::Nop: os << "Nop"; break; + case DxsoOpcode::Mov: os << "Mov"; break; + case DxsoOpcode::Add: os << "Add"; break; + case DxsoOpcode::Sub: os << "Sub"; break; + case DxsoOpcode::Mad: os << "Mad"; break; + case DxsoOpcode::Mul: os << "Mul"; break; + case DxsoOpcode::Rcp: os << "Rcp"; break; + case DxsoOpcode::Rsq: os << "Rsq"; break; + case DxsoOpcode::Dp3: os << "Dp3"; break; + case DxsoOpcode::Dp4: os << "Dp4"; break; + case DxsoOpcode::Min: os << "Min"; break; + case DxsoOpcode::Max: os << "Max"; break; + case DxsoOpcode::Slt: os << "Slt"; break; + case DxsoOpcode::Sge: os << "Sge"; break; + case DxsoOpcode::Exp: os << "Exp"; break; + case DxsoOpcode::Log: os << "Log"; break; + case DxsoOpcode::Lit: os << "Lit"; break; + case DxsoOpcode::Dst: os << "Dst"; break; + case DxsoOpcode::Lrp: os << "Lrp"; break; + case DxsoOpcode::Frc: os << "Frc"; break; + case DxsoOpcode::M4x4: os << "M4x4"; break; + case DxsoOpcode::M4x3: os << "M4x3"; break; + case DxsoOpcode::M3x4: os << "M3x4"; break; + case DxsoOpcode::M3x3: os << "M3x3"; break; + case DxsoOpcode::M3x2: os << "M3x2"; break; + case DxsoOpcode::Call: os << "Call"; break; + case DxsoOpcode::CallNz: os << "CallNz"; break; + case DxsoOpcode::Loop: os << "Loop"; break; + case DxsoOpcode::Ret: os << "Ret"; break; + case DxsoOpcode::EndLoop: os << "EndLoop"; break; + case DxsoOpcode::Label: os << "Label"; break; + case DxsoOpcode::Dcl: os << "Dcl"; break; + case DxsoOpcode::Pow: os << "Pow"; break; + case DxsoOpcode::Crs: os << "Crs"; break; + case DxsoOpcode::Sgn: os << "Sgn"; break; + case DxsoOpcode::Abs: os << "Abs"; break; + case DxsoOpcode::Nrm: os << "Nrm"; break; + case DxsoOpcode::SinCos: os << "SinCos"; break; + case DxsoOpcode::Rep: os << "Rep"; break; + case DxsoOpcode::EndRep: os << "EndRep"; break; + case DxsoOpcode::If: os << "If"; break; + case DxsoOpcode::Ifc: os << "Ifc"; break; + case DxsoOpcode::Else: os << "Else"; break; + case DxsoOpcode::EndIf: os << "EndIf"; break; + case DxsoOpcode::Break: os << "Break"; break; + case DxsoOpcode::BreakC: os << "BreakC"; break; + case DxsoOpcode::Mova: os << "Mova"; break; + case DxsoOpcode::DefB: os << "DefB"; break; + case DxsoOpcode::DefI: os << "DefI"; break; + + case DxsoOpcode::TexCoord: os << "TexCoord"; break; + case DxsoOpcode::TexKill: os << "TexKill"; break; + case DxsoOpcode::Tex: os << "Tex"; break; + case DxsoOpcode::TexBem: os << "TexBem"; break; + case DxsoOpcode::TexBemL: os << "TexBemL"; break; + case DxsoOpcode::TexReg2Ar: os << "TexReg2Ar"; break; + case DxsoOpcode::TexReg2Gb: os << "TexReg2Gb"; break; + case DxsoOpcode::TexM3x2Pad: os << "TexM3x2Pad"; break; + case DxsoOpcode::TexM3x2Tex: os << "TexM3x2Tex"; break; + case DxsoOpcode::TexM3x3Pad: os << "TexM3x3Pad"; break; + case DxsoOpcode::TexM3x3Tex: os << "TexM3x3Tex"; break; + case DxsoOpcode::Reserved0: os << "Reserved0"; break; + case DxsoOpcode::TexM3x3Spec: os << "TexM3x3Spec"; break; + case DxsoOpcode::TexM3x3VSpec: os << "TexM3x3VSpec"; break; + case DxsoOpcode::ExpP: os << "ExpP"; break; + case DxsoOpcode::LogP: os << "LogP"; break; + case DxsoOpcode::Cnd: os << "Cnd"; break; + case DxsoOpcode::Def: os << "Def"; break; + case DxsoOpcode::TexReg2Rgb: os << "TexReg2Rgb"; break; + case DxsoOpcode::TexDp3Tex: os << "TexDp3Tex"; break; + case DxsoOpcode::TexM3x2Depth: os << "TexM3x2Depth"; break; + case DxsoOpcode::TexDp3: os << "TexDp3"; break; + case DxsoOpcode::TexM3x3: os << "TexM3x3"; break; + case DxsoOpcode::TexDepth: os << "TexDepth"; break; + case DxsoOpcode::Cmp: os << "Cmp"; break; + case DxsoOpcode::Bem: os << "Bem"; break; + case DxsoOpcode::Dp2Add: os << "Dp2Add"; break; + case DxsoOpcode::DsX: os << "DsX"; break; + case DxsoOpcode::DsY: os << "DsY"; break; + case DxsoOpcode::TexLdd: os << "TexLdd"; break; + case DxsoOpcode::SetP: os << "SetP"; break; + case DxsoOpcode::TexLdl: os << "TexLdl"; break; + case DxsoOpcode::BreakP: os << "BreakP"; break; + + case DxsoOpcode::Phase: os << "Phase"; break; + case DxsoOpcode::Comment: os << "Comment"; break; + case DxsoOpcode::End: os << "End"; break; + default: + os << "Invalid Opcode (" << static_cast(opcode) << ")"; break; + } + + return os; + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_enums.h b/src/dxso/dxso_enums.h new file mode 100644 index 000000000..84f2e5b73 --- /dev/null +++ b/src/dxso/dxso_enums.h @@ -0,0 +1,164 @@ +#pragma once + +#include "dxso_include.h" + +#include + +namespace dxvk { + + /** + * \brief Instruction code listing + */ + enum class DxsoOpcode : uint32_t { + Nop = 0, + Mov , + Add , + Sub , + Mad , + Mul , + Rcp , + Rsq , + Dp3 , + Dp4 , + Min , + Max , + Slt , + Sge , + Exp , + Log , + Lit , + Dst , + Lrp , + Frc , + M4x4 , + M4x3 , + M3x4 , + M3x3 , + M3x2 , + Call , + CallNz , + Loop , + Ret , + EndLoop , + Label , + Dcl , + Pow , + Crs , + Sgn , + Abs , + Nrm , + SinCos , + Rep , + EndRep , + If , + Ifc , + Else , + EndIf , + Break , + BreakC , + Mova , + DefB , + DefI , + + TexCoord = 64, + TexKill , + Tex , + TexBem , + TexBemL , + TexReg2Ar , + TexReg2Gb , + TexM3x2Pad , + TexM3x2Tex , + TexM3x3Pad , + TexM3x3Tex , + Reserved0 , + TexM3x3Spec , + TexM3x3VSpec , + ExpP , + LogP , + Cnd , + Def , + TexReg2Rgb , + TexDp3Tex , + TexM3x2Depth , + TexDp3 , + TexM3x3 , + TexDepth , + Cmp , + Bem , + Dp2Add , + DsX , + DsY , + TexLdd , + SetP , + TexLdl , + BreakP , + + Phase = 0xfffd, + Comment = 0xfffe, + End = 0xffff + }; + + std::ostream& operator << (std::ostream& os, DxsoOpcode opcode); + + enum class DxsoRegisterType : uint32_t { + Temp = 0, // Temporary Register File + Input = 1, // Input Register File + Const = 2, // Constant Register File + Addr = 3, // Address Register (VS) + Texture = 3, // Texture Register File (PS) + RasterizerOut = 4, // Rasterizer Register File + AttributeOut = 5, // Attribute Output Register File + TexcoordOut = 6, // Texture Coordinate Output Register File + Output = 6, // Output register file for VS3.0+ + ConstInt = 7, // Constant Integer Vector Register File + ColorOut = 8, // Color Output Register File + DepthOut = 9, // Depth Output Register File + Sampler = 10, // Sampler State Register File + Const2 = 11, // Constant Register File 2048 - 4095 + Const3 = 12, // Constant Register File 4096 - 6143 + Const4 = 13, // Constant Register File 6144 - 8191 + ConstBool = 14, // Constant Boolean register file + Loop = 15, // Loop counter register file + TempFloat16 = 16, // 16-bit float temp register file + MiscType = 17, // Miscellaneous (single) registers. + Label = 18, // Label + Predicate = 19, // Predicate register + PixelTexcoord = 20 + }; + + enum class DxsoUsage : uint32_t { + Position = 0, + BlendWeight, // 1 + BlendIndices, // 2 + Normal, // 3 + PointSize, // 4 + Texcoord, // 5 + Tangent, // 6 + Binormal, // 7 + TessFactor, // 8 + PositionT, // 9 + Color, // 10 + Fog, // 11 + Depth, // 12 + Sample, // 13 + }; + + enum class DxsoTextureType : uint32_t { + Texture2D = 2, + TextureCube = 3, + Texture3D = 4 + }; + + enum DxsoReasterizerOutIndices : uint32_t { + RasterOutPosition = 0, + RasterOutFog = 1, + RasterOutPointSize = 2 + }; + + enum DxsoMiscTypeIndices : uint32_t { + MiscTypePosition, + MiscTypeFace, + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_header.cpp b/src/dxso/dxso_header.cpp new file mode 100644 index 000000000..81077c95e --- /dev/null +++ b/src/dxso/dxso_header.cpp @@ -0,0 +1,24 @@ +#include "dxso_header.h" + +namespace dxvk { + + DxsoHeader::DxsoHeader(DxsoReader& reader) { + uint32_t headerToken = reader.readu32(); + + uint32_t headerTypeMask = headerToken & 0xffff0000; + + DxsoProgramType programType; + if (headerTypeMask == 0xffff0000) + programType = DxsoProgramTypes::PixelShader; + else if (headerTypeMask == 0xfffe0000) + programType = DxsoProgramTypes::VertexShader; + else + throw DxvkError("DxsoHeader: invalid header - invalid version"); + + const uint32_t majorVersion = (headerToken >> 8) & 0xff; + const uint32_t minorVersion = headerToken & 0xff; + + m_info = DxsoProgramInfo{ programType, minorVersion, majorVersion }; + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_header.h b/src/dxso/dxso_header.h new file mode 100644 index 000000000..3a7f4bd3c --- /dev/null +++ b/src/dxso/dxso_header.h @@ -0,0 +1,31 @@ +#pragma once + +#include "dxso_common.h" + +#include "dxso_reader.h" + +namespace dxvk { + +/** + * \brief DXSO header + * + * Stores meta information about the shader such + * as the version and the type. + */ + class DxsoHeader { + + public: + + DxsoHeader(DxsoReader& reader); + + const DxsoProgramInfo& info() const { + return m_info; + } + + private: + + DxsoProgramInfo m_info; + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_helpers.h b/src/dxso/dxso_helpers.h new file mode 100644 index 000000000..e69de29bb diff --git a/src/dxso/dxso_include.h b/src/dxso/dxso_include.h new file mode 100644 index 000000000..2f41e784a --- /dev/null +++ b/src/dxso/dxso_include.h @@ -0,0 +1,18 @@ +#pragma once + +#include "../dxvk/dxvk_shader.h" + +#include "../util/com/com_guid.h" +#include "../util/com/com_object.h" +#include "../util/com/com_pointer.h" + +#include "../util/log/log.h" +#include "../util/log/log_debug.h" + +#include "../util/rc/util_rc.h" +#include "../util/rc/util_rc_ptr.h" + +#include "../util/util_bit.h" +#include "../util/util_enum.h" +#include "../util/util_error.h" +#include "../util/util_string.h" \ No newline at end of file diff --git a/src/dxso/dxso_isgn.h b/src/dxso/dxso_isgn.h new file mode 100644 index 000000000..168c8d8e9 --- /dev/null +++ b/src/dxso/dxso_isgn.h @@ -0,0 +1,39 @@ +#pragma once + +#include "dxso_decoder.h" + +namespace dxvk { + + struct DxsoIsgnEntry { + uint32_t regNumber = 0; + uint32_t slot = 0; + DxsoSemantic semantic = DxsoSemantic{ DxsoUsage::Position, 0 }; + DxsoRegMask mask = IdentityWriteMask; + bool centroid = false; + }; + + struct DxsoIsgn { + std::array< + DxsoIsgnEntry, + 2 * DxsoMaxInterfaceRegs> elems; + uint32_t elemCount = 0; + }; + + struct DxsoDefinedConstant { + uint32_t uboIdx; + + // Only float constants may be indexed. + // So that's the only ones we care about putting in the UBO. + float float32[4]; + }; + + using DxsoDefinedConstants = std::vector; + + struct DxsoShaderMetaInfo { + bool needsConstantCopies = false; + uint32_t maxConstIndexF = 0; + uint32_t maxConstIndexI = 0; + uint32_t maxConstIndexB = 0; + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_modinfo.h b/src/dxso/dxso_modinfo.h new file mode 100644 index 000000000..410c90f38 --- /dev/null +++ b/src/dxso/dxso_modinfo.h @@ -0,0 +1,17 @@ +#pragma once + +#include "dxso_options.h" + +namespace dxvk { + + /** + * \brief Shader module info + * + * Stores information which may affect shader compilation. + * This data can be supplied by the client API implementation. + */ + struct DxsoModuleInfo { + DxsoOptions options; + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_module.cpp b/src/dxso/dxso_module.cpp new file mode 100644 index 000000000..3b2bb25b4 --- /dev/null +++ b/src/dxso/dxso_module.cpp @@ -0,0 +1,84 @@ +#include "dxso_module.h" + +#include "dxso_code.h" +#include "dxso_compiler.h" + +namespace dxvk { + + DxsoModule::DxsoModule(DxsoReader& reader) + : m_header( reader ) + , m_code ( reader ) { } + + DxsoAnalysisInfo DxsoModule::analyze() { + DxsoAnalysisInfo info; + + DxsoAnalyzer analyzer(info); + + this->runAnalyzer(analyzer, m_code.iter()); + + return info; + } + + DxsoPermutations DxsoModule::compile( + const DxsoModuleInfo& moduleInfo, + const std::string& fileName, + const DxsoAnalysisInfo& analysis, + const D3D9ConstantLayout& layout) { + DxsoCompiler compiler( + fileName, moduleInfo, + m_header.info(), analysis, + layout); + + this->runCompiler(compiler, m_code.iter()); + m_isgn = compiler.isgn(); + + m_meta = compiler.meta(); + m_constants = compiler.constants(); + m_usedSamplers = compiler.usedSamplers(); + m_usedRTs = compiler.usedRTs(); + + compiler.finalize(); + + return compiler.compile(); + } + + void DxsoModule::runAnalyzer( + DxsoAnalyzer& analyzer, + DxsoCodeIter iter) const { + DxsoCodeIter start = iter; + + DxsoDecodeContext decoder(m_header.info()); + + while (decoder.decodeInstruction(iter)) + analyzer.processInstruction( + decoder.getInstructionContext()); + + size_t tokenCount = size_t(iter.ptrAt(0) - start.ptrAt(0)); + + // We need to account for the header token in the bytecode size... + + // At this point, start is offset by the header due to us this being + // a *code* iterator, and not the general reader class. + // [start token] ^(start caret)^ [frog rendering code] [end token] ^(end caret)^ + // where the tokenCount above is inbetween the start and end carets. + + // We need to account for this otherwise it will show up as us not + // accounting for the *end* token in GetFunction due to the total size being + // offset by -1. + // [start token] [frog rendering code] (end of tokenCount) [end token] + tokenCount += 1; + + analyzer.finalize(tokenCount); + } + + void DxsoModule::runCompiler( + DxsoCompiler& compiler, + DxsoCodeIter iter) const { + DxsoDecodeContext decoder(m_header.info()); + + while (decoder.decodeInstruction(iter)) + compiler.processInstruction( + decoder.getInstructionContext()); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_module.h b/src/dxso/dxso_module.h new file mode 100644 index 000000000..1f5cda85b --- /dev/null +++ b/src/dxso/dxso_module.h @@ -0,0 +1,85 @@ +#pragma once + +#include "dxso_reader.h" +#include "dxso_code.h" +#include "dxso_header.h" +#include "dxso_ctab.h" + +#include "dxso_isgn.h" +#include "dxso_analysis.h" + +#include "../d3d9/d3d9_constant_layout.h" +#include "../d3d9/d3d9_shader_permutations.h" + +#include + +namespace dxvk { + + class DxsoCompiler; + class DxsoCode; + struct DxsoModuleInfo; + + /** + * \brief DXSO shader module, a d3d9 shader object. + */ + class DxsoModule { + + public: + + DxsoModule(DxsoReader& reader); + + const DxsoProgramInfo& info() { + return m_header.info(); + } + + DxsoAnalysisInfo analyze(); + + /** + * \brief Compiles DXSO shader to SPIR-V module + * + * \param [in] moduleInfo DXSO module info + * \param [in] fileName File name, will be added to + * the compiled SPIR-V for debugging purposes. + * \returns The compiled shader object + */ + DxsoPermutations compile( + const DxsoModuleInfo& moduleInfo, + const std::string& fileName, + const DxsoAnalysisInfo& analysis, + const D3D9ConstantLayout& layout); + + const DxsoIsgn& isgn() { + return m_isgn; + } + + const DxsoShaderMetaInfo& meta() { return m_meta; } + + const DxsoDefinedConstants& constants() { return m_constants; } + + uint32_t usedSamplers() { return m_usedSamplers; } + + uint32_t usedRTs() { return m_usedRTs; } + + private: + + void runCompiler( + DxsoCompiler& compiler, + DxsoCodeIter iter) const; + + void runAnalyzer( + DxsoAnalyzer& analyzer, + DxsoCodeIter iter) const; + + DxsoHeader m_header; + DxsoCode m_code; + + DxsoIsgn m_isgn; + uint32_t m_usedSamplers; + uint32_t m_usedRTs; + + DxsoShaderMetaInfo m_meta; + DxsoDefinedConstants m_constants; + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_options.cpp b/src/dxso/dxso_options.cpp new file mode 100644 index 000000000..ae6fb603d --- /dev/null +++ b/src/dxso/dxso_options.cpp @@ -0,0 +1,42 @@ +#include "dxso_options.h" + +namespace dxvk { + + DxsoOptions::DxsoOptions() {} + + DxsoOptions::DxsoOptions(const Rc& device, const D3D9Options& options) { + const Rc adapter = device->adapter(); + + const DxvkDeviceFeatures& devFeatures = device->features(); + const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt(); + + useDemoteToHelperInvocation + = (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation); + + useSubgroupOpsForEarlyDiscard + = (devInfo.coreSubgroup.subgroupSize >= 4) + && (devInfo.coreSubgroup.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) + && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT); + + // Disable early discard on RADV (with LLVM) due to GPU hangs + // Disable early discard on Nvidia because it may hurt performance + bool isRadvAco = std::string(devInfo.core.properties.deviceName).find("RADV/ACO") != std::string::npos; + + if ((adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_MESA_RADV_KHR, 0, 0) && !isRadvAco) + || (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0))) + useSubgroupOpsForEarlyDiscard = false; + + // Apply shader-related options + applyTristate(useSubgroupOpsForEarlyDiscard, device->config().useEarlyDiscard); + + strictConstantCopies = options.strictConstantCopies; + + strictPow = options.strictPow; + d3d9FloatEmulation = options.d3d9FloatEmulation; + + shaderModel = options.shaderModel; + + invariantPosition = options.invariantPosition; + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_options.h b/src/dxso/dxso_options.h new file mode 100644 index 000000000..def47c420 --- /dev/null +++ b/src/dxso/dxso_options.h @@ -0,0 +1,42 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" +#include "../d3d9/d3d9_options.h" + +namespace dxvk { + + struct D3D9Options; + + struct DxsoOptions { + DxsoOptions(); + DxsoOptions(const Rc& device, const D3D9Options& options); + + /// Use a SPIR-V extension to implement D3D-style discards + bool useDemoteToHelperInvocation = false; + + /// Use subgroup operations to discard fragment + /// shader invocations if derivatives remain valid. + bool useSubgroupOpsForEarlyDiscard = false; + + /// True: Copy our constant set into UBO if we are relative indexing ever. + /// False: Copy our constant set into UBO if we are relative indexing at the start of a defined constant + /// Why?: In theory, FXC should never generate code where this would be an issue. + bool strictConstantCopies; + + /// Whether to emulate d3d9 float behaviour using clampps + /// True: Perform emulation to emulate behaviour (ie. anything * 0 = 0) + /// False: Don't do anything. + bool d3d9FloatEmulation; + + /// Whether or not we should care about pow(0, 0) = 1 + bool strictPow; + + /// Max version of shader to support + uint32_t shaderModel; + + /// Work around a NV driver quirk + /// Fixes flickering/z-fighting in some games. + bool invariantPosition; + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_reader.cpp b/src/dxso/dxso_reader.cpp new file mode 100644 index 000000000..ab47f2833 --- /dev/null +++ b/src/dxso/dxso_reader.cpp @@ -0,0 +1,26 @@ +#include "dxso_reader.h" + +#include + +namespace dxvk { + + DxbcTag DxsoReader::readTag() { + DxbcTag tag; + this->read(&tag, 4); + return tag; + } + + void DxsoReader::read(void* dst, size_t n) { + std::memcpy(dst, m_data + m_pos, n); + m_pos += n; + } + + void DxsoReader::skip(size_t n) { + m_pos += n; + } + + void DxsoReader::store(std::ostream && stream, size_t size) const { + stream.write(m_data, size); + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_reader.h b/src/dxso/dxso_reader.h new file mode 100644 index 000000000..fa650bb89 --- /dev/null +++ b/src/dxso/dxso_reader.h @@ -0,0 +1,60 @@ +#pragma once + +#include "dxso_include.h" + +#include "../dxbc/dxbc_tag.h" + +#include + +namespace dxvk { + + /** + * \brief DXSO (d3d9) bytecode reader + * + * Holds references to the shader byte code and + * provides methods to read + */ + class DxsoReader { + + public: + + DxsoReader(const char* data) + : DxsoReader(data, 0) { } + + size_t pos() { + return m_pos; + } + + auto readu32() { return this->readNum (); } + auto readf32() { return this->readNum (); } + + DxbcTag readTag(); + + void read(void* dst, size_t n); + + void skip(size_t n); + + void store(std::ostream&& stream, size_t size) const; + + const char* currentPtr() { + return m_data + m_pos; + } + + private: + + DxsoReader(const char* data, size_t pos) + : m_data(data), m_pos(pos) { } + + const char* m_data = nullptr; + size_t m_pos = 0; + + template + T readNum() { + T result; + this->read(&result, sizeof(result)); + return result; + } + + }; + +} \ No newline at end of file diff --git a/src/dxso/dxso_tables.cpp b/src/dxso/dxso_tables.cpp new file mode 100644 index 000000000..5b8ab91f0 --- /dev/null +++ b/src/dxso/dxso_tables.cpp @@ -0,0 +1,93 @@ +#include "dxso_tables.h" + +namespace dxvk { + + uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode) { + + switch (opcode) { + case DxsoOpcode::Nop: return 0; + case DxsoOpcode::Mov: return 2; + case DxsoOpcode::Add: return 3; + case DxsoOpcode::Sub: return 3; + case DxsoOpcode::Mad: return 4; + case DxsoOpcode::Mul: return 3; + case DxsoOpcode::Rcp: return 2; + case DxsoOpcode::Rsq: return 2; + case DxsoOpcode::Dp3: return 3; + case DxsoOpcode::Dp4: return 3; + case DxsoOpcode::Min: return 3; + case DxsoOpcode::Max: return 3; + case DxsoOpcode::Slt: return 3; + case DxsoOpcode::Sge: return 3; + case DxsoOpcode::Exp: return 2; + case DxsoOpcode::Log: return 2; + case DxsoOpcode::Lit: return 2; + case DxsoOpcode::Dst: return 3; + case DxsoOpcode::Lrp: return 4; + case DxsoOpcode::Frc: return 2; + case DxsoOpcode::M4x4: return 3; + case DxsoOpcode::M4x3: return 3; + case DxsoOpcode::M3x4: return 3; + case DxsoOpcode::M3x3: return 3; + case DxsoOpcode::M3x2: return 3; + case DxsoOpcode::Call: return 1; + case DxsoOpcode::CallNz: return 2; + case DxsoOpcode::Loop: return 2; + case DxsoOpcode::Ret: return 0; + case DxsoOpcode::EndLoop: return 0; + case DxsoOpcode::Label: return 1; + case DxsoOpcode::Dcl: return 2; + case DxsoOpcode::Pow: return 3; + case DxsoOpcode::Crs: return 3; + case DxsoOpcode::Sgn: return 4; + case DxsoOpcode::Abs: return 2; + case DxsoOpcode::Nrm: return 2; + case DxsoOpcode::SinCos: return 4; + case DxsoOpcode::Rep: return 1; + case DxsoOpcode::EndRep: return 0; + case DxsoOpcode::If: return 1; + case DxsoOpcode::Ifc: return 2; + case DxsoOpcode::Else: return 0; + case DxsoOpcode::EndIf: return 0; + case DxsoOpcode::Break: return 0; + case DxsoOpcode::BreakC: return 2; + case DxsoOpcode::Mova: return 2; + case DxsoOpcode::DefB: return 2; + case DxsoOpcode::DefI: return 5; + case DxsoOpcode::TexCoord: return 1; + case DxsoOpcode::TexKill: return 1; + case DxsoOpcode::Tex: return 1; + case DxsoOpcode::TexBem: return 2; + case DxsoOpcode::TexBemL: return 2; + case DxsoOpcode::TexReg2Ar: return 2; + case DxsoOpcode::TexReg2Gb: return 2; + case DxsoOpcode::TexM3x2Pad: return 2; + case DxsoOpcode::TexM3x2Tex: return 2; + case DxsoOpcode::TexM3x3Pad: return 2; + case DxsoOpcode::TexM3x3Tex: return 2; + case DxsoOpcode::TexM3x3Spec: return 3; + case DxsoOpcode::TexM3x3VSpec: return 2; + case DxsoOpcode::ExpP: return 2; + case DxsoOpcode::LogP: return 2; + case DxsoOpcode::Cnd: return 4; + case DxsoOpcode::Def: return 5; + case DxsoOpcode::TexReg2Rgb: return 2; + case DxsoOpcode::TexDp3Tex: return 2; + case DxsoOpcode::TexM3x2Depth: return 2; + case DxsoOpcode::TexDp3: return 2; + case DxsoOpcode::TexM3x3: return 2; + case DxsoOpcode::TexDepth: return 1; + case DxsoOpcode::Cmp: return 4; + case DxsoOpcode::Bem: return 3; + case DxsoOpcode::Dp2Add: return 4; + case DxsoOpcode::DsX: return 2; + case DxsoOpcode::DsY: return 2; + case DxsoOpcode::TexLdd: return 5; + case DxsoOpcode::SetP: return 3; + case DxsoOpcode::TexLdl: return 3; + case DxsoOpcode::BreakP: return 2; + default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return UINT32_MAX; + } + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_tables.h b/src/dxso/dxso_tables.h new file mode 100644 index 000000000..73e3801d3 --- /dev/null +++ b/src/dxso/dxso_tables.h @@ -0,0 +1,11 @@ +#pragma once + +#include "dxso_enums.h" + +namespace dxvk { + + constexpr uint32_t InvalidOpcodeLength = UINT32_MAX; + + uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode); + +} \ No newline at end of file diff --git a/src/dxso/dxso_util.cpp b/src/dxso/dxso_util.cpp new file mode 100644 index 000000000..a1effe73c --- /dev/null +++ b/src/dxso/dxso_util.cpp @@ -0,0 +1,67 @@ +#include "dxso_util.h" + +#include "dxso_include.h" + +namespace dxvk { + + uint32_t computeResourceSlotId( + DxsoProgramType shaderStage, + DxsoBindingType bindingType, + uint32_t bindingIndex) { + const uint32_t stageOffset = 12 * uint32_t(shaderStage); + + if (shaderStage == DxsoProgramTypes::VertexShader) { + switch (bindingType) { + case DxsoBindingType::ConstantBuffer: return bindingIndex + stageOffset + 0; // 0 + 4 = 4 + case DxsoBindingType::ColorImage: return bindingIndex + stageOffset + 4; // 4 + 4 = 8 + case DxsoBindingType::DepthImage: return bindingIndex + stageOffset + 8; // 8 + 4 = 12 + default: Logger::err("computeResourceSlotId: Invalid resource type"); + } + } + else { // Pixel Shader + switch (bindingType) { + case DxsoBindingType::ConstantBuffer: return bindingIndex + stageOffset + 0; // 0 + 3 = 3 + // The extra sampler here is being reserved for DMAP stuff later on. + case DxsoBindingType::ColorImage: return bindingIndex + stageOffset + 3; // 3 + 17 = 20 + case DxsoBindingType::DepthImage: return bindingIndex + stageOffset + 20; // 20 + 17 = 27 + default: Logger::err("computeResourceSlotId: Invalid resource type"); + } + } + + return 0; + } + + // TODO: Intergrate into compute resource slot ID/refactor all of this? + uint32_t getSWVPBufferSlot() { + return 39; + } + + + std::mutex g_linkerSlotMutex; + uint32_t g_linkerSlotCount = 0; + std::array g_linkerSlots; + + uint32_t RegisterLinkerSlot(DxsoSemantic semantic) { + // Lock, because games could be trying + // to make multiple shaders at a time. + std::lock_guard lock(g_linkerSlotMutex); + + // Need to chose a slot that maps nicely and similarly + // between vertex and pixel shaders + + // Find or map a slot. + uint32_t slot = g_linkerSlotCount; + for (uint32_t j = 0; j < g_linkerSlotCount; j++) { + if (g_linkerSlots[j] == semantic) { + slot = j; + break; + } + } + + if (slot == g_linkerSlotCount) + g_linkerSlots[g_linkerSlotCount++] = semantic; + + return slot; + } + +} \ No newline at end of file diff --git a/src/dxso/dxso_util.h b/src/dxso/dxso_util.h new file mode 100644 index 000000000..307a28d97 --- /dev/null +++ b/src/dxso/dxso_util.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include "dxso_common.h" +#include "dxso_decoder.h" + +namespace dxvk { + + enum class DxsoBindingType : uint32_t { + ConstantBuffer, + ColorImage, + DepthImage // <-- We use whatever one is bound to determine whether an image should be 'shadow' sampled or not. + }; + + enum DxsoConstantBuffers : uint32_t { + VSConstantBuffer = 0, + VSClipPlanes = 1, + VSFixedFunction = 2, + VSVertexBlendData = 3, + VSCount, + + PSConstantBuffer = 0, + PSFixedFunction = 1, + PSShared = 2, + PSCount + }; + + uint32_t computeResourceSlotId( + DxsoProgramType shaderStage, + DxsoBindingType bindingType, + uint32_t bindingIndex); + + uint32_t getSWVPBufferSlot(); + + uint32_t RegisterLinkerSlot(DxsoSemantic semantic); + +} \ No newline at end of file diff --git a/src/dxso/meson.build b/src/dxso/meson.build new file mode 100644 index 000000000..392b74097 --- /dev/null +++ b/src/dxso/meson.build @@ -0,0 +1,23 @@ +dxso_src = files([ + 'dxso_common.cpp', + 'dxso_options.cpp', + 'dxso_module.cpp', + 'dxso_reader.cpp', + 'dxso_header.cpp', + 'dxso_ctab.cpp', + 'dxso_util.cpp', + 'dxso_code.cpp', + 'dxso_tables.cpp', + 'dxso_decoder.cpp', + 'dxso_analysis.cpp', + 'dxso_compiler.cpp', + 'dxso_enums.cpp' +]) + +dxso_lib = static_library('dxso', dxso_src, + include_directories : [ dxvk_include_path ], + override_options : ['cpp_std='+dxvk_cpp_std]) + +dxso_dep = declare_dependency( + link_with : [ dxso_lib ], + include_directories : [ dxvk_include_path ]) diff --git a/src/meson.build b/src/meson.build index 64057f37a..c5d2753a5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -25,7 +25,12 @@ if get_option('enable_d3d10') subdir('d3d10') endif +if get_option('enable_d3d9') + subdir('dxso') + subdir('d3d9') +endif + # Nothing selected -if not get_option('enable_d3d10') and not get_option('enable_d3d11') and not get_option('enable_tests') +if not get_option('enable_d3d9') and not get_option('enable_d3d10') and not get_option('enable_d3d11') and not get_option('enable_tests') warning('Nothing selected to be built. Are you missing a frontend or tests?') endif diff --git a/src/util/config/config.cpp b/src/util/config/config.cpp index 6ffc93b34..72f660067 100644 --- a/src/util/config/config.cpp +++ b/src/util/config/config.cpp @@ -152,6 +152,125 @@ namespace dxvk { { R"(\\Crysis3\.exe$)", {{ { "dxgi.customVendorId", "10de" }, }} }, + + /**********************************************/ + /* D3D9 GAMES */ + /**********************************************/ + + /* A Hat in Time */ + { R"(\\HatinTimeGame\.exe$)", {{ + { "d3d9.strictPow", "False" }, + { "d3d9.lenientClear", "True" }, + }} }, + /* Borderlands: The Pre Sequel! */ + { R"(\\BorderlandsPreSequel\.exe$)", {{ + { "d3d9.lenientClear", "True" }, + }} }, + /* Borderlands 2 */ + { R"(\\Borderlands2\.exe$)", {{ + { "d3d9.lenientClear", "True" }, + }} }, + /* Borderlands */ + { R"(\\Borderlands\.exe$)", {{ + { "d3d9.lenientClear", "True" }, + }} }, + /* Gothic 3 */ + { R"(\\Gothic3\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + }} }, + /* Gothic 3 Forsaken Gods */ + { R"(\\Gothic III Forsaken Gods\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + }} }, + /* Risen */ + { R"(\\Risen\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + { "d3d9.invariantPosition", "True" }, + }} }, + /* Risen 2 */ + { R"(\\Risen2\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + { "d3d9.invariantPosition", "True" }, + }} }, + /* Risen 3 */ + { R"(\\Risen3\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + { "d3d9.invariantPosition", "True" }, + }} }, + /* Nostale */ + { R"(\\NostaleClientX\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + }} }, + /* Sonic Adventure 2 */ + { R"(\\Sonic Adventure 2\\(launcher|sonic2app)\.exe$)", {{ + { "d3d9.floatEmulation", "False" }, + }} }, + /* The Sims 2, + Body Shop, + The Sims Life Stories, + The Sims Pet Stories, + and The Sims Castaway Stories */ + { R"(\\(Sims2.*|TS2BodyShop|SimsLS|SimsPS|SimsCS)\.exe$)", {{ + { "d3d9.customVendorId", "10de" }, + { "d3d9.customDeviceId", "0091" }, + { "d3d9.customDeviceDesc", "GeForce 7800 GTX" }, + { "d3d9.disableA8RT", "True" }, + { "d3d9.supportX4R4G4B4", "False" }, + { "d3d9.maxAvailableMemory", "2048" }, + { "d3d9.memoryTrackTest", "True" }, + // The Sims 2 will try to upload 1024 constants + // every frame otherwise, which it never uses + // causing a massive discard + upload. + { "d3d9.swvpFloatCount", "384" }, + { "d3d9.swvpIntCount", "16" }, + { "d3d9.swvpBoolCount", "16" }, + }} }, + /* Dead Space uses the a NULL render target instead + of a 1x1 one if DF24 is NOT supported */ + { R"(\\Dead Space\.exe$)", {{ + { "d3d9.supportDFFormats", "False" }, + }} }, + /* Burnout Paradise */ + { R"(\\BurnoutParadise\.exe$)", {{ + { "d3d9.allowLockFlagReadonly", "False" }, + }} }, + /* Halo 2 */ + { R"(\\halo2\.exe$)", {{ + { "d3d9.invariantPosition", "True" }, + }} }, + /* Halo CE/HaloPC */ + { R"(\\halo(ce)?\.exe$)", {{ + // Game enables minor decal layering fixes + // specifically when it detects AMD. + // Avoids chip being detected as unsupported + // when on intel. Avoids possible path towards + // invalid texture addressing methods. + { "d3d9.customVendorId", "1002" }, + // Avoids card not recognized error. + // Keeps game's rendering methods consistent + // for optimal compatibility. + { "d3d9.customDeviceId", "4172" }, + }} }, + /* Counter Strike: Global Offensive + Needs NVAPI to avoid a forced AO + Smoke + exploit so we must force AMD vendor ID. */ + { R"(\\csgo\.exe$)", {{ + { "d3d9.customVendorId", "1002" }, + }} }, + /* Vampire - The Masquerade Bloodlines */ + { R"(\\vampire\.exe$)", {{ + { "d3d9.deferSurfaceCreation", "True" }, + { "d3d9.memoryTrackTest", "True" }, + { "d3d9.maxAvailableMemory", "1024" }, + }} }, + /* Senran Kagura Shinovi Versus */ + { R"(\\SKShinoviVersus\.exe$)", {{ + { "d3d9.forceAspectRatio", "16:9" }, + }} }, + /* Metal Slug X */ + { R"(\\mslugx\.exe$)", {{ + { "d3d9.supportD32", "False" }, + }} }, }}; diff --git a/tests/d3d9/meson.build b/tests/d3d9/meson.build new file mode 100644 index 000000000..e5710fde2 --- /dev/null +++ b/tests/d3d9/meson.build @@ -0,0 +1,5 @@ +test_d3d9_deps = [ util_dep, lib_d3d9, lib_d3dcompiler_47 ] + +executable('d3d9-clear'+exe_ext, files('test_d3d9_clear.cpp'), dependencies : test_d3d9_deps, install : true, gui_app : true, override_options: ['cpp_std='+dxvk_cpp_std]) +executable('d3d9-buffer'+exe_ext, files('test_d3d9_buffer.cpp'), dependencies : test_d3d9_deps, install : true, gui_app : true, override_options: ['cpp_std='+dxvk_cpp_std]) +executable('d3d9-triangle'+exe_ext, files('test_d3d9_triangle.cpp'), dependencies : test_d3d9_deps, install : true, gui_app : true, override_options: ['cpp_std='+dxvk_cpp_std]) diff --git a/tests/d3d9/test_d3d9_buffer.cpp b/tests/d3d9/test_d3d9_buffer.cpp new file mode 100644 index 000000000..f27c7b350 --- /dev/null +++ b/tests/d3d9/test_d3d9_buffer.cpp @@ -0,0 +1,220 @@ +#include +#include + +#include "../test_utils.h" + +using namespace dxvk; + +struct Extent2D { + uint32_t w, h; +}; + +DWORD g_UsagePermuatations[] = { + 0, + D3DUSAGE_DYNAMIC, + D3DUSAGE_WRITEONLY, + D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC, +}; + +DWORD g_MapFlagPermutations[] = { + 0, + D3DLOCK_DISCARD, + D3DLOCK_DONOTWAIT, + D3DLOCK_NOOVERWRITE +}; + +class BufferApp { + +public: + + BufferApp(HINSTANCE instance, HWND window) + : m_window(window) { + HRESULT status = Direct3DCreate9Ex(D3D_SDK_VERSION, &m_d3d); + + if (FAILED(status)) + throw DxvkError("Failed to create D3D9 interface"); + + D3DPRESENT_PARAMETERS params; + getPresentParams(params); + + status = m_d3d->CreateDeviceEx( + D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, + m_window, + D3DCREATE_HARDWARE_VERTEXPROCESSING, + ¶ms, + nullptr, + &m_device); + + if (FAILED(status)) + throw DxvkError("Failed to create D3D9 device"); + + uint8_t* data = new uint8_t[512]; + std::memset(data, 0xFC, 512); + + for (uint32_t i = 0; i < ARRAYSIZE(g_UsagePermuatations); i++) { + for (uint32_t j = 0; j < ARRAYSIZE(g_MapFlagPermutations); j++) { + testBuffer(data, g_UsagePermuatations[i], g_MapFlagPermutations[j]); + } + } + + delete[] data; + } + + void testBuffer(uint8_t* data, DWORD usage, DWORD mapFlags) { + Com buffer; + HRESULT status = m_device->CreateVertexBuffer(512, usage, 0, D3DPOOL_DEFAULT, &buffer, nullptr); + + if (FAILED(status)) + throw DxvkError("Failed to create buffer"); + + void* bufferMem = nullptr; + status = buffer->Lock(0, 0, &bufferMem, mapFlags); + + if (FAILED(status) || bufferMem == nullptr) + throw DxvkError("Failed to lock buffer"); + + std::memcpy(bufferMem, data, 512); + + status = buffer->Unlock(); + + if (FAILED(status)) + throw DxvkError("Failed to unlock buffer"); + } + + void run() { + this->adjustBackBuffer(); + + m_device->BeginScene(); + + m_device->Clear( + 0, + nullptr, + D3DCLEAR_TARGET, + D3DCOLOR_RGBA(255, 50, 139, 0), + 0.0f, + 0); + + m_device->EndScene(); + + m_device->PresentEx( + nullptr, + nullptr, + nullptr, + nullptr, + 0); + } + + void adjustBackBuffer() { + RECT windowRect = { 0, 0, 1024, 600 }; + GetClientRect(m_window, &windowRect); + + Extent2D newSize = { + static_cast(windowRect.right - windowRect.left), + static_cast(windowRect.bottom - windowRect.top), + }; + + if (m_windowSize.w != newSize.w + || m_windowSize.h != newSize.h) { + m_windowSize = newSize; + + D3DPRESENT_PARAMETERS params; + getPresentParams(params); + HRESULT status = m_device->ResetEx(¶ms, nullptr); + + if (FAILED(status)) + throw DxvkError("Device reset failed"); + } + } + + void getPresentParams(D3DPRESENT_PARAMETERS& params) { + params.AutoDepthStencilFormat = D3DFMT_UNKNOWN; + params.BackBufferCount = 1; + params.BackBufferFormat = D3DFMT_X8R8G8B8; + params.BackBufferWidth = m_windowSize.w; + params.BackBufferHeight = m_windowSize.h; + params.EnableAutoDepthStencil = FALSE; + params.Flags = 0; + params.FullScreen_RefreshRateInHz = 0; + params.hDeviceWindow = m_window; + params.MultiSampleQuality = 0; + params.MultiSampleType = D3DMULTISAMPLE_NONE; + params.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; + params.SwapEffect = D3DSWAPEFFECT_DISCARD; + params.Windowed = TRUE; + } + +private: + + HWND m_window; + Extent2D m_windowSize = { 1024, 600 }; + + Com m_d3d; + Com m_device; + +}; + +LRESULT CALLBACK WindowProc(HWND hWnd, + UINT message, + WPARAM wParam, + LPARAM lParam); + +int WINAPI WinMain(HINSTANCE hInstance, + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) { + HWND hWnd; + WNDCLASSEXW wc; + ZeroMemory(&wc, sizeof(WNDCLASSEX)); + wc.cbSize = sizeof(WNDCLASSEX); + wc.style = CS_HREDRAW | CS_VREDRAW; + wc.lpfnWndProc = WindowProc; + wc.hInstance = hInstance; + wc.hCursor = LoadCursor(nullptr, IDC_ARROW); + wc.hbrBackground = (HBRUSH)COLOR_WINDOW; + wc.lpszClassName = L"WindowClass1"; + RegisterClassExW(&wc); + + hWnd = CreateWindowExW(0, + L"WindowClass1", + L"Our First Windowed Program", + WS_OVERLAPPEDWINDOW, + 300, 300, + 640, 480, + nullptr, + nullptr, + hInstance, + nullptr); + ShowWindow(hWnd, nCmdShow); + + MSG msg; + + try { + BufferApp app(hInstance, hWnd); + + while (true) { + if (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + + if (msg.message == WM_QUIT) + return msg.wParam; + } else { + app.run(); + } + } + } catch (const dxvk::DxvkError& e) { + std::cerr << e.message() << std::endl; + return msg.wParam; + } +} + +LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { + switch (message) { + case WM_CLOSE: + PostQuitMessage(0); + return 0; + } + + return DefWindowProc(hWnd, message, wParam, lParam); +} diff --git a/tests/d3d9/test_d3d9_clear.cpp b/tests/d3d9/test_d3d9_clear.cpp new file mode 100644 index 000000000..9715333f1 --- /dev/null +++ b/tests/d3d9/test_d3d9_clear.cpp @@ -0,0 +1,173 @@ +#include + +#include "../test_utils.h" + +using namespace dxvk; + +struct Extent2D { + uint32_t w, h; +}; + +class ClearApp { + +public: + + ClearApp(HINSTANCE instance, HWND window) + : m_window(window) { + HRESULT status = Direct3DCreate9Ex(D3D_SDK_VERSION, &m_d3d); + + if (FAILED(status)) + throw DxvkError("Failed to create D3D9 interface"); + + D3DPRESENT_PARAMETERS params; + getPresentParams(params); + + status = m_d3d->CreateDeviceEx( + D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, + m_window, + D3DCREATE_HARDWARE_VERTEXPROCESSING, + ¶ms, + nullptr, + &m_device); + + if (FAILED(status)) + throw DxvkError("Failed to create D3D9 device"); + } + + void run() { + this->adjustBackBuffer(); + + m_device->BeginScene(); + + m_device->Clear( + 0, + nullptr, + D3DCLEAR_TARGET, + D3DCOLOR_RGBA(255, 0, 0, 0), + 0.0f, + 0); + + m_device->EndScene(); + + m_device->PresentEx( + nullptr, + nullptr, + nullptr, + nullptr, + 0); + } + + void adjustBackBuffer() { + RECT windowRect = { 0, 0, 1024, 600 }; + GetClientRect(m_window, &windowRect); + + Extent2D newSize = { + static_cast(windowRect.right - windowRect.left), + static_cast(windowRect.bottom - windowRect.top), + }; + + if (m_windowSize.w != newSize.w + || m_windowSize.h != newSize.h) { + m_windowSize = newSize; + + D3DPRESENT_PARAMETERS params; + getPresentParams(params); + HRESULT status = m_device->ResetEx(¶ms, nullptr); + + if (FAILED(status)) + throw DxvkError("Device reset failed"); + } + } + + void getPresentParams(D3DPRESENT_PARAMETERS& params) { + params.AutoDepthStencilFormat = D3DFMT_UNKNOWN; + params.BackBufferCount = 1; + params.BackBufferFormat = D3DFMT_X8R8G8B8; + params.BackBufferWidth = m_windowSize.w; + params.BackBufferHeight = m_windowSize.h; + params.EnableAutoDepthStencil = FALSE; + params.Flags = 0; + params.FullScreen_RefreshRateInHz = 0; + params.hDeviceWindow = m_window; + params.MultiSampleQuality = 0; + params.MultiSampleType = D3DMULTISAMPLE_NONE; + params.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; + params.SwapEffect = D3DSWAPEFFECT_DISCARD; + params.Windowed = TRUE; + } + +private: + + HWND m_window; + Extent2D m_windowSize = { 1024, 600 }; + + Com m_d3d; + Com m_device; + +}; + +LRESULT CALLBACK WindowProc(HWND hWnd, + UINT message, + WPARAM wParam, + LPARAM lParam); + +int WINAPI WinMain(HINSTANCE hInstance, + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) { + HWND hWnd; + WNDCLASSEXW wc; + ZeroMemory(&wc, sizeof(WNDCLASSEX)); + wc.cbSize = sizeof(WNDCLASSEX); + wc.style = CS_HREDRAW | CS_VREDRAW; + wc.lpfnWndProc = WindowProc; + wc.hInstance = hInstance; + wc.hCursor = LoadCursor(nullptr, IDC_ARROW); + wc.hbrBackground = (HBRUSH)COLOR_WINDOW; + wc.lpszClassName = L"WindowClass1"; + RegisterClassExW(&wc); + + hWnd = CreateWindowExW(0, + L"WindowClass1", + L"Our First Windowed Program", + WS_OVERLAPPEDWINDOW, + 300, 300, + 640, 480, + nullptr, + nullptr, + hInstance, + nullptr); + ShowWindow(hWnd, nCmdShow); + + MSG msg; + + try { + ClearApp app(hInstance, hWnd); + + while (true) { + if (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + + if (msg.message == WM_QUIT) + return msg.wParam; + } else { + app.run(); + } + } + } catch (const dxvk::DxvkError& e) { + std::cerr << e.message() << std::endl; + return msg.wParam; + } +} + +LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { + switch (message) { + case WM_CLOSE: + PostQuitMessage(0); + return 0; + } + + return DefWindowProc(hWnd, message, wParam, lParam); +} diff --git a/tests/d3d9/test_d3d9_triangle.cpp b/tests/d3d9/test_d3d9_triangle.cpp new file mode 100644 index 000000000..ed0c5a3ef --- /dev/null +++ b/tests/d3d9/test_d3d9_triangle.cpp @@ -0,0 +1,407 @@ +#include + +#include +#include + +#include "../test_utils.h" + +using namespace dxvk; + +struct Extent2D { + uint32_t w, h; +}; + +const std::string g_vertexShaderCode = R"( + +struct VS_INPUT { + float3 Position : POSITION; +}; + +struct VS_OUTPUT { + float4 Position : POSITION; +}; + +VS_OUTPUT main( VS_INPUT IN ) { + VS_OUTPUT OUT; + OUT.Position = float4(IN.Position, 0.6f); + + return OUT; +} + +)"; + +const std::string g_pixelShaderCode = R"( + +struct VS_OUTPUT { + float4 Position : POSITION; +}; + +struct PS_OUTPUT { + float4 Colour : COLOR; +}; + +sampler g_texDepth : register( s0 ); + +PS_OUTPUT main( VS_OUTPUT IN ) { + PS_OUTPUT OUT; + + OUT.Colour = tex2D(g_texDepth, float2(0, 0)); + + return OUT; +} + + +)"; + +class TriangleApp { + +public: + + TriangleApp(HINSTANCE instance, HWND window) + : m_window(window) { + HRESULT status = Direct3DCreate9Ex(D3D_SDK_VERSION, &m_d3d); + + if (FAILED(status)) + throw DxvkError("Failed to create D3D9 interface"); + + D3DPRESENT_PARAMETERS params; + getPresentParams(params); + + status = m_d3d->CreateDeviceEx( + D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, + m_window, + D3DCREATE_HARDWARE_VERTEXPROCESSING, + ¶ms, + nullptr, + &m_device); + + UINT firstRef = m_device->AddRef(); + + Com backbuffer; + m_device->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &backbuffer); + + UINT firstRef2 = m_device->AddRef(); + + Com swapchain; + m_device->GetSwapChain(0, &swapchain); + + UINT firstRef3 = m_device->AddRef(); + + if (FAILED(status)) + throw DxvkError("Failed to create D3D9 device"); + + // Vertex Shader + { + Com blob; + + status = D3DCompile( + g_vertexShaderCode.data(), + g_vertexShaderCode.length(), + nullptr, nullptr, nullptr, + "main", + "vs_2_0", + 0, 0, &blob, + nullptr); + + if (FAILED(status)) + throw DxvkError("Failed to compile vertex shader"); + + status = m_device->CreateVertexShader(reinterpret_cast(blob->GetBufferPointer()), &m_vs); + + if (FAILED(status)) + throw DxvkError("Failed to create vertex shader"); + } + + // Pixel Shader + { + Com blob; + + status = D3DCompile( + g_pixelShaderCode.data(), + g_pixelShaderCode.length(), + nullptr, nullptr, nullptr, + "main", + "ps_2_0", + 0, 0, &blob, + nullptr); + + if (FAILED(status)) + throw DxvkError("Failed to compile pixel shader"); + + status = m_device->CreatePixelShader(reinterpret_cast(blob->GetBufferPointer()), &m_ps); + + if (FAILED(status)) + throw DxvkError("Failed to create pixel shader"); + } + + m_device->SetVertexShader(m_vs.ptr()); + m_device->SetPixelShader(m_ps.ptr()); + + UINT secondRef1 = m_device->AddRef(); + + Com defaultTexture; + status = m_device->CreateTexture(64, 64, 1, 0, D3DFMT_DXT3, D3DPOOL_DEFAULT, &defaultTexture, nullptr); + + UINT secondRef2 = m_device->AddRef(); + + Com surface; + status = defaultTexture->GetSurfaceLevel(0, &surface); + + UINT secondRef3 = m_device->AddRef(); + + Com sysmemTexture; + status = m_device->CreateTexture(64, 64, 1, 0, D3DFMT_DXT3, D3DPOOL_SYSTEMMEM, &sysmemTexture, nullptr); + + Com offscreenSurface; + status = m_device->CreateOffscreenPlainSurfaceEx(64, 64, D3DFMT_DXT3, D3DPOOL_DEFAULT, &offscreenSurface, nullptr, 0); + + D3DLOCKED_RECT offscreenLock; + status = offscreenSurface->LockRect(&offscreenLock, nullptr, 0); + + std::memset(offscreenLock.pBits, 0xFF, offscreenLock.Pitch * (64 / 4)); + + status = offscreenSurface->UnlockRect(); + + //status = m_device->ColorFill(offscreenSurface.ptr(), nullptr, D3DCOLOR_ARGB(255, 255, 0, 0)); + + D3DLOCKED_RECT sysmemLock; + status = sysmemTexture->LockRect(0, &sysmemLock, nullptr, 0); + + //D3DLOCKED_RECT offscreenLock; + status = offscreenSurface->LockRect(&offscreenLock, nullptr, 0); + + std::memcpy(sysmemLock.pBits, offscreenLock.pBits, offscreenLock.Pitch * (64 / 4)); + + sysmemTexture->UnlockRect(0); + offscreenSurface->UnlockRect(); + + status = m_device->UpdateTexture(sysmemTexture.ptr(), defaultTexture.ptr()); + + status = m_device->SetTexture(0, defaultTexture.ptr()); + + Com rt; + status = m_device->CreateRenderTarget(1280, 720, D3DFMT_X8R8G8B8, D3DMULTISAMPLE_NONE, 0, FALSE, &rt, nullptr); + + ULONG refCount = m_device->AddRef(); + + Com rt2; + status = m_device->CreateRenderTarget(1280, 720, D3DFMT_X8R8G8B8, D3DMULTISAMPLE_NONE, 0, FALSE, &rt2, nullptr); + + ULONG refCount2 = m_device->AddRef(); + + rt2 = nullptr; + + ULONG refCount3 = m_device->AddRef(); + + RECT stretchRect1 = { 0, 0, 640, 720 }; + RECT stretchRect2 = { 640, 0, 1280, 720 }; + status = m_device->StretchRect(rt.ptr(), &stretchRect1, rt.ptr(), &stretchRect2, D3DTEXF_LINEAR); + + /// + + Com ds; + //status = m_device->CreateDepthStencilSurface(1274, 695, D3DFMT_D24X8, D3DMULTISAMPLE_NONE, 0, FALSE, &ds, nullptr); + status = m_device->CreateDepthStencilSurface(1280, 720, D3DFMT_D24X8, D3DMULTISAMPLE_NONE, 0, FALSE, &ds, nullptr); + + status = m_device->SetDepthStencilSurface(ds.ptr()); + status = m_device->SetRenderState(D3DRS_ZWRITEENABLE, 1); + status = m_device->SetRenderState(D3DRS_ZENABLE, 1); + status = m_device->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL); + + + + std::array vertices = { + 0.0f, 0.5f, 0.0f, + 0.5f, -0.5f, 0.0f, + -0.5f, -0.5f, 0.0f, + }; + + const size_t vbSize = vertices.size() * sizeof(float); + + status = m_device->CreateVertexBuffer(vbSize, 0, 0, D3DPOOL_DEFAULT, &m_vb, nullptr); + if (FAILED(status)) + throw DxvkError("Failed to create vertex buffer"); + + void* data = nullptr; + status = m_vb->Lock(0, 0, &data, 0); + if (FAILED(status)) + throw DxvkError("Failed to lock vertex buffer"); + + std::memcpy(data, vertices.data(), vbSize); + + status = m_vb->Unlock(); + if (FAILED(status)) + throw DxvkError("Failed to unlock vertex buffer"); + + m_device->SetStreamSource(0, m_vb.ptr(), 0, 3 * sizeof(float)); + + std::array elements; + + elements[0].Method = 0; + elements[0].Offset = 0; + elements[0].Stream = 0; + elements[0].Type = D3DDECLTYPE_FLOAT3; + elements[0].Usage = D3DDECLUSAGE_POSITION; + elements[0].UsageIndex = 0; + + elements[1] = D3DDECL_END(); + + HRESULT result = m_device->CreateVertexDeclaration(elements.data(), &m_decl); + if (FAILED(result)) + throw DxvkError("Failed to create vertex decl"); + + m_device->SetVertexDeclaration(m_decl.ptr()); + } + + void run() { + this->adjustBackBuffer(); + + m_device->BeginScene(); + + m_device->Clear( + 0, + nullptr, + D3DCLEAR_TARGET, + D3DCOLOR_RGBA(44, 62, 80, 0), + 0, + 0); + + m_device->Clear( + 0, + nullptr, + D3DCLEAR_ZBUFFER, + 0, + 0.5f, + 0); + + m_device->DrawPrimitive(D3DPT_TRIANGLELIST, 0, 1); + + m_device->EndScene(); + + m_device->PresentEx( + nullptr, + nullptr, + nullptr, + nullptr, + 0); + } + + void adjustBackBuffer() { + RECT windowRect = { 0, 0, 1024, 600 }; + GetClientRect(m_window, &windowRect); + + Extent2D newSize = { + static_cast(windowRect.right - windowRect.left), + static_cast(windowRect.bottom - windowRect.top), + }; + + if (m_windowSize.w != newSize.w + || m_windowSize.h != newSize.h) { + m_windowSize = newSize; + + D3DPRESENT_PARAMETERS params; + getPresentParams(params); + HRESULT status = m_device->ResetEx(¶ms, nullptr); + + if (FAILED(status)) + throw DxvkError("Device reset failed"); + } + } + + void getPresentParams(D3DPRESENT_PARAMETERS& params) { + params.AutoDepthStencilFormat = D3DFMT_UNKNOWN; + params.BackBufferCount = 1; + params.BackBufferFormat = D3DFMT_X8R8G8B8; + params.BackBufferWidth = m_windowSize.w; + params.BackBufferHeight = m_windowSize.h; + params.EnableAutoDepthStencil = 0; + params.Flags = 0; + params.FullScreen_RefreshRateInHz = 0; + params.hDeviceWindow = m_window; + params.MultiSampleQuality = 0; + params.MultiSampleType = D3DMULTISAMPLE_NONE; + params.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; + params.SwapEffect = D3DSWAPEFFECT_DISCARD; + params.Windowed = TRUE; + } + +private: + + HWND m_window; + Extent2D m_windowSize = { 1024, 600 }; + + Com m_d3d; + Com m_device; + + Com m_vs; + Com m_ps; + Com m_vb; + Com m_decl; + +}; + +LRESULT CALLBACK WindowProc(HWND hWnd, + UINT message, + WPARAM wParam, + LPARAM lParam); + +int WINAPI WinMain(HINSTANCE hInstance, + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) { + HWND hWnd; + WNDCLASSEXW wc; + ZeroMemory(&wc, sizeof(WNDCLASSEX)); + wc.cbSize = sizeof(WNDCLASSEX); + wc.style = CS_HREDRAW | CS_VREDRAW; + wc.lpfnWndProc = WindowProc; + wc.hInstance = hInstance; + wc.hCursor = LoadCursor(nullptr, IDC_ARROW); + wc.hbrBackground = (HBRUSH)COLOR_WINDOW; + wc.lpszClassName = L"WindowClass1"; + RegisterClassExW(&wc); + + hWnd = CreateWindowExW(0, + L"WindowClass1", + L"Our First Windowed Program", + WS_OVERLAPPEDWINDOW, + 300, 300, + 640, 480, + nullptr, + nullptr, + hInstance, + nullptr); + ShowWindow(hWnd, nCmdShow); + + MSG msg; + + try { + TriangleApp app(hInstance, hWnd); + + while (true) { + if (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + + if (msg.message == WM_QUIT) + return msg.wParam; + } else { + app.run(); + } + } + } catch (const dxvk::DxvkError& e) { + std::cerr << e.message() << std::endl; + return msg.wParam; + } +} + +LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { + switch (message) { + case WM_CLOSE: + PostQuitMessage(0); + return 0; + } + + return DefWindowProc(hWnd, message, wParam, lParam); +} diff --git a/tests/meson.build b/tests/meson.build index 81c295e8a..54fb3d631 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -1,3 +1,4 @@ +subdir('d3d9') subdir('d3d11') subdir('dxbc') subdir('dxgi')