Added Tracy profiler
parent
40734883d8
commit
ab469b40c8
|
@ -3,3 +3,6 @@
|
|||
.idea/**
|
||||
build**
|
||||
cmake-build-**
|
||||
|
||||
3rdprty/tracy/profile/build/unix/obj
|
||||
3rdprty/tracy/profile/build/unix/Tracy-release
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
github: wolfpld
|
Binary file not shown.
After Width: | Height: | Size: 1.0 KiB |
|
@ -0,0 +1,44 @@
|
|||
name: gcc
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-20.04, macOS-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install linux libraries
|
||||
if: ${{ matrix.os == 'ubuntu-20.04' }}
|
||||
run: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk-3-dev libcapstone-dev libtbb-dev
|
||||
- name: Install macos libraries
|
||||
if: ${{ matrix.os == 'macOS-latest' }}
|
||||
run: brew install capstone tbb pkg-config glfw
|
||||
- name: Profiler GUI
|
||||
run: make -j -C profiler/build/unix debug release
|
||||
- name: Update utility
|
||||
run: make -j -C update/build/unix debug release
|
||||
- name: Capture utility
|
||||
run: make -j -C capture/build/unix debug release
|
||||
- name: Csvexport utility
|
||||
run: make -j -C csvexport/build/unix debug release
|
||||
- name: Import-chrome utility
|
||||
run: make -j -C import-chrome/build/unix debug release
|
||||
- name: Library
|
||||
run: make -j -C library/unix debug release
|
||||
- name: Test application
|
||||
run: |
|
||||
make -j -C test
|
||||
make -j -C test clean
|
||||
make -j -C test TRACYFLAGS=-DTRACY_ON_DEMAND
|
||||
make -j -C test clean
|
||||
make -j -C test TRACYFLAGS="-DTRACY_DELAYED_INIT -DTRACY_MANUAL_LIFETIME"
|
|
@ -0,0 +1,28 @@
|
|||
name: Manual
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Fix stupidity
|
||||
run: |
|
||||
cp AUTHORS AUTHORS.
|
||||
cp LICENSE LICENSE.
|
||||
- name: Compile LaTeX
|
||||
uses: xu-cheng/latex-action@v2
|
||||
with:
|
||||
working_directory: manual
|
||||
root_file: tracy.tex
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: manual
|
||||
path: manual/tracy.pdf
|
|
@ -0,0 +1,57 @@
|
|||
name: MSVC
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: windows-2019
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: microsoft/setup-msbuild@v1.0.2
|
||||
- name: Integrate vcpkg
|
||||
run: vcpkg integrate install
|
||||
- name: Build vcpkg libraries
|
||||
run: vcpkg install freetype glfw3 capstone[arm,arm64,x86] --triplet x64-windows-static
|
||||
- name: Profiler GUI Debug
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Profiler GUI Release
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Update utility Debug
|
||||
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Update utility Release
|
||||
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Capture utility Debug
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Capture utility Release
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Csvexport utility Debug
|
||||
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Csvexport utility Release
|
||||
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Import-chrome utility Debug
|
||||
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Import-chrome utility Release
|
||||
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Library
|
||||
run: msbuild .\library\win32\TracyProfiler.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Package binaries
|
||||
run: |
|
||||
mkdir bin
|
||||
mkdir bin\dev
|
||||
copy profiler\build\win32\x64\Release\Tracy.exe bin
|
||||
copy update\build\win32\x64\Release\update.exe bin
|
||||
copy capture\build\win32\x64\Release\capture.exe bin
|
||||
copy import-chrome\build\win32\x64\Release\import-chrome.exe bin
|
||||
copy csvexport\build\win32\x64\Release\csvexport.exe bin
|
||||
copy library\win32\x64\Release\TracyProfiler.dll bin\dev
|
||||
copy library\win32\x64\Release\TracyProfiler.lib bin\dev
|
||||
7z a Tracy.7z bin
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
path: Tracy.7z
|
|
@ -0,0 +1,39 @@
|
|||
.vs
|
||||
*.opendb
|
||||
*.db
|
||||
*.vcxproj.user
|
||||
x64
|
||||
Release
|
||||
Debug
|
||||
_build
|
||||
_compiler
|
||||
tools/*
|
||||
*.d
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
imgui.ini
|
||||
test/tracy_test
|
||||
test/tracy_test.exe
|
||||
*/build/unix/*-*
|
||||
manual/t*.aux
|
||||
manual/t*.log
|
||||
manual/t*.out
|
||||
manual/t*.pdf
|
||||
manual/t*.synctex.gz
|
||||
manual/t*.toc
|
||||
manual/t*.bbl
|
||||
manual/t*.blg
|
||||
profiler/build/win32/packages
|
||||
profiler/build/win32/Tracy.aps
|
||||
# include the vcpkg install script but not the files it produces
|
||||
vcpkg/*
|
||||
!vcpkg/install_vcpkg_dependencies.bat
|
||||
.deps/
|
||||
.dirstamp
|
||||
.vscode/
|
||||
|
||||
/_*/**
|
||||
/**/__pycache__/**
|
||||
extra/vswhere.exe
|
||||
extra/tracy-build
|
|
@ -0,0 +1,16 @@
|
|||
Bartosz Taudul <wolf@nereid.pl>
|
||||
Kamil Klimek <kamil.klimek@sharkbits.com> (initial find zone implementation)
|
||||
Bartosz Szreder <zgredder@gmail.com> (view/worker split)
|
||||
Arvid Gerstmann <dev@arvid-g.de> (compatibility fixes)
|
||||
Rokas Kupstys <rokups@zoho.com> (compatibility fixes, initial CI work, MingW support)
|
||||
Till Rathmann <till.rathmann@gmx.de> (DLL support)
|
||||
Sherief Farouk <sherief.personal@gmail.com> (compatibility fixes)
|
||||
Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improvements)
|
||||
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
|
||||
Thales Sabino <thales@codeplay.com> (OpenCL support)
|
||||
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
|
||||
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
|
||||
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
|
||||
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)
|
||||
Benoit Jacob <benoitjacob@google.com> (Android improvements)
|
||||
David Farrel <dafarrel@adobe.com> (Direct3D 11 support)
|
|
@ -0,0 +1,27 @@
|
|||
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
|
||||
3-clause BSD license.
|
||||
|
||||
Copyright (c) 2017-2021, Bartosz Taudul <wolf@nereid.pl>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the <organization> nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,742 @@
|
|||
Note: There is no guarantee that version mismatched client and server will
|
||||
be able to talk with each other. Network protocol breakages won't be listed
|
||||
here.
|
||||
|
||||
|
||||
v0.7.8 (2021-05-19)
|
||||
-------------------
|
||||
|
||||
- Updated Zen 3 and added Tiger Lake microarchitectural data.
|
||||
- Manually disconnecting from the server will no longer display erroneous
|
||||
warning message.
|
||||
- Added ability to display sample time spent in child function calls.
|
||||
- Fixed issue which may have prevented sampling on ARM64.
|
||||
- Added TRACY_NO_FRAME_IMAGE macro to disable frame image compression
|
||||
thread.
|
||||
- Ctrl and shift keys will now modify mouse wheel zoom speed.
|
||||
- Improved user experience in the symbol view window.
|
||||
- Added support for Direct3D 11 instrumentation.
|
||||
- Vulkan contexts can be now calibrated on Linux.
|
||||
- Support loading zstd-compressed chrome traces.
|
||||
- Chrome traces with multiple PID entries (and possibly conflicting TIDs)
|
||||
can be now imported.
|
||||
- Added support for custom source location tag ("loc") in chrome traces.
|
||||
- Sampling frequency can be now controlled using TRACY_SAMPLING_HZ macro.
|
||||
- Trace compression can be now selected when saving a trace.
|
||||
- If a trace cannot be saved, a failure dialog will be displayed.
|
||||
- Run-time memory usage of frame images can be reduced by calculating
|
||||
a compression dictionary. This can be only performed when a trace is saved
|
||||
or through the update utility.
|
||||
|
||||
|
||||
v0.7.7 (2021-04-01)
|
||||
-------------------
|
||||
|
||||
- Linux crash handler will now also catch SIGABRT.
|
||||
- Fixed invalid name assignment to source files discovered client-side.
|
||||
- Added ability to check if a zone is active (which may be used to avoid
|
||||
preparing zone text, etc., as it wouldn't be used anyway).
|
||||
- Improved sorting behavior of internal vectors.
|
||||
- Some data will now be always properly displayed during live capture.
|
||||
This was not particularly visible before, as it mainly concerns edge
|
||||
cases.
|
||||
- Sorting is performed only as needed.
|
||||
- In case of plots the performance during live capture may be decreased,
|
||||
as these were sorted with at least 0.25 second intervals before. Now
|
||||
the sorting is performed every frame.
|
||||
- Some other data, which previously was not sorted, is sorted now.
|
||||
- In headless capture mode sorting will be only performed when the trace
|
||||
is saved to disk.
|
||||
- Fixed some typos in macros.
|
||||
- Fixed handling of non-ANSI file names on Windows. You can now name your
|
||||
traces 'ęśąćż.tracy' and it should work as intended. This is supported on
|
||||
Windows 10 release 1903 and newer.
|
||||
- Fixed sending GPU context name in on-demand mode.
|
||||
- Fixed color channel order in ZoneColor() macro.
|
||||
- Handle failure state when a memory pointer allocation is reported twice,
|
||||
without an intermediate free.
|
||||
- Renamed "call stack parents" to "entry call stacks".
|
||||
- Display number of entry call stacks in assembly line sample count tooltip.
|
||||
- Added tooltips with preview of source code in various places in the UI.
|
||||
|
||||
|
||||
v0.7.6 (2021-02-06)
|
||||
-------------------
|
||||
|
||||
- Various fixes in build scripts.
|
||||
- Fixed a faulty rpmalloc initialization path when the first thing the
|
||||
thread did was sending a message with call stack.
|
||||
- Added fallback timer define for various virtualized environments, which
|
||||
may not be able to access the hardware timer registers. This will result
|
||||
in usage of timer provided by the standard library, with reduced
|
||||
resolution.
|
||||
- Further OpenCL improvements.
|
||||
- Updated libbacktrace.
|
||||
- Adds Mach-O 64-bit FAT support.
|
||||
- Fixes memory corruption when processing Mach-O data.
|
||||
- Fixes missing matching entries during binary search.
|
||||
- Adds support for MiniDebugInfo.
|
||||
- Adds fallback to ELF symbol table if no debug info is available.
|
||||
- Various other fixes.
|
||||
- Store build time of profiled program in captures.
|
||||
- GPU contexts can be now named.
|
||||
- Implemented client -> server source code transfer.
|
||||
|
||||
|
||||
v0.7.5 (2021-01-23)
|
||||
-------------------
|
||||
|
||||
- More robust handling of system tracing on Android.
|
||||
- Added warning dialog when the connection is lost before all needed data
|
||||
can be retrieved.
|
||||
- Fixed handling of NaN plot entries (by skipping them).
|
||||
- Dynamic zone colors are now supported through the ZoneColor() macro.
|
||||
- Fixed Arm machine code printout to match the one printed by objdump.
|
||||
- Fixed client memory corruption when using colored messages.
|
||||
- Switched to the next-gen ImGui table UI.
|
||||
- Table columns can have their order rearranged, can be hidden, can be
|
||||
sorted both in ascending and descending order (where appropriate).
|
||||
- Table columns state is now preserved between runs.
|
||||
- Various fixes related to restricting listening to localhost.
|
||||
- Improved compatibility of ETW tracing with non-MSVC compilers.
|
||||
- Fixed Vulkan call stack transfer.
|
||||
- Added support for transient GPU zones (OpenGL, Vulkan, Direct3D 12).
|
||||
- OpenCL fixes for assert-less builds and non-active zones.
|
||||
- Added support for thread names and title bar description in traces
|
||||
imported from chrome tracing format.
|
||||
|
||||
|
||||
v0.7.4 (2020-11-15)
|
||||
-------------------
|
||||
|
||||
- Added support for user-provided locks to keep dbghelp calls thread-safe.
|
||||
- Call stacks can be now copied to clipboard.
|
||||
- Allow more control over which automated captures are performed.
|
||||
- Added textual descriptions for some assembly instructions.
|
||||
- Profiler memory usage is now also displayed as a percentage of available
|
||||
physical memory.
|
||||
- Microarchitecture mismatch is now clearly displayed in the source view
|
||||
window.
|
||||
- Added Zen 3 and Cascade Lake microarchitectural data.
|
||||
- Ghost zones are now supporting all zone coloring modes and namespace
|
||||
shortening.
|
||||
- Extend C API to support memory pools.
|
||||
- Frame rate targets can be now visually represented on the timeline view.
|
||||
|
||||
|
||||
v0.7.3 (2020-10-06)
|
||||
-------------------
|
||||
|
||||
- Properly support DPI scaling on Linux (requires GLFW 3.3).
|
||||
- Added early checks for output file validity in the capture utility.
|
||||
- Improvements to presence broadcast handling.
|
||||
- Custom zone colors can be optionally ignored.
|
||||
- Added support for tracking multiple memory pools.
|
||||
- Memory free failure dialog can now show call stack pointing to the failure
|
||||
location.
|
||||
- Added support for Wayland on Linux.
|
||||
- If during the first 5 seconds of the trace there are no frames being
|
||||
reported, the profiler will switch to following last 5 seconds of the
|
||||
trace, instead of displaying three last frames.
|
||||
|
||||
|
||||
v0.7.2 (2020-09-14)
|
||||
-------------------
|
||||
|
||||
- Note: the bitbucket repository is obsolete and will soon stop receiving
|
||||
updates. Migrate to https://github.com/wolfpld/tracy, if you haven't
|
||||
already.
|
||||
- The "waiting for connection" dialog no longer has "cancel" button. To
|
||||
abort connection attempt just use the "close window" button.
|
||||
- Added update notification.
|
||||
- The most recent traced events can be now viewed regardless of timeline
|
||||
zoom level.
|
||||
- Fixed going-to-line in source view (again).
|
||||
- Crash handling on client is now not performed, if there is no active
|
||||
connection.
|
||||
- Added ability to listen only on IPv4 interfaces.
|
||||
|
||||
|
||||
v0.7.1 (2020-08-24)
|
||||
-------------------
|
||||
|
||||
- Dropped support for pre-v0.6 traces.
|
||||
- Fixed regression on non-AVX2 CPUs.
|
||||
- Fixed incorrect calculation of some ghost zones.
|
||||
- Added list of cached source files.
|
||||
- Added import of plot data.
|
||||
- Secure versions of alloc/free macros.
|
||||
- Automated tracing of vertical synchronization on Windows.
|
||||
- Fixed attachment of postponed frame images.
|
||||
- Source location data can be now copied to clipboard from zone info window.
|
||||
- Zones in find zones menu can be now grouped by zone name.
|
||||
- Vulkan and D3D12 GPU contexts can be now calibrated.
|
||||
- Added CSV export utility.
|
||||
- "Go to frame" popup no longer has a dedicated button. To show it, click on
|
||||
the frame counter.
|
||||
- Added macro for checking if profiler is connected.
|
||||
- Implemented optional data removal from traces in the update utility.
|
||||
- Allow manual management of profiler lifetime.
|
||||
- Adjusted priority of ETW threads to time critical.
|
||||
- Annotations can be now freely adjusted on the timeline.
|
||||
- Limiting time range for find zone functionality has been significantly
|
||||
improved.
|
||||
- Added time range limits for statistics and symbol view.
|
||||
- Implemented call stack sampling on Linux (including Android).
|
||||
- Exact time from start of profiling session can be now viewed by hovering
|
||||
the mouse over the time scale.
|
||||
- Code transfer can be now compiled-out.
|
||||
- Added support for zone markup in unloadable modules.
|
||||
- Added image name filter to sampling statistics results window.
|
||||
|
||||
|
||||
v0.7 (2020-06-11)
|
||||
-----------------
|
||||
|
||||
This is the last release which will be able to load pre-v0.6 traces. Use the
|
||||
update utility to convert your old traces now!
|
||||
|
||||
- chrome:tracing importer now imports zone metadata from "args" key.
|
||||
- Added display of statistical mode to find zone menu.
|
||||
- Automatic stack sampling is now available on windows.
|
||||
- Properly handle tracing on long-running systems.
|
||||
- Message list entries can now show associated frame image.
|
||||
- Call stack window will now display module names.
|
||||
- Symbol location in call stack window may now also display symbol address.
|
||||
- Statistics menu can now be used to display call stack sampling data or
|
||||
list available symbols.
|
||||
- All call paths leading to the sampled instruction in a call stack can be
|
||||
now displayed.
|
||||
- Frame image compression ratio (lossless in-memory compression, not taking
|
||||
into account DXT compression) is displayed in playback window.
|
||||
- Allow reconnection straight from the discard data dialog.
|
||||
- Added ability to set custom names for locks.
|
||||
- Improved handling of network ports.
|
||||
- Added time percentage display to instrumentation statistics.
|
||||
- Display of ghost zones (generated from automated call stack sampling).
|
||||
- Notify when empty labels display is enabled.
|
||||
- Small fragments of executable code will be now sent from client to server.
|
||||
- Added notification about query backlog.
|
||||
- Fixed performance problem with query backlog.
|
||||
- Display number of in-flight queries, in addition to query backlog.
|
||||
- Improved failure reports.
|
||||
- The capture utility will connect to localhost by default.
|
||||
- Added optional support for QPC timer on windows.
|
||||
- Complete rewrite of source file viewer. It is now 100% reliable when going
|
||||
to a source location.
|
||||
- Symbol source view was added.
|
||||
- Extension of source file viewer.
|
||||
- Can display source file, assembly view, or both at the same time.
|
||||
- May include display of statistical profiling data.
|
||||
- Ability to switch between source files which were used to build the
|
||||
symbol.
|
||||
- Ability to switch between inlined functions which are incorporated into
|
||||
the symbol.
|
||||
- Graphical representation of control flow in program.
|
||||
- Display of micro-architectural data for each assembly instruction.
|
||||
- Tracking register dependencies between assembly instructions.
|
||||
- Disassembly may be saved to a file, in order to be processed by external
|
||||
tools.
|
||||
- If the default listening port is occupied, profiler will now try listening
|
||||
on other ports.
|
||||
- Added possibility to perform source file names substitution.
|
||||
- Profiler windows can be now docked.
|
||||
- CPU usage tooltip now displays a list of running threads.
|
||||
- Added possibility to filter discovered clients list.
|
||||
- Source files are now cached during capture.
|
||||
- Profiler will now display a popup when application crashes.
|
||||
- Added ability to send simple integral values as extra payload for zones.
|
||||
- Per-frame zone times on the frames plot can now display self time.
|
||||
- Ability to bind only on localhost interface.
|
||||
- OpenCL profiling.
|
||||
- Direct3D 12 profiling.
|
||||
|
||||
|
||||
v0.6.3 (2020-02-13)
|
||||
-------------------
|
||||
|
||||
- Fixed performance issues with loading saved traces on Ryzen CPUs.
|
||||
- Profiler window contents are now properly updated during window resize.
|
||||
- Improved tid to pid mapping on windows.
|
||||
- Zero length and unfinished zones are no longer taken into account for
|
||||
statistics.
|
||||
- Build files for shared library are now available (experimental).
|
||||
- GPU zones now also have "active" parameter.
|
||||
- Further reduction of memory usage and on-disk trace size.
|
||||
- Replaced ska::flat_hash_map with robin-hood-hashing.
|
||||
- Speed-up rendering of long lists of items.
|
||||
- Exact event time is displayed in some places in the UI.
|
||||
- Memory allocation lists can now be sorted.
|
||||
- Added display of trace file compression ratio.
|
||||
- Optional Zstd compression of trace files.
|
||||
- Frame images are now internally compressed using Zstd (instead of LZ4).
|
||||
- Fix display of continuous frame set tooltips.
|
||||
|
||||
|
||||
v0.6.2 (2019-12-30)
|
||||
-------------------
|
||||
|
||||
- Improved call stack decoding on OSX.
|
||||
- Collection of CPU topology data.
|
||||
- C API now supports allocated source locations.
|
||||
- Added chrome:tracing importer.
|
||||
- Allow merging of ZoneText() strings.
|
||||
- Time distribution can now show both exclusive and inclusive times.
|
||||
- Display proper value of selection time in find zone menu.
|
||||
- Implemented limiting find zone search to a specified time range.
|
||||
- Highlight hovered zone from find zone menu zone list on the histogram.
|
||||
- Allow copying user data directory location to the clipboard.
|
||||
|
||||
|
||||
v0.6.1 (2019-11-28)
|
||||
-------------------
|
||||
|
||||
- Dropped support for pre-v0.5 traces.
|
||||
- Improve BSD support.
|
||||
- GPU zone CPU thread highlight will now highlight whole thread, not only
|
||||
the thread name.
|
||||
- Added CPU thread highlight for CPU data items.
|
||||
- Client parameters may be now set from the server.
|
||||
- Minor UI fixes.
|
||||
|
||||
|
||||
v0.6 (2019-11-17)
|
||||
-----------------
|
||||
|
||||
This is the last release which will be able to load pre-v0.5 traces. Use the
|
||||
update utility to convert your old traces now!
|
||||
|
||||
- Dropped support for pre-v0.4 traces.
|
||||
- Major memory usage decrease.
|
||||
- Significant network bandwidth decrease.
|
||||
- Implemented context switch capture on selected platforms.
|
||||
- Zone timings in various UI places can now take into account only the
|
||||
time when the thread was executing.
|
||||
- Zone information window can now display regions in which thread was
|
||||
suspended by the operating system.
|
||||
- CPUs on which the zone was running are enumerated.
|
||||
- Thread activity regions can be graphed on the timeline.
|
||||
- API breakage: SetThreadName() now only works on current thread.
|
||||
- Fixed thread name retrieval after thread is destroyed.
|
||||
- Added number of CPU cores to host info.
|
||||
- Limited number of possible source locations to 64K.
|
||||
- Limited supported capture length to 1.6 days.
|
||||
- CPU cores are now displayed on the timeline.
|
||||
- Thread execution workload is displayed, including threads from external
|
||||
programs.
|
||||
- Thread migrations across CPU cores can be graphed.
|
||||
- System-wide workload distribution is now plotted on the timeline.
|
||||
- Added "CPU data" window showing programs competing for CPU during the
|
||||
capture.
|
||||
- Switched to using native thread identifiers (relatively small numbers), as
|
||||
opposed to pthreads identifiers, which in reality were pointers.
|
||||
- Improved thread name discovery if context switch capture is enabled.
|
||||
- Per-trace state is now preserved between profiling sessions:
|
||||
- Timeline view position.
|
||||
- Item categories draw/hide settings.
|
||||
- Timeline zones will be highlighted using a different color, when a
|
||||
matching time range is selected on histogram.
|
||||
- Per-frame zone times are now displayed on the frames plot when a zone is
|
||||
selected in the find zone menu.
|
||||
- Zone color is now displayed in zone information window.
|
||||
- Zone colors can now be determined basing on depth and thread or source
|
||||
location.
|
||||
- Thread colors are displayed across the profiler application.
|
||||
- Frame times can be now compared.
|
||||
- Expose more lock handling functionality.
|
||||
- Network port can be now specified by the user.
|
||||
- Proper handling of multithreaded Vulkan code.
|
||||
- Added extreme compression level in update utility.
|
||||
- Added time distribution data in the zone information window.
|
||||
- Trace file name is now displayed in trace information window.
|
||||
- Annotations can be now added to the timeline.
|
||||
- Server now performs network data retrieval and decompression on a dedicated
|
||||
thread.
|
||||
- Added examples of Tracy integration.
|
||||
- Allow grouping of zones in the find zone menu by zone parent or with no
|
||||
grouping.
|
||||
- Zone list in the statistics window can be now filtered.
|
||||
- Implemented configuration of plots.
|
||||
- Messages can now collect call stacks.
|
||||
|
||||
|
||||
v0.5 (2019-08-10)
|
||||
-----------------
|
||||
|
||||
This is the last release which will be able to load pre-v0.4 traces. Use the
|
||||
update utility to convert your old traces now!
|
||||
|
||||
- Major decrease of trace dump file size.
|
||||
- Major optimizations across the board.
|
||||
- Vcpkg is now used for library management on Windows.
|
||||
- Display dump file size change in the update utility.
|
||||
- Added notification area.
|
||||
- Display trace loading time.
|
||||
- Display background processing tasks after trace is loaded.
|
||||
- Display trace save notification.
|
||||
- Show crash icon, if there was a crash.
|
||||
- Added C API.
|
||||
- Profiling session may now gracefully terminate, due to incorrect
|
||||
instrumentation. A popup with termination reason will be displayed.
|
||||
- Call stack improvements.
|
||||
- Call stack frames now have a proper source file and file line
|
||||
information on Linux.
|
||||
- Single call stack frame may now have multiple entries, representing
|
||||
inlined function calls.
|
||||
- Call stack grouping in the find zone menu now has a special display
|
||||
mode.
|
||||
- Call stack memory allocations tree improvements:
|
||||
- Add top-down variant to complement the previously available bottom-up
|
||||
one.
|
||||
- Add ability to group tree nodes by function name.
|
||||
- Allow restricting tree to display only active allocations.
|
||||
- Added support for Lua call stack capture.
|
||||
- Self time of zones may be now displayed in the find zone menu.
|
||||
- Added ability to disconnect from a client.
|
||||
- Find zone groups can now be sorted by mean time per call.
|
||||
- Zones displayed in the find zone menu can be now grouped by order of
|
||||
appearance, execution time or name.
|
||||
- Time is now displayed without trailing fractional zeros (e.g. "2.5 ms"
|
||||
instead of "2.50 ms").
|
||||
- Child zones displayed in zone info window can be now grouped by source
|
||||
location.
|
||||
- Selected or hovered lock is now highlighted on the timeline.
|
||||
- Locks are now grouped into single and multithreaded (contended and
|
||||
uncontended) in the options menu locks list.
|
||||
- On broken platforms the profiler can now be initialized as needed (and
|
||||
possible), taking a performance and functionality hit.
|
||||
- User experience improvements in the graphical profiler.
|
||||
- Thread position and height is now animated, to eliminate flickering that
|
||||
was happening when depth of displayed zones was changing.
|
||||
- Zooming in/out using the mouse wheel is now animated.
|
||||
- Plot range adjustment is now animated.
|
||||
- Various other UI improvements.
|
||||
- System CPU usage is now being monitored.
|
||||
- Threads that have nothing to display in the current view are now hidden by
|
||||
default.
|
||||
- Dimmed-out the timeline outside the profiling area.
|
||||
- Source file view can now be opened also from statistics menu.
|
||||
- Display standard deviation in find zone and compare traces menus.
|
||||
- Display zone messages in zone information window.
|
||||
- Display order of threads can be changed in the options menu.
|
||||
- Prevent deadlocks by querying socket send buffer size.
|
||||
- Frame set statistics can be now limited to frames visible on the screen.
|
||||
- Messages can be now colored.
|
||||
- Zone selection in compare traces menu can be now linked to the other
|
||||
trace.
|
||||
- Added support for frame image (screen shot) storage.
|
||||
- Implemented ability to cut off outliers on histograms.
|
||||
- Zone or frame that is currently hovered by the mouse cursor will be
|
||||
highlighted on the histogram.
|
||||
- Server now displays available clients in the local network.
|
||||
- Source code whitespace visibility can now be enabled or disabled.
|
||||
- Profiler will now check if proper timer readings can be performed on
|
||||
x86/x64.
|
||||
- Application can now log app-specific information, similarly to how the
|
||||
host info reports system information.
|
||||
- Message list will automatically scroll down to the most recent message.
|
||||
- Feature will disable when the list is scrolled by user.
|
||||
- To re-enable, scroll to the bottom of the list.
|
||||
- Message list can be now filtered.
|
||||
- A notification popup will be displayed during trace cleanup.
|
||||
- Source file view won't be available if a source file is newer than the
|
||||
capture.
|
||||
- Added ability to set custom trace descriptions.
|
||||
- Added frame time target lines.
|
||||
- FPS counts are now displayed next to frame times.
|
||||
- GPU drift value can be now automatically measured.
|
||||
- Connection window is now a popup hidden under a dedicated button.
|
||||
|
||||
|
||||
v0.4.1 (2018-12-30)
|
||||
-------------------
|
||||
|
||||
- Active frame set can be now switched by clicking on a frame set on the
|
||||
timeline.
|
||||
- Add ability to go to a specified frame.
|
||||
- Most commonly used addresses can be now selected from the drop-down menu.
|
||||
- Fixed corner case problem with profiler initialization on Windows.
|
||||
- Added third state (stopped) to the pause/resume button. It will be used
|
||||
after the connection to the client is terminated.
|
||||
- Active trace can be discarded.
|
||||
- Call stack capture may be forced through TRACY_CALLSTACK define.
|
||||
- Lock info window has been added.
|
||||
- Time of lock creation and termination is now being tracked.
|
||||
- Menu bar buttons are now toggles that can also close their corresponding
|
||||
windows.
|
||||
- Find zone and compare menu improvements.
|
||||
- Ability to ignore case during search.
|
||||
- Pressing enter key will now start search, just like pressing the "find"
|
||||
button.
|
||||
- Using the ^F keyboard shortcut will open the find zone menu and focus
|
||||
the input box.
|
||||
- Added ability to automatically connect to an IP address in the graphical
|
||||
profiler application (use "-a address" argument to enable).
|
||||
- Pressing enter key after entering client address in the welcome dialog
|
||||
will now automatically begin connection process.
|
||||
|
||||
|
||||
v0.4 (2018-10-09)
|
||||
-----------------
|
||||
|
||||
- Renamed "standalone" utility to "profiler".
|
||||
- Added trace update utility, which will convert files saved in previous
|
||||
versions of tracy to be up-to-date.
|
||||
- Optional high compression (--hc) mode is available that will increase
|
||||
the compression level, at the cost of considerably longer compression
|
||||
time.
|
||||
- Fix regression causing varying size of profiler window for different
|
||||
captures.
|
||||
- Added support for on-demand tracing.
|
||||
- If a client application is compiled with the TRACY_ON_DEMAND macro
|
||||
defined, tracing will not begin until a connection to server is
|
||||
established.
|
||||
- Since data is not fully captured in this mode, the resulting trace will
|
||||
be less precise, until application state is appropriately reset. For
|
||||
example, locks need to be fully released, zone stacks need to be
|
||||
flushed. This is an automatic process.
|
||||
- All tracing macros are able to work in the on-demand mode.
|
||||
- Improved compatibility with various system setups.
|
||||
- Aside from using TRACY_NO_EXIT define you can also set the same-named
|
||||
environmental variable to 1 to get the same effect.
|
||||
- Added ability to show/hide all threads and plots.
|
||||
- Performance improvements.
|
||||
- Improvements to memory data presentation.
|
||||
- Added memory allocation info window.
|
||||
- Selecting memory allocation on a plot will draw time range of the
|
||||
allocation.
|
||||
- Middle clicking on an memory allocation address (or on a button in
|
||||
memory allocation info window) will zoom the view to the allocation
|
||||
range.
|
||||
- Find zone menu improvements:
|
||||
- Zones can be now also grouped by call stacks.
|
||||
- Zone groups can be now also sorted by time spend in each zone.
|
||||
- Zone groups list now displays group times.
|
||||
- Average and median zone times are now displayed on the histogram.
|
||||
- Selected zones will be highlighted on the timeline view.
|
||||
- Added named versions of tracing macros that allow specifying scoped
|
||||
variable name.
|
||||
- The main profiler window is now kept at the bottom of windows stack.
|
||||
- The "profiler" utility will now use a custom embedded font.
|
||||
- Microseconds are now displayed using correct symbol ('μ' instead of 'u').
|
||||
- Unix builds of the "profiler" utility will now ask for a file name when
|
||||
saving a trace.
|
||||
- Progress popup is now displayed when a trace file is loading.
|
||||
- Zones that share source location with a zone that is hovered over are now
|
||||
highlighted.
|
||||
- Added ability to zoom-in to a selection range made using middle mouse
|
||||
button.
|
||||
- Holding the ctrl key will switch to zoom-out mode.
|
||||
- The "profiler" utility will use less resources when its window is
|
||||
out-of-focus or minimized.
|
||||
- Added support for cross-DLL profiling.
|
||||
- Items in options menu (locks, threads, etc.) are now described with number
|
||||
of events.
|
||||
- Source location of lock declaration is also provided.
|
||||
- Created an extensive user manual for the profiler.
|
||||
- Added ability to capture multiple frame sets.
|
||||
- Viewer will display multiple frame ranges at once.
|
||||
- Only one frame set can be active at once. The selected one is used for
|
||||
the frame navigation graph, frame navigation buttons and drawing frame
|
||||
separators.
|
||||
- The active frame set will be highlighted, and the rest will be dimmed
|
||||
out.
|
||||
- Frames can now also be discontinuous.
|
||||
- Frames and zones too small to be displayed will be marked with a zig-zag
|
||||
pattern.
|
||||
- General improvements to message list and message markers.
|
||||
- Hovering over message on a list will highlight its marker (previously it
|
||||
only worked the other way).
|
||||
- Left clicking on a message marker will focus the message list on the
|
||||
selected message.
|
||||
- Middle clicking on a message marker will center it on screen.
|
||||
- Added trace information window.
|
||||
- This includes frame time statistics and histogram.
|
||||
- Displayed memory sizes are now properly formatted.
|
||||
- Added call stack tree for memory allocations.
|
||||
- You can display allocations list for each call stack tree entry.
|
||||
- The source code of the profiled application may now be viewed in the
|
||||
profiler.
|
||||
- BIG FAT WARNING: The actual profiled program source code is not known to
|
||||
the profiler. It only checks if there is a file on your disk that
|
||||
matches the file name of the captured source location. Even if the file
|
||||
is displayed, it may be out of date.
|
||||
- CPU and GPU zones will have "Source" button, if source file can be
|
||||
opened.
|
||||
- Source files for call stack traces can be opened by right-clicking on
|
||||
the file name. Since in this case there is no button that can be hidden,
|
||||
a small animation will be played to notify user if the source cannot be
|
||||
opened.
|
||||
- The main profiler view will now occupy the whole window. Previous behavior
|
||||
is still available for embedded use cases.
|
||||
- Many button labels are now accompanied by icons.
|
||||
- Fonts should now be less blurry.
|
||||
- "Go to parent" button in zone info window won't be displayed if there is
|
||||
no parent to go to.
|
||||
- Improvements to the compare traces menu.
|
||||
- There are now colored markers to make it easier to distinguish "this" and
|
||||
"external" traces.
|
||||
- The amount of saved time is now displayed (a difference between total
|
||||
run times of both traces).
|
||||
- Tracy will now collect host information, like CPU name, amount of system
|
||||
memory, etc.
|
||||
- Windows builds of the "profiler" utility will perform a check of supported
|
||||
CPU instruction set and match it against the one required by the binary
|
||||
(by default AVX2 is used). If the program cannot be executed on the
|
||||
processor, a message dialog with workaround instructions will be
|
||||
displayed.
|
||||
- Tracy can intercept crashes and finish sending data from a dying process.
|
||||
- Currently this is only implemented on Windows, Linux and Android.
|
||||
- Call stack window may now display addresses of the frames, instead of
|
||||
source file locations.
|
||||
- Memory events will now properly register their thread.
|
||||
- Profiler settings are now stored in a persistent location.
|
||||
- On Windows settings are stored in %APPDATA%/tracy.
|
||||
- On other platforms settings are stored in $XDG_CONFIG_HOME/tracy or
|
||||
$HOME/.config/tracy, if the variable is not set.
|
||||
- The main profiler window position, size and maximized state are saved
|
||||
and restored.
|
||||
- The size and position of internal windows now doesn't depend on the
|
||||
runtime directory of the profiler executable.
|
||||
- Added connection handshake.
|
||||
- Server won't be able to connect to client if there's a protocol version
|
||||
mismatch.
|
||||
- Client not in on-demand mode will refuse connections after the first
|
||||
connection was made and the initial event buffers were cleared.
|
||||
- A single server will no longer try to connect to multiple clients.
|
||||
- The capture utility will now display time span of the ongoing capture.
|
||||
|
||||
|
||||
v0.3 (2018-07-03)
|
||||
-----------------
|
||||
|
||||
- Breaking change: the format of trace files has changed.
|
||||
- Previous tracy version will crash when trying to open new traces.
|
||||
- Loading of traces saved by previous version is supported.
|
||||
- Tracy will no longer crash when trying to load traces saved by future
|
||||
versions. Instead, a dialog advising to update will be displayed.
|
||||
- Tracy will no longer crash in most cases when trying to open files that
|
||||
are not traces. Some crashes are still possible, due to support of old,
|
||||
header-less traces.
|
||||
- Ability to track every memory allocation in profiled program.
|
||||
- Allocation event queuing must be done in order, which requires exclusive
|
||||
access to the serialized queue on the client side. This has no effect on
|
||||
the rest of events, which are stored in a concurrent queue, as before.
|
||||
- You can search for a memory address and see where it was allocated, for
|
||||
how long, etc. This lists all matching allocations since the program was
|
||||
started.
|
||||
- All active (non-freed) allocations may be listed. This shows the current
|
||||
memory state by default, but can go back to any point in time.
|
||||
- Graphical representation of process memory map may be displayed. New
|
||||
allocations/frees are displayed in a bright color and fade out with
|
||||
time. This feature also can look back in time.
|
||||
- Memory usage plot is automatically generated.
|
||||
- Basic allocation information is displayed in memory plot tooltips.
|
||||
- A summary of memory events within a zone (and its children) is now
|
||||
printed in zone info window.
|
||||
- Support loading profile dumps with no memory allocation data (generated by
|
||||
v0.2).
|
||||
- Added ability to display global statistics of a selected zone from the
|
||||
zone info window.
|
||||
- Fixed regression with lock announce processing that appeared during
|
||||
worker/viewer split.
|
||||
- Allow selecting/unselecting all locks for display.
|
||||
- Performance improvements.
|
||||
- Don't save unneeded lock information in trace file.
|
||||
- Don't save thrash in message list data.
|
||||
- Allow expanding view span up to one hour, instead of one minute.
|
||||
- Added trace comparison window.
|
||||
- An external trace has to be loaded first.
|
||||
- Zone query in both traces (current and external).
|
||||
- Both results are overlaid on the same histogram.
|
||||
- Graphs can be adjusted as-if there was the same number of zones
|
||||
collected.
|
||||
- Read time directly from a hardware register on ARM/ARM64, if possible.
|
||||
- User-space access to the timer needs to be enabled in the kernel, so
|
||||
tracy will perform run-time checks and fallback to the old method if the
|
||||
check fails.
|
||||
- Prevent connections in a TIME-WAIT state from blocking new listen
|
||||
connections.
|
||||
- Display y-range of plots.
|
||||
- Added ability to unload traces loaded from files. To do so close the main
|
||||
profiler window. You will return to the connect/open selection dialog.
|
||||
Live captures cannot be terminated this way.
|
||||
- Zones previously displayed in zone info window are remembered and you can
|
||||
go back to them. Closing the zone info window or switching between CPU and
|
||||
GPU zones will clear the memory.
|
||||
- Improved message list window.
|
||||
- Messages are now displayed in columns.
|
||||
- Originating thread of each message is now included in the list.
|
||||
- Messages can be filtered by the originating thread.
|
||||
- You can now navigate to next and previous frame.
|
||||
- Zone statistics can be now displayed using only self times.
|
||||
- Support for tracing GPU events using Vulkan.
|
||||
- Timeline will now display "OpenGL context" or "Vulkan context" instead of
|
||||
"GPU context".
|
||||
- Fixed regression causing invalid display of GPU context appearance time.
|
||||
- Fixed regression causing invalid reporting of an active CPU in zone end
|
||||
events, if MSVC rdtscp optimization was not enabled.
|
||||
- Ability to collect true call stacks.
|
||||
- Supported on Windows, Linux, Android.
|
||||
- The following events can collect call stacks:
|
||||
- Memory alloc/free.
|
||||
- Zone begin.
|
||||
- GPU zone begin.
|
||||
- Zone stack trace now also displays frames from a real call trace.
|
||||
- On Linux call stack frame name resolution requires a call to dladdr,
|
||||
which in turn requires linking with libdl.
|
||||
- Allow manual entry of GPU time drift value.
|
||||
- Unix build system no longer shares object files between different build
|
||||
units.
|
||||
- Fixes inability to build debug and release versions of a single utility
|
||||
without "make clean".
|
||||
- Fixes incompatibility between "standalone" and "capture" utilities due
|
||||
to different set of used feature flags.
|
||||
- On Windows "standalone" utility now adapts to system DPI setting.
|
||||
- Optional per-call zone naming.
|
||||
|
||||
|
||||
v0.2 (2018-04-05)
|
||||
-----------------
|
||||
|
||||
- Fixed broken TRACY_NO_EXIT behavior.
|
||||
- Visual refresh (new color scheme).
|
||||
- Added optional support for live in-depth zone analysis.
|
||||
- Ability to search for zones matching a query.
|
||||
- Histogram of zone time spans.
|
||||
- List occurrences of a zone, grouped by thread, or by user text.
|
||||
- Zone groups can be selected and highlighted on histogram graph.
|
||||
- Support for linear and logarithmic display of time and values.
|
||||
- Histogram bins can show zone counts or total execution time.
|
||||
- Listed zones can be narrowed down by data range selection on histogram.
|
||||
- Separation of server data handling code from the visualisation.
|
||||
- Implementation of a command line capture utility.
|
||||
- Support libraries have been updated.
|
||||
- Fixed an issue that prevented de-duplication of source location payloads.
|
||||
- Fixed an issue that prevented the ability to disable threads in settings
|
||||
menu, if two threads had the same name.
|
||||
- Performance optimizations.
|
||||
- Visual clean up of the settings menu.
|
||||
- Zone info windows improvements.
|
||||
- Visual improvements to zone info window child list.
|
||||
- Zone info windows now show zone thread.
|
||||
- Display zone stack trace.
|
||||
- Hide pause/resume button if there's no data connection (i.e. trace was
|
||||
loaded from file).
|
||||
- Source location statistics view has been added.
|
||||
- Fixed crash when a saved trace was opened, but no trace capture session
|
||||
was performed before.
|
||||
- Standalone server will now open trace files passed as an argument to the
|
||||
executable.
|
||||
- Fix possible crash in SetThreadName, that could happen if TLS init was
|
||||
delayed until first use of thread local variable.
|
||||
- Store full thread name if pthreads (with 15 character name limit) are
|
||||
used.
|
||||
- Properly handle unaligned memory access (no performance impact).
|
||||
- Fixed broken lock identifiers in try_lock().
|
||||
|
||||
|
||||
v0.1 (2017-12-18)
|
||||
-----------------
|
||||
|
||||
- Initial release.
|
|
@ -0,0 +1,22 @@
|
|||
# Tracy Profiler
|
||||
|
||||
[![Sponsor](.github/sponsor.png)](https://github.com/sponsors/wolfpld/)
|
||||
|
||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||
|
||||
Tracy supports profiling CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, OpenCL, Direct3D 11/12), memory, locks, context switches, per-frame screenshots and more.
|
||||
|
||||
For usage **and build process** instructions, consult the user manual [at the following address](https://github.com/wolfpld/tracy/releases).
|
||||
|
||||
![](doc/profiler.png)
|
||||
|
||||
![](doc/profiler2.png)
|
||||
|
||||
[Changelog](NEWS)
|
||||
|
||||
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
|
||||
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
|
||||
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
|
||||
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
|
||||
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
|
||||
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
|
|
@ -0,0 +1,7 @@
|
|||
"Would be nice to have" list for 1.0 release:
|
||||
=============================================
|
||||
|
||||
* Pack queue items tightly in the queues.
|
||||
* Use level-of-detail system for plots.
|
||||
* Use per-thread lock data structures.
|
||||
* Use DTrace for BSD/OSX context switch capture.
|
|
@ -0,0 +1,259 @@
|
|||
#ifndef __TRACY_HPP__
|
||||
#define __TRACY_HPP__
|
||||
|
||||
#include "common/TracyColor.hpp"
|
||||
#include "common/TracySystem.hpp"
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define ZoneNamed(x,y)
|
||||
#define ZoneNamedN(x,y,z)
|
||||
#define ZoneNamedC(x,y,z)
|
||||
#define ZoneNamedNC(x,y,z,w)
|
||||
|
||||
#define ZoneTransient(x,y)
|
||||
#define ZoneTransientN(x,y,z)
|
||||
|
||||
#define ZoneScoped
|
||||
#define ZoneScopedN(x)
|
||||
#define ZoneScopedC(x)
|
||||
#define ZoneScopedNC(x,y)
|
||||
|
||||
#define ZoneText(x,y)
|
||||
#define ZoneTextV(x,y,z)
|
||||
#define ZoneName(x,y)
|
||||
#define ZoneNameV(x,y,z)
|
||||
#define ZoneColor(x)
|
||||
#define ZoneColorV(x,y)
|
||||
#define ZoneValue(x)
|
||||
#define ZoneValueV(x,y)
|
||||
#define ZoneIsActive false
|
||||
#define ZoneIsActiveV(x) false
|
||||
|
||||
#define FrameMark
|
||||
#define FrameMarkNamed(x)
|
||||
#define FrameMarkStart(x)
|
||||
#define FrameMarkEnd(x)
|
||||
|
||||
#define FrameImage(x,y,z,w,a)
|
||||
|
||||
#define TracyLockable( type, varname ) type varname;
|
||||
#define TracyLockableN( type, varname, desc ) type varname;
|
||||
#define TracySharedLockable( type, varname ) type varname;
|
||||
#define TracySharedLockableN( type, varname, desc ) type varname;
|
||||
#define LockableBase( type ) type
|
||||
#define SharedLockableBase( type ) type
|
||||
#define LockMark(x) (void)x;
|
||||
#define LockableName(x,y,z);
|
||||
|
||||
#define TracyPlot(x,y)
|
||||
#define TracyPlotConfig(x,y)
|
||||
|
||||
#define TracyMessage(x,y)
|
||||
#define TracyMessageL(x)
|
||||
#define TracyMessageC(x,y,z)
|
||||
#define TracyMessageLC(x,y)
|
||||
#define TracyAppInfo(x,y)
|
||||
|
||||
#define TracyAlloc(x,y)
|
||||
#define TracyFree(x)
|
||||
#define TracySecureAlloc(x,y)
|
||||
#define TracySecureFree(x)
|
||||
|
||||
#define TracyAllocN(x,y,z)
|
||||
#define TracyFreeN(x,y)
|
||||
#define TracySecureAllocN(x,y,z)
|
||||
#define TracySecureFreeN(x,y)
|
||||
|
||||
#define ZoneNamedS(x,y,z)
|
||||
#define ZoneNamedNS(x,y,z,w)
|
||||
#define ZoneNamedCS(x,y,z,w)
|
||||
#define ZoneNamedNCS(x,y,z,w,a)
|
||||
|
||||
#define ZoneTransientS(x,y,z)
|
||||
#define ZoneTransientNS(x,y,z,w)
|
||||
|
||||
#define ZoneScopedS(x)
|
||||
#define ZoneScopedNS(x,y)
|
||||
#define ZoneScopedCS(x,y)
|
||||
#define ZoneScopedNCS(x,y,z)
|
||||
|
||||
#define TracyAllocS(x,y,z)
|
||||
#define TracyFreeS(x,y)
|
||||
#define TracySecureAllocS(x,y,z)
|
||||
#define TracySecureFreeS(x,y)
|
||||
|
||||
#define TracyAllocNS(x,y,z,w)
|
||||
#define TracyFreeNS(x,y,z)
|
||||
#define TracySecureAllocNS(x,y,z,w)
|
||||
#define TracySecureFreeNS(x,y,z)
|
||||
|
||||
#define TracyMessageS(x,y,z)
|
||||
#define TracyMessageLS(x,y)
|
||||
#define TracyMessageCS(x,y,z,w)
|
||||
#define TracyMessageLCS(x,y,z)
|
||||
|
||||
#define TracyParameterRegister(x)
|
||||
#define TracyParameterSetup(x,y,z,w)
|
||||
#define TracyIsConnected false
|
||||
|
||||
#else
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "client/TracyLock.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyScoped.hpp"
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active );
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active );
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
||||
#endif
|
||||
|
||||
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
|
||||
#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true )
|
||||
#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true )
|
||||
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
|
||||
|
||||
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size );
|
||||
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size );
|
||||
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size );
|
||||
#define ZoneNameV( varname, txt, size ) varname.Name( txt, size );
|
||||
#define ZoneColor( color ) ___tracy_scoped_zone.Color( color );
|
||||
#define ZoneColorV( varname, color ) varname.Color( color );
|
||||
#define ZoneValue( value ) ___tracy_scoped_zone.Value( value );
|
||||
#define ZoneValueV( varname, value ) varname.Value( value );
|
||||
#define ZoneIsActive ___tracy_scoped_zone.IsActive()
|
||||
#define ZoneIsActiveV( varname ) varname.IsActive()
|
||||
|
||||
#define FrameMark tracy::Profiler::SendFrameMark( nullptr );
|
||||
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name );
|
||||
#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart );
|
||||
#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd );
|
||||
|
||||
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip );
|
||||
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define LockableBase( type ) tracy::Lockable<type>
|
||||
#define SharedLockableBase( type ) tracy::SharedLockable<type>
|
||||
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
|
||||
#define LockableName( varname, txt, size ) varname.CustomName( txt, size );
|
||||
|
||||
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
|
||||
#define TracyPlotConfig( name, type ) tracy::Profiler::ConfigurePlot( name, type );
|
||||
|
||||
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK );
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK );
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK );
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false );
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true );
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true );
|
||||
|
||||
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name );
|
||||
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name );
|
||||
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name );
|
||||
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name );
|
||||
#else
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 );
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 );
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 );
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 );
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false );
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true );
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true );
|
||||
|
||||
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name );
|
||||
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name );
|
||||
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name );
|
||||
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active );
|
||||
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
|
||||
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
|
||||
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false );
|
||||
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false );
|
||||
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true );
|
||||
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true );
|
||||
|
||||
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name );
|
||||
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name );
|
||||
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name );
|
||||
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name );
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth );
|
||||
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth );
|
||||
# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth );
|
||||
# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth );
|
||||
#else
|
||||
# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active )
|
||||
# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active )
|
||||
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
|
||||
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneScoped
|
||||
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
|
||||
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color )
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
|
||||
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
|
||||
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
|
||||
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
|
||||
|
||||
# define TracyAllocNS( ptr, size, depth, name ) TracyAlloc( ptr, size, name )
|
||||
# define TracyFreeNS( ptr, depth, name ) TracyFree( ptr, name )
|
||||
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAlloc( ptr, size, name )
|
||||
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFree( ptr, name )
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
|
||||
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
|
||||
# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color )
|
||||
# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color )
|
||||
#endif
|
||||
|
||||
#define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb );
|
||||
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val );
|
||||
#define TracyIsConnected tracy::GetProfiler().IsConnected()
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,252 @@
|
|||
#ifndef __TRACYC_HPP__
|
||||
#define __TRACYC_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "client/TracyCallstack.h"
|
||||
#include "common/TracyApi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name );
|
||||
|
||||
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
|
||||
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
typedef const void* TracyCZoneCtx;
|
||||
|
||||
#define TracyCZone(c,x)
|
||||
#define TracyCZoneN(c,x,y)
|
||||
#define TracyCZoneC(c,x,y)
|
||||
#define TracyCZoneNC(c,x,y,z)
|
||||
#define TracyCZoneEnd(c)
|
||||
#define TracyCZoneText(c,x,y)
|
||||
#define TracyCZoneName(c,x,y)
|
||||
#define TracyCZoneColor(c,x)
|
||||
#define TracyCZoneValue(c,x)
|
||||
|
||||
#define TracyCAlloc(x,y)
|
||||
#define TracyCFree(x)
|
||||
#define TracyCSecureAlloc(x,y)
|
||||
#define TracyCSecureFree(x)
|
||||
|
||||
#define TracyCAllocN(x,y,z)
|
||||
#define TracyCFreeN(x,y)
|
||||
#define TracyCSecureAllocN(x,y,z)
|
||||
#define TracyCSecureFreeN(x,y)
|
||||
|
||||
#define TracyCFrameMark
|
||||
#define TracyCFrameMarkNamed(x)
|
||||
#define TracyCFrameMarkStart(x)
|
||||
#define TracyCFrameMarkEnd(x)
|
||||
#define TracyCFrameImage(x,y,z,w,a)
|
||||
|
||||
#define TracyCPlot(x,y)
|
||||
#define TracyCMessage(x,y)
|
||||
#define TracyCMessageL(x)
|
||||
#define TracyCMessageC(x,y,z)
|
||||
#define TracyCMessageLC(x,y)
|
||||
#define TracyCAppInfo(x,y)
|
||||
|
||||
#define TracyCZoneS(x,y,z)
|
||||
#define TracyCZoneNS(x,y,z,w)
|
||||
#define TracyCZoneCS(x,y,z,w)
|
||||
#define TracyCZoneNCS(x,y,z,w,a)
|
||||
|
||||
#define TracyCAllocS(x,y,z)
|
||||
#define TracyCFreeS(x,y)
|
||||
#define TracyCSecureAllocS(x,y,z)
|
||||
#define TracyCSecureFreeS(x,y)
|
||||
|
||||
#define TracyCAllocNS(x,y,z,w)
|
||||
#define TracyCFreeNS(x,y,z)
|
||||
#define TracyCSecureAllocNS(x,y,z,w)
|
||||
#define TracyCSecureFreeNS(x,y,z)
|
||||
|
||||
#define TracyCMessageS(x,y,z)
|
||||
#define TracyCMessageLS(x,y)
|
||||
#define TracyCMessageCS(x,y,z,w)
|
||||
#define TracyCMessageLCS(x,y,z)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef TracyConcat
|
||||
# define TracyConcat(x,y) TracyConcatIndirect(x,y)
|
||||
#endif
|
||||
#ifndef TracyConcatIndirect
|
||||
# define TracyConcatIndirect(x,y) x##y
|
||||
#endif
|
||||
|
||||
struct ___tracy_source_location_data
|
||||
{
|
||||
const char* name;
|
||||
const char* function;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
struct ___tracy_c_zone_context
|
||||
{
|
||||
uint32_t id;
|
||||
int active;
|
||||
};
|
||||
|
||||
// Some containers don't support storing const types.
|
||||
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
||||
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
||||
|
||||
TRACY_API void ___tracy_init_thread(void);
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
|
||||
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active );
|
||||
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
|
||||
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
#endif
|
||||
|
||||
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
|
||||
|
||||
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
|
||||
#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size );
|
||||
#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color );
|
||||
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name );
|
||||
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
|
||||
|
||||
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
|
||||
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
|
||||
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
|
||||
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
|
||||
#else
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
|
||||
|
||||
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name );
|
||||
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name );
|
||||
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name );
|
||||
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name );
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 );
|
||||
#endif
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_frame_mark( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_mark_start( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_mark_end( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip );
|
||||
|
||||
#define TracyCFrameMark ___tracy_emit_frame_mark( 0 );
|
||||
#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name );
|
||||
#define TracyCFrameMarkStart( name ) ___tracy_emit_frame_mark_start( name );
|
||||
#define TracyCFrameMarkEnd( name ) ___tracy_emit_frame_mark_end( name );
|
||||
#define TracyCFrameImage( image, width, height, offset, flip ) ___tracy_emit_frame_image( image, width, height, offset, flip );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_plot( const char* name, double val );
|
||||
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
|
||||
#define TracyCAppInfo( txt, color ) ___tracy_emit_message_appinfo( txt, color );
|
||||
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
|
||||
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
|
||||
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
|
||||
|
||||
# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
|
||||
# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
|
||||
# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
|
||||
# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
|
||||
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
|
||||
# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
|
||||
# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
|
||||
#else
|
||||
# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active )
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active )
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active )
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active )
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
|
||||
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
|
||||
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
|
||||
|
||||
# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name )
|
||||
# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name )
|
||||
# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name )
|
||||
# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
|
||||
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )
|
||||
# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color )
|
||||
# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,54 @@
|
|||
//
|
||||
// Tracy profiler
|
||||
// ----------------
|
||||
//
|
||||
// For fast integration, compile and
|
||||
// link with this source file (and none
|
||||
// other) in your executable (or in the
|
||||
// main DLL / shared object on multi-DLL
|
||||
// projects).
|
||||
//
|
||||
|
||||
// Define TRACY_ENABLE to enable profiler.
|
||||
|
||||
#include "common/TracySystem.cpp"
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(push, 0)
|
||||
#endif
|
||||
|
||||
#include "common/tracy_lz4.cpp"
|
||||
#include "client/TracyProfiler.cpp"
|
||||
#include "client/TracyCallstack.cpp"
|
||||
#include "client/TracySysTime.cpp"
|
||||
#include "client/TracySysTrace.cpp"
|
||||
#include "common/TracySocket.cpp"
|
||||
#include "client/tracy_rpmalloc.cpp"
|
||||
#include "client/TracyDxt1.cpp"
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
# include "libbacktrace/alloc.cpp"
|
||||
# include "libbacktrace/dwarf.cpp"
|
||||
# include "libbacktrace/fileline.cpp"
|
||||
# include "libbacktrace/mmapio.cpp"
|
||||
# include "libbacktrace/posix.cpp"
|
||||
# include "libbacktrace/sort.cpp"
|
||||
# include "libbacktrace/state.cpp"
|
||||
# if TRACY_HAS_CALLSTACK == 4
|
||||
# include "libbacktrace/macho.cpp"
|
||||
# else
|
||||
# include "libbacktrace/elf.cpp"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma comment(lib, "ws2_32.lib")
|
||||
# pragma comment(lib, "dbghelp.lib")
|
||||
# pragma comment(lib, "advapi32.lib")
|
||||
# pragma comment(lib, "user32.lib")
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,443 @@
|
|||
#ifndef __TRACYD3D11_HPP__
|
||||
#define __TRACYD3D11_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D11Context(device,queue) nullptr
|
||||
#define TracyD3D11Destroy(ctx)
|
||||
#define TracyD3D11ContextName(ctx, name, size)
|
||||
|
||||
#define TracyD3D11NewFrame(ctx)
|
||||
|
||||
#define TracyD3D11Zone(ctx, name)
|
||||
#define TracyD3D11ZoneC(ctx, name, color)
|
||||
#define TracyD3D11NamedZone(ctx, varname, name, active)
|
||||
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
|
||||
#define TracyD3D12ZoneTransient(ctx, varname, name, active)
|
||||
|
||||
#define TracyD3D11ZoneS(ctx, name, depth)
|
||||
#define TracyD3D11ZoneCS(ctx, name, color, depth)
|
||||
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
|
||||
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
|
||||
#define TracyD3D12ZoneTransientS(ctx, varname, name, depth, active)
|
||||
|
||||
#define TracyD3D11Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D11ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D11Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
#include "common/TracyAlign.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class D3D11Ctx
|
||||
{
|
||||
friend class D3D11ZoneScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
: m_device( device )
|
||||
, m_devicectx( devicectx )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
for (int i = 0; i < QueryCount; i++)
|
||||
{
|
||||
HRESULT hr = S_OK;
|
||||
D3D11_QUERY_DESC desc;
|
||||
desc.MiscFlags = 0;
|
||||
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
hr |= device->CreateQuery(&desc, &m_queries[i]);
|
||||
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
hr |= device->CreateQuery(&desc, &m_disjoints[i]);
|
||||
|
||||
m_disjointMap[i] = nullptr;
|
||||
|
||||
assert(SUCCEEDED(hr));
|
||||
}
|
||||
|
||||
// Force query the initial GPU timestamp (pipeline stall)
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
UINT64 timestamp;
|
||||
for (int attempts = 0; attempts < 50; attempts++)
|
||||
{
|
||||
devicectx->Begin(m_disjoints[0]);
|
||||
devicectx->End(m_queries[0]);
|
||||
devicectx->End(m_disjoints[0]);
|
||||
devicectx->Flush();
|
||||
|
||||
while (devicectx->GetData(m_disjoints[0], &disjoint, sizeof(disjoint), 0) == S_FALSE)
|
||||
/* Nothing */;
|
||||
|
||||
if (disjoint.Disjoint)
|
||||
continue;
|
||||
|
||||
while (devicectx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) == S_FALSE)
|
||||
/* Nothing */;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
int64_t tgpu = timestamp * (1000000000ull / disjoint.Frequency);
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
uint8_t flags = 0;
|
||||
|
||||
const float period = 1.f;
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
~D3D11Ctx()
|
||||
{
|
||||
for (int i = 0; i < QueryCount; i++)
|
||||
{
|
||||
m_queries[i]->Release();
|
||||
m_disjoints[i]->Release();
|
||||
m_disjointMap[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto start = m_tail;
|
||||
auto end = m_head + QueryCount;
|
||||
auto cnt = (end - start) % QueryCount;
|
||||
while (cnt > 1)
|
||||
{
|
||||
auto mid = start + cnt / 2;
|
||||
|
||||
bool available =
|
||||
m_devicectx->GetData(m_disjointMap[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK &&
|
||||
m_devicectx->GetData(m_queries[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
|
||||
if (available)
|
||||
{
|
||||
start = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
end = mid;
|
||||
}
|
||||
cnt = (end - start) % QueryCount;
|
||||
}
|
||||
|
||||
start %= QueryCount;
|
||||
|
||||
while (m_tail != start)
|
||||
{
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
UINT64 time;
|
||||
|
||||
m_devicectx->GetData(m_disjointMap[m_tail], &disjoint, sizeof(disjoint), 0);
|
||||
m_devicectx->GetData(m_queries[m_tail], &time, sizeof(time), 0);
|
||||
|
||||
time *= (1000000000ull / disjoint.Frequency);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, (int64_t)time);
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_tail = (m_tail + 1) % QueryCount;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % QueryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* TranslateQueryId( unsigned int id )
|
||||
{
|
||||
return m_queries[id];
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* MapDisjointQueryId( unsigned int id, unsigned int disjointId )
|
||||
{
|
||||
m_disjointMap[id] = m_disjoints[disjointId];
|
||||
return m_disjoints[disjointId];
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
ID3D11Device* m_device;
|
||||
ID3D11DeviceContext* m_devicectx;
|
||||
|
||||
ID3D11Query* m_queries[QueryCount];
|
||||
ID3D11Query* m_disjoints[QueryCount];
|
||||
ID3D11Query* m_disjointMap[QueryCount]; // Multiple time queries can have one disjoint query
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
};
|
||||
|
||||
class D3D11ZoneScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D11ZoneScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
m_ctx->m_devicectx->End(m_ctx->TranslateQueryId(queryId));
|
||||
m_ctx->m_devicectx->End(m_ctx->MapDisjointQueryId(queryId, m_disjointId));
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
D3D11Ctx* m_ctx;
|
||||
unsigned int m_disjointId;
|
||||
};
|
||||
|
||||
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
|
||||
new(ctx) D3D11Ctx( device, devicectx );
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D11Context( D3D11Ctx* ctx )
|
||||
{
|
||||
ctx->~D3D11Ctx();
|
||||
tracy_free( ctx );
|
||||
}
|
||||
}
|
||||
|
||||
using TracyD3D11Ctx = tracy::D3D11Ctx*;
|
||||
|
||||
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
|
||||
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
|
||||
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, ___tracy_gpu_zone, name, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, ___tracy_gpu_zone, name, color, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, depth, true )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), depth, active };
|
||||
#else
|
||||
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D11Collect( ctx ) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,510 @@
|
|||
#ifndef __TRACYD3D12_HPP__
|
||||
#define __TRACYD3D12_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D12Context(device, queue) nullptr
|
||||
#define TracyD3D12Destroy(ctx)
|
||||
#define TracyD3D12ContextName(ctx, name, size)
|
||||
|
||||
#define TracyD3D12NewFrame(ctx)
|
||||
|
||||
#define TracyD3D12Zone(ctx, cmdList, name)
|
||||
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
|
||||
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
|
||||
|
||||
#define TracyD3D12ZoneS(ctx, cmdList, name, depth)
|
||||
#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth)
|
||||
#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active)
|
||||
#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active)
|
||||
#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active)
|
||||
|
||||
#define TracyD3D12Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D12ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include <d3d12.h>
|
||||
#include <dxgi.h>
|
||||
#include <wrl/client.h>
|
||||
#include <queue>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct D3D12QueryPayload
|
||||
{
|
||||
uint32_t m_queryIdStart = 0;
|
||||
uint32_t m_queryCount = 0;
|
||||
};
|
||||
|
||||
// Command queue context.
|
||||
class D3D12QueueCtx
|
||||
{
|
||||
friend class D3D12ZoneScope;
|
||||
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even!
|
||||
|
||||
bool m_initialized = false;
|
||||
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_queue = nullptr;
|
||||
uint8_t m_context;
|
||||
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
||||
|
||||
// In-progress payload.
|
||||
uint32_t m_queryLimit = MaxQueries;
|
||||
uint32_t m_queryCounter = 0;
|
||||
uint32_t m_previousQueryCounter = 0;
|
||||
|
||||
uint32_t m_activePayload = 0;
|
||||
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||
|
||||
int64_t m_prevCalibration = 0;
|
||||
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
|
||||
|
||||
public:
|
||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
: m_device(device)
|
||||
, m_queue(queue)
|
||||
, m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
{
|
||||
// Verify we support timestamp queries on this queue.
|
||||
|
||||
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||
|
||||
if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData))))
|
||||
{
|
||||
assert(false && "Platform does not support profiling of copy queues.");
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t timestampFrequency;
|
||||
|
||||
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||
{
|
||||
assert(false && "Failed to get timestamp frequency.");
|
||||
}
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||
m_prevCalibration = cpuTimestamp * m_qpcToNs;
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||
{
|
||||
m_queryLimit /= 2;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
}
|
||||
|
||||
// Create a readback buffer, which will be used as a destination for the query data.
|
||||
|
||||
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||
readbackBufferDesc.Alignment = 0;
|
||||
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||
readbackBufferDesc.Height = 1;
|
||||
readbackBufferDesc.DepthOrArraySize = 1;
|
||||
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||
readbackBufferDesc.MipLevels = 1;
|
||||
readbackBufferDesc.SampleDesc.Count = 1;
|
||||
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
readbackHeapProps.CreationNodeMask = 0;
|
||||
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||
{
|
||||
assert(false && "Failed to create query readback buffer.");
|
||||
}
|
||||
|
||||
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||
{
|
||||
assert(false && "Failed to create payload fence.");
|
||||
}
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||
MemWrite(&item->gpuNewContext.context, m_context);
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
void NewFrame()
|
||||
{
|
||||
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, m_queryCounter });
|
||||
m_previousQueryCounter += m_queryCounter;
|
||||
m_queryCounter = 0;
|
||||
|
||||
if (m_previousQueryCounter >= m_queryLimit)
|
||||
{
|
||||
m_previousQueryCounter -= m_queryLimit;
|
||||
}
|
||||
|
||||
m_queue->Signal(m_payloadFence.Get(), ++m_activePayload);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_queryCounter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find out what payloads are available.
|
||||
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||
|
||||
if (!payloadCount)
|
||||
{
|
||||
return; // No payloads are available yet, exit out.
|
||||
}
|
||||
|
||||
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||
|
||||
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||
void* readbackBufferMapping = nullptr;
|
||||
|
||||
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||
{
|
||||
assert(false && "Failed to map readback buffer.");
|
||||
}
|
||||
|
||||
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||
|
||||
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||
{
|
||||
const auto& payload = m_payloadQueue.front();
|
||||
|
||||
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||
{
|
||||
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||
const auto timestamp = timestampData[counter];
|
||||
const auto queryId = counter;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
m_payloadQueue.pop();
|
||||
}
|
||||
|
||||
m_readbackBuffer->Unmap(0, nullptr);
|
||||
|
||||
// Recalibrate to account for drift.
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
cpuTimestamp *= m_qpcToNs;
|
||||
|
||||
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
|
||||
if (cpuDelta > 0)
|
||||
{
|
||||
m_prevCalibration = cpuTimestamp;
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
|
||||
MemWrite(&item->gpuCalibration.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
assert(m_queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||
|
||||
const uint32_t id = (m_previousQueryCounter + m_queryCounter) % m_queryLimit;
|
||||
m_queryCounter += 2; // Allocate space for a begin and end query.
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
};
|
||||
|
||||
class D3D12ZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
D3D12QueueCtx* m_ctx = nullptr;
|
||||
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||
|
||||
public:
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D12ZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t));
|
||||
}
|
||||
};
|
||||
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
InitRPMallocThread();
|
||||
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||
{
|
||||
ctx->~D3D12QueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
||||
|
||||
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
|
||||
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
|
||||
#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), cmdList, active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, depth, true)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, depth, true)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), depth, active };
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), depth, active };
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), cmdList, depth, active };
|
||||
#else
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D12Collect(ctx) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,424 @@
|
|||
#ifndef __TRACYLUA_HPP__
|
||||
#define __TRACYLUA_HPP__
|
||||
|
||||
// Include this file after you include lua headers.
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#include <string.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
static inline int noop( lua_State* L ) { return 0; }
|
||||
}
|
||||
|
||||
static inline void LuaRegister( lua_State* L )
|
||||
{
|
||||
lua_newtable( L );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBegin" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginN" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginS" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginNS" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneEnd" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneText" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneName" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "Message" );
|
||||
lua_setglobal( L, "tracy" );
|
||||
}
|
||||
|
||||
static inline char* FindEnd( char* ptr )
|
||||
{
|
||||
unsigned int cnt = 1;
|
||||
while( cnt != 0 )
|
||||
{
|
||||
if( *ptr == '(' ) cnt++;
|
||||
else if( *ptr == ')' ) cnt--;
|
||||
ptr++;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void LuaRemove( char* script )
|
||||
{
|
||||
while( *script )
|
||||
{
|
||||
if( strncmp( script, "tracy.", 6 ) == 0 )
|
||||
{
|
||||
if( strncmp( script + 6, "Zone", 4 ) == 0 )
|
||||
{
|
||||
if( strncmp( script + 10, "End()", 5 ) == 0 )
|
||||
{
|
||||
memset( script, ' ', 15 );
|
||||
script += 15;
|
||||
}
|
||||
else if( strncmp( script + 10, "Begin()", 7 ) == 0 )
|
||||
{
|
||||
memset( script, ' ', 17 );
|
||||
script += 17;
|
||||
}
|
||||
else if( strncmp( script + 10, "Text(", 5 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 15 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "Name(", 5 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 15 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginN(", 7 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 17 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginS(", 7 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 17 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginNS(", 8 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 18 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
script += 10;
|
||||
}
|
||||
}
|
||||
else if( strncmp( script + 6, "Message(", 8 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 14 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
script += 6;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
script++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
|
||||
#include "common/TracyColor.hpp"
|
||||
#include "common/TracyAlign.hpp"
|
||||
#include "common/TracyForceInline.hpp"
|
||||
#include "common/TracySystem.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
TRACY_API LuaZoneState& GetLuaZoneState();
|
||||
#endif
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
{
|
||||
assert( depth <= 64 );
|
||||
lua_Debug dbg[64];
|
||||
const char* func[64];
|
||||
uint32_t fsz[64];
|
||||
uint32_t ssz[64];
|
||||
|
||||
uint8_t cnt;
|
||||
uint16_t spaceNeeded = sizeof( cnt );
|
||||
for( cnt=0; cnt<depth; cnt++ )
|
||||
{
|
||||
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
|
||||
lua_getinfo( L, "Snl", dbg+cnt );
|
||||
func[cnt] = dbg[cnt].name ? dbg[cnt].name : dbg[cnt].short_src;
|
||||
fsz[cnt] = uint32_t( strlen( func[cnt] ) );
|
||||
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
|
||||
spaceNeeded += fsz[cnt] + ssz[cnt];
|
||||
}
|
||||
spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
|
||||
|
||||
auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
|
||||
auto dst = ptr;
|
||||
memcpy( dst, &spaceNeeded, 2 ); dst += 2;
|
||||
memcpy( dst, &cnt, 1 ); dst++;
|
||||
for( uint8_t i=0; i<cnt; i++ )
|
||||
{
|
||||
const uint32_t line = dbg[i].currentline;
|
||||
memcpy( dst, &line, 4 ); dst += 4;
|
||||
assert( fsz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
memcpy( dst, fsz+i, 2 ); dst += 2;
|
||||
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
|
||||
assert( ssz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
memcpy( dst, ssz+i, 2 ); dst += 2;
|
||||
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
|
||||
}
|
||||
assert( dst - ptr == spaceNeeded + 2 );
|
||||
|
||||
TracyLfqPrepare( QueueType::CallstackAlloc );
|
||||
MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginS( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
const auto depth = uint32_t( lua_tointeger( L, 1 ) );
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginNS( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
const auto depth = uint32_t( lua_tointeger( L, 2 ) );
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
size_t nsz;
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int LuaZoneBegin( lua_State* L )
|
||||
{
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
return LuaZoneBeginS( L );
|
||||
#else
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginN( lua_State* L )
|
||||
{
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
return LuaZoneBeginNS( L );
|
||||
#else
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
size_t nsz;
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int LuaZoneEnd( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
assert( GetLuaZoneState().counter != 0 );
|
||||
GetLuaZoneState().counter--;
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneText( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneName( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaMessage( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return 0;
|
||||
#endif
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
TracyLfqPrepare( QueueType::Message );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
MemWrite( &item->messageFat.time, Profiler::GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline void LuaRegister( lua_State* L )
|
||||
{
|
||||
lua_newtable( L );
|
||||
lua_pushcfunction( L, detail::LuaZoneBegin );
|
||||
lua_setfield( L, -2, "ZoneBegin" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginN );
|
||||
lua_setfield( L, -2, "ZoneBeginN" );
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginS );
|
||||
lua_setfield( L, -2, "ZoneBeginS" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginNS );
|
||||
lua_setfield( L, -2, "ZoneBeginNS" );
|
||||
#else
|
||||
lua_pushcfunction( L, detail::LuaZoneBegin );
|
||||
lua_setfield( L, -2, "ZoneBeginS" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginN );
|
||||
lua_setfield( L, -2, "ZoneBeginNS" );
|
||||
#endif
|
||||
lua_pushcfunction( L, detail::LuaZoneEnd );
|
||||
lua_setfield( L, -2, "ZoneEnd" );
|
||||
lua_pushcfunction( L, detail::LuaZoneText );
|
||||
lua_setfield( L, -2, "ZoneText" );
|
||||
lua_pushcfunction( L, detail::LuaZoneName );
|
||||
lua_setfield( L, -2, "ZoneName" );
|
||||
lua_pushcfunction( L, detail::LuaMessage );
|
||||
lua_setfield( L, -2, "Message" );
|
||||
lua_setglobal( L, "tracy" );
|
||||
}
|
||||
|
||||
static inline void LuaRemove( char* script ) {}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,339 @@
|
|||
#ifndef __TRACYOPENCL_HPP__
|
||||
#define __TRACYOPENCL_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyCLContext(c, x) nullptr
|
||||
#define TracyCLDestroy(c)
|
||||
#define TracyCLContextName(c, x, y)
|
||||
|
||||
#define TracyCLNamedZone(c, x, y, z)
|
||||
#define TracyCLNamedZoneC(c, x, y, z, w)
|
||||
#define TracyCLZone(c, x)
|
||||
#define TracyCLZoneC(c, x, y)
|
||||
|
||||
#define TracyCLNamedZoneS(c, x, y, z, w)
|
||||
#define TracyCLNamedZoneCS(c, x, y, z, w, v)
|
||||
#define TracyCLZoneS(c, x, y)
|
||||
#define TracyCLZoneCS(c, x, y, z)
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(x, e)
|
||||
#define TracyCLZoneSetEvent(e)
|
||||
|
||||
#define TracyCLCollect(c)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class OpenCLCtxScope {};
|
||||
}
|
||||
|
||||
using TracyCLCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
namespace tracy {
|
||||
|
||||
enum class EventPhase : uint8_t
|
||||
{
|
||||
Begin,
|
||||
End
|
||||
};
|
||||
|
||||
struct EventInfo
|
||||
{
|
||||
cl_event event;
|
||||
EventPhase phase;
|
||||
};
|
||||
|
||||
class OpenCLCtx
|
||||
{
|
||||
public:
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
OpenCLCtx(cl_context context, cl_device_id device)
|
||||
: m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
, m_head(0)
|
||||
, m_tail(0)
|
||||
{
|
||||
int64_t tcpu, tgpu;
|
||||
assert(m_contextId != 255);
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
assert(err == CL_SUCCESS);
|
||||
uint32_t dummyValue = 42;
|
||||
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
|
||||
assert(err == CL_SUCCESS);
|
||||
cl_event writeBufferEvent;
|
||||
err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clWaitForEvents(1, &writeBufferEvent);
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
|
||||
assert(err == CL_SUCCESS);
|
||||
cl_int eventStatus;
|
||||
err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
assert(eventStatus == CL_COMPLETE);
|
||||
err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clReleaseEvent(writeBufferEvent);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clReleaseMemObject(dummyBuffer);
|
||||
assert(err == CL_SUCCESS);
|
||||
err = clReleaseCommandQueue(queue);
|
||||
assert(err == CL_SUCCESS);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, tcpu);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, tgpu);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1.0f);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
||||
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
||||
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
if (m_tail == m_head) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (m_tail != m_head)
|
||||
{
|
||||
EventInfo eventInfo = m_query[m_tail];
|
||||
cl_event event = eventInfo.event;
|
||||
cl_int eventStatus;
|
||||
cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
if (eventStatus != CL_COMPLETE) return;
|
||||
|
||||
cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin)
|
||||
? CL_PROFILING_COMMAND_START
|
||||
: CL_PROFILING_COMMAND_END;
|
||||
|
||||
cl_ulong eventTimeStamp = 0;
|
||||
err = clGetEventProfilingInfo(event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr);
|
||||
assert(err == CL_SUCCESS);
|
||||
assert(eventTimeStamp != 0);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp);
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
if (eventInfo.phase == EventPhase::End)
|
||||
{
|
||||
// Done with the event, so release it
|
||||
err = clReleaseEvent(event);
|
||||
assert(err == CL_SUCCESS);
|
||||
}
|
||||
|
||||
m_tail = (m_tail + 1) % QueryCount;
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo)
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = (m_head + 1) % QueryCount;
|
||||
assert(m_head != m_tail);
|
||||
m_query[id] = eventInfo;
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline EventInfo& GetQuery(unsigned int id)
|
||||
{
|
||||
assert(id < QueryCount);
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
unsigned int m_contextId;
|
||||
|
||||
EventInfo m_query[QueryCount];
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
|
||||
};
|
||||
|
||||
class OpenCLCtxScope {
|
||||
public:
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
GetProfiler().SendCallstack(depth);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void SetEvent(cl_event event)
|
||||
{
|
||||
if (!m_active) return;
|
||||
m_event = event;
|
||||
cl_int err = clRetainEvent(m_event);
|
||||
assert(err == CL_SUCCESS);
|
||||
m_ctx->GetQuery(m_beginQueryId).event = m_event;
|
||||
}
|
||||
|
||||
tracy_force_inline ~OpenCLCtxScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId);
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
const bool m_active;
|
||||
OpenCLCtx* m_ctx;
|
||||
cl_event m_event;
|
||||
unsigned int m_beginQueryId;
|
||||
};
|
||||
|
||||
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
|
||||
new (ctx) OpenCLCtx(context, device);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyCLContext(OpenCLCtx* ctx)
|
||||
{
|
||||
ctx->~OpenCLCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
} // namespace tracy
|
||||
|
||||
using TracyCLCtx = tracy::OpenCLCtx*;
|
||||
|
||||
#define TracyCLContext(context, device) tracy::CreateCLContext(context, device);
|
||||
#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx);
|
||||
#define TracyCLContextName(context, name, size) ctx->Name(name, size);
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true)
|
||||
#else
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true )
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true)
|
||||
#else
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active)
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active)
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color)
|
||||
#endif
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event)
|
||||
#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event)
|
||||
|
||||
#define TracyCLCollect(ctx) ctx->Collect()
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,323 @@
|
|||
#ifndef __TRACYOPENGL_HPP__
|
||||
#define __TRACYOPENGL_HPP__
|
||||
|
||||
#if !defined GL_TIMESTAMP && !defined GL_TIMESTAMP_EXT
|
||||
# error "You must include OpenGL 3.2 headers before including TracyOpenGL.hpp"
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_ENABLE || defined __APPLE__
|
||||
|
||||
#define TracyGpuContext
|
||||
#define TracyGpuContextName(x,y)
|
||||
#define TracyGpuNamedZone(x,y,z)
|
||||
#define TracyGpuNamedZoneC(x,y,z,w)
|
||||
#define TracyGpuZone(x)
|
||||
#define TracyGpuZoneC(x,y)
|
||||
#define TracyGpuZoneTransient(x,y,z)
|
||||
#define TracyGpuCollect
|
||||
|
||||
#define TracyGpuNamedZoneS(x,y,z,w)
|
||||
#define TracyGpuNamedZoneCS(x,y,z,w,a)
|
||||
#define TracyGpuZoneS(x,y)
|
||||
#define TracyGpuZoneCS(x,y,z)
|
||||
#define TracyGpuZoneTransientS(x,y,z,w)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
struct SourceLocationData;
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
GpuCtxScope( const SourceLocationData*, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int, bool ) {}
|
||||
};
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
#include "common/TracyAlign.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
#if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT
|
||||
# define GL_TIMESTAMP GL_TIMESTAMP_EXT
|
||||
# define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT
|
||||
# define glGetQueryObjectiv glGetQueryObjectivEXT
|
||||
# define glGetQueryObjectui64v glGetQueryObjectui64vEXT
|
||||
# define glQueryCounter glQueryCounterEXT
|
||||
#endif
|
||||
|
||||
#define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
|
||||
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
||||
#endif
|
||||
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
|
||||
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
||||
#else
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
|
||||
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class GpuCtx
|
||||
{
|
||||
friend class GpuCtxScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
GpuCtx()
|
||||
: m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
glGenQueries( QueryCount, m_query );
|
||||
|
||||
int64_t tgpu;
|
||||
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
GLint bits;
|
||||
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
||||
|
||||
const float period = 1.f;
|
||||
const auto thread = GetThreadHandle();
|
||||
TracyLfqPrepare( QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuNewContext.thread, thread );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
while( m_tail != m_head )
|
||||
{
|
||||
GLint available;
|
||||
glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available );
|
||||
if( !available ) return;
|
||||
|
||||
uint64_t time;
|
||||
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuTime );
|
||||
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
|
||||
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
|
||||
MemWrite( &item->gpuTime.context, m_context );
|
||||
TracyLfqCommit;
|
||||
|
||||
m_tail = ( m_tail + 1 ) % QueryCount;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % QueryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id )
|
||||
{
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
unsigned int m_query[QueryCount];
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
};
|
||||
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
const auto thread = GetThreadHandle();
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.thread, thread );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
const auto thread = GetThreadHandle();
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.thread, thread );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ~GpuCtxScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneEnd );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,510 @@
|
|||
#ifndef __TRACYVULKAN_HPP__
|
||||
#define __TRACYVULKAN_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyVkContext(x,y,z,w) nullptr
|
||||
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
|
||||
#define TracyVkDestroy(x)
|
||||
#define TracyVkContextName(c,x,y)
|
||||
#define TracyVkNamedZone(c,x,y,z,w)
|
||||
#define TracyVkNamedZoneC(c,x,y,z,w,a)
|
||||
#define TracyVkZone(c,x,y)
|
||||
#define TracyVkZoneC(c,x,y,z)
|
||||
#define TracyVkZoneTransient(c,x,y,z,w)
|
||||
#define TracyVkCollect(c,x)
|
||||
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w,a)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
|
||||
#define TracyVkZoneS(c,x,y,z)
|
||||
#define TracyVkZoneCS(c,x,y,z,w)
|
||||
#define TracyVkZoneTransientS(c,x,y,z,w,a)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class VkCtxScope {};
|
||||
}
|
||||
|
||||
using TracyVkCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class VkCtx
|
||||
{
|
||||
friend class VkCtxScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
uint32_t num;
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
|
||||
if( num > 4 ) num = 4;
|
||||
VkTimeDomainEXT data[4];
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
|
||||
VkTimeDomainEXT supportedDomain = VK_TIME_DOMAIN_MAX_ENUM_EXT;
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
|
||||
#endif
|
||||
for( uint32_t i=0; i<num; i++ )
|
||||
{
|
||||
if( data[i] == supportedDomain )
|
||||
{
|
||||
m_timeDomain = data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties prop;
|
||||
vkGetPhysicalDeviceProperties( physdev, &prop );
|
||||
const float period = prop.limits.timestampPeriod;
|
||||
|
||||
VkQueryPoolCreateInfo poolInfo = {};
|
||||
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
while( vkCreateQueryPool( device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS )
|
||||
{
|
||||
m_queryCount /= 2;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
}
|
||||
|
||||
VkCommandBufferBeginInfo beginInfo = {};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
||||
VkSubmitInfo submitInfo = {};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &cmdbuf;
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
int64_t tcpu, tgpu;
|
||||
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
}
|
||||
else
|
||||
{
|
||||
enum { NumProbes = 32 };
|
||||
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( int i=0; i<NumProbes; i++ )
|
||||
{
|
||||
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( int i=1; i<NumProbes; i++ )
|
||||
{
|
||||
if( minDeviation > deviation[i] )
|
||||
{
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
#endif
|
||||
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
tcpu = Profiler::GetTime();
|
||||
}
|
||||
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
|
||||
}
|
||||
|
||||
~VkCtx()
|
||||
{
|
||||
tracy_free( m_res );
|
||||
vkDestroyQueryPool( m_device, m_query, nullptr );
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect( VkCommandBuffer cmdbuf )
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
m_head = m_tail = m_oldCnt = 0;
|
||||
int64_t tgpu;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned int cnt;
|
||||
if( m_oldCnt != 0 )
|
||||
{
|
||||
cnt = m_oldCnt;
|
||||
m_oldCnt = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cnt = m_head < m_tail ? m_queryCount - m_tail : m_head - m_tail;
|
||||
}
|
||||
|
||||
if( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY )
|
||||
{
|
||||
m_oldCnt = cnt;
|
||||
return;
|
||||
}
|
||||
|
||||
for( unsigned int idx=0; idx<cnt; idx++ )
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuTime );
|
||||
MemWrite( &item->gpuTime.gpuTime, m_res[idx] );
|
||||
MemWrite( &item->gpuTime.queryId, uint16_t( m_tail + idx ) );
|
||||
MemWrite( &item->gpuTime.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
int64_t tgpu, tcpu;
|
||||
Calibrate( m_device, tcpu, tgpu );
|
||||
const auto refCpu = Profiler::GetTime();
|
||||
const auto delta = tcpu - m_prevCalibration;
|
||||
if( delta > 0 )
|
||||
{
|
||||
m_prevCalibration = tcpu;
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
|
||||
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||
MemWrite( &item->gpuCalibration.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
||||
|
||||
m_tail += cnt;
|
||||
if( m_tail == m_queryCount ) m_tail = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % m_queryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
|
||||
{
|
||||
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation;
|
||||
do
|
||||
{
|
||||
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
|
||||
}
|
||||
while( deviation > m_deviation );
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1] * m_qpcToNs;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1];
|
||||
#else
|
||||
assert( false );
|
||||
#endif
|
||||
}
|
||||
|
||||
VkDevice m_device;
|
||||
VkQueryPool m_query;
|
||||
VkTimeDomainEXT m_timeDomain;
|
||||
uint64_t m_deviation;
|
||||
int64_t m_qpcToNs;
|
||||
int64_t m_prevCalibration;
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
unsigned int m_oldCnt;
|
||||
unsigned int m_queryCount;
|
||||
|
||||
int64_t* m_res;
|
||||
|
||||
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
|
||||
};
|
||||
|
||||
class VkCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~VkCtxScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
VkCommandBuffer m_cmdbuf;
|
||||
VkCtx* m_ctx;
|
||||
};
|
||||
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyVkContext( VkCtx* ctx )
|
||||
{
|
||||
ctx->~VkCtx();
|
||||
tracy_free( ctx );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
using TracyVkCtx = tracy::VkCtx*;
|
||||
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
|
||||
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
|
||||
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active )
|
||||
#else
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), cmdbuf, active );
|
||||
#endif
|
||||
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
|
||||
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), cmdbuf, depth, active );
|
||||
#else
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
|
||||
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,263 @@
|
|||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <chrono>
|
||||
#include <inttypes.h>
|
||||
#include <mutex>
|
||||
#include <signal.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "../../common/TracyProtocol.hpp"
|
||||
#include "../../server/TracyFileWrite.hpp"
|
||||
#include "../../server/TracyMemory.hpp"
|
||||
#include "../../server/TracyPrint.hpp"
|
||||
#include "../../server/TracyStackFrames.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include "../../getopt/getopt.h"
|
||||
#endif
|
||||
|
||||
|
||||
bool disconnect = false;
|
||||
|
||||
void SigInt( int )
|
||||
{
|
||||
disconnect = true;
|
||||
}
|
||||
|
||||
[[noreturn]] void Usage()
|
||||
{
|
||||
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f]\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if( !AttachConsole( ATTACH_PARENT_PROCESS ) )
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode( GetStdHandle( STD_OUTPUT_HANDLE ), 0x07 );
|
||||
}
|
||||
#endif
|
||||
|
||||
bool overwrite = false;
|
||||
const char* address = "127.0.0.1";
|
||||
const char* output = nullptr;
|
||||
int port = 8086;
|
||||
|
||||
int c;
|
||||
while( ( c = getopt( argc, argv, "a:o:p:f" ) ) != -1 )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
case 'a':
|
||||
address = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
output = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
port = atoi( optarg );
|
||||
break;
|
||||
case 'f':
|
||||
overwrite = true;
|
||||
break;
|
||||
default:
|
||||
Usage();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( !address || !output ) Usage();
|
||||
|
||||
struct stat st;
|
||||
if( stat( output, &st ) == 0 && !overwrite )
|
||||
{
|
||||
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
|
||||
return 4;
|
||||
}
|
||||
|
||||
FILE* test = fopen( output, "wb" );
|
||||
if( !test )
|
||||
{
|
||||
printf( "Cannot open output file %s for writing!\n", output );
|
||||
return 5;
|
||||
}
|
||||
fclose( test );
|
||||
unlink( output );
|
||||
|
||||
printf( "Connecting to %s:%i...", address, port );
|
||||
fflush( stdout );
|
||||
tracy::Worker worker( address, port );
|
||||
while( !worker.IsConnected() )
|
||||
{
|
||||
const auto handshake = worker.GetHandshakeStatus();
|
||||
if( handshake == tracy::HandshakeProtocolMismatch )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
|
||||
return 1;
|
||||
}
|
||||
if( handshake == tracy::HandshakeNotAvailable )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n 1. Restart the client application.\n 2. Rebuild the client application with on-demand mode enabled.\n" );
|
||||
return 2;
|
||||
}
|
||||
if( handshake == tracy::HandshakeDropped )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
while( !worker.HasData() ) std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
|
||||
|
||||
#ifdef _WIN32
|
||||
signal( SIGINT, SigInt );
|
||||
#else
|
||||
struct sigaction sigint, oldsigint;
|
||||
memset( &sigint, 0, sizeof( sigint ) );
|
||||
sigint.sa_handler = SigInt;
|
||||
sigaction( SIGINT, &sigint, &oldsigint );
|
||||
#endif
|
||||
|
||||
auto& lock = worker.GetMbpsDataLock();
|
||||
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
while( worker.IsConnected() )
|
||||
{
|
||||
if( disconnect )
|
||||
{
|
||||
worker.Disconnect();
|
||||
disconnect = false;
|
||||
}
|
||||
|
||||
lock.lock();
|
||||
const auto mbps = worker.GetMbpsData().back();
|
||||
const auto compRatio = worker.GetCompRatio();
|
||||
const auto netTotal = worker.GetDataTransferred();
|
||||
lock.unlock();
|
||||
|
||||
if( mbps < 0.1f )
|
||||
{
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Kbps", mbps * 1000.f );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Mbps", mbps );
|
||||
}
|
||||
printf( " \033[0m /\033[36;1m%5.1f%% \033[0m=\033[33;1m%7.2f Mbps \033[0m| \033[33mNet: \033[32m%s \033[0m| \033[33mMem: \033[31;1m%s\033[0m | \033[33mTime: %s\033[0m",
|
||||
compRatio * 100.f,
|
||||
mbps / compRatio,
|
||||
tracy::MemSizeToString( netTotal ),
|
||||
tracy::MemSizeToString( tracy::memUsage ),
|
||||
tracy::TimeToString( worker.GetLastTime() ) );
|
||||
fflush( stdout );
|
||||
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
}
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const auto& failure = worker.GetFailureType();
|
||||
if( failure != tracy::Worker::Failure::None )
|
||||
{
|
||||
printf( "\n\033[31;1mInstrumentation failure: %s\033[0m", tracy::Worker::GetFailureString( failure ) );
|
||||
auto& fd = worker.GetFailureData();
|
||||
if( fd.callstack != 0 )
|
||||
{
|
||||
printf( "\n\033[1mFailure callstack:\033[0m\n" );
|
||||
auto& cs = worker.GetCallstack( fd.callstack );
|
||||
int fidx = 0;
|
||||
int bidx = 0;
|
||||
for( auto& entry : cs )
|
||||
{
|
||||
auto frameData = worker.GetCallstackFrame( entry );
|
||||
if( !frameData )
|
||||
{
|
||||
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto fsz = frameData->size;
|
||||
for( uint8_t f=0; f<fsz; f++ )
|
||||
{
|
||||
const auto& frame = frameData->data[f];
|
||||
auto txt = worker.GetString( frame.name );
|
||||
|
||||
if( fidx == 0 && f != fsz-1 )
|
||||
{
|
||||
auto test = tracy::s_tracyStackFrames;
|
||||
bool match = false;
|
||||
do
|
||||
{
|
||||
if( strcmp( txt, *test ) == 0 )
|
||||
{
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
while( *++test );
|
||||
if( match ) continue;
|
||||
}
|
||||
|
||||
bidx++;
|
||||
|
||||
if( f == fsz-1 )
|
||||
{
|
||||
printf( "%3i. ", fidx++ );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\033[30;1minl. " );
|
||||
}
|
||||
printf( "\033[0;36m%s ", txt );
|
||||
txt = worker.GetString( frame.file );
|
||||
if( frame.line == 0 )
|
||||
{
|
||||
printf( "\033[33m(%s)", txt );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\033[33m(%s:%" PRIu32 ")", txt, frame.line );
|
||||
}
|
||||
if( frameData->imageName.Active() )
|
||||
{
|
||||
printf( "\033[35m %s\033[0m\n", worker.GetString( frameData->imageName ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\033[0m\n" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
|
||||
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() ), tracy::RealToString( worker.GetZoneCount() ),
|
||||
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
|
||||
fflush( stdout );
|
||||
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output ) );
|
||||
if( f )
|
||||
{
|
||||
worker.Write( *f, false );
|
||||
printf( " \033[32;1mdone!\033[0m\n" );
|
||||
f->Finish();
|
||||
const auto stats = f->GetCompressionStatistics();
|
||||
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( " \033[31;1failed!\033[0m\n" );
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,349 @@
|
|||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined __linux__ && defined __ARM_ARCH
|
||||
|
||||
static const char* DecodeArmImplementer( uint32_t v )
|
||||
{
|
||||
static char buf[16];
|
||||
switch( v )
|
||||
{
|
||||
case 0x41: return "ARM";
|
||||
case 0x42: return "Broadcom";
|
||||
case 0x43: return "Cavium";
|
||||
case 0x44: return "DEC";
|
||||
case 0x46: return "Fujitsu";
|
||||
case 0x48: return "HiSilicon";
|
||||
case 0x49: return "Infineon";
|
||||
case 0x4d: return "Motorola";
|
||||
case 0x4e: return "Nvidia";
|
||||
case 0x50: return "Applied Micro";
|
||||
case 0x51: return "Qualcomm";
|
||||
case 0x53: return "Samsung";
|
||||
case 0x54: return "Texas Instruments";
|
||||
case 0x56: return "Marvell";
|
||||
case 0x61: return "Apple";
|
||||
case 0x66: return "Faraday";
|
||||
case 0x68: return "HXT";
|
||||
case 0x69: return "Intel";
|
||||
case 0xc0: return "Ampere Computing";
|
||||
default: break;
|
||||
}
|
||||
sprintf( buf, "0x%x", v );
|
||||
return buf;
|
||||
}
|
||||
|
||||
static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
{
|
||||
static char buf[16];
|
||||
switch( impl )
|
||||
{
|
||||
case 0x41:
|
||||
switch( part )
|
||||
{
|
||||
case 0x810: return "810";
|
||||
case 0x920: return "920";
|
||||
case 0x922: return "922";
|
||||
case 0x926: return "926";
|
||||
case 0x940: return "940";
|
||||
case 0x946: return "946";
|
||||
case 0x966: return "966";
|
||||
case 0xa20: return "1020";
|
||||
case 0xa22: return "1022";
|
||||
case 0xa26: return "1026";
|
||||
case 0xb02: return "11 MPCore";
|
||||
case 0xb36: return "1136";
|
||||
case 0xb56: return "1156";
|
||||
case 0xb76: return "1176";
|
||||
case 0xc05: return " Cortex-A5";
|
||||
case 0xc07: return " Cortex-A7";
|
||||
case 0xc08: return " Cortex-A8";
|
||||
case 0xc09: return " Cortex-A9";
|
||||
case 0xc0c: return " Cortex-A12";
|
||||
case 0xc0d: return " Rockchip RK3288";
|
||||
case 0xc0f: return " Cortex-A15";
|
||||
case 0xc0e: return " Cortex-A17";
|
||||
case 0xc14: return " Cortex-R4";
|
||||
case 0xc15: return " Cortex-R5";
|
||||
case 0xc17: return " Cortex-R7";
|
||||
case 0xc18: return " Cortex-R8";
|
||||
case 0xc20: return " Cortex-M0";
|
||||
case 0xc21: return " Cortex-M1";
|
||||
case 0xc23: return " Cortex-M3";
|
||||
case 0xc24: return " Cortex-M4";
|
||||
case 0xc27: return " Cortex-M7";
|
||||
case 0xc60: return " Cortex-M0+";
|
||||
case 0xd00: return " AArch64 simulator";
|
||||
case 0xd01: return " Cortex-A32";
|
||||
case 0xd02: return " Cortex-A34";
|
||||
case 0xd03: return " Cortex-A53";
|
||||
case 0xd04: return " Cortex-A35";
|
||||
case 0xd05: return " Cortex-A55";
|
||||
case 0xd06: return " Cortex-A65";
|
||||
case 0xd07: return " Cortex-A57";
|
||||
case 0xd08: return " Cortex-A72";
|
||||
case 0xd09: return " Cortex-A73";
|
||||
case 0xd0a: return " Cortex-A75";
|
||||
case 0xd0b: return " Cortex-A76";
|
||||
case 0xd0c: return " Neoverse N1";
|
||||
case 0xd0d: return " Cortex-A77";
|
||||
case 0xd0e: return " Cortex-A76AE";
|
||||
case 0xd0f: return " AEMv8";
|
||||
case 0xd13: return " Cortex-R52";
|
||||
case 0xd20: return " Cortex-M23";
|
||||
case 0xd21: return " Cortex-M33";
|
||||
case 0xd40: return " Zeus";
|
||||
case 0xd41: return " Cortex-A78";
|
||||
case 0xd43: return " Cortex-A65AE";
|
||||
case 0xd44: return " Cortex-X1";
|
||||
case 0xd4a: return " Neoverse E1";
|
||||
default: break;
|
||||
}
|
||||
case 0x42:
|
||||
switch( part )
|
||||
{
|
||||
case 0xf: return " Brahma B15";
|
||||
case 0x100: return " Brahma B53";
|
||||
case 0x516: return " ThunderX2";
|
||||
default: break;
|
||||
}
|
||||
case 0x43:
|
||||
switch( part )
|
||||
{
|
||||
case 0xa0: return " ThunderX";
|
||||
case 0xa1: return " ThunderX 88XX";
|
||||
case 0xa2: return " ThunderX 81XX";
|
||||
case 0xa3: return " ThunderX 83XX";
|
||||
case 0xaf: return " ThunderX2 99xx";
|
||||
case 0xb0: return " OcteonTX2";
|
||||
case 0xb1: return " OcteonTX2 T98";
|
||||
case 0xb2: return " OcteonTX2 T96";
|
||||
case 0xb3: return " OcteonTX2 F95";
|
||||
case 0xb4: return " OcteonTX2 F95N";
|
||||
case 0xb5: return " OcteonTX2 F95MM";
|
||||
case 0xb8: return " ThunderX3 T110";
|
||||
default: break;
|
||||
}
|
||||
case 0x44:
|
||||
switch( part )
|
||||
{
|
||||
case 0xa10: return " SA110";
|
||||
case 0xa11: return " SA1100";
|
||||
default: break;
|
||||
}
|
||||
case 0x46:
|
||||
switch( part )
|
||||
{
|
||||
case 0x1: return " A64FX";
|
||||
default: break;
|
||||
}
|
||||
case 0x48:
|
||||
switch( part )
|
||||
{
|
||||
case 0xd01: return " TSV100";
|
||||
case 0xd40: return " Kirin 980";
|
||||
default: break;
|
||||
}
|
||||
case 0x4e:
|
||||
switch( part )
|
||||
{
|
||||
case 0x0: return " Denver";
|
||||
case 0x3: return " Denver 2";
|
||||
case 0x4: return " Carmel";
|
||||
default: break;
|
||||
}
|
||||
case 0x50:
|
||||
switch( part )
|
||||
{
|
||||
case 0x0: return " X-Gene";
|
||||
default: break;
|
||||
}
|
||||
case 0x51:
|
||||
switch( part )
|
||||
{
|
||||
case 0xf: return " Scorpion";
|
||||
case 0x2d: return " Scorpion";
|
||||
case 0x4d: return " Krait";
|
||||
case 0x6f: return " Krait";
|
||||
case 0x200: return " Kryo";
|
||||
case 0x201: return " Kryo Silver (Snapdragon 821)";
|
||||
case 0x205: return " Kryo Gold";
|
||||
case 0x211: return " Kryo Silver (Snapdragon 820)";
|
||||
case 0x800: return " Kryo 260 / 280 Gold";
|
||||
case 0x801: return " Kryo 260 / 280 Silver";
|
||||
case 0x802: return " Kryo 385 Gold";
|
||||
case 0x803: return " Kryo 385 Silver";
|
||||
case 0x804: return " Kryo 485 Gold";
|
||||
case 0xc00: return " Falkor";
|
||||
case 0xc01: return " Saphira";
|
||||
default: break;
|
||||
}
|
||||
case 0x53:
|
||||
switch( part )
|
||||
{
|
||||
case 0x1: return " Exynos M1/M2";
|
||||
case 0x2: return " Exynos M3";
|
||||
default: break;
|
||||
}
|
||||
case 0x56:
|
||||
switch( part )
|
||||
{
|
||||
case 0x131: return " Feroceon 88FR131";
|
||||
case 0x581: return " PJ4 / PJ4B";
|
||||
case 0x584: return " PJ4B-MP / PJ4C";
|
||||
default: break;
|
||||
}
|
||||
case 0x61:
|
||||
switch( part )
|
||||
{
|
||||
case 0x1: return " Cyclone";
|
||||
case 0x2: return " Typhoon";
|
||||
case 0x3: return " Typhoon/Capri";
|
||||
case 0x4: return " Twister";
|
||||
case 0x5: return " Twister/Elba/Malta";
|
||||
case 0x6: return " Hurricane";
|
||||
case 0x7: return " Hurricane/Myst";
|
||||
default: break;
|
||||
}
|
||||
case 0x66:
|
||||
switch( part )
|
||||
{
|
||||
case 0x526: return " FA526";
|
||||
case 0x626: return " FA626";
|
||||
default: break;
|
||||
}
|
||||
case 0x68:
|
||||
switch( part )
|
||||
{
|
||||
case 0x0: return " Phecda";
|
||||
default: break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
sprintf( buf, " 0x%x", part );
|
||||
return buf;
|
||||
}
|
||||
|
||||
#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
|
||||
|
||||
static const char* DecodeIosDevice( const char* id )
|
||||
{
|
||||
static const char* DeviceTable[] = {
|
||||
"i386", "32-bit simulator",
|
||||
"x86_64", "64-bit simulator",
|
||||
"iPhone1,1", "iPhone",
|
||||
"iPhone1,2", "iPhone 3G",
|
||||
"iPhone2,1", "iPhone 3GS",
|
||||
"iPhone3,1", "iPhone 4 (GSM)",
|
||||
"iPhone3,2", "iPhone 4 (GSM)",
|
||||
"iPhone3,3", "iPhone 4 (CDMA)",
|
||||
"iPhone4,1", "iPhone 4S",
|
||||
"iPhone5,1", "iPhone 5 (A1428)",
|
||||
"iPhone5,2", "iPhone 5 (A1429)",
|
||||
"iPhone5,3", "iPhone 5c (A1456/A1532)",
|
||||
"iPhone5,4", "iPhone 5c (A1507/A1516/1526/A1529)",
|
||||
"iPhone6,1", "iPhone 5s (A1433/A1533)",
|
||||
"iPhone6,2", "iPhone 5s (A1457/A1518/A1528/A1530)",
|
||||
"iPhone7,1", "iPhone 6 Plus",
|
||||
"iPhone7,2", "iPhone 6",
|
||||
"iPhone8,1", "iPhone 6S",
|
||||
"iPhone8,2", "iPhone 6S Plus",
|
||||
"iPhone8,4", "iPhone SE",
|
||||
"iPhone9,1", "iPhone 7 (CDMA)",
|
||||
"iPhone9,2", "iPhone 7 Plus (CDMA)",
|
||||
"iPhone9,3", "iPhone 7 (GSM)",
|
||||
"iPhone9,4", "iPhone 7 Plus (GSM)",
|
||||
"iPhone10,1", "iPhone 8 (CDMA)",
|
||||
"iPhone10,2", "iPhone 8 Plus (CDMA)",
|
||||
"iPhone10,3", "iPhone X (CDMA)",
|
||||
"iPhone10,4", "iPhone 8 (GSM)",
|
||||
"iPhone10,5", "iPhone 8 Plus (GSM)",
|
||||
"iPhone10,6", "iPhone X (GSM)",
|
||||
"iPhone11,2", "iPhone XS",
|
||||
"iPhone11,4", "iPhone XS Max",
|
||||
"iPhone11,6", "iPhone XS Max China",
|
||||
"iPhone11,8", "iPhone XR",
|
||||
"iPhone12,1", "iPhone 11",
|
||||
"iPhone12,3", "iPhone 11 Pro",
|
||||
"iPhone12,5", "iPhone 11 Pro Max",
|
||||
"iPhone12,8", "iPhone SE 2nd Gen",
|
||||
"iPad1,1", "iPad (A1219/A1337)",
|
||||
"iPad2,1", "iPad 2 (A1395)",
|
||||
"iPad2,2", "iPad 2 (A1396)",
|
||||
"iPad2,3", "iPad 2 (A1397)",
|
||||
"iPad2,4", "iPad 2 (A1395)",
|
||||
"iPad2,5", "iPad Mini (A1432)",
|
||||
"iPad2,6", "iPad Mini (A1454)",
|
||||
"iPad2,7", "iPad Mini (A1455)",
|
||||
"iPad3,1", "iPad 3 (A1416)",
|
||||
"iPad3,2", "iPad 3 (A1403)",
|
||||
"iPad3,3", "iPad 3 (A1430)",
|
||||
"iPad3,4", "iPad 4 (A1458)",
|
||||
"iPad3,5", "iPad 4 (A1459)",
|
||||
"iPad3,6", "iPad 4 (A1460)",
|
||||
"iPad4,1", "iPad Air (A1474)",
|
||||
"iPad4,2", "iPad Air (A1475)",
|
||||
"iPad4,3", "iPad Air (A1476)",
|
||||
"iPad4,4", "iPad Mini 2 (A1489)",
|
||||
"iPad4,5", "iPad Mini 2 (A1490)",
|
||||
"iPad4,6", "iPad Mini 2 (A1491)",
|
||||
"iPad4,7", "iPad Mini 3 (A1599)",
|
||||
"iPad4,8", "iPad Mini 3 (A1600)",
|
||||
"iPad4,9", "iPad Mini 3 (A1601)",
|
||||
"iPad5,1", "iPad Mini 4 (A1538)",
|
||||
"iPad5,2", "iPad Mini 4 (A1550)",
|
||||
"iPad5,3", "iPad Air 2 (A1566)",
|
||||
"iPad5,4", "iPad Air 2 (A1567)",
|
||||
"iPad6,3", "iPad Pro 9.7\" (A1673)",
|
||||
"iPad6,4", "iPad Pro 9.7\" (A1674)",
|
||||
"iPad6,5", "iPad Pro 9.7\" (A1675)",
|
||||
"iPad6,7", "iPad Pro 12.9\" (A1584)",
|
||||
"iPad6,8", "iPad Pro 12.9\" (A1652)",
|
||||
"iPad6,11", "iPad 5th gen (A1822)",
|
||||
"iPad6,12", "iPad 5th gen (A1823)",
|
||||
"iPad7,1", "iPad Pro 12.9\" 2nd gen (A1670)",
|
||||
"iPad7,2", "iPad Pro 12.9\" 2nd gen (A1671/A1821)",
|
||||
"iPad7,3", "iPad Pro 10.5\" (A1701)",
|
||||
"iPad7,4", "iPad Pro 10.5\" (A1709)",
|
||||
"iPad7,5", "iPad 6th gen (A1893)",
|
||||
"iPad7,6", "iPad 6th gen (A1954)",
|
||||
"iPad7,11", "iPad 7th gen 10.2\" (Wifi)",
|
||||
"iPad7,12", "iPad 7th gen 10.2\" (Wifi+Cellular)",
|
||||
"iPad8,1", "iPad Pro 11\" (A1980)",
|
||||
"iPad8,2", "iPad Pro 11\" (A1980)",
|
||||
"iPad8,3", "iPad Pro 11\" (A1934/A1979/A2013)",
|
||||
"iPad8,4", "iPad Pro 11\" (A1934/A1979/A2013)",
|
||||
"iPad8,5", "iPad Pro 12.9\" 3rd gen (A1876)",
|
||||
"iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)",
|
||||
"iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
|
||||
"iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
|
||||
"iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)",
|
||||
"iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)",
|
||||
"iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)",
|
||||
"iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)",
|
||||
"iPad11,1", "iPad Mini 5th gen (A2133)",
|
||||
"iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)",
|
||||
"iPad11,3", "iPad Air 3rd gen (A2152)",
|
||||
"iPad11,4", "iPad Air 3rd gen (A2123/A2153/A2154)",
|
||||
"iPod1,1", "iPod Touch",
|
||||
"iPod2,1", "iPod Touch 2nd gen",
|
||||
"iPod3,1", "iPod Touch 3rd gen",
|
||||
"iPod4,1", "iPod Touch 4th gen",
|
||||
"iPod5,1", "iPod Touch 5th gen",
|
||||
"iPod7,1", "iPod Touch 6th gen",
|
||||
"iPod9,1", "iPod Touch 7th gen",
|
||||
nullptr
|
||||
};
|
||||
|
||||
auto ptr = DeviceTable;
|
||||
while( *ptr )
|
||||
{
|
||||
if( strcmp( ptr[0], id ) == 0 ) return ptr[1];
|
||||
ptr += 2;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
|
@ -0,0 +1,767 @@
|
|||
#include <new>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "TracyCallstack.hpp"
|
||||
#include "TracyFastVector.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# include <windows.h>
|
||||
# include <psapi.h>
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning( push )
|
||||
# pragma warning( disable : 4091 )
|
||||
# endif
|
||||
# include <dbghelp.h>
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning( pop )
|
||||
# endif
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
# include "../libbacktrace/backtrace.hpp"
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
#elif TRACY_HAS_CALLSTACK == 5
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
# include "TracyProfiler.hpp"
|
||||
|
||||
# define DBGHELP_INIT TracyConcat( TRACY_DBGHELP_LOCK, Init() )
|
||||
# define DBGHELP_LOCK TracyConcat( TRACY_DBGHELP_LOCK, Lock() );
|
||||
# define DBGHELP_UNLOCK TracyConcat( TRACY_DBGHELP_LOCK, Unlock() );
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void DBGHELP_INIT;
|
||||
void DBGHELP_LOCK;
|
||||
void DBGHELP_UNLOCK;
|
||||
};
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline char* CopyString( const char* src, size_t sz )
|
||||
{
|
||||
assert( strlen( src ) == sz );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
static inline char* CopyString( const char* src )
|
||||
{
|
||||
const auto sz = strlen( src );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
|
||||
enum { MaxCbTrace = 16 };
|
||||
enum { MaxNameSize = 8*1024 };
|
||||
|
||||
int cb_num;
|
||||
CallstackEntry cb_data[MaxCbTrace];
|
||||
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
t_RtlWalkFrameChain RtlWalkFrameChain = 0;
|
||||
}
|
||||
|
||||
#if defined __MINGW32__ && API_VERSION_NUMBER < 12
|
||||
extern "C" {
|
||||
// Actual required API_VERSION_NUMBER is unknown because it is undocumented. These functions are not present in at least v11.
|
||||
DWORD IMAGEAPI SymAddrIncludeInlineTrace(HANDLE hProcess, DWORD64 Address);
|
||||
BOOL IMAGEAPI SymQueryInlineTrace(HANDLE hProcess, DWORD64 StartAddress, DWORD StartContext, DWORD64 StartRetAddress,
|
||||
DWORD64 CurAddress, LPDWORD CurContext, LPDWORD CurFrameIndex);
|
||||
BOOL IMAGEAPI SymFromInlineContext(HANDLE hProcess, DWORD64 Address, ULONG InlineContext, PDWORD64 Displacement,
|
||||
PSYMBOL_INFO Symbol);
|
||||
BOOL IMAGEAPI SymGetLineFromInlineContext(HANDLE hProcess, DWORD64 qwAddr, ULONG InlineContext,
|
||||
DWORD64 qwModuleBaseAddress, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line64);
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
struct ModuleCache
|
||||
{
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
char* name;
|
||||
};
|
||||
|
||||
static FastVector<ModuleCache>* s_modCache;
|
||||
#endif
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
RtlWalkFrameChain = (t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_INIT;
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
|
||||
SymInitialize( GetCurrentProcess(), nullptr, true );
|
||||
SymSetOptions( SYMOPT_LOAD_LINES );
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
HMODULE mod[1024];
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
|
||||
s_modCache = (FastVector<ModuleCache>*)tracy_malloc( sizeof( FastVector<ModuleCache> ) );
|
||||
new(s_modCache) FastVector<ModuleCache>( 512 );
|
||||
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
char name[1024];
|
||||
const auto res = GetModuleFileNameA( mod[i], name, 1021 );
|
||||
if( res > 0 )
|
||||
{
|
||||
auto ptr = name + res;
|
||||
while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > name ) ptr++;
|
||||
const auto namelen = name + res - ptr;
|
||||
auto cache = s_modCache->push_next();
|
||||
cache->start = base;
|
||||
cache->end = base + info.SizeOfImage;
|
||||
cache->name = (char*)tracy_malloc( namelen+3 );
|
||||
cache->name[0] = '[';
|
||||
memcpy( cache->name+1, ptr, namelen );
|
||||
cache->name[namelen+1] = ']';
|
||||
cache->name[namelen+2] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
#endif
|
||||
}
|
||||
|
||||
TRACY_API uintptr_t* CallTrace( int depth )
|
||||
{
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
|
||||
*trace = num;
|
||||
return trace;
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
{
|
||||
static char ret[MaxNameSize];
|
||||
const auto proc = GetCurrentProcess();
|
||||
|
||||
char buf[sizeof( SYMBOL_INFO ) + MaxNameSize];
|
||||
auto si = (SYMBOL_INFO*)buf;
|
||||
si->SizeOfStruct = sizeof( SYMBOL_INFO );
|
||||
si->MaxNameLen = MaxNameSize;
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
if( SymFromAddr( proc, ptr, nullptr, si ) == 0 )
|
||||
{
|
||||
*ret = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( ret, si->Name, si->NameLen );
|
||||
ret[si->NameLen] = '\0';
|
||||
}
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char* GetModuleName( uint64_t addr )
|
||||
{
|
||||
if( ( addr & 0x8000000000000000 ) != 0 ) return "[kernel]";
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
for( auto& v : *s_modCache )
|
||||
{
|
||||
if( addr >= v.start && addr < v.end )
|
||||
{
|
||||
return v.name;
|
||||
}
|
||||
}
|
||||
|
||||
HMODULE mod[1024];
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
if( addr >= base && addr < base + info.SizeOfImage )
|
||||
{
|
||||
char name[1024];
|
||||
const auto res = GetModuleFileNameA( mod[i], name, 1021 );
|
||||
if( res > 0 )
|
||||
{
|
||||
auto ptr = name + res;
|
||||
while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > name ) ptr++;
|
||||
const auto namelen = name + res - ptr;
|
||||
auto cache = s_modCache->push_next();
|
||||
cache->start = base;
|
||||
cache->end = base + info.SizeOfImage;
|
||||
cache->name = (char*)tracy_malloc( namelen+3 );
|
||||
cache->name[0] = '[';
|
||||
memcpy( cache->name+1, ptr, namelen );
|
||||
cache->name[namelen+1] = ']';
|
||||
cache->name[namelen+2] = '\0';
|
||||
return cache->name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return "[unknown]";
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
|
||||
{
|
||||
CallstackSymbolData sym;
|
||||
IMAGEHLP_LINE64 line;
|
||||
DWORD displacement = 0;
|
||||
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
const auto res = SymGetLineFromAddr64( GetCurrentProcess(), ptr, &displacement, &line );
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
#endif
|
||||
if( res == 0 )
|
||||
{
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym.file = line.FileName;
|
||||
sym.line = line.LineNumber;
|
||||
}
|
||||
sym.needFree = false;
|
||||
return sym;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
|
||||
{
|
||||
CallstackSymbolData sym;
|
||||
const auto proc = GetCurrentProcess();
|
||||
bool done = false;
|
||||
|
||||
IMAGEHLP_LINE64 line;
|
||||
DWORD displacement = 0;
|
||||
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
#ifndef __CYGWIN__
|
||||
DWORD inlineNum = SymAddrIncludeInlineTrace( proc, ptr );
|
||||
DWORD ctx = 0;
|
||||
DWORD idx;
|
||||
BOOL doInline = FALSE;
|
||||
if( inlineNum != 0 ) doInline = SymQueryInlineTrace( proc, ptr, 0, ptr, ptr, &ctx, &idx );
|
||||
if( doInline )
|
||||
{
|
||||
if( SymGetLineFromInlineContext( proc, ptr, ctx, 0, &displacement, &line ) != 0 )
|
||||
{
|
||||
sym.file = line.FileName;
|
||||
sym.line = line.LineNumber;
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if( !done )
|
||||
{
|
||||
if( SymGetLineFromAddr64( proc, ptr, &displacement, &line ) == 0 )
|
||||
{
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym.file = line.FileName;
|
||||
sym.line = line.LineNumber;
|
||||
}
|
||||
}
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
#endif
|
||||
sym.needFree = false;
|
||||
return sym;
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
int write;
|
||||
const auto proc = GetCurrentProcess();
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
#ifndef __CYGWIN__
|
||||
DWORD inlineNum = SymAddrIncludeInlineTrace( proc, ptr );
|
||||
if( inlineNum > MaxCbTrace - 1 ) inlineNum = MaxCbTrace - 1;
|
||||
DWORD ctx = 0;
|
||||
DWORD idx;
|
||||
BOOL doInline = FALSE;
|
||||
if( inlineNum != 0 ) doInline = SymQueryInlineTrace( proc, ptr, 0, ptr, ptr, &ctx, &idx );
|
||||
if( doInline )
|
||||
{
|
||||
write = inlineNum;
|
||||
cb_num = 1 + inlineNum;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
write = 0;
|
||||
cb_num = 1;
|
||||
}
|
||||
|
||||
char buf[sizeof( SYMBOL_INFO ) + MaxNameSize];
|
||||
auto si = (SYMBOL_INFO*)buf;
|
||||
si->SizeOfStruct = sizeof( SYMBOL_INFO );
|
||||
si->MaxNameLen = MaxNameSize;
|
||||
|
||||
const auto moduleName = GetModuleName( ptr );
|
||||
const auto symValid = SymFromAddr( proc, ptr, nullptr, si ) != 0;
|
||||
|
||||
IMAGEHLP_LINE64 line;
|
||||
DWORD displacement = 0;
|
||||
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
|
||||
|
||||
{
|
||||
const char* filename;
|
||||
if( SymGetLineFromAddr64( proc, ptr, &displacement, &line ) == 0 )
|
||||
{
|
||||
filename = "[unknown]";
|
||||
cb_data[write].line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
filename = line.FileName;
|
||||
cb_data[write].line = line.LineNumber;
|
||||
}
|
||||
|
||||
cb_data[write].name = symValid ? CopyString( si->Name, si->NameLen ) : CopyString( moduleName );
|
||||
cb_data[write].file = CopyString( filename );
|
||||
if( symValid )
|
||||
{
|
||||
cb_data[write].symLen = si->Size;
|
||||
cb_data[write].symAddr = si->Address;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb_data[write].symLen = 0;
|
||||
cb_data[write].symAddr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
if( doInline )
|
||||
{
|
||||
for( DWORD i=0; i<inlineNum; i++ )
|
||||
{
|
||||
auto& cb = cb_data[i];
|
||||
const auto symInlineValid = SymFromInlineContext( proc, ptr, ctx, nullptr, si ) != 0;
|
||||
const char* filename;
|
||||
if( SymGetLineFromInlineContext( proc, ptr, ctx, 0, &displacement, &line ) == 0 )
|
||||
{
|
||||
filename = "[unknown]";
|
||||
cb.line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
filename = line.FileName;
|
||||
cb.line = line.LineNumber;
|
||||
}
|
||||
|
||||
cb.name = symInlineValid ? CopyString( si->Name, si->NameLen ) : CopyString( moduleName );
|
||||
cb.file = CopyString( filename );
|
||||
if( symInlineValid )
|
||||
{
|
||||
cb.symLen = si->Size;
|
||||
cb.symAddr = si->Address;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.symLen = 0;
|
||||
cb.symAddr = 0;
|
||||
}
|
||||
|
||||
ctx++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
#endif
|
||||
|
||||
return { cb_data, uint8_t( cb_num ), moduleName };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
|
||||
enum { MaxCbTrace = 16 };
|
||||
|
||||
struct backtrace_state* cb_bts;
|
||||
int cb_num;
|
||||
CallstackEntry cb_data[MaxCbTrace];
|
||||
int cb_fixup;
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
cb_bts = backtrace_create_state( nullptr, 0, nullptr, nullptr );
|
||||
}
|
||||
|
||||
static int FastCallstackDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
|
||||
{
|
||||
if( function )
|
||||
{
|
||||
strcpy( (char*)data, function );
|
||||
}
|
||||
else
|
||||
{
|
||||
const char* symname = nullptr;
|
||||
auto vptr = (void*)pc;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) )
|
||||
{
|
||||
symname = dlinfo.dli_sname;
|
||||
}
|
||||
if( symname )
|
||||
{
|
||||
strcpy( (char*)data, symname );
|
||||
}
|
||||
else
|
||||
{
|
||||
*(char*)data = '\0';
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void FastCallstackErrorCb( void* data, const char* /*msg*/, int /*errnum*/ )
|
||||
{
|
||||
*(char*)data = '\0';
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
{
|
||||
static char ret[1024];
|
||||
backtrace_pcinfo( cb_bts, ptr, FastCallstackDataCb, FastCallstackErrorCb, ret );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
|
||||
{
|
||||
auto& sym = *(CallstackSymbolData*)data;
|
||||
if( !fn )
|
||||
{
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
sym.needFree = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym.file = CopyString( fn );
|
||||
sym.line = lineno;
|
||||
sym.needFree = true;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ )
|
||||
{
|
||||
auto& sym = *(CallstackSymbolData*)data;
|
||||
sym.file = "[unknown]";
|
||||
sym.line = 0;
|
||||
sym.needFree = false;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
|
||||
{
|
||||
CallstackSymbolData sym;
|
||||
backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym );
|
||||
return sym;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
|
||||
{
|
||||
return DecodeSymbolAddress( ptr );
|
||||
}
|
||||
|
||||
static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
|
||||
{
|
||||
enum { DemangleBufLen = 64*1024 };
|
||||
char demangled[DemangleBufLen];
|
||||
|
||||
cb_data[cb_num].symLen = 0;
|
||||
cb_data[cb_num].symAddr = (uint64_t)lowaddr;
|
||||
|
||||
if( !fn && !function )
|
||||
{
|
||||
const char* symname = nullptr;
|
||||
auto vptr = (void*)pc;
|
||||
ptrdiff_t symoff = 0;
|
||||
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) )
|
||||
{
|
||||
symname = dlinfo.dli_sname;
|
||||
symoff = (char*)pc - (char*)dlinfo.dli_saddr;
|
||||
|
||||
if( symname && symname[0] == '_' )
|
||||
{
|
||||
size_t len = DemangleBufLen;
|
||||
int status;
|
||||
abi::__cxa_demangle( symname, demangled, &len, &status );
|
||||
if( status == 0 )
|
||||
{
|
||||
symname = demangled;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( !symname ) symname = "[unknown]";
|
||||
|
||||
if( symoff == 0 )
|
||||
{
|
||||
cb_data[cb_num].name = CopyString( symname );
|
||||
}
|
||||
else
|
||||
{
|
||||
char buf[32];
|
||||
const auto offlen = sprintf( buf, " + %td", symoff );
|
||||
const auto namelen = strlen( symname );
|
||||
auto name = (char*)tracy_malloc( namelen + offlen + 1 );
|
||||
memcpy( name, symname, namelen );
|
||||
memcpy( name + namelen, buf, offlen );
|
||||
name[namelen + offlen] = '\0';
|
||||
cb_data[cb_num].name = name;
|
||||
}
|
||||
|
||||
cb_data[cb_num].file = CopyString( "[unknown]" );
|
||||
cb_data[cb_num].line = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( !fn ) fn = "[unknown]";
|
||||
if( !function )
|
||||
{
|
||||
function = "[unknown]";
|
||||
}
|
||||
else
|
||||
{
|
||||
if( function[0] == '_' )
|
||||
{
|
||||
size_t len = DemangleBufLen;
|
||||
int status;
|
||||
abi::__cxa_demangle( function, demangled, &len, &status );
|
||||
if( status == 0 )
|
||||
{
|
||||
function = demangled;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cb_data[cb_num].name = CopyString( function );
|
||||
cb_data[cb_num].file = CopyString( fn );
|
||||
cb_data[cb_num].line = lineno;
|
||||
}
|
||||
|
||||
if( ++cb_num >= MaxCbTrace )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void CallstackErrorCb( void* /*data*/, const char* /*msg*/, int /*errnum*/ )
|
||||
{
|
||||
for( int i=0; i<cb_num; i++ )
|
||||
{
|
||||
tracy_free( (void*)cb_data[i].name );
|
||||
tracy_free( (void*)cb_data[i].file );
|
||||
}
|
||||
|
||||
cb_data[0].name = CopyString( "[error]" );
|
||||
cb_data[0].file = CopyString( "[error]" );
|
||||
cb_data[0].line = 0;
|
||||
|
||||
cb_num = 1;
|
||||
}
|
||||
|
||||
void SymInfoCallback( void* /*data*/, uintptr_t pc, const char* symname, uintptr_t symval, uintptr_t symsize )
|
||||
{
|
||||
cb_data[cb_num-1].symLen = (uint32_t)symsize;
|
||||
cb_data[cb_num-1].symAddr = (uint64_t)symval;
|
||||
}
|
||||
|
||||
void SymInfoError( void* /*data*/, const char* /*msg*/, int /*errnum*/ )
|
||||
{
|
||||
cb_data[cb_num-1].symLen = 0;
|
||||
cb_data[cb_num-1].symAddr = 0;
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
cb_num = 0;
|
||||
backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr );
|
||||
assert( cb_num > 0 );
|
||||
|
||||
backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr );
|
||||
|
||||
const char* symloc = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname;
|
||||
|
||||
return { cb_data, uint8_t( cb_num ), symloc ? symloc : "[unknown]" };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 5
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
{
|
||||
static char ret[1024];
|
||||
auto vptr = (void*)ptr;
|
||||
const char* symname = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) && dlinfo.dli_sname )
|
||||
{
|
||||
symname = dlinfo.dli_sname;
|
||||
}
|
||||
if( symname )
|
||||
{
|
||||
strcpy( ret, symname );
|
||||
}
|
||||
else
|
||||
{
|
||||
*ret = '\0';
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
|
||||
{
|
||||
const char* symloc = nullptr;
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname;
|
||||
if( !symloc ) symloc = "[unknown]";
|
||||
return CallstackSymbolData { symloc, 0, false };
|
||||
}
|
||||
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
|
||||
{
|
||||
return DecodeSymbolAddress( ptr );
|
||||
}
|
||||
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
static CallstackEntry cb;
|
||||
cb.line = 0;
|
||||
|
||||
char* demangled = nullptr;
|
||||
const char* symname = nullptr;
|
||||
const char* symloc = nullptr;
|
||||
auto vptr = (void*)ptr;
|
||||
ptrdiff_t symoff = 0;
|
||||
void* symaddr = nullptr;
|
||||
|
||||
Dl_info dlinfo;
|
||||
if( dladdr( vptr, &dlinfo ) )
|
||||
{
|
||||
symloc = dlinfo.dli_fname;
|
||||
symname = dlinfo.dli_sname;
|
||||
symoff = (char*)ptr - (char*)dlinfo.dli_saddr;
|
||||
symaddr = dlinfo.dli_saddr;
|
||||
|
||||
if( symname && symname[0] == '_' )
|
||||
{
|
||||
size_t len = 0;
|
||||
int status;
|
||||
demangled = abi::__cxa_demangle( symname, nullptr, &len, &status );
|
||||
if( status == 0 )
|
||||
{
|
||||
symname = demangled;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( !symname ) symname = "[unknown]";
|
||||
if( !symloc ) symloc = "[unknown]";
|
||||
|
||||
if( symoff == 0 )
|
||||
{
|
||||
cb.name = CopyString( symname );
|
||||
}
|
||||
else
|
||||
{
|
||||
char buf[32];
|
||||
const auto offlen = sprintf( buf, " + %td", symoff );
|
||||
const auto namelen = strlen( symname );
|
||||
auto name = (char*)tracy_malloc( namelen + offlen + 1 );
|
||||
memcpy( name, symname, namelen );
|
||||
memcpy( name + namelen, buf, offlen );
|
||||
name[namelen + offlen] = '\0';
|
||||
cb.name = name;
|
||||
}
|
||||
|
||||
cb.file = CopyString( "[unknown]" );
|
||||
cb.symLen = 0;
|
||||
cb.symAddr = (uint64_t)symaddr;
|
||||
|
||||
if( demangled ) free( demangled );
|
||||
|
||||
return { &cb, 1, symloc };
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef __TRACYCALLSTACK_H__
|
||||
#define __TRACYCALLSTACK_H__
|
||||
|
||||
#if !defined _WIN32 && !defined __CYGWIN__
|
||||
# include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# define TRACY_HAS_CALLSTACK 1
|
||||
#elif defined __ANDROID__
|
||||
# if !defined __arm__ || __ANDROID_API__ >= 21
|
||||
# define TRACY_HAS_CALLSTACK 2
|
||||
# else
|
||||
# define TRACY_HAS_CALLSTACK 5
|
||||
# endif
|
||||
#elif defined __linux
|
||||
# if defined _GNU_SOURCE && defined __GLIBC__
|
||||
# define TRACY_HAS_CALLSTACK 3
|
||||
# else
|
||||
# define TRACY_HAS_CALLSTACK 2
|
||||
# endif
|
||||
#elif defined __APPLE__
|
||||
# define TRACY_HAS_CALLSTACK 4
|
||||
#elif defined BSD
|
||||
# define TRACY_HAS_CALLSTACK 6
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,116 @@
|
|||
#ifndef __TRACYCALLSTACK_HPP__
|
||||
#define __TRACYCALLSTACK_HPP__
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
#include "TracyCallstack.h"
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
# include <unwind.h>
|
||||
#elif TRACY_HAS_CALLSTACK >= 3
|
||||
# include <execinfo.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct CallstackSymbolData
|
||||
{
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
bool needFree;
|
||||
};
|
||||
|
||||
struct CallstackEntry
|
||||
{
|
||||
const char* name;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t symLen;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct CallstackEntryData
|
||||
{
|
||||
const CallstackEntry* data;
|
||||
uint8_t size;
|
||||
const char* imageName;
|
||||
};
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr );
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr );
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr );
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr );
|
||||
void InitCallstack();
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
|
||||
TRACY_API uintptr_t* CallTrace( int depth );
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
return CallTrace( depth );
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
|
||||
struct BacktraceState
|
||||
{
|
||||
void** current;
|
||||
void** end;
|
||||
};
|
||||
|
||||
static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, void* arg )
|
||||
{
|
||||
auto state = (BacktraceState*)arg;
|
||||
uintptr_t pc = _Unwind_GetIP( ctx );
|
||||
if( pc )
|
||||
{
|
||||
if( state->current == state->end ) return _URC_END_OF_STACK;
|
||||
*state->current++ = (void*)pc;
|
||||
}
|
||||
return _URC_NO_REASON;
|
||||
}
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
BacktraceState state = { (void**)(trace+1), (void**)(trace+1+depth) };
|
||||
_Unwind_Backtrace( tracy_unwind_callback, &state );
|
||||
|
||||
*trace = (uintptr_t*)state.current - trace + 1;
|
||||
|
||||
return trace;
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
assert( depth >= 1 );
|
||||
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = (size_t)backtrace( (void**)(trace+1), depth );
|
||||
*trace = num;
|
||||
|
||||
return trace;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,641 @@
|
|||
#include "TracyDxt1.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if defined __AVX__ && !defined __SSE4_1__
|
||||
# define __SSE4_1__
|
||||
#endif
|
||||
|
||||
#if defined __SSE4_1__ || defined __AVX2__
|
||||
# ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
# else
|
||||
# include <x86intrin.h>
|
||||
# ifndef _mm256_cvtsi256_si32
|
||||
# define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline uint16_t to565( uint8_t r, uint8_t g, uint8_t b )
|
||||
{
|
||||
return ( ( r & 0xF8 ) << 8 ) | ( ( g & 0xFC ) << 3 ) | ( b >> 3 );
|
||||
}
|
||||
|
||||
static inline uint16_t to565( uint32_t c )
|
||||
{
|
||||
return
|
||||
( ( c & 0xF80000 ) >> 19 ) |
|
||||
( ( c & 0x00FC00 ) >> 5 ) |
|
||||
( ( c & 0x0000F8 ) << 8 );
|
||||
}
|
||||
|
||||
static const uint16_t DivTable[255*3+1] = {
|
||||
0xffff, 0xffff, 0xffff, 0xffff, 0xcccc, 0xaaaa, 0x9249, 0x8000, 0x71c7, 0x6666, 0x5d17, 0x5555, 0x4ec4, 0x4924, 0x4444, 0x4000,
|
||||
0x3c3c, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000,
|
||||
0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555,
|
||||
0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000,
|
||||
0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc,
|
||||
0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa,
|
||||
0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924,
|
||||
0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800,
|
||||
0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c,
|
||||
0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666,
|
||||
0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1,
|
||||
0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555,
|
||||
0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec,
|
||||
0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492,
|
||||
0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444,
|
||||
0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400,
|
||||
0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3,
|
||||
0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e,
|
||||
0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e,
|
||||
0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333,
|
||||
0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c,
|
||||
0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8,
|
||||
0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8,
|
||||
0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa,
|
||||
0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f,
|
||||
0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276,
|
||||
0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e,
|
||||
0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249,
|
||||
0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234,
|
||||
0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222,
|
||||
0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210,
|
||||
0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200,
|
||||
0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0,
|
||||
0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1,
|
||||
0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4,
|
||||
0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7,
|
||||
0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba,
|
||||
0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af,
|
||||
0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4,
|
||||
0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199,
|
||||
0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f,
|
||||
0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186,
|
||||
0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d,
|
||||
0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174,
|
||||
0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c,
|
||||
0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164,
|
||||
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
|
||||
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
|
||||
};
|
||||
static const uint16_t DivTableNEON[255*3+1] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000,
|
||||
0x0f83, 0x0f0f, 0x0ea0, 0x0e38, 0x0dd6, 0x0d79, 0x0d20, 0x0ccc, 0x0c7c, 0x0c30, 0x0be8, 0x0ba2, 0x0b60, 0x0b21, 0x0ae4, 0x0aaa,
|
||||
0x0a72, 0x0a3d, 0x0a0a, 0x09d8, 0x09a9, 0x097b, 0x094f, 0x0924, 0x08fb, 0x08d3, 0x08ad, 0x0888, 0x0864, 0x0842, 0x0820, 0x0800,
|
||||
0x07e0, 0x07c1, 0x07a4, 0x0787, 0x076b, 0x0750, 0x0736, 0x071c, 0x0703, 0x06eb, 0x06d3, 0x06bc, 0x06a6, 0x0690, 0x067b, 0x0666,
|
||||
0x0652, 0x063e, 0x062b, 0x0618, 0x0606, 0x05f4, 0x05e2, 0x05d1, 0x05c0, 0x05b0, 0x05a0, 0x0590, 0x0581, 0x0572, 0x0563, 0x0555,
|
||||
0x0547, 0x0539, 0x052b, 0x051e, 0x0511, 0x0505, 0x04f8, 0x04ec, 0x04e0, 0x04d4, 0x04c8, 0x04bd, 0x04b2, 0x04a7, 0x049c, 0x0492,
|
||||
0x0487, 0x047d, 0x0473, 0x0469, 0x0460, 0x0456, 0x044d, 0x0444, 0x043b, 0x0432, 0x0429, 0x0421, 0x0418, 0x0410, 0x0408, 0x0400,
|
||||
0x03f8, 0x03f0, 0x03e8, 0x03e0, 0x03d9, 0x03d2, 0x03ca, 0x03c3, 0x03bc, 0x03b5, 0x03ae, 0x03a8, 0x03a1, 0x039b, 0x0394, 0x038e,
|
||||
0x0387, 0x0381, 0x037b, 0x0375, 0x036f, 0x0369, 0x0364, 0x035e, 0x0358, 0x0353, 0x034d, 0x0348, 0x0342, 0x033d, 0x0338, 0x0333,
|
||||
0x032e, 0x0329, 0x0324, 0x031f, 0x031a, 0x0315, 0x0310, 0x030c, 0x0307, 0x0303, 0x02fe, 0x02fa, 0x02f5, 0x02f1, 0x02ec, 0x02e8,
|
||||
0x02e4, 0x02e0, 0x02dc, 0x02d8, 0x02d4, 0x02d0, 0x02cc, 0x02c8, 0x02c4, 0x02c0, 0x02bc, 0x02b9, 0x02b5, 0x02b1, 0x02ae, 0x02aa,
|
||||
0x02a7, 0x02a3, 0x02a0, 0x029c, 0x0299, 0x0295, 0x0292, 0x028f, 0x028c, 0x0288, 0x0285, 0x0282, 0x027f, 0x027c, 0x0279, 0x0276,
|
||||
0x0273, 0x0270, 0x026d, 0x026a, 0x0267, 0x0264, 0x0261, 0x025e, 0x025c, 0x0259, 0x0256, 0x0253, 0x0251, 0x024e, 0x024b, 0x0249,
|
||||
0x0246, 0x0243, 0x0241, 0x023e, 0x023c, 0x0239, 0x0237, 0x0234, 0x0232, 0x0230, 0x022d, 0x022b, 0x0229, 0x0226, 0x0224, 0x0222,
|
||||
0x021f, 0x021d, 0x021b, 0x0219, 0x0216, 0x0214, 0x0212, 0x0210, 0x020e, 0x020c, 0x020a, 0x0208, 0x0206, 0x0204, 0x0202, 0x0200,
|
||||
0x01fe, 0x01fc, 0x01fa, 0x01f8, 0x01f6, 0x01f4, 0x01f2, 0x01f0, 0x01ee, 0x01ec, 0x01ea, 0x01e9, 0x01e7, 0x01e5, 0x01e3, 0x01e1,
|
||||
0x01e0, 0x01de, 0x01dc, 0x01da, 0x01d9, 0x01d7, 0x01d5, 0x01d4, 0x01d2, 0x01d0, 0x01cf, 0x01cd, 0x01cb, 0x01ca, 0x01c8, 0x01c7,
|
||||
0x01c5, 0x01c3, 0x01c2, 0x01c0, 0x01bf, 0x01bd, 0x01bc, 0x01ba, 0x01b9, 0x01b7, 0x01b6, 0x01b4, 0x01b3, 0x01b2, 0x01b0, 0x01af,
|
||||
0x01ad, 0x01ac, 0x01aa, 0x01a9, 0x01a8, 0x01a6, 0x01a5, 0x01a4, 0x01a2, 0x01a1, 0x01a0, 0x019e, 0x019d, 0x019c, 0x019a, 0x0199,
|
||||
0x0198, 0x0197, 0x0195, 0x0194, 0x0193, 0x0192, 0x0190, 0x018f, 0x018e, 0x018d, 0x018b, 0x018a, 0x0189, 0x0188, 0x0187, 0x0186,
|
||||
0x0184, 0x0183, 0x0182, 0x0181, 0x0180, 0x017f, 0x017e, 0x017d, 0x017b, 0x017a, 0x0179, 0x0178, 0x0177, 0x0176, 0x0175, 0x0174,
|
||||
0x0173, 0x0172, 0x0171, 0x0170, 0x016f, 0x016e, 0x016d, 0x016c, 0x016b, 0x016a, 0x0169, 0x0168, 0x0167, 0x0166, 0x0165, 0x0164,
|
||||
0x0163, 0x0162, 0x0161, 0x0160, 0x015f, 0x015e, 0x015d, 0x015c, 0x015b, 0x015a, 0x0159, 0x0158, 0x0158, 0x0157, 0x0156, 0x0155,
|
||||
0x0154, 0x0153, 0x0152, 0x0151, 0x0150, 0x0150, 0x014f, 0x014e, 0x014d, 0x014c, 0x014b, 0x014a, 0x014a, 0x0149, 0x0148, 0x0147,
|
||||
0x0146, 0x0146, 0x0145, 0x0144, 0x0143, 0x0142, 0x0142, 0x0141, 0x0140, 0x013f, 0x013e, 0x013e, 0x013d, 0x013c, 0x013b, 0x013b,
|
||||
0x013a, 0x0139, 0x0138, 0x0138, 0x0137, 0x0136, 0x0135, 0x0135, 0x0134, 0x0133, 0x0132, 0x0132, 0x0131, 0x0130, 0x0130, 0x012f,
|
||||
0x012e, 0x012e, 0x012d, 0x012c, 0x012b, 0x012b, 0x012a, 0x0129, 0x0129, 0x0128, 0x0127, 0x0127, 0x0126, 0x0125, 0x0125, 0x0124,
|
||||
0x0123, 0x0123, 0x0122, 0x0121, 0x0121, 0x0120, 0x0120, 0x011f, 0x011e, 0x011e, 0x011d, 0x011c, 0x011c, 0x011b, 0x011b, 0x011a,
|
||||
0x0119, 0x0119, 0x0118, 0x0118, 0x0117, 0x0116, 0x0116, 0x0115, 0x0115, 0x0114, 0x0113, 0x0113, 0x0112, 0x0112, 0x0111, 0x0111,
|
||||
0x0110, 0x010f, 0x010f, 0x010e, 0x010e, 0x010d, 0x010d, 0x010c, 0x010c, 0x010b, 0x010a, 0x010a, 0x0109, 0x0109, 0x0108, 0x0108,
|
||||
0x0107, 0x0107, 0x0106, 0x0106, 0x0105, 0x0105, 0x0104, 0x0104, 0x0103, 0x0103, 0x0102, 0x0102, 0x0101, 0x0101, 0x0100, 0x0100,
|
||||
0x00ff, 0x00ff, 0x00fe, 0x00fe, 0x00fd, 0x00fd, 0x00fc, 0x00fc, 0x00fb, 0x00fb, 0x00fa, 0x00fa, 0x00f9, 0x00f9, 0x00f8, 0x00f8,
|
||||
0x00f7, 0x00f7, 0x00f6, 0x00f6, 0x00f5, 0x00f5, 0x00f4, 0x00f4, 0x00f4, 0x00f3, 0x00f3, 0x00f2, 0x00f2, 0x00f1, 0x00f1, 0x00f0,
|
||||
0x00f0, 0x00f0, 0x00ef, 0x00ef, 0x00ee, 0x00ee, 0x00ed, 0x00ed, 0x00ed, 0x00ec, 0x00ec, 0x00eb, 0x00eb, 0x00ea, 0x00ea, 0x00ea,
|
||||
0x00e9, 0x00e9, 0x00e8, 0x00e8, 0x00e7, 0x00e7, 0x00e7, 0x00e6, 0x00e6, 0x00e5, 0x00e5, 0x00e5, 0x00e4, 0x00e4, 0x00e3, 0x00e3,
|
||||
0x00e3, 0x00e2, 0x00e2, 0x00e1, 0x00e1, 0x00e1, 0x00e0, 0x00e0, 0x00e0, 0x00df, 0x00df, 0x00de, 0x00de, 0x00de, 0x00dd, 0x00dd,
|
||||
0x00dd, 0x00dc, 0x00dc, 0x00db, 0x00db, 0x00db, 0x00da, 0x00da, 0x00da, 0x00d9, 0x00d9, 0x00d9, 0x00d8, 0x00d8, 0x00d7, 0x00d7,
|
||||
0x00d7, 0x00d6, 0x00d6, 0x00d6, 0x00d5, 0x00d5, 0x00d5, 0x00d4, 0x00d4, 0x00d4, 0x00d3, 0x00d3, 0x00d3, 0x00d2, 0x00d2, 0x00d2,
|
||||
0x00d1, 0x00d1, 0x00d1, 0x00d0, 0x00d0, 0x00d0, 0x00cf, 0x00cf, 0x00cf, 0x00ce, 0x00ce, 0x00ce, 0x00cd, 0x00cd, 0x00cd, 0x00cc,
|
||||
0x00cc, 0x00cc, 0x00cb, 0x00cb, 0x00cb, 0x00ca, 0x00ca, 0x00ca, 0x00c9, 0x00c9, 0x00c9, 0x00c9, 0x00c8, 0x00c8, 0x00c8, 0x00c7,
|
||||
0x00c7, 0x00c7, 0x00c6, 0x00c6, 0x00c6, 0x00c5, 0x00c5, 0x00c5, 0x00c5, 0x00c4, 0x00c4, 0x00c4, 0x00c3, 0x00c3, 0x00c3, 0x00c3,
|
||||
0x00c2, 0x00c2, 0x00c2, 0x00c1, 0x00c1, 0x00c1, 0x00c1, 0x00c0, 0x00c0, 0x00c0, 0x00bf, 0x00bf, 0x00bf, 0x00bf, 0x00be, 0x00be,
|
||||
0x00be, 0x00bd, 0x00bd, 0x00bd, 0x00bd, 0x00bc, 0x00bc, 0x00bc, 0x00bc, 0x00bb, 0x00bb, 0x00bb, 0x00ba, 0x00ba, 0x00ba, 0x00ba,
|
||||
0x00b9, 0x00b9, 0x00b9, 0x00b9, 0x00b8, 0x00b8, 0x00b8, 0x00b8, 0x00b7, 0x00b7, 0x00b7, 0x00b7, 0x00b6, 0x00b6, 0x00b6, 0x00b6,
|
||||
0x00b5, 0x00b5, 0x00b5, 0x00b5, 0x00b4, 0x00b4, 0x00b4, 0x00b4, 0x00b3, 0x00b3, 0x00b3, 0x00b3, 0x00b2, 0x00b2, 0x00b2, 0x00b2,
|
||||
0x00b1, 0x00b1, 0x00b1, 0x00b1, 0x00b0, 0x00b0, 0x00b0, 0x00b0, 0x00af, 0x00af, 0x00af, 0x00af, 0x00ae, 0x00ae, 0x00ae, 0x00ae,
|
||||
0x00ae, 0x00ad, 0x00ad, 0x00ad, 0x00ad, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ab, 0x00ab, 0x00ab, 0x00ab,
|
||||
};
|
||||
|
||||
|
||||
static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
__m128i px0 = _mm_loadu_si128(((__m128i*)src) + 0);
|
||||
__m128i px1 = _mm_loadu_si128(((__m128i*)src) + 1);
|
||||
__m128i px2 = _mm_loadu_si128(((__m128i*)src) + 2);
|
||||
__m128i px3 = _mm_loadu_si128(((__m128i*)src) + 3);
|
||||
|
||||
__m128i smask = _mm_set1_epi32( 0xF8FCF8 );
|
||||
__m128i sd0 = _mm_and_si128( px0, smask );
|
||||
__m128i sd1 = _mm_and_si128( px1, smask );
|
||||
__m128i sd2 = _mm_and_si128( px2, smask );
|
||||
__m128i sd3 = _mm_and_si128( px3, smask );
|
||||
|
||||
__m128i sc = _mm_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128i sc0 = _mm_cmpeq_epi8(sd0, sc);
|
||||
__m128i sc1 = _mm_cmpeq_epi8(sd1, sc);
|
||||
__m128i sc2 = _mm_cmpeq_epi8(sd2, sc);
|
||||
__m128i sc3 = _mm_cmpeq_epi8(sd3, sc);
|
||||
|
||||
__m128i sm0 = _mm_and_si128(sc0, sc1);
|
||||
__m128i sm1 = _mm_and_si128(sc2, sc3);
|
||||
__m128i sm = _mm_and_si128(sm0, sm1);
|
||||
|
||||
if( _mm_testc_si128(sm, _mm_set1_epi32(-1)) )
|
||||
{
|
||||
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
}
|
||||
|
||||
__m128i amask = _mm_set1_epi32( 0xFFFFFF );
|
||||
px0 = _mm_and_si128( px0, amask );
|
||||
px1 = _mm_and_si128( px1, amask );
|
||||
px2 = _mm_and_si128( px2, amask );
|
||||
px3 = _mm_and_si128( px3, amask );
|
||||
|
||||
__m128i min0 = _mm_min_epu8( px0, px1 );
|
||||
__m128i min1 = _mm_min_epu8( px2, px3 );
|
||||
__m128i min2 = _mm_min_epu8( min0, min1 );
|
||||
|
||||
__m128i max0 = _mm_max_epu8( px0, px1 );
|
||||
__m128i max1 = _mm_max_epu8( px2, px3 );
|
||||
__m128i max2 = _mm_max_epu8( max0, max1 );
|
||||
|
||||
__m128i min3 = _mm_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
|
||||
__m128i max3 = _mm_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
|
||||
__m128i min4 = _mm_min_epu8( min2, min3 );
|
||||
__m128i max4 = _mm_max_epu8( max2, max3 );
|
||||
|
||||
__m128i min5 = _mm_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
|
||||
__m128i max5 = _mm_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
|
||||
__m128i rmin = _mm_min_epu8( min4, min5 );
|
||||
__m128i rmax = _mm_max_epu8( max4, max5 );
|
||||
|
||||
__m128i range1 = _mm_subs_epu8( rmax, rmin );
|
||||
__m128i range2 = _mm_sad_epu8( rmax, rmin );
|
||||
|
||||
uint32_t vrange = _mm_cvtsi128_si32( range2 ) >> 1;
|
||||
__m128i range = _mm_set1_epi16( DivTable[vrange] );
|
||||
|
||||
__m128i inset1 = _mm_srli_epi16( range1, 4 );
|
||||
__m128i inset = _mm_and_si128( inset1, _mm_set1_epi8( 0xF ) );
|
||||
__m128i min = _mm_adds_epu8( rmin, inset );
|
||||
__m128i max = _mm_subs_epu8( rmax, inset );
|
||||
|
||||
__m128i c0 = _mm_subs_epu8( px0, rmin );
|
||||
__m128i c1 = _mm_subs_epu8( px1, rmin );
|
||||
__m128i c2 = _mm_subs_epu8( px2, rmin );
|
||||
__m128i c3 = _mm_subs_epu8( px3, rmin );
|
||||
|
||||
__m128i is0 = _mm_maddubs_epi16( c0, _mm_set1_epi8( 1 ) );
|
||||
__m128i is1 = _mm_maddubs_epi16( c1, _mm_set1_epi8( 1 ) );
|
||||
__m128i is2 = _mm_maddubs_epi16( c2, _mm_set1_epi8( 1 ) );
|
||||
__m128i is3 = _mm_maddubs_epi16( c3, _mm_set1_epi8( 1 ) );
|
||||
|
||||
__m128i s0 = _mm_hadd_epi16( is0, is1 );
|
||||
__m128i s1 = _mm_hadd_epi16( is2, is3 );
|
||||
|
||||
__m128i m0 = _mm_mulhi_epu16( s0, range );
|
||||
__m128i m1 = _mm_mulhi_epu16( s1, range );
|
||||
|
||||
__m128i p0 = _mm_packus_epi16( m0, m1 );
|
||||
|
||||
__m128i p1 = _mm_or_si128( _mm_srai_epi32( p0, 6 ), _mm_srai_epi32( p0, 12 ) );
|
||||
__m128i p2 = _mm_or_si128( _mm_srai_epi32( p0, 18 ), p0 );
|
||||
__m128i p3 = _mm_or_si128( p1, p2 );
|
||||
__m128i p =_mm_shuffle_epi8( p3, _mm_set1_epi32( 0x0C080400 ) );
|
||||
|
||||
uint32_t vmin = _mm_cvtsi128_si32( min );
|
||||
uint32_t vmax = _mm_cvtsi128_si32( max );
|
||||
uint32_t vp = _mm_cvtsi128_si32( p );
|
||||
|
||||
return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
|
||||
#elif defined __ARM_NEON
|
||||
# ifdef __aarch64__
|
||||
uint8x16x4_t px = vld4q_u8( src );
|
||||
|
||||
uint8x16_t lr = px.val[0];
|
||||
uint8x16_t lg = px.val[1];
|
||||
uint8x16_t lb = px.val[2];
|
||||
|
||||
uint8_t rmaxr = vmaxvq_u8( lr );
|
||||
uint8_t rmaxg = vmaxvq_u8( lg );
|
||||
uint8_t rmaxb = vmaxvq_u8( lb );
|
||||
|
||||
uint8_t rminr = vminvq_u8( lr );
|
||||
uint8_t rming = vminvq_u8( lg );
|
||||
uint8_t rminb = vminvq_u8( lb );
|
||||
|
||||
int rr = rmaxr - rminr;
|
||||
int rg = rmaxg - rming;
|
||||
int rb = rmaxb - rminb;
|
||||
|
||||
int vrange1 = rr + rg + rb;
|
||||
uint16_t vrange2 = DivTableNEON[vrange1];
|
||||
|
||||
uint8_t insetr = rr >> 4;
|
||||
uint8_t insetg = rg >> 4;
|
||||
uint8_t insetb = rb >> 4;
|
||||
|
||||
uint8_t minr = rminr + insetr;
|
||||
uint8_t ming = rming + insetg;
|
||||
uint8_t minb = rminb + insetb;
|
||||
|
||||
uint8_t maxr = rmaxr - insetr;
|
||||
uint8_t maxg = rmaxg - insetg;
|
||||
uint8_t maxb = rmaxb - insetb;
|
||||
|
||||
uint8x16_t cr = vsubq_u8( lr, vdupq_n_u8( rminr ) );
|
||||
uint8x16_t cg = vsubq_u8( lg, vdupq_n_u8( rming ) );
|
||||
uint8x16_t cb = vsubq_u8( lb, vdupq_n_u8( rminb ) );
|
||||
|
||||
uint16x8_t is0l = vaddl_u8( vget_low_u8( cr ), vget_low_u8( cg ) );
|
||||
uint16x8_t is0h = vaddl_u8( vget_high_u8( cr ), vget_high_u8( cg ) );
|
||||
uint16x8_t is1l = vaddw_u8( is0l, vget_low_u8( cb ) );
|
||||
uint16x8_t is1h = vaddw_u8( is0h, vget_high_u8( cb ) );
|
||||
|
||||
int16x8_t range = vdupq_n_s16( vrange2 );
|
||||
uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1l ), range ) );
|
||||
uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( is1h ), range ) );
|
||||
|
||||
uint8x8_t p00 = vmovn_u16( m0 );
|
||||
uint8x8_t p01 = vmovn_u16( m1 );
|
||||
uint8x16_t p0 = vcombine_u8( p00, p01 );
|
||||
|
||||
uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
|
||||
uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
|
||||
uint32x4_t p3 = vaddq_u32( p1, p2 );
|
||||
|
||||
uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) );
|
||||
uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );
|
||||
|
||||
uint32_t vp;
|
||||
vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
|
||||
|
||||
return uint64_t( ( uint64_t( to565( minr, ming, minb ) ) << 16 ) | to565( maxr, maxg, maxb ) | ( uint64_t( vp ) << 32 ) );
|
||||
# else
|
||||
uint32x4_t px0 = vld1q_u32( (uint32_t*)src );
|
||||
uint32x4_t px1 = vld1q_u32( (uint32_t*)src + 4 );
|
||||
uint32x4_t px2 = vld1q_u32( (uint32_t*)src + 8 );
|
||||
uint32x4_t px3 = vld1q_u32( (uint32_t*)src + 12 );
|
||||
|
||||
uint32x4_t smask = vdupq_n_u32( 0xF8FCF8 );
|
||||
uint32x4_t sd0 = vandq_u32( smask, px0 );
|
||||
uint32x4_t sd1 = vandq_u32( smask, px1 );
|
||||
uint32x4_t sd2 = vandq_u32( smask, px2 );
|
||||
uint32x4_t sd3 = vandq_u32( smask, px3 );
|
||||
|
||||
uint32x4_t sc = vdupq_n_u32( sd0[0] );
|
||||
|
||||
uint32x4_t sc0 = vceqq_u32( sd0, sc );
|
||||
uint32x4_t sc1 = vceqq_u32( sd1, sc );
|
||||
uint32x4_t sc2 = vceqq_u32( sd2, sc );
|
||||
uint32x4_t sc3 = vceqq_u32( sd3, sc );
|
||||
|
||||
uint32x4_t sm0 = vandq_u32( sc0, sc1 );
|
||||
uint32x4_t sm1 = vandq_u32( sc2, sc3 );
|
||||
int64x2_t sm = vreinterpretq_s64_u32( vandq_u32( sm0, sm1 ) );
|
||||
|
||||
if( sm[0] == -1 && sm[1] == -1 )
|
||||
{
|
||||
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
}
|
||||
|
||||
uint32x4_t mask = vdupq_n_u32( 0xFFFFFF );
|
||||
uint8x16_t l0 = vreinterpretq_u8_u32( vandq_u32( mask, px0 ) );
|
||||
uint8x16_t l1 = vreinterpretq_u8_u32( vandq_u32( mask, px1 ) );
|
||||
uint8x16_t l2 = vreinterpretq_u8_u32( vandq_u32( mask, px2 ) );
|
||||
uint8x16_t l3 = vreinterpretq_u8_u32( vandq_u32( mask, px3 ) );
|
||||
|
||||
uint8x16_t min0 = vminq_u8( l0, l1 );
|
||||
uint8x16_t min1 = vminq_u8( l2, l3 );
|
||||
uint8x16_t min2 = vminq_u8( min0, min1 );
|
||||
|
||||
uint8x16_t max0 = vmaxq_u8( l0, l1 );
|
||||
uint8x16_t max1 = vmaxq_u8( l2, l3 );
|
||||
uint8x16_t max2 = vmaxq_u8( max0, max1 );
|
||||
|
||||
uint8x16_t min3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( min2 ) ) );
|
||||
uint8x16_t max3 = vreinterpretq_u8_u32( vrev64q_u32( vreinterpretq_u32_u8( max2 ) ) );
|
||||
|
||||
uint8x16_t min4 = vminq_u8( min2, min3 );
|
||||
uint8x16_t max4 = vmaxq_u8( max2, max3 );
|
||||
|
||||
uint8x16_t min5 = vcombine_u8( vget_high_u8( min4 ), vget_low_u8( min4 ) );
|
||||
uint8x16_t max5 = vcombine_u8( vget_high_u8( max4 ), vget_low_u8( max4 ) );
|
||||
|
||||
uint8x16_t rmin = vminq_u8( min4, min5 );
|
||||
uint8x16_t rmax = vmaxq_u8( max4, max5 );
|
||||
|
||||
uint8x16_t range1 = vsubq_u8( rmax, rmin );
|
||||
uint8x8_t range2 = vget_low_u8( range1 );
|
||||
uint8x8x2_t range3 = vzip_u8( range2, vdup_n_u8( 0 ) );
|
||||
uint16x4_t range4 = vreinterpret_u16_u8( range3.val[0] );
|
||||
|
||||
uint16_t vrange1;
|
||||
uint16x4_t range5 = vpadd_u16( range4, range4 );
|
||||
uint16x4_t range6 = vpadd_u16( range5, range5 );
|
||||
vst1_lane_u16( &vrange1, range6, 0 );
|
||||
|
||||
uint32_t vrange2 = ( 2 << 16 ) / uint32_t( vrange1 + 1 );
|
||||
uint16x8_t range = vdupq_n_u16( vrange2 );
|
||||
|
||||
uint8x16_t inset = vshrq_n_u8( range1, 4 );
|
||||
uint8x16_t min = vaddq_u8( rmin, inset );
|
||||
uint8x16_t max = vsubq_u8( rmax, inset );
|
||||
|
||||
uint8x16_t c0 = vsubq_u8( l0, rmin );
|
||||
uint8x16_t c1 = vsubq_u8( l1, rmin );
|
||||
uint8x16_t c2 = vsubq_u8( l2, rmin );
|
||||
uint8x16_t c3 = vsubq_u8( l3, rmin );
|
||||
|
||||
uint16x8_t is0 = vpaddlq_u8( c0 );
|
||||
uint16x8_t is1 = vpaddlq_u8( c1 );
|
||||
uint16x8_t is2 = vpaddlq_u8( c2 );
|
||||
uint16x8_t is3 = vpaddlq_u8( c3 );
|
||||
|
||||
uint16x4_t is4 = vpadd_u16( vget_low_u16( is0 ), vget_high_u16( is0 ) );
|
||||
uint16x4_t is5 = vpadd_u16( vget_low_u16( is1 ), vget_high_u16( is1 ) );
|
||||
uint16x4_t is6 = vpadd_u16( vget_low_u16( is2 ), vget_high_u16( is2 ) );
|
||||
uint16x4_t is7 = vpadd_u16( vget_low_u16( is3 ), vget_high_u16( is3 ) );
|
||||
|
||||
uint16x8_t s0 = vcombine_u16( is4, is5 );
|
||||
uint16x8_t s1 = vcombine_u16( is6, is7 );
|
||||
|
||||
uint16x8_t m0 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s0 ), vreinterpretq_s16_u16( range ) ) );
|
||||
uint16x8_t m1 = vreinterpretq_u16_s16( vqdmulhq_s16( vreinterpretq_s16_u16( s1 ), vreinterpretq_s16_u16( range ) ) );
|
||||
|
||||
uint8x8_t p00 = vmovn_u16( m0 );
|
||||
uint8x8_t p01 = vmovn_u16( m1 );
|
||||
uint8x16_t p0 = vcombine_u8( p00, p01 );
|
||||
|
||||
uint32x4_t p1 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 6 ), vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 12 ) );
|
||||
uint32x4_t p2 = vaddq_u32( vshrq_n_u32( vreinterpretq_u32_u8( p0 ), 18 ), vreinterpretq_u32_u8( p0 ) );
|
||||
uint32x4_t p3 = vaddq_u32( p1, p2 );
|
||||
|
||||
uint16x4x2_t p4 = vuzp_u16( vget_low_u16( vreinterpretq_u16_u32( p3 ) ), vget_high_u16( vreinterpretq_u16_u32( p3 ) ) );
|
||||
uint8x8x2_t p = vuzp_u8( vreinterpret_u8_u16( p4.val[0] ), vreinterpret_u8_u16( p4.val[0] ) );
|
||||
|
||||
uint32_t vmin, vmax, vp;
|
||||
vst1q_lane_u32( &vmin, vreinterpretq_u32_u8( min ), 0 );
|
||||
vst1q_lane_u32( &vmax, vreinterpretq_u32_u8( max ), 0 );
|
||||
vst1_lane_u32( &vp, vreinterpret_u32_u8( p.val[0] ), 0 );
|
||||
|
||||
return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
|
||||
# endif
|
||||
#else
|
||||
uint32_t ref;
|
||||
memcpy( &ref, src, 4 );
|
||||
uint32_t refMask = ref & 0xF8FCF8;
|
||||
auto stmp = src + 4;
|
||||
for( int i=1; i<16; i++ )
|
||||
{
|
||||
uint32_t px;
|
||||
memcpy( &px, stmp, 4 );
|
||||
if( ( px & 0xF8FCF8 ) != refMask ) break;
|
||||
stmp += 4;
|
||||
}
|
||||
if( stmp == src + 64 )
|
||||
{
|
||||
return uint64_t( to565( ref ) ) << 16;
|
||||
}
|
||||
|
||||
uint8_t min[3] = { src[0], src[1], src[2] };
|
||||
uint8_t max[3] = { src[0], src[1], src[2] };
|
||||
auto tmp = src + 4;
|
||||
for( int i=1; i<16; i++ )
|
||||
{
|
||||
for( int j=0; j<3; j++ )
|
||||
{
|
||||
if( tmp[j] < min[j] ) min[j] = tmp[j];
|
||||
else if( tmp[j] > max[j] ) max[j] = tmp[j];
|
||||
}
|
||||
tmp += 4;
|
||||
}
|
||||
|
||||
const uint32_t range = DivTable[max[0] - min[0] + max[1] - min[1] + max[2] - min[2]];
|
||||
const uint32_t rmin = min[0] + min[1] + min[2];
|
||||
for( int i=0; i<3; i++ )
|
||||
{
|
||||
const uint8_t inset = ( max[i] - min[i] ) >> 4;
|
||||
min[i] += inset;
|
||||
max[i] -= inset;
|
||||
}
|
||||
|
||||
uint32_t data = 0;
|
||||
for( int i=0; i<16; i++ )
|
||||
{
|
||||
const uint32_t c = src[0] + src[1] + src[2] - rmin;
|
||||
const uint8_t idx = ( c * range ) >> 16;
|
||||
data |= idx << (i*2);
|
||||
src += 4;
|
||||
}
|
||||
|
||||
return uint64_t( ( uint64_t( to565( min[0], min[1], min[2] ) ) << 16 ) | to565( max[0], max[1], max[2] ) | ( uint64_t( data ) << 32 ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __AVX2__
|
||||
static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
{
|
||||
__m256i px0 = _mm256_loadu_si256(((__m256i*)src) + 0);
|
||||
__m256i px1 = _mm256_loadu_si256(((__m256i*)src) + 1);
|
||||
__m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2);
|
||||
__m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3);
|
||||
|
||||
__m256i smask = _mm256_set1_epi32( 0xF8FCF8 );
|
||||
__m256i sd0 = _mm256_and_si256( px0, smask );
|
||||
__m256i sd1 = _mm256_and_si256( px1, smask );
|
||||
__m256i sd2 = _mm256_and_si256( px2, smask );
|
||||
__m256i sd3 = _mm256_and_si256( px3, smask );
|
||||
|
||||
__m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m256i sc0 = _mm256_cmpeq_epi8( sd0, sc );
|
||||
__m256i sc1 = _mm256_cmpeq_epi8( sd1, sc );
|
||||
__m256i sc2 = _mm256_cmpeq_epi8( sd2, sc );
|
||||
__m256i sc3 = _mm256_cmpeq_epi8( sd3, sc );
|
||||
|
||||
__m256i sm0 = _mm256_and_si256( sc0, sc1 );
|
||||
__m256i sm1 = _mm256_and_si256( sc2, sc3 );
|
||||
__m256i sm = _mm256_and_si256( sm0, sm1 );
|
||||
|
||||
const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) );
|
||||
const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) );
|
||||
|
||||
if( solid0 + solid1 == 0 )
|
||||
{
|
||||
const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) ) << 16;
|
||||
memcpy( dst, &c0, 8 );
|
||||
memcpy( dst+8, &c1, 8 );
|
||||
dst += 16;
|
||||
return;
|
||||
}
|
||||
|
||||
__m256i amask = _mm256_set1_epi32( 0xFFFFFF );
|
||||
px0 = _mm256_and_si256( px0, amask );
|
||||
px1 = _mm256_and_si256( px1, amask );
|
||||
px2 = _mm256_and_si256( px2, amask );
|
||||
px3 = _mm256_and_si256( px3, amask );
|
||||
|
||||
__m256i min0 = _mm256_min_epu8( px0, px1 );
|
||||
__m256i min1 = _mm256_min_epu8( px2, px3 );
|
||||
__m256i min2 = _mm256_min_epu8( min0, min1 );
|
||||
|
||||
__m256i max0 = _mm256_max_epu8( px0, px1 );
|
||||
__m256i max1 = _mm256_max_epu8( px2, px3 );
|
||||
__m256i max2 = _mm256_max_epu8( max0, max1 );
|
||||
|
||||
__m256i min3 = _mm256_shuffle_epi32( min2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
|
||||
__m256i max3 = _mm256_shuffle_epi32( max2, _MM_SHUFFLE( 2, 3, 0, 1 ) );
|
||||
__m256i min4 = _mm256_min_epu8( min2, min3 );
|
||||
__m256i max4 = _mm256_max_epu8( max2, max3 );
|
||||
|
||||
__m256i min5 = _mm256_shuffle_epi32( min4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
|
||||
__m256i max5 = _mm256_shuffle_epi32( max4, _MM_SHUFFLE( 0, 0, 2, 2 ) );
|
||||
__m256i rmin = _mm256_min_epu8( min4, min5 );
|
||||
__m256i rmax = _mm256_max_epu8( max4, max5 );
|
||||
|
||||
__m256i range1 = _mm256_subs_epu8( rmax, rmin );
|
||||
__m256i range2 = _mm256_sad_epu8( rmax, rmin );
|
||||
|
||||
uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1];
|
||||
uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1];
|
||||
__m256i range00 = _mm256_set1_epi16( vrange0 );
|
||||
__m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 );
|
||||
|
||||
__m256i inset1 = _mm256_srli_epi16( range1, 4 );
|
||||
__m256i inset = _mm256_and_si256( inset1, _mm256_set1_epi8( 0xF ) );
|
||||
__m256i min = _mm256_adds_epu8( rmin, inset );
|
||||
__m256i max = _mm256_subs_epu8( rmax, inset );
|
||||
|
||||
__m256i c0 = _mm256_subs_epu8( px0, rmin );
|
||||
__m256i c1 = _mm256_subs_epu8( px1, rmin );
|
||||
__m256i c2 = _mm256_subs_epu8( px2, rmin );
|
||||
__m256i c3 = _mm256_subs_epu8( px3, rmin );
|
||||
|
||||
__m256i is0 = _mm256_maddubs_epi16( c0, _mm256_set1_epi8( 1 ) );
|
||||
__m256i is1 = _mm256_maddubs_epi16( c1, _mm256_set1_epi8( 1 ) );
|
||||
__m256i is2 = _mm256_maddubs_epi16( c2, _mm256_set1_epi8( 1 ) );
|
||||
__m256i is3 = _mm256_maddubs_epi16( c3, _mm256_set1_epi8( 1 ) );
|
||||
|
||||
__m256i s0 = _mm256_hadd_epi16( is0, is1 );
|
||||
__m256i s1 = _mm256_hadd_epi16( is2, is3 );
|
||||
|
||||
__m256i m0 = _mm256_mulhi_epu16( s0, range );
|
||||
__m256i m1 = _mm256_mulhi_epu16( s1, range );
|
||||
|
||||
__m256i p0 = _mm256_packus_epi16( m0, m1 );
|
||||
|
||||
__m256i p1 = _mm256_or_si256( _mm256_srai_epi32( p0, 6 ), _mm256_srai_epi32( p0, 12 ) );
|
||||
__m256i p2 = _mm256_or_si256( _mm256_srai_epi32( p0, 18 ), p0 );
|
||||
__m256i p3 = _mm256_or_si256( p1, p2 );
|
||||
__m256i p =_mm256_shuffle_epi8( p3, _mm256_set1_epi32( 0x0C080400 ) );
|
||||
|
||||
__m256i mm0 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), min );
|
||||
__m256i mm1 = _mm256_unpacklo_epi8( _mm256_setzero_si256(), max );
|
||||
__m256i mm2 = _mm256_unpacklo_epi64( mm1, mm0 );
|
||||
__m256i mmr = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 11 ), 11 );
|
||||
__m256i mmg = _mm256_slli_epi64( _mm256_srli_epi64( mm2, 26 ), 5 );
|
||||
__m256i mmb = _mm256_srli_epi64( _mm256_slli_epi64( mm2, 16 ), 59 );
|
||||
__m256i mm3 = _mm256_or_si256( mmr, mmg );
|
||||
__m256i mm4 = _mm256_or_si256( mm3, mmb );
|
||||
__m256i mm5 = _mm256_shuffle_epi8( mm4, _mm256_set1_epi32( 0x09080100 ) );
|
||||
|
||||
__m256i d0 = _mm256_unpacklo_epi32( mm5, p );
|
||||
__m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
|
||||
__m128i d2 = _mm256_castsi256_si128( d1 );
|
||||
|
||||
__m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 );
|
||||
__m128i d3 = _mm_and_si128( d2, mask );
|
||||
_mm_storeu_si128( (__m128i*)dst, d3 );
|
||||
dst += 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
void CompressImageDxt1( const char* src, char* dst, int w, int h )
|
||||
{
|
||||
assert( (w % 4) == 0 && (h % 4) == 0 );
|
||||
|
||||
#ifdef __AVX2__
|
||||
if( w%8 == 0 )
|
||||
{
|
||||
uint32_t buf[8*4];
|
||||
int i = 0;
|
||||
|
||||
auto blocks = w * h / 32;
|
||||
do
|
||||
{
|
||||
auto tmp = (char*)buf;
|
||||
memcpy( tmp, src, 8*4 );
|
||||
memcpy( tmp + 8*4, src + w * 4, 8*4 );
|
||||
memcpy( tmp + 16*4, src + w * 8, 8*4 );
|
||||
memcpy( tmp + 24*4, src + w * 12, 8*4 );
|
||||
src += 8*4;
|
||||
if( ++i == w/8 )
|
||||
{
|
||||
src += w * 3 * 4;
|
||||
i = 0;
|
||||
}
|
||||
|
||||
ProcessRGB_AVX( (uint8_t*)buf, dst );
|
||||
}
|
||||
while( --blocks );
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
uint32_t buf[4*4];
|
||||
int i = 0;
|
||||
|
||||
auto ptr = dst;
|
||||
auto blocks = w * h / 16;
|
||||
do
|
||||
{
|
||||
auto tmp = (char*)buf;
|
||||
memcpy( tmp, src, 4*4 );
|
||||
memcpy( tmp + 4*4, src + w * 4, 4*4 );
|
||||
memcpy( tmp + 8*4, src + w * 8, 4*4 );
|
||||
memcpy( tmp + 12*4, src + w * 12, 4*4 );
|
||||
src += 4*4;
|
||||
if( ++i == w/4 )
|
||||
{
|
||||
src += w * 3 * 4;
|
||||
i = 0;
|
||||
}
|
||||
|
||||
const auto c = ProcessRGB( (uint8_t*)buf );
|
||||
memcpy( ptr, &c, sizeof( uint64_t ) );
|
||||
ptr += sizeof( uint64_t );
|
||||
}
|
||||
while( --blocks );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
#ifndef __TRACYDXT1_HPP__
|
||||
#define __TRACYDXT1_HPP__
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
void CompressImageDxt1( const char* src, char* dst, int w, int h );
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,118 @@
|
|||
#ifndef __TRACYFASTVECTOR_HPP__
|
||||
#define __TRACYFASTVECTOR_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
template<typename T>
|
||||
class FastVector
|
||||
{
|
||||
public:
|
||||
using iterator = T*;
|
||||
using const_iterator = const T*;
|
||||
|
||||
FastVector( size_t capacity )
|
||||
: m_ptr( (T*)tracy_malloc( sizeof( T ) * capacity ) )
|
||||
, m_write( m_ptr )
|
||||
, m_end( m_ptr + capacity )
|
||||
{
|
||||
assert( capacity != 0 );
|
||||
}
|
||||
|
||||
FastVector( const FastVector& ) = delete;
|
||||
FastVector( FastVector&& ) = delete;
|
||||
|
||||
~FastVector()
|
||||
{
|
||||
tracy_free( m_ptr );
|
||||
}
|
||||
|
||||
FastVector& operator=( const FastVector& ) = delete;
|
||||
FastVector& operator=( FastVector&& ) = delete;
|
||||
|
||||
bool empty() const { return m_ptr == m_write; }
|
||||
size_t size() const { return m_write - m_ptr; }
|
||||
|
||||
T* data() { return m_ptr; }
|
||||
const T* data() const { return m_ptr; };
|
||||
|
||||
T* begin() { return m_ptr; }
|
||||
const T* begin() const { return m_ptr; }
|
||||
T* end() { return m_write; }
|
||||
const T* end() const { return m_write; }
|
||||
|
||||
T& front() { assert( !empty() ); return m_ptr[0]; }
|
||||
const T& front() const { assert( !empty() ); return m_ptr[0]; }
|
||||
|
||||
T& back() { assert( !empty() ); return m_write[-1]; }
|
||||
const T& back() const { assert( !empty() ); return m_write[-1]; }
|
||||
|
||||
T& operator[]( size_t idx ) { return m_ptr[idx]; }
|
||||
const T& operator[]( size_t idx ) const { return m_ptr[idx]; }
|
||||
|
||||
T* push_next()
|
||||
{
|
||||
if( m_write == m_end ) AllocMore();
|
||||
return m_write++;
|
||||
}
|
||||
|
||||
T* prepare_next()
|
||||
{
|
||||
if( m_write == m_end ) AllocMore();
|
||||
return m_write;
|
||||
}
|
||||
|
||||
void commit_next()
|
||||
{
|
||||
m_write++;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_write = m_ptr;
|
||||
}
|
||||
|
||||
void swap( FastVector& vec )
|
||||
{
|
||||
const auto ptr1 = m_ptr;
|
||||
const auto ptr2 = vec.m_ptr;
|
||||
const auto write1 = m_write;
|
||||
const auto write2 = vec.m_write;
|
||||
const auto end1 = m_end;
|
||||
const auto end2 = vec.m_end;
|
||||
|
||||
m_ptr = ptr2;
|
||||
vec.m_ptr = ptr1;
|
||||
m_write = write2;
|
||||
vec.m_write = write1;
|
||||
m_end = end2;
|
||||
vec.m_end = end1;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_no_inline void AllocMore()
|
||||
{
|
||||
const auto cap = size_t( m_end - m_ptr ) * 2;
|
||||
const auto size = size_t( m_write - m_ptr );
|
||||
T* ptr = (T*)tracy_malloc( sizeof( T ) * cap );
|
||||
memcpy( ptr, m_ptr, size * sizeof( T ) );
|
||||
tracy_free( m_ptr );
|
||||
m_ptr = ptr;
|
||||
m_write = m_ptr + size;
|
||||
m_end = m_ptr + cap;
|
||||
}
|
||||
|
||||
T* m_ptr;
|
||||
T* m_write;
|
||||
T* m_end;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,548 @@
|
|||
#ifndef __TRACYLOCK_HPP__
|
||||
#define __TRACYLOCK_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class LockableCtx
|
||||
{
|
||||
public:
|
||||
tracy_force_inline LockableCtx( const SourceLocationData* srcloc )
|
||||
: m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
, m_lockCount( 0 )
|
||||
, m_active( false )
|
||||
#endif
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
MemWrite( &item->lockAnnounce.id, m_id );
|
||||
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->lockAnnounce.type, LockType::Lockable );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
LockableCtx( const LockableCtx& ) = delete;
|
||||
LockableCtx& operator=( const LockableCtx& ) = delete;
|
||||
|
||||
tracy_force_inline ~LockableCtx()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockTerminate );
|
||||
MemWrite( &item->lockTerminate.id, m_id );
|
||||
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLock()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
bool queue = false;
|
||||
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( locks == 0 || active )
|
||||
{
|
||||
const bool connected = GetProfiler().IsConnected();
|
||||
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
|
||||
if( connected ) queue = true;
|
||||
}
|
||||
if( !queue ) return false;
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockWait );
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterLock()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockObtain );
|
||||
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockObtain.id, m_id );
|
||||
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterUnlock()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
|
||||
if( !m_active.load( std::memory_order_relaxed ) ) return;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_active.store( false, std::memory_order_relaxed );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockRelease );
|
||||
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockRelease.id, m_id );
|
||||
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterTryLock( bool acquired )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !acquired ) return;
|
||||
|
||||
bool queue = false;
|
||||
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( locks == 0 || active )
|
||||
{
|
||||
const bool connected = GetProfiler().IsConnected();
|
||||
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
|
||||
if( connected ) queue = true;
|
||||
}
|
||||
if( !queue ) return;
|
||||
#endif
|
||||
|
||||
if( acquired )
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockObtain );
|
||||
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockObtain.id, m_id );
|
||||
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void Mark( const SourceLocationData* srcloc )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( !active ) return;
|
||||
const auto connected = GetProfiler().IsConnected();
|
||||
if( !connected )
|
||||
{
|
||||
if( active ) m_active.store( false, std::memory_order_relaxed );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockMark );
|
||||
MemWrite( &item->lockMark.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockMark.id, m_id );
|
||||
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockName );
|
||||
MemWrite( &item->lockNameFat.id, m_id );
|
||||
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
|
||||
MemWrite( &item->lockNameFat.size, (uint16_t)size );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_id;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<uint32_t> m_lockCount;
|
||||
std::atomic<bool> m_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<class T>
|
||||
class Lockable
|
||||
{
|
||||
public:
|
||||
tracy_force_inline Lockable( const SourceLocationData* srcloc )
|
||||
: m_ctx( srcloc )
|
||||
{
|
||||
}
|
||||
|
||||
Lockable( const Lockable& ) = delete;
|
||||
Lockable& operator=( const Lockable& ) = delete;
|
||||
|
||||
tracy_force_inline void lock()
|
||||
{
|
||||
const auto runAfter = m_ctx.BeforeLock();
|
||||
m_lockable.lock();
|
||||
if( runAfter ) m_ctx.AfterLock();
|
||||
}
|
||||
|
||||
tracy_force_inline void unlock()
|
||||
{
|
||||
m_lockable.unlock();
|
||||
m_ctx.AfterUnlock();
|
||||
}
|
||||
|
||||
tracy_force_inline bool try_lock()
|
||||
{
|
||||
const auto acquired = m_lockable.try_lock();
|
||||
m_ctx.AfterTryLock( acquired );
|
||||
return acquired;
|
||||
}
|
||||
|
||||
tracy_force_inline void Mark( const SourceLocationData* srcloc )
|
||||
{
|
||||
m_ctx.Mark( srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
LockableCtx m_ctx;
|
||||
};
|
||||
|
||||
|
||||
class SharedLockableCtx
|
||||
{
|
||||
public:
|
||||
tracy_force_inline SharedLockableCtx( const SourceLocationData* srcloc )
|
||||
: m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
, m_lockCount( 0 )
|
||||
, m_active( false )
|
||||
#endif
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
MemWrite( &item->lockAnnounce.id, m_id );
|
||||
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
SharedLockableCtx( const SharedLockableCtx& ) = delete;
|
||||
SharedLockableCtx& operator=( const SharedLockableCtx& ) = delete;
|
||||
|
||||
tracy_force_inline ~SharedLockableCtx()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockTerminate );
|
||||
MemWrite( &item->lockTerminate.id, m_id );
|
||||
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLock()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
bool queue = false;
|
||||
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( locks == 0 || active )
|
||||
{
|
||||
const bool connected = GetProfiler().IsConnected();
|
||||
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
|
||||
if( connected ) queue = true;
|
||||
}
|
||||
if( !queue ) return false;
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockWait );
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterLock()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockObtain );
|
||||
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockObtain.id, m_id );
|
||||
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterUnlock()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
|
||||
if( !m_active.load( std::memory_order_relaxed ) ) return;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_active.store( false, std::memory_order_relaxed );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockRelease );
|
||||
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockRelease.id, m_id );
|
||||
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterTryLock( bool acquired )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !acquired ) return;
|
||||
|
||||
bool queue = false;
|
||||
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( locks == 0 || active )
|
||||
{
|
||||
const bool connected = GetProfiler().IsConnected();
|
||||
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
|
||||
if( connected ) queue = true;
|
||||
}
|
||||
if( !queue ) return;
|
||||
#endif
|
||||
|
||||
if( acquired )
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockObtain );
|
||||
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockObtain.id, m_id );
|
||||
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLockShared()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
bool queue = false;
|
||||
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( locks == 0 || active )
|
||||
{
|
||||
const bool connected = GetProfiler().IsConnected();
|
||||
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
|
||||
if( connected ) queue = true;
|
||||
}
|
||||
if( !queue ) return false;
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockSharedWait );
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterLockShared()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
|
||||
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockObtain.id, m_id );
|
||||
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterUnlockShared()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
|
||||
if( !m_active.load( std::memory_order_relaxed ) ) return;
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_active.store( false, std::memory_order_relaxed );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockSharedRelease );
|
||||
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockRelease.id, m_id );
|
||||
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void AfterTryLockShared( bool acquired )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !acquired ) return;
|
||||
|
||||
bool queue = false;
|
||||
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( locks == 0 || active )
|
||||
{
|
||||
const bool connected = GetProfiler().IsConnected();
|
||||
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
|
||||
if( connected ) queue = true;
|
||||
}
|
||||
if( !queue ) return;
|
||||
#endif
|
||||
|
||||
if( acquired )
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
|
||||
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockObtain.id, m_id );
|
||||
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void Mark( const SourceLocationData* srcloc )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto active = m_active.load( std::memory_order_relaxed );
|
||||
if( !active ) return;
|
||||
const auto connected = GetProfiler().IsConnected();
|
||||
if( !connected )
|
||||
{
|
||||
if( active ) m_active.store( false, std::memory_order_relaxed );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockMark );
|
||||
MemWrite( &item->lockMark.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockMark.id, m_id );
|
||||
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockName );
|
||||
MemWrite( &item->lockNameFat.id, m_id );
|
||||
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
|
||||
MemWrite( &item->lockNameFat.size, (uint16_t)size );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_id;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<uint32_t> m_lockCount;
|
||||
std::atomic<bool> m_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
template<class T>
|
||||
class SharedLockable
|
||||
{
|
||||
public:
|
||||
tracy_force_inline SharedLockable( const SourceLocationData* srcloc )
|
||||
: m_ctx( srcloc )
|
||||
{
|
||||
}
|
||||
|
||||
SharedLockable( const SharedLockable& ) = delete;
|
||||
SharedLockable& operator=( const SharedLockable& ) = delete;
|
||||
|
||||
tracy_force_inline void lock()
|
||||
{
|
||||
const auto runAfter = m_ctx.BeforeLock();
|
||||
m_lockable.lock();
|
||||
if( runAfter ) m_ctx.AfterLock();
|
||||
}
|
||||
|
||||
tracy_force_inline void unlock()
|
||||
{
|
||||
m_lockable.unlock();
|
||||
m_ctx.AfterUnlock();
|
||||
}
|
||||
|
||||
tracy_force_inline bool try_lock()
|
||||
{
|
||||
const auto acquired = m_lockable.try_lock();
|
||||
m_ctx.AfterTryLock( acquired );
|
||||
return acquired;
|
||||
}
|
||||
|
||||
tracy_force_inline void lock_shared()
|
||||
{
|
||||
const auto runAfter = m_ctx.BeforeLockShared();
|
||||
m_lockable.lock_shared();
|
||||
if( runAfter ) m_ctx.AfterLockShared();
|
||||
}
|
||||
|
||||
tracy_force_inline void unlock_shared()
|
||||
{
|
||||
m_lockable.unlock_shared();
|
||||
m_ctx.AfterUnlockShared();
|
||||
}
|
||||
|
||||
tracy_force_inline bool try_lock_shared()
|
||||
{
|
||||
const auto acquired = m_lockable.try_lock_shared();
|
||||
m_ctx.AfterTryLockShared( acquired );
|
||||
return acquired;
|
||||
}
|
||||
|
||||
tracy_force_inline void Mark( const SourceLocationData* srcloc )
|
||||
{
|
||||
m_ctx.Mark( srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
SharedLockableCtx m_ctx;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,828 @@
|
|||
#ifndef __TRACYPROFILER_HPP__
|
||||
#define __TRACYPROFILER_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <atomic>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "tracy_concurrentqueue.h"
|
||||
#include "TracyCallstack.hpp"
|
||||
#include "TracySysTime.hpp"
|
||||
#include "TracyFastVector.hpp"
|
||||
#include "../common/TracyQueue.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyMutex.hpp"
|
||||
#include "../common/TracyProtocol.hpp"
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
# include <TargetConditionals.h>
|
||||
# include <mach/mach_time.h>
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_TIMER_FALLBACK && ( defined _WIN32 || defined __CYGWIN__ || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) )
|
||||
# define TRACY_HW_TIMER
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_HW_TIMER
|
||||
# include <chrono>
|
||||
#endif
|
||||
|
||||
#ifndef TracyConcat
|
||||
# define TracyConcat(x,y) TracyConcatIndirect(x,y)
|
||||
#endif
|
||||
#ifndef TracyConcatIndirect
|
||||
# define TracyConcatIndirect(x,y) x##y
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME)
|
||||
TRACY_API void StartupProfiler();
|
||||
TRACY_API void ShutdownProfiler();
|
||||
#endif
|
||||
|
||||
class GpuCtx;
|
||||
class Profiler;
|
||||
class Socket;
|
||||
class UdpBroadcast;
|
||||
|
||||
struct GpuCtxWrapper
|
||||
{
|
||||
GpuCtx* ptr;
|
||||
};
|
||||
|
||||
TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken();
|
||||
TRACY_API Profiler& GetProfiler();
|
||||
TRACY_API std::atomic<uint32_t>& GetLockCounter();
|
||||
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
|
||||
TRACY_API GpuCtxWrapper& GetGpuCtx();
|
||||
TRACY_API uint64_t GetThreadHandle();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
TRACY_API bool ProfilerAvailable();
|
||||
TRACY_API int64_t GetFrequencyQpc();
|
||||
|
||||
struct SourceLocationData
|
||||
{
|
||||
const char* name;
|
||||
const char* function;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
struct LuaZoneState
|
||||
{
|
||||
uint32_t counter;
|
||||
bool active;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#define TracyLfqPrepare( _type ) \
|
||||
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommit \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
|
||||
#define TracyLfqPrepareC( _type ) \
|
||||
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = tracy::GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommitC \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
|
||||
|
||||
typedef void(*ParameterCallback)( uint32_t idx, int32_t val );
|
||||
|
||||
class Profiler
|
||||
{
|
||||
struct FrameImageQueueItem
|
||||
{
|
||||
void* image;
|
||||
uint32_t frame;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
uint8_t offset;
|
||||
bool flip;
|
||||
};
|
||||
|
||||
public:
|
||||
Profiler();
|
||||
~Profiler();
|
||||
|
||||
void SpawnWorkerThreads();
|
||||
|
||||
static tracy_force_inline int64_t GetTime()
|
||||
{
|
||||
#ifdef TRACY_HW_TIMER
|
||||
# if defined TARGET_OS_IOS && TARGET_OS_IOS == 1
|
||||
return mach_absolute_time();
|
||||
# elif defined _WIN32 || defined __CYGWIN__
|
||||
# ifdef TRACY_TIMER_QPC
|
||||
return GetTimeQpc();
|
||||
# else
|
||||
return int64_t( __rdtsc() );
|
||||
# endif
|
||||
# elif defined __i386 || defined _M_IX86
|
||||
uint32_t eax, edx;
|
||||
asm volatile ( "rdtsc" : "=a" (eax), "=d" (edx) );
|
||||
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
|
||||
# elif defined __x86_64__ || defined _M_X64
|
||||
uint64_t rax, rdx;
|
||||
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
|
||||
return (int64_t)(( rdx << 32 ) + rax);
|
||||
# else
|
||||
# error "TRACY_HW_TIMER detection logic needs fixing"
|
||||
# endif
|
||||
#else
|
||||
# if defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
struct timespec ts;
|
||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
|
||||
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
|
||||
# else
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t GetNextZoneId()
|
||||
{
|
||||
return m_zoneId.fetch_add( 1, std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
static tracy_force_inline QueueItem* QueueSerial()
|
||||
{
|
||||
auto& p = GetProfiler();
|
||||
p.m_serialLock.lock();
|
||||
return p.m_serialQueue.prepare_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline QueueItem* QueueSerialCallstack( void* ptr )
|
||||
{
|
||||
auto& p = GetProfiler();
|
||||
p.m_serialLock.lock();
|
||||
p.SendCallstackSerial( ptr );
|
||||
return p.m_serialQueue.prepare_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void QueueSerialFinish()
|
||||
{
|
||||
auto& p = GetProfiler();
|
||||
p.m_serialQueue.commit_next();
|
||||
p.m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendFrameMark( const char* name )
|
||||
{
|
||||
if( !name ) GetProfiler().m_frameCount.fetch_add( 1, std::memory_order_relaxed );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::FrameMarkMsg );
|
||||
MemWrite( &item->frameMark.time, GetTime() );
|
||||
MemWrite( &item->frameMark.name, uint64_t( name ) );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendFrameMark( const char* name, QueueType type )
|
||||
{
|
||||
assert( type == QueueType::FrameMarkMsgStart || type == QueueType::FrameMarkMsgEnd );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
auto item = QueueSerial();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
MemWrite( &item->frameMark.time, GetTime() );
|
||||
MemWrite( &item->frameMark.name, uint64_t( name ) );
|
||||
QueueSerialFinish();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
|
||||
{
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
auto& profiler = GetProfiler();
|
||||
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto sz = size_t( w ) * size_t( h ) * 4;
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, image, sz );
|
||||
|
||||
profiler.m_fiLock.lock();
|
||||
auto fi = profiler.m_fiQueue.prepare_next();
|
||||
fi->image = ptr;
|
||||
fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset );
|
||||
fi->w = w;
|
||||
fi->h = h;
|
||||
fi->flip = flip;
|
||||
profiler.m_fiQueue.commit_next();
|
||||
profiler.m_fiLock.unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, int64_t val )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::PlotData );
|
||||
MemWrite( &item->plotData.name, (uint64_t)name );
|
||||
MemWrite( &item->plotData.time, GetTime() );
|
||||
MemWrite( &item->plotData.type, PlotDataType::Int );
|
||||
MemWrite( &item->plotData.data.i, val );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, float val )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::PlotData );
|
||||
MemWrite( &item->plotData.name, (uint64_t)name );
|
||||
MemWrite( &item->plotData.time, GetTime() );
|
||||
MemWrite( &item->plotData.type, PlotDataType::Float );
|
||||
MemWrite( &item->plotData.data.f, val );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, double val )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::PlotData );
|
||||
MemWrite( &item->plotData.name, (uint64_t)name );
|
||||
MemWrite( &item->plotData.time, GetTime() );
|
||||
MemWrite( &item->plotData.type, PlotDataType::Double );
|
||||
MemWrite( &item->plotData.data.d, val );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void ConfigurePlot( const char* name, PlotFormatType type )
|
||||
{
|
||||
TracyLfqPrepare( QueueType::PlotConfig );
|
||||
MemWrite( &item->plotConfig.name, (uint64_t)name );
|
||||
MemWrite( &item->plotConfig.type, (uint8_t)type );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, int callstack )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
MemWrite( &item->messageLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
MemWrite( &item->messageColorFat.time, GetTime() );
|
||||
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
InitRPMallocThread();
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
MemWrite( &item->messageColorLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
InitRPMallocThread();
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::MessageAppInfo );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemAlloc( QueueType::MemAlloc, thread, ptr, size );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFree( const void* ptr, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemFree( QueueType::MemFree, thread, ptr );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
MemAlloc( ptr, size, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
MemFree( ptr, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocNamed, thread, ptr, size );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeNamed( const void* ptr, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeNamed, thread, ptr );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
MemAlloc( ptr, size, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
InitRPMallocThread();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
MemFree( ptr, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendCallstack( int depth )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto ptr = Callstack( depth );
|
||||
TracyLfqPrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void ParameterRegister( ParameterCallback cb ) { GetProfiler().m_paramCallback = cb; }
|
||||
static tracy_force_inline void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val )
|
||||
{
|
||||
TracyLfqPrepare( QueueType::ParamSetup );
|
||||
tracy::MemWrite( &item->paramSetup.idx, idx );
|
||||
tracy::MemWrite( &item->paramSetup.name, (uint64_t)name );
|
||||
tracy::MemWrite( &item->paramSetup.isBool, (uint8_t)isBool );
|
||||
tracy::MemWrite( &item->paramSetup.val, val );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void SendCallstack( int depth, const char* skipBefore );
|
||||
static void CutCallstack( void* callstack, const char* skipBefore );
|
||||
|
||||
static bool ShouldExit();
|
||||
|
||||
tracy_force_inline bool IsConnected() const
|
||||
{
|
||||
return m_isConnected.load( std::memory_order_acquire );
|
||||
}
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
tracy_force_inline uint64_t ConnectionId() const
|
||||
{
|
||||
return m_connectionId.load( std::memory_order_acquire );
|
||||
}
|
||||
|
||||
tracy_force_inline void DeferItem( const QueueItem& item )
|
||||
{
|
||||
m_deferredLock.lock();
|
||||
auto dst = m_deferredQueue.push_next();
|
||||
memcpy( dst, &item, sizeof( item ) );
|
||||
m_deferredLock.unlock();
|
||||
}
|
||||
#endif
|
||||
|
||||
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
|
||||
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
|
||||
|
||||
void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); }
|
||||
void SendString( uint64_t str, const char* ptr, size_t len, QueueType type );
|
||||
void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); }
|
||||
void SendSingleString( const char* ptr, size_t len );
|
||||
void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); }
|
||||
void SendSecondString( const char* ptr, size_t len );
|
||||
|
||||
|
||||
// Allocated source location data layout:
|
||||
// 2b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
// nsz zone name (optional)
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function )
|
||||
{
|
||||
return AllocSourceLocation( line, source, function, nullptr, 0 );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz )
|
||||
{
|
||||
return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz )
|
||||
{
|
||||
return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz )
|
||||
{
|
||||
const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz );
|
||||
assert( sz32 <= std::numeric_limits<uint16_t>::max() );
|
||||
const auto sz = uint16_t( sz32 );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 2 );
|
||||
memset( ptr + 2, 0, 4 );
|
||||
memcpy( ptr + 6, &line, 4 );
|
||||
memcpy( ptr + 10, function, functionSz );
|
||||
ptr[10 + functionSz] = '\0';
|
||||
memcpy( ptr + 10 + functionSz + 1, source, sourceSz );
|
||||
ptr[10 + functionSz + 1 + sourceSz] = '\0';
|
||||
if( nameSz != 0 )
|
||||
{
|
||||
memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz );
|
||||
}
|
||||
return uint64_t( ptr );
|
||||
}
|
||||
|
||||
private:
|
||||
enum class DequeueStatus { DataDequeued, ConnectionLost, QueueEmpty };
|
||||
|
||||
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
|
||||
void Worker();
|
||||
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
static void LaunchCompressWorker( void* ptr ) { ((Profiler*)ptr)->CompressWorker(); }
|
||||
void CompressWorker();
|
||||
#endif
|
||||
|
||||
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
|
||||
void ClearSerial();
|
||||
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
|
||||
DequeueStatus DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop );
|
||||
DequeueStatus DequeueSerial();
|
||||
bool CommitData();
|
||||
|
||||
tracy_force_inline bool AppendData( const void* data, size_t len )
|
||||
{
|
||||
const auto ret = NeedDataSize( len );
|
||||
AppendDataUnsafe( data, len );
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline bool NeedDataSize( size_t len )
|
||||
{
|
||||
assert( len <= TargetFrameSize );
|
||||
bool ret = true;
|
||||
if( m_bufferOffset - m_bufferStart + (int)len > TargetFrameSize )
|
||||
{
|
||||
ret = CommitData();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline void AppendDataUnsafe( const void* data, size_t len )
|
||||
{
|
||||
memcpy( m_buffer + m_bufferOffset, data, len );
|
||||
m_bufferOffset += int( len );
|
||||
}
|
||||
|
||||
bool SendData( const char* data, size_t len );
|
||||
void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type );
|
||||
void SendSourceLocation( uint64_t ptr );
|
||||
void SendSourceLocationPayload( uint64_t ptr );
|
||||
void SendCallstackPayload( uint64_t ptr );
|
||||
void SendCallstackPayload64( uint64_t ptr );
|
||||
void SendCallstackAlloc( uint64_t ptr );
|
||||
void SendCallstackFrame( uint64_t ptr );
|
||||
void SendCodeLocation( uint64_t ptr );
|
||||
|
||||
bool HandleServerQuery();
|
||||
void HandleDisconnect();
|
||||
void HandleParameter( uint64_t payload );
|
||||
void HandleSymbolQuery( uint64_t symbol );
|
||||
void HandleSymbolCodeQuery( uint64_t symbol, uint32_t size );
|
||||
void HandleSourceCodeQuery();
|
||||
|
||||
void AckServerQuery();
|
||||
void AckSourceCodeNotAvailable();
|
||||
|
||||
void CalibrateTimer();
|
||||
void CalibrateDelay();
|
||||
void ReportTopology();
|
||||
|
||||
static tracy_force_inline void SendCallstackSerial( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
|
||||
{
|
||||
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed );
|
||||
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
MemWrite( &item->memAlloc.time, GetTime() );
|
||||
MemWrite( &item->memAlloc.thread, thread );
|
||||
MemWrite( &item->memAlloc.ptr, (uint64_t)ptr );
|
||||
if( compile_time_condition<sizeof( size ) == 4>::value )
|
||||
{
|
||||
memcpy( &item->memAlloc.size, &size, 4 );
|
||||
memset( &item->memAlloc.size + 4, 0, 2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( sizeof( size ) == 8 );
|
||||
memcpy( &item->memAlloc.size, &size, 4 );
|
||||
memcpy( ((char*)&item->memAlloc.size)+4, ((char*)&size)+4, 2 );
|
||||
}
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
|
||||
{
|
||||
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed );
|
||||
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
MemWrite( &item->memFree.time, GetTime() );
|
||||
MemWrite( &item->memFree.thread, thread );
|
||||
MemWrite( &item->memFree.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemName( const char* name )
|
||||
{
|
||||
assert( name );
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::MemNamePayload );
|
||||
MemWrite( &item->memName.name, (uint64_t)name );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
#if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC
|
||||
static int64_t GetTimeQpc();
|
||||
#endif
|
||||
|
||||
double m_timerMul;
|
||||
uint64_t m_resolution;
|
||||
uint64_t m_delay;
|
||||
std::atomic<int64_t> m_timeBegin;
|
||||
uint64_t m_mainThread;
|
||||
uint64_t m_epoch, m_exectime;
|
||||
std::atomic<bool> m_shutdown;
|
||||
std::atomic<bool> m_shutdownManual;
|
||||
std::atomic<bool> m_shutdownFinished;
|
||||
Socket* m_sock;
|
||||
UdpBroadcast* m_broadcast;
|
||||
bool m_noExit;
|
||||
uint32_t m_userPort;
|
||||
std::atomic<uint32_t> m_zoneId;
|
||||
int64_t m_samplingPeriod;
|
||||
|
||||
uint64_t m_threadCtx;
|
||||
int64_t m_refTimeThread;
|
||||
int64_t m_refTimeSerial;
|
||||
int64_t m_refTimeCtx;
|
||||
int64_t m_refTimeGpu;
|
||||
|
||||
void* m_stream; // LZ4_stream_t*
|
||||
char* m_buffer;
|
||||
int m_bufferOffset;
|
||||
int m_bufferStart;
|
||||
|
||||
char* m_lz4Buf;
|
||||
|
||||
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
||||
TracyMutex m_serialLock;
|
||||
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
FastVector<FrameImageQueueItem> m_fiQueue, m_fiDequeue;
|
||||
TracyMutex m_fiLock;
|
||||
#endif
|
||||
|
||||
std::atomic<uint64_t> m_frameCount;
|
||||
std::atomic<bool> m_isConnected;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<uint64_t> m_connectionId;
|
||||
|
||||
TracyMutex m_deferredLock;
|
||||
FastVector<QueueItem> m_deferredQueue;
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSTIME
|
||||
void ProcessSysTime();
|
||||
|
||||
SysTime m_sysTime;
|
||||
uint64_t m_sysTimeLast = 0;
|
||||
#else
|
||||
void ProcessSysTime() {}
|
||||
#endif
|
||||
|
||||
ParameterCallback m_paramCallback;
|
||||
|
||||
char* m_queryData;
|
||||
char* m_queryDataPtr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,116 @@
|
|||
namespace tracy
|
||||
{
|
||||
|
||||
template<size_t Size>
|
||||
class RingBuffer
|
||||
{
|
||||
public:
|
||||
RingBuffer( int fd )
|
||||
: m_fd( fd )
|
||||
{
|
||||
const auto pageSize = uint32_t( getpagesize() );
|
||||
assert( Size >= pageSize );
|
||||
assert( __builtin_popcount( Size ) == 1 );
|
||||
m_mapSize = Size + pageSize;
|
||||
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
|
||||
if( !mapAddr )
|
||||
{
|
||||
m_fd = 0;
|
||||
close( fd );
|
||||
return;
|
||||
}
|
||||
m_metadata = (perf_event_mmap_page*)mapAddr;
|
||||
assert( m_metadata->data_offset == pageSize );
|
||||
m_buffer = ((char*)mapAddr) + pageSize;
|
||||
}
|
||||
|
||||
~RingBuffer()
|
||||
{
|
||||
if( m_metadata ) munmap( m_metadata, m_mapSize );
|
||||
if( m_fd ) close( m_fd );
|
||||
}
|
||||
|
||||
RingBuffer( const RingBuffer& ) = delete;
|
||||
RingBuffer& operator=( const RingBuffer& ) = delete;
|
||||
|
||||
RingBuffer( RingBuffer&& other )
|
||||
{
|
||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
||||
m_metadata = nullptr;
|
||||
m_fd = 0;
|
||||
}
|
||||
|
||||
RingBuffer& operator=( RingBuffer&& other )
|
||||
{
|
||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
||||
m_metadata = nullptr;
|
||||
m_fd = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool IsValid() const { return m_metadata != nullptr; }
|
||||
|
||||
void Enable()
|
||||
{
|
||||
ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 );
|
||||
}
|
||||
|
||||
bool HasData() const
|
||||
{
|
||||
const auto head = LoadHead();
|
||||
return head > m_metadata->data_tail;
|
||||
}
|
||||
|
||||
void Read( void* dst, uint64_t offset, uint64_t cnt )
|
||||
{
|
||||
auto src = ( m_metadata->data_tail + offset ) % Size;
|
||||
if( src + cnt <= Size )
|
||||
{
|
||||
memcpy( dst, m_buffer + src, cnt );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto s0 = Size - src;
|
||||
memcpy( dst, m_buffer + src, s0 );
|
||||
memcpy( (char*)dst + s0, m_buffer, cnt - s0 );
|
||||
}
|
||||
}
|
||||
|
||||
void Advance( uint64_t cnt )
|
||||
{
|
||||
StoreTail( m_metadata->data_tail + cnt );
|
||||
}
|
||||
|
||||
bool CheckTscCaps() const
|
||||
{
|
||||
return m_metadata->cap_user_time_zero;
|
||||
}
|
||||
|
||||
int64_t ConvertTimeToTsc( int64_t timestamp ) const
|
||||
{
|
||||
assert( m_metadata->cap_user_time_zero );
|
||||
const auto time = timestamp - m_metadata->time_zero;
|
||||
const auto quot = time / m_metadata->time_mult;
|
||||
const auto rem = time % m_metadata->time_mult;
|
||||
return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult;
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t LoadHead() const
|
||||
{
|
||||
return std::atomic_load_explicit( (const volatile std::atomic<uint64_t>*)&m_metadata->data_head, std::memory_order_acquire );
|
||||
}
|
||||
|
||||
void StoreTail( uint64_t tail )
|
||||
{
|
||||
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, tail, std::memory_order_release );
|
||||
}
|
||||
|
||||
perf_event_mmap_page* m_metadata;
|
||||
char* m_buffer;
|
||||
|
||||
size_t m_mapSize;
|
||||
int m_fd;
|
||||
};
|
||||
|
||||
}
|
|
@ -0,0 +1,175 @@
|
|||
#ifndef __TRACYSCOPED_HPP__
|
||||
#define __TRACYSCOPED_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class ScopedZone
|
||||
{
|
||||
public:
|
||||
ScopedZone( const ScopedZone& ) = delete;
|
||||
ScopedZone( ScopedZone&& ) = delete;
|
||||
ScopedZone& operator=( const ScopedZone& ) = delete;
|
||||
ScopedZone& operator=( ScopedZone&& ) = delete;
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneBegin );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ~ScopedZone()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Text( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Name( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Color( uint32_t color )
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneColor );
|
||||
MemWrite( &item->zoneColor.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->zoneColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Value( uint64_t value )
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
TracyLfqPrepare( QueueType::ZoneValue );
|
||||
MemWrite( &item->zoneValue.value, value );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline bool IsActive() const { return m_active; }
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
uint64_t m_connectionId;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,108 @@
|
|||
#include "TracySysTime.hpp"
|
||||
|
||||
#ifdef TRACY_HAS_SYSTIME
|
||||
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
# elif defined __linux__
|
||||
# include <stdio.h>
|
||||
# include <inttypes.h>
|
||||
# elif defined __APPLE__
|
||||
# include <mach/mach_host.h>
|
||||
# include <mach/host_info.h>
|
||||
# elif defined BSD
|
||||
# include <sys/types.h>
|
||||
# include <sys/sysctl.h>
|
||||
# endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
|
||||
static inline uint64_t ConvertTime( const FILETIME& t )
|
||||
{
|
||||
return ( uint64_t( t.dwHighDateTime ) << 32 ) | uint64_t( t.dwLowDateTime );
|
||||
}
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
FILETIME idleTime;
|
||||
FILETIME kernelTime;
|
||||
FILETIME userTime;
|
||||
|
||||
GetSystemTimes( &idleTime, &kernelTime, &userTime );
|
||||
|
||||
idle = ConvertTime( idleTime );
|
||||
const auto kernel = ConvertTime( kernelTime );
|
||||
const auto user = ConvertTime( userTime );
|
||||
used = kernel + user;
|
||||
}
|
||||
|
||||
# elif defined __linux__
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
uint64_t user, nice, system;
|
||||
FILE* f = fopen( "/proc/stat", "r" );
|
||||
if( f )
|
||||
{
|
||||
int read = fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
|
||||
fclose( f );
|
||||
if (read == 4)
|
||||
{
|
||||
used = user + nice + system;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# elif defined __APPLE__
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
host_cpu_load_info_data_t info;
|
||||
mach_msg_type_number_t cnt = HOST_CPU_LOAD_INFO_COUNT;
|
||||
host_statistics( mach_host_self(), HOST_CPU_LOAD_INFO, reinterpret_cast<host_info_t>( &info ), &cnt );
|
||||
used = info.cpu_ticks[CPU_STATE_USER] + info.cpu_ticks[CPU_STATE_NICE] + info.cpu_ticks[CPU_STATE_SYSTEM];
|
||||
idle = info.cpu_ticks[CPU_STATE_IDLE];
|
||||
}
|
||||
|
||||
# elif defined BSD
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
u_long data[5];
|
||||
size_t sz = sizeof( data );
|
||||
sysctlbyname( "kern.cp_time", &data, &sz, nullptr, 0 );
|
||||
used = data[0] + data[1] + data[2] + data[3];
|
||||
idle = data[4];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SysTime::SysTime()
|
||||
{
|
||||
ReadTimes();
|
||||
}
|
||||
|
||||
float SysTime::Get()
|
||||
{
|
||||
const auto oldUsed = used;
|
||||
const auto oldIdle = idle;
|
||||
|
||||
ReadTimes();
|
||||
|
||||
const auto diffIdle = idle - oldIdle;
|
||||
const auto diffUsed = used - oldUsed;
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
return diffUsed == 0 ? -1 : ( diffUsed - diffIdle ) * 100.f / diffUsed;
|
||||
#elif defined __linux__ || defined __APPLE__ || defined BSD
|
||||
const auto total = diffUsed + diffIdle;
|
||||
return total == 0 ? -1 : diffUsed * 100.f / total;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,36 @@
|
|||
#ifndef __TRACYSYSTIME_HPP__
|
||||
#define __TRACYSYSTIME_HPP__
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__ || defined __linux__ || defined __APPLE__
|
||||
# define TRACY_HAS_SYSTIME
|
||||
#else
|
||||
# include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#ifdef BSD
|
||||
# define TRACY_HAS_SYSTIME
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSTIME
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class SysTime
|
||||
{
|
||||
public:
|
||||
SysTime();
|
||||
float Get();
|
||||
|
||||
void ReadTimes();
|
||||
|
||||
private:
|
||||
uint64_t idle, used;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,25 @@
|
|||
#ifndef __TRACYSYSTRACE_HPP__
|
||||
#define __TRACYSYSTRACE_HPP__
|
||||
|
||||
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __CYGWIN__ || defined __linux__ )
|
||||
# define TRACY_HAS_SYSTEM_TRACING
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSTEM_TRACING
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
bool SysTraceStart( int64_t& samplingPeriod );
|
||||
void SysTraceStop();
|
||||
void SysTraceWorker( void* ptr );
|
||||
|
||||
void SysTraceSendExternalName( uint64_t thread );
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,78 @@
|
|||
// File: 'extra/systrace/tracy_systrace.armv7' (1149 bytes)
|
||||
// File: 'extra/systrace/tracy_systrace.aarch64' (1650 bytes)
|
||||
|
||||
// Exported using binary_to_compressed_c.cpp
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static const unsigned int tracy_systrace_armv7_size = 1149;
|
||||
static const unsigned int tracy_systrace_armv7_data[1152/4] =
|
||||
{
|
||||
0x464c457f, 0x00010101, 0x00000000, 0x00000000, 0x00280003, 0x00000001, 0x000001f0, 0x00000034, 0x00000000, 0x05000200, 0x00200034, 0x00280007,
|
||||
0x00000000, 0x00000006, 0x00000034, 0x00000034, 0x00000034, 0x000000e0, 0x000000e0, 0x00000004, 0x00000004, 0x00000003, 0x00000114, 0x00000114,
|
||||
0x00000114, 0x00000013, 0x00000013, 0x00000004, 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x000003fd, 0x000003fd, 0x00000005,
|
||||
0x00001000, 0x00000001, 0x000003fd, 0x000013fd, 0x000013fd, 0x00000080, 0x000000b3, 0x00000006, 0x00001000, 0x00000002, 0x00000400, 0x00001400,
|
||||
0x00001400, 0x0000007d, 0x000000b0, 0x00000006, 0x00000004, 0x6474e551, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000006,
|
||||
0x00000004, 0x70000001, 0x000003a4, 0x000003a4, 0x000003a4, 0x00000008, 0x00000008, 0x00000004, 0x00000004, 0x7379732f, 0x2f6d6574, 0x2f6e6962,
|
||||
0x6b6e696c, 0x00007265, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000012, 0x00000016, 0x00000000,
|
||||
0x00000000, 0x00000012, 0x6f6c6400, 0x006e6570, 0x4342494c, 0x62696c00, 0x732e6c64, 0x6c64006f, 0x006d7973, 0x00000001, 0x00000003, 0x00000001,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00010001, 0x0000000d, 0x00000010, 0x00000000, 0x00050d63, 0x00020000, 0x00000008,
|
||||
0x00000000, 0x000014bc, 0x00000116, 0x000014c0, 0x00000216, 0xe52de004, 0xe59fe004, 0xe08fe00e, 0xe5bef008, 0x000012dc, 0xe28fc600, 0xe28cca01,
|
||||
0xe5bcf2dc, 0xe28fc600, 0xe28cca01, 0xe5bcf2d4, 0xe92d4ff0, 0xe28db01c, 0xe24dd024, 0xe24dd801, 0xe59f017c, 0xe3a01001, 0xe3a08001, 0xe08f0000,
|
||||
0xebfffff0, 0xe59f116c, 0xe1a04000, 0xe08f1001, 0xebffffef, 0xe59f1160, 0xe1a06000, 0xe1a00004, 0xe08f1001, 0xebffffea, 0xe59f1150, 0xe1a07000,
|
||||
0xe1a00004, 0xe08f1001, 0xebffffe5, 0xe59f1140, 0xe1a05000, 0xe1a00004, 0xe08f1001, 0xebffffe0, 0xe58d0004, 0xe1a00004, 0xe59f1128, 0xe08f1001,
|
||||
0xebffffdb, 0xe59f1120, 0xe1a0a000, 0xe1a00004, 0xe08f1001, 0xebffffd6, 0xe1a04000, 0xe59f010c, 0xe3a01000, 0xe3a09000, 0xe08f0000, 0xe12fff36,
|
||||
0xe1a06000, 0xe3700001, 0xca000001, 0xe3a00000, 0xe12fff37, 0xe3a00009, 0xe3a01001, 0xe1cd01bc, 0xe3a00008, 0xe1cd01b4, 0xe3090680, 0xe3400098,
|
||||
0xe3a02000, 0xe58d000c, 0xe28d0010, 0xe58d7000, 0xe58d6018, 0xe58d8010, 0xe58d9008, 0xe12fff35, 0xe3500000, 0xca00001d, 0xe28d7018, 0xe28d8010,
|
||||
0xe28d9020, 0xe1a00007, 0xe3a01001, 0xe3a02000, 0xe12fff35, 0xe3500000, 0xda00000a, 0xe1a00006, 0xe1a01009, 0xe3a02801, 0xe12fff3a, 0xe3500001,
|
||||
0xba00000e, 0xe1a02000, 0xe3a00001, 0xe1a01009, 0xe12fff34, 0xea000003, 0xe59d2004, 0xe28d0008, 0xe3a01000, 0xe12fff32, 0xe1a00008, 0xe3a01001,
|
||||
0xe3a02000, 0xe12fff35, 0xe3500001, 0xbaffffe4, 0xe59d1000, 0xe3a00000, 0xe12fff31, 0xe24bd01c, 0xe8bd8ff0, 0x00000198, 0x00000190, 0x00000181,
|
||||
0x00000172, 0x00000163, 0x00000159, 0x0000014a, 0x00000138, 0x7ffffe4c, 0x00000001, 0x6362696c, 0x006f732e, 0x6e65706f, 0x69786500, 0x6f700074,
|
||||
0x6e006c6c, 0x736f6e61, 0x7065656c, 0x61657200, 0x72770064, 0x00657469, 0x7379732f, 0x72656b2f, 0x2f6c656e, 0x75626564, 0x72742f67, 0x6e696361,
|
||||
0x72742f67, 0x5f656361, 0x65706970, 0x00000000, 0x00000003, 0x000014b0, 0x00000002, 0x00000010, 0x00000017, 0x000001b4, 0x00000014, 0x00000011,
|
||||
0x00000015, 0x00000000, 0x00000006, 0x00000128, 0x0000000b, 0x00000010, 0x00000005, 0x00000158, 0x0000000a, 0x0000001c, 0x6ffffef5, 0x00000174,
|
||||
0x00000001, 0x0000000d, 0x0000001e, 0x00000008, 0x6ffffffb, 0x00000001, 0x6ffffff0, 0x0000018c, 0x6ffffffe, 0x00000194, 0x6fffffff, 0x00000001,
|
||||
};
|
||||
|
||||
static const unsigned int tracy_systrace_aarch64_size = 1650;
|
||||
static const unsigned int tracy_systrace_aarch64_data[1652/4] =
|
||||
{
|
||||
0x464c457f, 0x00010102, 0x00000000, 0x00000000, 0x00b70003, 0x00000001, 0x000002e0, 0x00000000, 0x00000040, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00380040, 0x00400006, 0x00000000, 0x00000006, 0x00000005, 0x00000040, 0x00000000, 0x00000040, 0x00000000, 0x00000040, 0x00000000,
|
||||
0x00000150, 0x00000000, 0x00000150, 0x00000000, 0x00000008, 0x00000000, 0x00000003, 0x00000004, 0x00000190, 0x00000000, 0x00000190, 0x00000000,
|
||||
0x00000190, 0x00000000, 0x00000015, 0x00000000, 0x00000015, 0x00000000, 0x00000001, 0x00000000, 0x00000001, 0x00000005, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000004e1, 0x00000000, 0x000004e1, 0x00000000, 0x00001000, 0x00000000, 0x00000001, 0x00000006,
|
||||
0x000004e8, 0x00000000, 0x000014e8, 0x00000000, 0x000014e8, 0x00000000, 0x0000018a, 0x00000000, 0x00000190, 0x00000000, 0x00001000, 0x00000000,
|
||||
0x00000002, 0x00000006, 0x000004e8, 0x00000000, 0x000014e8, 0x00000000, 0x000014e8, 0x00000000, 0x00000160, 0x00000000, 0x00000160, 0x00000000,
|
||||
0x00000008, 0x00000000, 0x6474e551, 0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000008, 0x00000000, 0x7379732f, 0x2f6d6574, 0x2f6e6962, 0x6b6e696c, 0x34367265, 0x00000000, 0x00000001, 0x00000001,
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00090003, 0x000002e0, 0x00000000, 0x00000000, 0x00000000, 0x00000010, 0x00000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x0000000a, 0x00000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x62696c00, 0x732e6c64, 0x6c64006f, 0x006d7973, 0x706f6c64, 0x4c006e65,
|
||||
0x00434249, 0x00000000, 0x00000000, 0x00000000, 0x00010001, 0x00000001, 0x00000010, 0x00000000, 0x00050d63, 0x00020000, 0x00000017, 0x00000000,
|
||||
0x00001668, 0x00000000, 0x00000402, 0x00000002, 0x00000000, 0x00000000, 0x00001670, 0x00000000, 0x00000402, 0x00000003, 0x00000000, 0x00000000,
|
||||
0xa9bf7bf0, 0xb0000010, 0xf9433211, 0x91198210, 0xd61f0220, 0xd503201f, 0xd503201f, 0xd503201f, 0xb0000010, 0xf9433611, 0x9119a210, 0xd61f0220,
|
||||
0xb0000010, 0xf9433a11, 0x9119c210, 0xd61f0220, 0xa9bb67fc, 0xa9015ff8, 0xa90257f6, 0xa9034ff4, 0xa9047bfd, 0x910103fd, 0xd14043ff, 0xd10083ff,
|
||||
0x90000000, 0x91124000, 0x52800021, 0x52800039, 0x97ffffec, 0x90000001, 0x91126021, 0xaa0003f7, 0x97ffffec, 0x90000001, 0xaa0003f8, 0x91127421,
|
||||
0xaa1703e0, 0x97ffffe7, 0x90000001, 0xaa0003f3, 0x91128821, 0xaa1703e0, 0x97ffffe2, 0x90000001, 0xaa0003f4, 0x91129c21, 0xaa1703e0, 0x97ffffdd,
|
||||
0x90000001, 0xaa0003f5, 0x9112c421, 0xaa1703e0, 0x97ffffd8, 0x90000001, 0xaa0003f6, 0x9112d821, 0xaa1703e0, 0x97ffffd3, 0xaa0003f7, 0x90000000,
|
||||
0x9112f000, 0x2a1f03e1, 0xd63f0300, 0x2a0003f8, 0x36f80060, 0x2a1f03e0, 0xd63f0260, 0x90000009, 0x3dc12120, 0x52800128, 0x79003be8, 0x52800108,
|
||||
0x910043e0, 0x52800021, 0x2a1f03e2, 0xb9001bf8, 0xb90013f9, 0x79002be8, 0x3d8003e0, 0xd63f0280, 0x7100001f, 0x5400036c, 0x910063e0, 0x52800021,
|
||||
0x2a1f03e2, 0xd63f0280, 0x7100001f, 0x5400018d, 0x910083e1, 0x52a00022, 0x2a1803e0, 0xd63f02c0, 0xf100041f, 0x540001eb, 0xaa0003e2, 0x910083e1,
|
||||
0x52800020, 0xd63f02e0, 0x14000004, 0x910003e0, 0xaa1f03e1, 0xd63f02a0, 0x910043e0, 0x52800021, 0x2a1f03e2, 0xd63f0280, 0x7100041f, 0x54fffceb,
|
||||
0x2a1f03e0, 0xd63f0260, 0x914043ff, 0x910083ff, 0xa9447bfd, 0xa9434ff4, 0xa94257f6, 0xa9415ff8, 0xa8c567fc, 0xd65f03c0, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00989680, 0x00000000, 0x6362696c, 0x006f732e, 0x6e65706f, 0x69786500, 0x6f700074, 0x6e006c6c, 0x736f6e61, 0x7065656c,
|
||||
0x61657200, 0x72770064, 0x00657469, 0x7379732f, 0x72656b2f, 0x2f6c656e, 0x75626564, 0x72742f67, 0x6e696361, 0x72742f67, 0x5f656361, 0x65706970,
|
||||
0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000001, 0x00000000, 0x6ffffef5, 0x00000000, 0x000001a8, 0x00000000, 0x00000005, 0x00000000,
|
||||
0x00000228, 0x00000000, 0x00000006, 0x00000000, 0x000001c8, 0x00000000, 0x0000000a, 0x00000000, 0x0000001c, 0x00000000, 0x0000000b, 0x00000000,
|
||||
0x00000018, 0x00000000, 0x00000015, 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00001650, 0x00000000, 0x00000002, 0x00000000,
|
||||
0x00000030, 0x00000000, 0x00000014, 0x00000000, 0x00000007, 0x00000000, 0x00000017, 0x00000000, 0x00000270, 0x00000000, 0x0000001e, 0x00000000,
|
||||
0x00000008, 0x00000000, 0x6ffffffb, 0x00000000, 0x00000001, 0x00000000, 0x6ffffffe, 0x00000000, 0x00000250, 0x00000000, 0x6fffffff, 0x00000000,
|
||||
0x00000001, 0x00000000, 0x6ffffff0, 0x00000000, 0x00000244, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x000002a0, 0x00000000, 0x000002a0,
|
||||
};
|
||||
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
#ifndef __TRACYTHREAD_HPP__
|
||||
#define __TRACYTHREAD_HPP__
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
# include "tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class ThreadExitHandler
|
||||
{
|
||||
public:
|
||||
~ThreadExitHandler()
|
||||
{
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
rpmalloc_thread_finalize();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
|
||||
class Thread
|
||||
{
|
||||
public:
|
||||
Thread( void(*func)( void* ptr ), void* ptr )
|
||||
: m_func( func )
|
||||
, m_ptr( ptr )
|
||||
, m_hnd( CreateThread( nullptr, 0, Launch, this, 0, nullptr ) )
|
||||
{}
|
||||
|
||||
~Thread()
|
||||
{
|
||||
WaitForSingleObject( m_hnd, INFINITE );
|
||||
CloseHandle( m_hnd );
|
||||
}
|
||||
|
||||
HANDLE Handle() const { return m_hnd; }
|
||||
|
||||
private:
|
||||
static DWORD WINAPI Launch( void* ptr ) { ((Thread*)ptr)->m_func( ((Thread*)ptr)->m_ptr ); return 0; }
|
||||
|
||||
void(*m_func)( void* ptr );
|
||||
void* m_ptr;
|
||||
HANDLE m_hnd;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
class Thread
|
||||
{
|
||||
public:
|
||||
Thread( void(*func)( void* ptr ), void* ptr )
|
||||
: m_func( func )
|
||||
, m_ptr( ptr )
|
||||
{
|
||||
pthread_create( &m_thread, nullptr, Launch, this );
|
||||
}
|
||||
|
||||
~Thread()
|
||||
{
|
||||
pthread_join( m_thread, nullptr );
|
||||
}
|
||||
|
||||
pthread_t Handle() const { return m_thread; }
|
||||
|
||||
private:
|
||||
static void* Launch( void* ptr ) { ((Thread*)ptr)->m_func( ((Thread*)ptr)->m_ptr ); return nullptr; }
|
||||
void(*m_func)( void* ptr );
|
||||
void* m_ptr;
|
||||
pthread_t m_thread;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,261 @@
|
|||
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson
|
||||
*
|
||||
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
|
||||
* The latest source code is always available at
|
||||
*
|
||||
* https://github.com/mjansson/rpmalloc
|
||||
*
|
||||
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include "../common/TracyApi.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
# define RPMALLOC_EXPORT __attribute__((visibility("default")))
|
||||
# define RPMALLOC_ALLOCATOR
|
||||
# define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
|
||||
# if defined(__clang_major__) && (__clang_major__ < 4)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
|
||||
# else
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size)))
|
||||
# endif
|
||||
# define RPMALLOC_CDECL
|
||||
#elif defined(_MSC_VER)
|
||||
# define RPMALLOC_EXPORT
|
||||
# define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
|
||||
# define RPMALLOC_ATTRIB_MALLOC
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
|
||||
# define RPMALLOC_CDECL __cdecl
|
||||
#else
|
||||
# define RPMALLOC_EXPORT
|
||||
# define RPMALLOC_ALLOCATOR
|
||||
# define RPMALLOC_ATTRIB_MALLOC
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
|
||||
# define RPMALLOC_CDECL
|
||||
#endif
|
||||
|
||||
//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes
|
||||
#ifndef RPMALLOC_CONFIGURABLE
|
||||
#define RPMALLOC_CONFIGURABLE 0
|
||||
#endif
|
||||
|
||||
//! Flag to rpaligned_realloc to not preserve content in reallocation
|
||||
#define RPMALLOC_NO_PRESERVE 1
|
||||
|
||||
typedef struct rpmalloc_global_statistics_t {
|
||||
//! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped;
|
||||
//! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_peak;
|
||||
//! Current amount of memory in global caches for small and medium sizes (<32KiB)
|
||||
size_t cached;
|
||||
//! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
|
||||
size_t huge_alloc;
|
||||
//! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
|
||||
size_t huge_alloc_peak;
|
||||
//! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_total;
|
||||
//! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
|
||||
size_t unmapped_total;
|
||||
} rpmalloc_global_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_thread_statistics_t {
|
||||
//! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
|
||||
size_t sizecache;
|
||||
//! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
|
||||
size_t spancache;
|
||||
//! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
|
||||
size_t thread_to_global;
|
||||
//! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
|
||||
size_t global_to_thread;
|
||||
//! Per span count statistics (only if ENABLE_STATISTICS=1)
|
||||
struct {
|
||||
//! Currently used number of spans
|
||||
size_t current;
|
||||
//! High water mark of spans used
|
||||
size_t peak;
|
||||
//! Number of spans transitioned to global cache
|
||||
size_t to_global;
|
||||
//! Number of spans transitioned from global cache
|
||||
size_t from_global;
|
||||
//! Number of spans transitioned to thread cache
|
||||
size_t to_cache;
|
||||
//! Number of spans transitioned from thread cache
|
||||
size_t from_cache;
|
||||
//! Number of spans transitioned to reserved state
|
||||
size_t to_reserved;
|
||||
//! Number of spans transitioned from reserved state
|
||||
size_t from_reserved;
|
||||
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
|
||||
size_t map_calls;
|
||||
} span_use[32];
|
||||
//! Per size class statistics (only if ENABLE_STATISTICS=1)
|
||||
struct {
|
||||
//! Current number of allocations
|
||||
size_t alloc_current;
|
||||
//! Peak number of allocations
|
||||
size_t alloc_peak;
|
||||
//! Total number of allocations
|
||||
size_t alloc_total;
|
||||
//! Total number of frees
|
||||
size_t free_total;
|
||||
//! Number of spans transitioned to cache
|
||||
size_t spans_to_cache;
|
||||
//! Number of spans transitioned from cache
|
||||
size_t spans_from_cache;
|
||||
//! Number of spans transitioned from reserved state
|
||||
size_t spans_from_reserved;
|
||||
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
|
||||
size_t map_calls;
|
||||
} size_use[128];
|
||||
} rpmalloc_thread_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_config_t {
|
||||
//! Map memory pages for the given number of bytes. The returned address MUST be
|
||||
// aligned to the rpmalloc span size, which will always be a power of two.
|
||||
// Optionally the function can store an alignment offset in the offset variable
|
||||
// in case it performs alignment and the returned pointer is offset from the
|
||||
// actual start of the memory region due to this alignment. The alignment offset
|
||||
// will be passed to the memory unmap function. The alignment offset MUST NOT be
|
||||
// larger than 65535 (storable in an uint16_t), if it is you must use natural
|
||||
// alignment to shift it into 16 bits. If you set a memory_map function, you
|
||||
// must also set a memory_unmap function or else the default implementation will
|
||||
// be used for both.
|
||||
void* (*memory_map)(size_t size, size_t* offset);
|
||||
//! Unmap the memory pages starting at address and spanning the given number of bytes.
|
||||
// If release is set to non-zero, the unmap is for an entire span range as returned by
|
||||
// a previous call to memory_map and that the entire range should be released. The
|
||||
// release argument holds the size of the entire span range. If release is set to 0,
|
||||
// the unmap is a partial decommit of a subset of the mapped memory range.
|
||||
// If you set a memory_unmap function, you must also set a memory_map function or
|
||||
// else the default implementation will be used for both.
|
||||
void (*memory_unmap)(void* address, size_t size, size_t offset, size_t release);
|
||||
//! Size of memory pages. The page size MUST be a power of two. All memory mapping
|
||||
// requests to memory_map will be made with size set to a multiple of the page size.
|
||||
// Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used.
|
||||
size_t page_size;
|
||||
//! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144]
|
||||
// range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE
|
||||
// is defined to 1.
|
||||
size_t span_size;
|
||||
//! Number of spans to map at each request to map new virtual memory blocks. This can
|
||||
// be used to minimize the system call overhead at the cost of virtual memory address
|
||||
// space. The extra mapped pages will not be written until actually used, so physical
|
||||
// committed memory should not be affected in the default implementation. Will be
|
||||
// aligned to a multiple of spans that match memory page size in case of huge pages.
|
||||
size_t span_map_count;
|
||||
//! Enable use of large/huge pages. If this flag is set to non-zero and page size is
|
||||
// zero, the allocator will try to enable huge pages and auto detect the configuration.
|
||||
// If this is set to non-zero and page_size is also non-zero, the allocator will
|
||||
// assume huge pages have been configured and enabled prior to initializing the
|
||||
// allocator.
|
||||
// For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
|
||||
// For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
|
||||
int enable_huge_pages;
|
||||
} rpmalloc_config_t;
|
||||
|
||||
//! Initialize allocator with default configuration
|
||||
TRACY_API int
|
||||
rpmalloc_initialize(void);
|
||||
|
||||
//! Initialize allocator with given configuration
|
||||
RPMALLOC_EXPORT int
|
||||
rpmalloc_initialize_config(const rpmalloc_config_t* config);
|
||||
|
||||
//! Get allocator configuration
|
||||
RPMALLOC_EXPORT const rpmalloc_config_t*
|
||||
rpmalloc_config(void);
|
||||
|
||||
//! Finalize allocator
|
||||
TRACY_API void
|
||||
rpmalloc_finalize(void);
|
||||
|
||||
//! Initialize allocator for calling thread
|
||||
TRACY_API void
|
||||
rpmalloc_thread_initialize(void);
|
||||
|
||||
//! Finalize allocator for calling thread
|
||||
TRACY_API void
|
||||
rpmalloc_thread_finalize(void);
|
||||
|
||||
//! Perform deferred deallocations pending for the calling thread heap
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_thread_collect(void);
|
||||
|
||||
//! Query if allocator is initialized for calling thread
|
||||
RPMALLOC_EXPORT int
|
||||
rpmalloc_is_thread_initialized(void);
|
||||
|
||||
//! Get per-thread statistics
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
|
||||
|
||||
//! Get global statistics
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
|
||||
|
||||
//! Dump all statistics in human readable format to file (should be a FILE*)
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_dump_statistics(void* file);
|
||||
|
||||
//! Allocate a memory block of at least the given size
|
||||
TRACY_API RPMALLOC_ALLOCATOR void*
|
||||
rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
|
||||
|
||||
//! Free the given memory block
|
||||
TRACY_API void
|
||||
rpfree(void* ptr);
|
||||
|
||||
//! Allocate a memory block of at least the given size and zero initialize it
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
|
||||
|
||||
//! Reallocate the given block to at least the given size
|
||||
TRACY_API RPMALLOC_ALLOCATOR void*
|
||||
rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
|
||||
//! Reallocate the given block to at least the given size and alignment,
|
||||
// with optional control flags (see RPMALLOC_NO_PRESERVE).
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
|
||||
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT int
|
||||
rpposix_memalign(void **memptr, size_t alignment, size_t size);
|
||||
|
||||
//! Query the usable size of the given memory block (from given pointer to the end of block)
|
||||
RPMALLOC_EXPORT size_t
|
||||
rpmalloc_usable_size(void* ptr);
|
||||
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef __TRACYALIGN_HPP__
|
||||
#define __TRACYALIGN_HPP__
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
template<typename T>
|
||||
tracy_force_inline T MemRead( const void* ptr )
|
||||
{
|
||||
T val;
|
||||
memcpy( &val, ptr, sizeof( T ) );
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
tracy_force_inline void MemWrite( void* ptr, T val )
|
||||
{
|
||||
memcpy( ptr, &val, sizeof( T ) );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef __TRACYALLOC_HPP__
|
||||
#define __TRACYALLOC_HPP__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include "../client/tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline void* tracy_malloc( size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
return rpmalloc( size );
|
||||
#else
|
||||
return malloc( size );
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void tracy_free( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
rpfree( ptr );
|
||||
#else
|
||||
free( ptr );
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void* tracy_realloc( void* ptr, size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
return rprealloc( ptr, size );
|
||||
#else
|
||||
return realloc( ptr, size );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef __TRACYAPI_H__
|
||||
#define __TRACYAPI_H__
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# if defined TRACY_EXPORTS
|
||||
# define TRACY_API __declspec(dllexport)
|
||||
# elif defined TRACY_IMPORTS
|
||||
# define TRACY_API __declspec(dllimport)
|
||||
# else
|
||||
# define TRACY_API
|
||||
# endif
|
||||
#else
|
||||
# define TRACY_API __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
#endif // __TRACYAPI_H__
|
|
@ -0,0 +1,690 @@
|
|||
#ifndef __TRACYCOLOR_HPP__
|
||||
#define __TRACYCOLOR_HPP__
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
struct Color
|
||||
{
|
||||
enum ColorType
|
||||
{
|
||||
Snow = 0xfffafa,
|
||||
GhostWhite = 0xf8f8ff,
|
||||
WhiteSmoke = 0xf5f5f5,
|
||||
Gainsboro = 0xdcdcdc,
|
||||
FloralWhite = 0xfffaf0,
|
||||
OldLace = 0xfdf5e6,
|
||||
Linen = 0xfaf0e6,
|
||||
AntiqueWhite = 0xfaebd7,
|
||||
PapayaWhip = 0xffefd5,
|
||||
BlanchedAlmond = 0xffebcd,
|
||||
Bisque = 0xffe4c4,
|
||||
PeachPuff = 0xffdab9,
|
||||
NavajoWhite = 0xffdead,
|
||||
Moccasin = 0xffe4b5,
|
||||
Cornsilk = 0xfff8dc,
|
||||
Ivory = 0xfffff0,
|
||||
LemonChiffon = 0xfffacd,
|
||||
Seashell = 0xfff5ee,
|
||||
Honeydew = 0xf0fff0,
|
||||
MintCream = 0xf5fffa,
|
||||
Azure = 0xf0ffff,
|
||||
AliceBlue = 0xf0f8ff,
|
||||
Lavender = 0xe6e6fa,
|
||||
LavenderBlush = 0xfff0f5,
|
||||
MistyRose = 0xffe4e1,
|
||||
White = 0xffffff,
|
||||
Black = 0x000000,
|
||||
DarkSlateGray = 0x2f4f4f,
|
||||
DarkSlateGrey = 0x2f4f4f,
|
||||
DimGray = 0x696969,
|
||||
DimGrey = 0x696969,
|
||||
SlateGray = 0x708090,
|
||||
SlateGrey = 0x708090,
|
||||
LightSlateGray = 0x778899,
|
||||
LightSlateGrey = 0x778899,
|
||||
Gray = 0xbebebe,
|
||||
Grey = 0xbebebe,
|
||||
X11Gray = 0xbebebe,
|
||||
X11Grey = 0xbebebe,
|
||||
WebGray = 0x808080,
|
||||
WebGrey = 0x808080,
|
||||
LightGrey = 0xd3d3d3,
|
||||
LightGray = 0xd3d3d3,
|
||||
MidnightBlue = 0x191970,
|
||||
Navy = 0x000080,
|
||||
NavyBlue = 0x000080,
|
||||
CornflowerBlue = 0x6495ed,
|
||||
DarkSlateBlue = 0x483d8b,
|
||||
SlateBlue = 0x6a5acd,
|
||||
MediumSlateBlue = 0x7b68ee,
|
||||
LightSlateBlue = 0x8470ff,
|
||||
MediumBlue = 0x0000cd,
|
||||
RoyalBlue = 0x4169e1,
|
||||
Blue = 0x0000ff,
|
||||
DodgerBlue = 0x1e90ff,
|
||||
DeepSkyBlue = 0x00bfff,
|
||||
SkyBlue = 0x87ceeb,
|
||||
LightSkyBlue = 0x87cefa,
|
||||
SteelBlue = 0x4682b4,
|
||||
LightSteelBlue = 0xb0c4de,
|
||||
LightBlue = 0xadd8e6,
|
||||
PowderBlue = 0xb0e0e6,
|
||||
PaleTurquoise = 0xafeeee,
|
||||
DarkTurquoise = 0x00ced1,
|
||||
MediumTurquoise = 0x48d1cc,
|
||||
Turquoise = 0x40e0d0,
|
||||
Cyan = 0x00ffff,
|
||||
Aqua = 0x00ffff,
|
||||
LightCyan = 0xe0ffff,
|
||||
CadetBlue = 0x5f9ea0,
|
||||
MediumAquamarine = 0x66cdaa,
|
||||
Aquamarine = 0x7fffd4,
|
||||
DarkGreen = 0x006400,
|
||||
DarkOliveGreen = 0x556b2f,
|
||||
DarkSeaGreen = 0x8fbc8f,
|
||||
SeaGreen = 0x2e8b57,
|
||||
MediumSeaGreen = 0x3cb371,
|
||||
LightSeaGreen = 0x20b2aa,
|
||||
PaleGreen = 0x98fb98,
|
||||
SpringGreen = 0x00ff7f,
|
||||
LawnGreen = 0x7cfc00,
|
||||
Green = 0x00ff00,
|
||||
Lime = 0x00ff00,
|
||||
X11Green = 0x00ff00,
|
||||
WebGreen = 0x008000,
|
||||
Chartreuse = 0x7fff00,
|
||||
MediumSpringGreen = 0x00fa9a,
|
||||
GreenYellow = 0xadff2f,
|
||||
LimeGreen = 0x32cd32,
|
||||
YellowGreen = 0x9acd32,
|
||||
ForestGreen = 0x228b22,
|
||||
OliveDrab = 0x6b8e23,
|
||||
DarkKhaki = 0xbdb76b,
|
||||
Khaki = 0xf0e68c,
|
||||
PaleGoldenrod = 0xeee8aa,
|
||||
LightGoldenrodYellow = 0xfafad2,
|
||||
LightYellow = 0xffffe0,
|
||||
Yellow = 0xffff00,
|
||||
Gold = 0xffd700,
|
||||
LightGoldenrod = 0xeedd82,
|
||||
Goldenrod = 0xdaa520,
|
||||
DarkGoldenrod = 0xb8860b,
|
||||
RosyBrown = 0xbc8f8f,
|
||||
IndianRed = 0xcd5c5c,
|
||||
SaddleBrown = 0x8b4513,
|
||||
Sienna = 0xa0522d,
|
||||
Peru = 0xcd853f,
|
||||
Burlywood = 0xdeb887,
|
||||
Beige = 0xf5f5dc,
|
||||
Wheat = 0xf5deb3,
|
||||
SandyBrown = 0xf4a460,
|
||||
Tan = 0xd2b48c,
|
||||
Chocolate = 0xd2691e,
|
||||
Firebrick = 0xb22222,
|
||||
Brown = 0xa52a2a,
|
||||
DarkSalmon = 0xe9967a,
|
||||
Salmon = 0xfa8072,
|
||||
LightSalmon = 0xffa07a,
|
||||
Orange = 0xffa500,
|
||||
DarkOrange = 0xff8c00,
|
||||
Coral = 0xff7f50,
|
||||
LightCoral = 0xf08080,
|
||||
Tomato = 0xff6347,
|
||||
OrangeRed = 0xff4500,
|
||||
Red = 0xff0000,
|
||||
HotPink = 0xff69b4,
|
||||
DeepPink = 0xff1493,
|
||||
Pink = 0xffc0cb,
|
||||
LightPink = 0xffb6c1,
|
||||
PaleVioletRed = 0xdb7093,
|
||||
Maroon = 0xb03060,
|
||||
X11Maroon = 0xb03060,
|
||||
WebMaroon = 0x800000,
|
||||
MediumVioletRed = 0xc71585,
|
||||
VioletRed = 0xd02090,
|
||||
Magenta = 0xff00ff,
|
||||
Fuchsia = 0xff00ff,
|
||||
Violet = 0xee82ee,
|
||||
Plum = 0xdda0dd,
|
||||
Orchid = 0xda70d6,
|
||||
MediumOrchid = 0xba55d3,
|
||||
DarkOrchid = 0x9932cc,
|
||||
DarkViolet = 0x9400d3,
|
||||
BlueViolet = 0x8a2be2,
|
||||
Purple = 0xa020f0,
|
||||
X11Purple = 0xa020f0,
|
||||
WebPurple = 0x800080,
|
||||
MediumPurple = 0x9370db,
|
||||
Thistle = 0xd8bfd8,
|
||||
Snow1 = 0xfffafa,
|
||||
Snow2 = 0xeee9e9,
|
||||
Snow3 = 0xcdc9c9,
|
||||
Snow4 = 0x8b8989,
|
||||
Seashell1 = 0xfff5ee,
|
||||
Seashell2 = 0xeee5de,
|
||||
Seashell3 = 0xcdc5bf,
|
||||
Seashell4 = 0x8b8682,
|
||||
AntiqueWhite1 = 0xffefdb,
|
||||
AntiqueWhite2 = 0xeedfcc,
|
||||
AntiqueWhite3 = 0xcdc0b0,
|
||||
AntiqueWhite4 = 0x8b8378,
|
||||
Bisque1 = 0xffe4c4,
|
||||
Bisque2 = 0xeed5b7,
|
||||
Bisque3 = 0xcdb79e,
|
||||
Bisque4 = 0x8b7d6b,
|
||||
PeachPuff1 = 0xffdab9,
|
||||
PeachPuff2 = 0xeecbad,
|
||||
PeachPuff3 = 0xcdaf95,
|
||||
PeachPuff4 = 0x8b7765,
|
||||
NavajoWhite1 = 0xffdead,
|
||||
NavajoWhite2 = 0xeecfa1,
|
||||
NavajoWhite3 = 0xcdb38b,
|
||||
NavajoWhite4 = 0x8b795e,
|
||||
LemonChiffon1 = 0xfffacd,
|
||||
LemonChiffon2 = 0xeee9bf,
|
||||
LemonChiffon3 = 0xcdc9a5,
|
||||
LemonChiffon4 = 0x8b8970,
|
||||
Cornsilk1 = 0xfff8dc,
|
||||
Cornsilk2 = 0xeee8cd,
|
||||
Cornsilk3 = 0xcdc8b1,
|
||||
Cornsilk4 = 0x8b8878,
|
||||
Ivory1 = 0xfffff0,
|
||||
Ivory2 = 0xeeeee0,
|
||||
Ivory3 = 0xcdcdc1,
|
||||
Ivory4 = 0x8b8b83,
|
||||
Honeydew1 = 0xf0fff0,
|
||||
Honeydew2 = 0xe0eee0,
|
||||
Honeydew3 = 0xc1cdc1,
|
||||
Honeydew4 = 0x838b83,
|
||||
LavenderBlush1 = 0xfff0f5,
|
||||
LavenderBlush2 = 0xeee0e5,
|
||||
LavenderBlush3 = 0xcdc1c5,
|
||||
LavenderBlush4 = 0x8b8386,
|
||||
MistyRose1 = 0xffe4e1,
|
||||
MistyRose2 = 0xeed5d2,
|
||||
MistyRose3 = 0xcdb7b5,
|
||||
MistyRose4 = 0x8b7d7b,
|
||||
Azure1 = 0xf0ffff,
|
||||
Azure2 = 0xe0eeee,
|
||||
Azure3 = 0xc1cdcd,
|
||||
Azure4 = 0x838b8b,
|
||||
SlateBlue1 = 0x836fff,
|
||||
SlateBlue2 = 0x7a67ee,
|
||||
SlateBlue3 = 0x6959cd,
|
||||
SlateBlue4 = 0x473c8b,
|
||||
RoyalBlue1 = 0x4876ff,
|
||||
RoyalBlue2 = 0x436eee,
|
||||
RoyalBlue3 = 0x3a5fcd,
|
||||
RoyalBlue4 = 0x27408b,
|
||||
Blue1 = 0x0000ff,
|
||||
Blue2 = 0x0000ee,
|
||||
Blue3 = 0x0000cd,
|
||||
Blue4 = 0x00008b,
|
||||
DodgerBlue1 = 0x1e90ff,
|
||||
DodgerBlue2 = 0x1c86ee,
|
||||
DodgerBlue3 = 0x1874cd,
|
||||
DodgerBlue4 = 0x104e8b,
|
||||
SteelBlue1 = 0x63b8ff,
|
||||
SteelBlue2 = 0x5cacee,
|
||||
SteelBlue3 = 0x4f94cd,
|
||||
SteelBlue4 = 0x36648b,
|
||||
DeepSkyBlue1 = 0x00bfff,
|
||||
DeepSkyBlue2 = 0x00b2ee,
|
||||
DeepSkyBlue3 = 0x009acd,
|
||||
DeepSkyBlue4 = 0x00688b,
|
||||
SkyBlue1 = 0x87ceff,
|
||||
SkyBlue2 = 0x7ec0ee,
|
||||
SkyBlue3 = 0x6ca6cd,
|
||||
SkyBlue4 = 0x4a708b,
|
||||
LightSkyBlue1 = 0xb0e2ff,
|
||||
LightSkyBlue2 = 0xa4d3ee,
|
||||
LightSkyBlue3 = 0x8db6cd,
|
||||
LightSkyBlue4 = 0x607b8b,
|
||||
SlateGray1 = 0xc6e2ff,
|
||||
SlateGray2 = 0xb9d3ee,
|
||||
SlateGray3 = 0x9fb6cd,
|
||||
SlateGray4 = 0x6c7b8b,
|
||||
LightSteelBlue1 = 0xcae1ff,
|
||||
LightSteelBlue2 = 0xbcd2ee,
|
||||
LightSteelBlue3 = 0xa2b5cd,
|
||||
LightSteelBlue4 = 0x6e7b8b,
|
||||
LightBlue1 = 0xbfefff,
|
||||
LightBlue2 = 0xb2dfee,
|
||||
LightBlue3 = 0x9ac0cd,
|
||||
LightBlue4 = 0x68838b,
|
||||
LightCyan1 = 0xe0ffff,
|
||||
LightCyan2 = 0xd1eeee,
|
||||
LightCyan3 = 0xb4cdcd,
|
||||
LightCyan4 = 0x7a8b8b,
|
||||
PaleTurquoise1 = 0xbbffff,
|
||||
PaleTurquoise2 = 0xaeeeee,
|
||||
PaleTurquoise3 = 0x96cdcd,
|
||||
PaleTurquoise4 = 0x668b8b,
|
||||
CadetBlue1 = 0x98f5ff,
|
||||
CadetBlue2 = 0x8ee5ee,
|
||||
CadetBlue3 = 0x7ac5cd,
|
||||
CadetBlue4 = 0x53868b,
|
||||
Turquoise1 = 0x00f5ff,
|
||||
Turquoise2 = 0x00e5ee,
|
||||
Turquoise3 = 0x00c5cd,
|
||||
Turquoise4 = 0x00868b,
|
||||
Cyan1 = 0x00ffff,
|
||||
Cyan2 = 0x00eeee,
|
||||
Cyan3 = 0x00cdcd,
|
||||
Cyan4 = 0x008b8b,
|
||||
DarkSlateGray1 = 0x97ffff,
|
||||
DarkSlateGray2 = 0x8deeee,
|
||||
DarkSlateGray3 = 0x79cdcd,
|
||||
DarkSlateGray4 = 0x528b8b,
|
||||
Aquamarine1 = 0x7fffd4,
|
||||
Aquamarine2 = 0x76eec6,
|
||||
Aquamarine3 = 0x66cdaa,
|
||||
Aquamarine4 = 0x458b74,
|
||||
DarkSeaGreen1 = 0xc1ffc1,
|
||||
DarkSeaGreen2 = 0xb4eeb4,
|
||||
DarkSeaGreen3 = 0x9bcd9b,
|
||||
DarkSeaGreen4 = 0x698b69,
|
||||
SeaGreen1 = 0x54ff9f,
|
||||
SeaGreen2 = 0x4eee94,
|
||||
SeaGreen3 = 0x43cd80,
|
||||
SeaGreen4 = 0x2e8b57,
|
||||
PaleGreen1 = 0x9aff9a,
|
||||
PaleGreen2 = 0x90ee90,
|
||||
PaleGreen3 = 0x7ccd7c,
|
||||
PaleGreen4 = 0x548b54,
|
||||
SpringGreen1 = 0x00ff7f,
|
||||
SpringGreen2 = 0x00ee76,
|
||||
SpringGreen3 = 0x00cd66,
|
||||
SpringGreen4 = 0x008b45,
|
||||
Green1 = 0x00ff00,
|
||||
Green2 = 0x00ee00,
|
||||
Green3 = 0x00cd00,
|
||||
Green4 = 0x008b00,
|
||||
Chartreuse1 = 0x7fff00,
|
||||
Chartreuse2 = 0x76ee00,
|
||||
Chartreuse3 = 0x66cd00,
|
||||
Chartreuse4 = 0x458b00,
|
||||
OliveDrab1 = 0xc0ff3e,
|
||||
OliveDrab2 = 0xb3ee3a,
|
||||
OliveDrab3 = 0x9acd32,
|
||||
OliveDrab4 = 0x698b22,
|
||||
DarkOliveGreen1 = 0xcaff70,
|
||||
DarkOliveGreen2 = 0xbcee68,
|
||||
DarkOliveGreen3 = 0xa2cd5a,
|
||||
DarkOliveGreen4 = 0x6e8b3d,
|
||||
Khaki1 = 0xfff68f,
|
||||
Khaki2 = 0xeee685,
|
||||
Khaki3 = 0xcdc673,
|
||||
Khaki4 = 0x8b864e,
|
||||
LightGoldenrod1 = 0xffec8b,
|
||||
LightGoldenrod2 = 0xeedc82,
|
||||
LightGoldenrod3 = 0xcdbe70,
|
||||
LightGoldenrod4 = 0x8b814c,
|
||||
LightYellow1 = 0xffffe0,
|
||||
LightYellow2 = 0xeeeed1,
|
||||
LightYellow3 = 0xcdcdb4,
|
||||
LightYellow4 = 0x8b8b7a,
|
||||
Yellow1 = 0xffff00,
|
||||
Yellow2 = 0xeeee00,
|
||||
Yellow3 = 0xcdcd00,
|
||||
Yellow4 = 0x8b8b00,
|
||||
Gold1 = 0xffd700,
|
||||
Gold2 = 0xeec900,
|
||||
Gold3 = 0xcdad00,
|
||||
Gold4 = 0x8b7500,
|
||||
Goldenrod1 = 0xffc125,
|
||||
Goldenrod2 = 0xeeb422,
|
||||
Goldenrod3 = 0xcd9b1d,
|
||||
Goldenrod4 = 0x8b6914,
|
||||
DarkGoldenrod1 = 0xffb90f,
|
||||
DarkGoldenrod2 = 0xeead0e,
|
||||
DarkGoldenrod3 = 0xcd950c,
|
||||
DarkGoldenrod4 = 0x8b6508,
|
||||
RosyBrown1 = 0xffc1c1,
|
||||
RosyBrown2 = 0xeeb4b4,
|
||||
RosyBrown3 = 0xcd9b9b,
|
||||
RosyBrown4 = 0x8b6969,
|
||||
IndianRed1 = 0xff6a6a,
|
||||
IndianRed2 = 0xee6363,
|
||||
IndianRed3 = 0xcd5555,
|
||||
IndianRed4 = 0x8b3a3a,
|
||||
Sienna1 = 0xff8247,
|
||||
Sienna2 = 0xee7942,
|
||||
Sienna3 = 0xcd6839,
|
||||
Sienna4 = 0x8b4726,
|
||||
Burlywood1 = 0xffd39b,
|
||||
Burlywood2 = 0xeec591,
|
||||
Burlywood3 = 0xcdaa7d,
|
||||
Burlywood4 = 0x8b7355,
|
||||
Wheat1 = 0xffe7ba,
|
||||
Wheat2 = 0xeed8ae,
|
||||
Wheat3 = 0xcdba96,
|
||||
Wheat4 = 0x8b7e66,
|
||||
Tan1 = 0xffa54f,
|
||||
Tan2 = 0xee9a49,
|
||||
Tan3 = 0xcd853f,
|
||||
Tan4 = 0x8b5a2b,
|
||||
Chocolate1 = 0xff7f24,
|
||||
Chocolate2 = 0xee7621,
|
||||
Chocolate3 = 0xcd661d,
|
||||
Chocolate4 = 0x8b4513,
|
||||
Firebrick1 = 0xff3030,
|
||||
Firebrick2 = 0xee2c2c,
|
||||
Firebrick3 = 0xcd2626,
|
||||
Firebrick4 = 0x8b1a1a,
|
||||
Brown1 = 0xff4040,
|
||||
Brown2 = 0xee3b3b,
|
||||
Brown3 = 0xcd3333,
|
||||
Brown4 = 0x8b2323,
|
||||
Salmon1 = 0xff8c69,
|
||||
Salmon2 = 0xee8262,
|
||||
Salmon3 = 0xcd7054,
|
||||
Salmon4 = 0x8b4c39,
|
||||
LightSalmon1 = 0xffa07a,
|
||||
LightSalmon2 = 0xee9572,
|
||||
LightSalmon3 = 0xcd8162,
|
||||
LightSalmon4 = 0x8b5742,
|
||||
Orange1 = 0xffa500,
|
||||
Orange2 = 0xee9a00,
|
||||
Orange3 = 0xcd8500,
|
||||
Orange4 = 0x8b5a00,
|
||||
DarkOrange1 = 0xff7f00,
|
||||
DarkOrange2 = 0xee7600,
|
||||
DarkOrange3 = 0xcd6600,
|
||||
DarkOrange4 = 0x8b4500,
|
||||
Coral1 = 0xff7256,
|
||||
Coral2 = 0xee6a50,
|
||||
Coral3 = 0xcd5b45,
|
||||
Coral4 = 0x8b3e2f,
|
||||
Tomato1 = 0xff6347,
|
||||
Tomato2 = 0xee5c42,
|
||||
Tomato3 = 0xcd4f39,
|
||||
Tomato4 = 0x8b3626,
|
||||
OrangeRed1 = 0xff4500,
|
||||
OrangeRed2 = 0xee4000,
|
||||
OrangeRed3 = 0xcd3700,
|
||||
OrangeRed4 = 0x8b2500,
|
||||
Red1 = 0xff0000,
|
||||
Red2 = 0xee0000,
|
||||
Red3 = 0xcd0000,
|
||||
Red4 = 0x8b0000,
|
||||
DeepPink1 = 0xff1493,
|
||||
DeepPink2 = 0xee1289,
|
||||
DeepPink3 = 0xcd1076,
|
||||
DeepPink4 = 0x8b0a50,
|
||||
HotPink1 = 0xff6eb4,
|
||||
HotPink2 = 0xee6aa7,
|
||||
HotPink3 = 0xcd6090,
|
||||
HotPink4 = 0x8b3a62,
|
||||
Pink1 = 0xffb5c5,
|
||||
Pink2 = 0xeea9b8,
|
||||
Pink3 = 0xcd919e,
|
||||
Pink4 = 0x8b636c,
|
||||
LightPink1 = 0xffaeb9,
|
||||
LightPink2 = 0xeea2ad,
|
||||
LightPink3 = 0xcd8c95,
|
||||
LightPink4 = 0x8b5f65,
|
||||
PaleVioletRed1 = 0xff82ab,
|
||||
PaleVioletRed2 = 0xee799f,
|
||||
PaleVioletRed3 = 0xcd6889,
|
||||
PaleVioletRed4 = 0x8b475d,
|
||||
Maroon1 = 0xff34b3,
|
||||
Maroon2 = 0xee30a7,
|
||||
Maroon3 = 0xcd2990,
|
||||
Maroon4 = 0x8b1c62,
|
||||
VioletRed1 = 0xff3e96,
|
||||
VioletRed2 = 0xee3a8c,
|
||||
VioletRed3 = 0xcd3278,
|
||||
VioletRed4 = 0x8b2252,
|
||||
Magenta1 = 0xff00ff,
|
||||
Magenta2 = 0xee00ee,
|
||||
Magenta3 = 0xcd00cd,
|
||||
Magenta4 = 0x8b008b,
|
||||
Orchid1 = 0xff83fa,
|
||||
Orchid2 = 0xee7ae9,
|
||||
Orchid3 = 0xcd69c9,
|
||||
Orchid4 = 0x8b4789,
|
||||
Plum1 = 0xffbbff,
|
||||
Plum2 = 0xeeaeee,
|
||||
Plum3 = 0xcd96cd,
|
||||
Plum4 = 0x8b668b,
|
||||
MediumOrchid1 = 0xe066ff,
|
||||
MediumOrchid2 = 0xd15fee,
|
||||
MediumOrchid3 = 0xb452cd,
|
||||
MediumOrchid4 = 0x7a378b,
|
||||
DarkOrchid1 = 0xbf3eff,
|
||||
DarkOrchid2 = 0xb23aee,
|
||||
DarkOrchid3 = 0x9a32cd,
|
||||
DarkOrchid4 = 0x68228b,
|
||||
Purple1 = 0x9b30ff,
|
||||
Purple2 = 0x912cee,
|
||||
Purple3 = 0x7d26cd,
|
||||
Purple4 = 0x551a8b,
|
||||
MediumPurple1 = 0xab82ff,
|
||||
MediumPurple2 = 0x9f79ee,
|
||||
MediumPurple3 = 0x8968cd,
|
||||
MediumPurple4 = 0x5d478b,
|
||||
Thistle1 = 0xffe1ff,
|
||||
Thistle2 = 0xeed2ee,
|
||||
Thistle3 = 0xcdb5cd,
|
||||
Thistle4 = 0x8b7b8b,
|
||||
Gray0 = 0x000000,
|
||||
Grey0 = 0x000000,
|
||||
Gray1 = 0x030303,
|
||||
Grey1 = 0x030303,
|
||||
Gray2 = 0x050505,
|
||||
Grey2 = 0x050505,
|
||||
Gray3 = 0x080808,
|
||||
Grey3 = 0x080808,
|
||||
Gray4 = 0x0a0a0a,
|
||||
Grey4 = 0x0a0a0a,
|
||||
Gray5 = 0x0d0d0d,
|
||||
Grey5 = 0x0d0d0d,
|
||||
Gray6 = 0x0f0f0f,
|
||||
Grey6 = 0x0f0f0f,
|
||||
Gray7 = 0x121212,
|
||||
Grey7 = 0x121212,
|
||||
Gray8 = 0x141414,
|
||||
Grey8 = 0x141414,
|
||||
Gray9 = 0x171717,
|
||||
Grey9 = 0x171717,
|
||||
Gray10 = 0x1a1a1a,
|
||||
Grey10 = 0x1a1a1a,
|
||||
Gray11 = 0x1c1c1c,
|
||||
Grey11 = 0x1c1c1c,
|
||||
Gray12 = 0x1f1f1f,
|
||||
Grey12 = 0x1f1f1f,
|
||||
Gray13 = 0x212121,
|
||||
Grey13 = 0x212121,
|
||||
Gray14 = 0x242424,
|
||||
Grey14 = 0x242424,
|
||||
Gray15 = 0x262626,
|
||||
Grey15 = 0x262626,
|
||||
Gray16 = 0x292929,
|
||||
Grey16 = 0x292929,
|
||||
Gray17 = 0x2b2b2b,
|
||||
Grey17 = 0x2b2b2b,
|
||||
Gray18 = 0x2e2e2e,
|
||||
Grey18 = 0x2e2e2e,
|
||||
Gray19 = 0x303030,
|
||||
Grey19 = 0x303030,
|
||||
Gray20 = 0x333333,
|
||||
Grey20 = 0x333333,
|
||||
Gray21 = 0x363636,
|
||||
Grey21 = 0x363636,
|
||||
Gray22 = 0x383838,
|
||||
Grey22 = 0x383838,
|
||||
Gray23 = 0x3b3b3b,
|
||||
Grey23 = 0x3b3b3b,
|
||||
Gray24 = 0x3d3d3d,
|
||||
Grey24 = 0x3d3d3d,
|
||||
Gray25 = 0x404040,
|
||||
Grey25 = 0x404040,
|
||||
Gray26 = 0x424242,
|
||||
Grey26 = 0x424242,
|
||||
Gray27 = 0x454545,
|
||||
Grey27 = 0x454545,
|
||||
Gray28 = 0x474747,
|
||||
Grey28 = 0x474747,
|
||||
Gray29 = 0x4a4a4a,
|
||||
Grey29 = 0x4a4a4a,
|
||||
Gray30 = 0x4d4d4d,
|
||||
Grey30 = 0x4d4d4d,
|
||||
Gray31 = 0x4f4f4f,
|
||||
Grey31 = 0x4f4f4f,
|
||||
Gray32 = 0x525252,
|
||||
Grey32 = 0x525252,
|
||||
Gray33 = 0x545454,
|
||||
Grey33 = 0x545454,
|
||||
Gray34 = 0x575757,
|
||||
Grey34 = 0x575757,
|
||||
Gray35 = 0x595959,
|
||||
Grey35 = 0x595959,
|
||||
Gray36 = 0x5c5c5c,
|
||||
Grey36 = 0x5c5c5c,
|
||||
Gray37 = 0x5e5e5e,
|
||||
Grey37 = 0x5e5e5e,
|
||||
Gray38 = 0x616161,
|
||||
Grey38 = 0x616161,
|
||||
Gray39 = 0x636363,
|
||||
Grey39 = 0x636363,
|
||||
Gray40 = 0x666666,
|
||||
Grey40 = 0x666666,
|
||||
Gray41 = 0x696969,
|
||||
Grey41 = 0x696969,
|
||||
Gray42 = 0x6b6b6b,
|
||||
Grey42 = 0x6b6b6b,
|
||||
Gray43 = 0x6e6e6e,
|
||||
Grey43 = 0x6e6e6e,
|
||||
Gray44 = 0x707070,
|
||||
Grey44 = 0x707070,
|
||||
Gray45 = 0x737373,
|
||||
Grey45 = 0x737373,
|
||||
Gray46 = 0x757575,
|
||||
Grey46 = 0x757575,
|
||||
Gray47 = 0x787878,
|
||||
Grey47 = 0x787878,
|
||||
Gray48 = 0x7a7a7a,
|
||||
Grey48 = 0x7a7a7a,
|
||||
Gray49 = 0x7d7d7d,
|
||||
Grey49 = 0x7d7d7d,
|
||||
Gray50 = 0x7f7f7f,
|
||||
Grey50 = 0x7f7f7f,
|
||||
Gray51 = 0x828282,
|
||||
Grey51 = 0x828282,
|
||||
Gray52 = 0x858585,
|
||||
Grey52 = 0x858585,
|
||||
Gray53 = 0x878787,
|
||||
Grey53 = 0x878787,
|
||||
Gray54 = 0x8a8a8a,
|
||||
Grey54 = 0x8a8a8a,
|
||||
Gray55 = 0x8c8c8c,
|
||||
Grey55 = 0x8c8c8c,
|
||||
Gray56 = 0x8f8f8f,
|
||||
Grey56 = 0x8f8f8f,
|
||||
Gray57 = 0x919191,
|
||||
Grey57 = 0x919191,
|
||||
Gray58 = 0x949494,
|
||||
Grey58 = 0x949494,
|
||||
Gray59 = 0x969696,
|
||||
Grey59 = 0x969696,
|
||||
Gray60 = 0x999999,
|
||||
Grey60 = 0x999999,
|
||||
Gray61 = 0x9c9c9c,
|
||||
Grey61 = 0x9c9c9c,
|
||||
Gray62 = 0x9e9e9e,
|
||||
Grey62 = 0x9e9e9e,
|
||||
Gray63 = 0xa1a1a1,
|
||||
Grey63 = 0xa1a1a1,
|
||||
Gray64 = 0xa3a3a3,
|
||||
Grey64 = 0xa3a3a3,
|
||||
Gray65 = 0xa6a6a6,
|
||||
Grey65 = 0xa6a6a6,
|
||||
Gray66 = 0xa8a8a8,
|
||||
Grey66 = 0xa8a8a8,
|
||||
Gray67 = 0xababab,
|
||||
Grey67 = 0xababab,
|
||||
Gray68 = 0xadadad,
|
||||
Grey68 = 0xadadad,
|
||||
Gray69 = 0xb0b0b0,
|
||||
Grey69 = 0xb0b0b0,
|
||||
Gray70 = 0xb3b3b3,
|
||||
Grey70 = 0xb3b3b3,
|
||||
Gray71 = 0xb5b5b5,
|
||||
Grey71 = 0xb5b5b5,
|
||||
Gray72 = 0xb8b8b8,
|
||||
Grey72 = 0xb8b8b8,
|
||||
Gray73 = 0xbababa,
|
||||
Grey73 = 0xbababa,
|
||||
Gray74 = 0xbdbdbd,
|
||||
Grey74 = 0xbdbdbd,
|
||||
Gray75 = 0xbfbfbf,
|
||||
Grey75 = 0xbfbfbf,
|
||||
Gray76 = 0xc2c2c2,
|
||||
Grey76 = 0xc2c2c2,
|
||||
Gray77 = 0xc4c4c4,
|
||||
Grey77 = 0xc4c4c4,
|
||||
Gray78 = 0xc7c7c7,
|
||||
Grey78 = 0xc7c7c7,
|
||||
Gray79 = 0xc9c9c9,
|
||||
Grey79 = 0xc9c9c9,
|
||||
Gray80 = 0xcccccc,
|
||||
Grey80 = 0xcccccc,
|
||||
Gray81 = 0xcfcfcf,
|
||||
Grey81 = 0xcfcfcf,
|
||||
Gray82 = 0xd1d1d1,
|
||||
Grey82 = 0xd1d1d1,
|
||||
Gray83 = 0xd4d4d4,
|
||||
Grey83 = 0xd4d4d4,
|
||||
Gray84 = 0xd6d6d6,
|
||||
Grey84 = 0xd6d6d6,
|
||||
Gray85 = 0xd9d9d9,
|
||||
Grey85 = 0xd9d9d9,
|
||||
Gray86 = 0xdbdbdb,
|
||||
Grey86 = 0xdbdbdb,
|
||||
Gray87 = 0xdedede,
|
||||
Grey87 = 0xdedede,
|
||||
Gray88 = 0xe0e0e0,
|
||||
Grey88 = 0xe0e0e0,
|
||||
Gray89 = 0xe3e3e3,
|
||||
Grey89 = 0xe3e3e3,
|
||||
Gray90 = 0xe5e5e5,
|
||||
Grey90 = 0xe5e5e5,
|
||||
Gray91 = 0xe8e8e8,
|
||||
Grey91 = 0xe8e8e8,
|
||||
Gray92 = 0xebebeb,
|
||||
Grey92 = 0xebebeb,
|
||||
Gray93 = 0xededed,
|
||||
Grey93 = 0xededed,
|
||||
Gray94 = 0xf0f0f0,
|
||||
Grey94 = 0xf0f0f0,
|
||||
Gray95 = 0xf2f2f2,
|
||||
Grey95 = 0xf2f2f2,
|
||||
Gray96 = 0xf5f5f5,
|
||||
Grey96 = 0xf5f5f5,
|
||||
Gray97 = 0xf7f7f7,
|
||||
Grey97 = 0xf7f7f7,
|
||||
Gray98 = 0xfafafa,
|
||||
Grey98 = 0xfafafa,
|
||||
Gray99 = 0xfcfcfc,
|
||||
Grey99 = 0xfcfcfc,
|
||||
Gray100 = 0xffffff,
|
||||
Grey100 = 0xffffff,
|
||||
DarkGrey = 0xa9a9a9,
|
||||
DarkGray = 0xa9a9a9,
|
||||
DarkBlue = 0x00008b,
|
||||
DarkCyan = 0x008b8b,
|
||||
DarkMagenta = 0x8b008b,
|
||||
DarkRed = 0x8b0000,
|
||||
LightGreen = 0x90ee90,
|
||||
Crimson = 0xdc143c,
|
||||
Indigo = 0x4b0082,
|
||||
Olive = 0x808000,
|
||||
RebeccaPurple = 0x663399,
|
||||
Silver = 0xc0c0c0,
|
||||
Teal = 0x008080,
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef __TRACYFORCEINLINE_HPP__
|
||||
#define __TRACYFORCEINLINE_HPP__
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define tracy_force_inline __attribute__((always_inline)) inline
|
||||
#elif defined(_MSC_VER)
|
||||
# define tracy_force_inline __forceinline
|
||||
#else
|
||||
# define tracy_force_inline inline
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define tracy_no_inline __attribute__((noinline))
|
||||
#elif defined(_MSC_VER)
|
||||
# define tracy_no_inline __declspec(noinline)
|
||||
#else
|
||||
# define tracy_no_inline
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,24 @@
|
|||
#ifndef __TRACYMUTEX_HPP__
|
||||
#define __TRACYMUTEX_HPP__
|
||||
|
||||
#if defined _MSC_VER
|
||||
|
||||
# include <shared_mutex>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
using TracyMutex = std::shared_mutex;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
using TracyMutex = std::mutex;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,128 @@
|
|||
#ifndef __TRACYPROTOCOL_HPP__
|
||||
#define __TRACYPROTOCOL_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 46 };
|
||||
enum : uint16_t { BroadcastVersion = 2 };
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
|
||||
enum { TargetFrameSize = 256 * 1024 };
|
||||
enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) };
|
||||
static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
|
||||
static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
|
||||
|
||||
enum { HandshakeShibbolethSize = 8 };
|
||||
static const char HandshakeShibboleth[HandshakeShibbolethSize] = { 'T', 'r', 'a', 'c', 'y', 'P', 'r', 'f' };
|
||||
|
||||
enum HandshakeStatus : uint8_t
|
||||
{
|
||||
HandshakePending,
|
||||
HandshakeWelcome,
|
||||
HandshakeProtocolMismatch,
|
||||
HandshakeNotAvailable,
|
||||
HandshakeDropped
|
||||
};
|
||||
|
||||
enum { WelcomeMessageProgramNameSize = 64 };
|
||||
enum { WelcomeMessageHostInfoSize = 1024 };
|
||||
|
||||
#pragma pack( 1 )
|
||||
|
||||
// Must increase left query space after handling!
|
||||
enum ServerQuery : uint8_t
|
||||
{
|
||||
ServerQueryTerminate,
|
||||
ServerQueryString,
|
||||
ServerQueryThreadString,
|
||||
ServerQuerySourceLocation,
|
||||
ServerQueryPlotName,
|
||||
ServerQueryCallstackFrame,
|
||||
ServerQueryFrameName,
|
||||
ServerQueryDisconnect,
|
||||
ServerQueryExternalName,
|
||||
ServerQueryParameter,
|
||||
ServerQuerySymbol,
|
||||
ServerQuerySymbolCode,
|
||||
ServerQueryCodeLocation,
|
||||
ServerQuerySourceCode,
|
||||
ServerQueryDataTransfer,
|
||||
ServerQueryDataTransferPart
|
||||
};
|
||||
|
||||
struct ServerQueryPacket
|
||||
{
|
||||
ServerQuery type;
|
||||
uint64_t ptr;
|
||||
uint32_t extra;
|
||||
};
|
||||
|
||||
enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) };
|
||||
|
||||
|
||||
enum CpuArchitecture : uint8_t
|
||||
{
|
||||
CpuArchUnknown,
|
||||
CpuArchX86,
|
||||
CpuArchX64,
|
||||
CpuArchArm32,
|
||||
CpuArchArm64
|
||||
};
|
||||
|
||||
|
||||
struct WelcomeMessage
|
||||
{
|
||||
double timerMul;
|
||||
int64_t initBegin;
|
||||
int64_t initEnd;
|
||||
uint64_t delay;
|
||||
uint64_t resolution;
|
||||
uint64_t epoch;
|
||||
uint64_t exectime;
|
||||
uint64_t pid;
|
||||
int64_t samplingPeriod;
|
||||
uint8_t onDemand;
|
||||
uint8_t isApple;
|
||||
uint8_t cpuArch;
|
||||
uint8_t codeTransfer;
|
||||
char cpuManufacturer[12];
|
||||
uint32_t cpuId;
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
char hostInfo[WelcomeMessageHostInfoSize];
|
||||
};
|
||||
|
||||
enum { WelcomeMessageSize = sizeof( WelcomeMessage ) };
|
||||
|
||||
|
||||
struct OnDemandPayloadMessage
|
||||
{
|
||||
uint64_t frames;
|
||||
uint64_t currentTime;
|
||||
};
|
||||
|
||||
enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
|
||||
|
||||
|
||||
struct BroadcastMessage
|
||||
{
|
||||
uint16_t broadcastVersion;
|
||||
uint16_t listenPort;
|
||||
uint32_t protocolVersion;
|
||||
int32_t activeTime; // in seconds
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
};
|
||||
|
||||
enum { BroadcastMessageSize = sizeof( BroadcastMessage ) };
|
||||
|
||||
#pragma pack()
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,686 @@
|
|||
#ifndef __TRACYQUEUE_HPP__
|
||||
#define __TRACYQUEUE_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
enum class QueueType : uint8_t
|
||||
{
|
||||
ZoneText,
|
||||
ZoneName,
|
||||
Message,
|
||||
MessageColor,
|
||||
MessageCallstack,
|
||||
MessageColorCallstack,
|
||||
MessageAppInfo,
|
||||
ZoneBeginAllocSrcLoc,
|
||||
ZoneBeginAllocSrcLocCallstack,
|
||||
CallstackSerial,
|
||||
Callstack,
|
||||
CallstackAlloc,
|
||||
CallstackSample,
|
||||
FrameImage,
|
||||
ZoneBegin,
|
||||
ZoneBeginCallstack,
|
||||
ZoneEnd,
|
||||
LockWait,
|
||||
LockObtain,
|
||||
LockRelease,
|
||||
LockSharedWait,
|
||||
LockSharedObtain,
|
||||
LockSharedRelease,
|
||||
LockName,
|
||||
MemAlloc,
|
||||
MemAllocNamed,
|
||||
MemFree,
|
||||
MemFreeNamed,
|
||||
MemAllocCallstack,
|
||||
MemAllocCallstackNamed,
|
||||
MemFreeCallstack,
|
||||
MemFreeCallstackNamed,
|
||||
GpuZoneBegin,
|
||||
GpuZoneBeginCallstack,
|
||||
GpuZoneBeginAllocSrcLoc,
|
||||
GpuZoneBeginAllocSrcLocCallstack,
|
||||
GpuZoneEnd,
|
||||
GpuZoneBeginSerial,
|
||||
GpuZoneBeginCallstackSerial,
|
||||
GpuZoneBeginAllocSrcLocSerial,
|
||||
GpuZoneBeginAllocSrcLocCallstackSerial,
|
||||
GpuZoneEndSerial,
|
||||
PlotData,
|
||||
ContextSwitch,
|
||||
ThreadWakeup,
|
||||
GpuTime,
|
||||
GpuContextName,
|
||||
Terminate,
|
||||
KeepAlive,
|
||||
ThreadContext,
|
||||
GpuCalibration,
|
||||
Crash,
|
||||
CrashReport,
|
||||
ZoneValidation,
|
||||
ZoneColor,
|
||||
ZoneValue,
|
||||
FrameMarkMsg,
|
||||
FrameMarkMsgStart,
|
||||
FrameMarkMsgEnd,
|
||||
SourceLocation,
|
||||
LockAnnounce,
|
||||
LockTerminate,
|
||||
LockMark,
|
||||
MessageLiteral,
|
||||
MessageLiteralColor,
|
||||
MessageLiteralCallstack,
|
||||
MessageLiteralColorCallstack,
|
||||
GpuNewContext,
|
||||
CallstackFrameSize,
|
||||
CallstackFrame,
|
||||
SymbolInformation,
|
||||
CodeInformation,
|
||||
SysTimeReport,
|
||||
TidToPid,
|
||||
PlotConfig,
|
||||
ParamSetup,
|
||||
AckServerQueryNoop,
|
||||
AckSourceCodeNotAvailable,
|
||||
CpuTopology,
|
||||
SingleStringData,
|
||||
SecondStringData,
|
||||
MemNamePayload,
|
||||
StringData,
|
||||
ThreadName,
|
||||
PlotName,
|
||||
SourceLocationPayload,
|
||||
CallstackPayload,
|
||||
CallstackAllocPayload,
|
||||
FrameName,
|
||||
FrameImageData,
|
||||
ExternalName,
|
||||
ExternalThreadName,
|
||||
SymbolCode,
|
||||
SourceCode,
|
||||
NUM_TYPES
|
||||
};
|
||||
|
||||
#pragma pack( 1 )
|
||||
|
||||
struct QueueThreadContext
|
||||
{
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneBeginLean
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueZoneBegin : public QueueZoneBeginLean
|
||||
{
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
struct QueueZoneEnd
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueZoneValidation
|
||||
{
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct QueueZoneColor
|
||||
{
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueZoneValue
|
||||
{
|
||||
uint64_t value;
|
||||
};
|
||||
|
||||
struct QueueStringTransfer
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueFrameMark
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t name; // ptr
|
||||
};
|
||||
|
||||
struct QueueFrameImage
|
||||
{
|
||||
uint32_t frame;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
uint8_t flip;
|
||||
};
|
||||
|
||||
struct QueueFrameImageFat : public QueueFrameImage
|
||||
{
|
||||
uint64_t image; // ptr
|
||||
};
|
||||
|
||||
struct QueueSourceLocation
|
||||
{
|
||||
uint64_t name;
|
||||
uint64_t function; // ptr
|
||||
uint64_t file; // ptr
|
||||
uint32_t line;
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueZoneTextFat
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
enum class LockType : uint8_t
|
||||
{
|
||||
Lockable,
|
||||
SharedLockable
|
||||
};
|
||||
|
||||
struct QueueLockAnnounce
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
uint64_t lckloc; // ptr
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockTerminate
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueLockWait
|
||||
{
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueLockObtain
|
||||
{
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueLockRelease
|
||||
{
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueLockMark
|
||||
{
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
struct QueueLockName
|
||||
{
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct QueueLockNameFat : public QueueLockName
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
enum class PlotDataType : uint8_t
|
||||
{
|
||||
Float,
|
||||
Double,
|
||||
Int
|
||||
};
|
||||
|
||||
struct QueuePlotData
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
int64_t time;
|
||||
PlotDataType type;
|
||||
union
|
||||
{
|
||||
double d;
|
||||
float f;
|
||||
int64_t i;
|
||||
} data;
|
||||
};
|
||||
|
||||
struct QueueMessage
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueMessageColor : public QueueMessage
|
||||
{
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueMessageLiteral : public QueueMessage
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageColorLiteral : public QueueMessageColor
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageFat : public QueueMessage
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMessageColorFat : public QueueMessageColor
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
// Don't change order, only add new entries at the end, this is also used on trace dumps!
|
||||
enum class GpuContextType : uint8_t
|
||||
{
|
||||
Invalid,
|
||||
OpenGl,
|
||||
Vulkan,
|
||||
OpenCL,
|
||||
Direct3D12,
|
||||
Direct3D11
|
||||
};
|
||||
|
||||
enum GpuContextFlags : uint8_t
|
||||
{
|
||||
GpuContextCalibration = 1 << 0
|
||||
};
|
||||
|
||||
struct QueueGpuNewContext
|
||||
{
|
||||
int64_t cpuTime;
|
||||
int64_t gpuTime;
|
||||
uint64_t thread;
|
||||
float period;
|
||||
uint8_t context;
|
||||
GpuContextFlags flags;
|
||||
GpuContextType type;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBeginLean
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint64_t thread;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
|
||||
{
|
||||
uint64_t srcloc;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneEnd
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint64_t thread;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuTime
|
||||
{
|
||||
int64_t gpuTime;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuCalibration
|
||||
{
|
||||
int64_t gpuTime;
|
||||
int64_t cpuTime;
|
||||
int64_t cpuDelta;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuContextName
|
||||
{
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuContextNameFat : public QueueGpuContextName
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMemNamePayload
|
||||
{
|
||||
uint64_t name;
|
||||
};
|
||||
|
||||
struct QueueMemAlloc
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
uint64_t ptr;
|
||||
char size[6];
|
||||
};
|
||||
|
||||
struct QueueMemFree
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackFat
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackAllocFat
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint64_t nativePtr;
|
||||
};
|
||||
|
||||
struct QueueCallstackSample
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueCallstackSampleFat : public QueueCallstackSample
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackFrameSize
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint8_t size;
|
||||
};
|
||||
|
||||
struct QueueCallstackFrame
|
||||
{
|
||||
uint32_t line;
|
||||
uint64_t symAddr;
|
||||
uint32_t symLen;
|
||||
};
|
||||
|
||||
struct QueueSymbolInformation
|
||||
{
|
||||
uint32_t line;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct QueueCodeInformation
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint32_t line;
|
||||
};
|
||||
|
||||
struct QueueCrashReport
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueSysTime
|
||||
{
|
||||
int64_t time;
|
||||
float sysTime;
|
||||
};
|
||||
|
||||
struct QueueContextSwitch
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t oldThread;
|
||||
uint64_t newThread;
|
||||
uint8_t cpu;
|
||||
uint8_t reason;
|
||||
uint8_t state;
|
||||
};
|
||||
|
||||
struct QueueThreadWakeup
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueTidToPid
|
||||
{
|
||||
uint64_t tid;
|
||||
uint64_t pid;
|
||||
};
|
||||
|
||||
enum class PlotFormatType : uint8_t
|
||||
{
|
||||
Number,
|
||||
Memory,
|
||||
Percentage
|
||||
};
|
||||
|
||||
struct QueuePlotConfig
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
uint8_t type;
|
||||
};
|
||||
|
||||
struct QueueParamSetup
|
||||
{
|
||||
uint32_t idx;
|
||||
uint64_t name; // ptr
|
||||
uint8_t isBool;
|
||||
int32_t val;
|
||||
};
|
||||
|
||||
struct QueueCpuTopology
|
||||
{
|
||||
uint32_t package;
|
||||
uint32_t core;
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueHeader
|
||||
{
|
||||
union
|
||||
{
|
||||
QueueType type;
|
||||
uint8_t idx;
|
||||
};
|
||||
};
|
||||
|
||||
struct QueueItem
|
||||
{
|
||||
QueueHeader hdr;
|
||||
union
|
||||
{
|
||||
QueueThreadContext threadCtx;
|
||||
QueueZoneBegin zoneBegin;
|
||||
QueueZoneBeginLean zoneBeginLean;
|
||||
QueueZoneEnd zoneEnd;
|
||||
QueueZoneValidation zoneValidation;
|
||||
QueueZoneColor zoneColor;
|
||||
QueueZoneValue zoneValue;
|
||||
QueueStringTransfer stringTransfer;
|
||||
QueueFrameMark frameMark;
|
||||
QueueFrameImage frameImage;
|
||||
QueueFrameImageFat frameImageFat;
|
||||
QueueSourceLocation srcloc;
|
||||
QueueZoneTextFat zoneTextFat;
|
||||
QueueLockAnnounce lockAnnounce;
|
||||
QueueLockTerminate lockTerminate;
|
||||
QueueLockWait lockWait;
|
||||
QueueLockObtain lockObtain;
|
||||
QueueLockRelease lockRelease;
|
||||
QueueLockMark lockMark;
|
||||
QueueLockName lockName;
|
||||
QueueLockNameFat lockNameFat;
|
||||
QueuePlotData plotData;
|
||||
QueueMessage message;
|
||||
QueueMessageColor messageColor;
|
||||
QueueMessageLiteral messageLiteral;
|
||||
QueueMessageColorLiteral messageColorLiteral;
|
||||
QueueMessageFat messageFat;
|
||||
QueueMessageColorFat messageColorFat;
|
||||
QueueGpuNewContext gpuNewContext;
|
||||
QueueGpuZoneBegin gpuZoneBegin;
|
||||
QueueGpuZoneBeginLean gpuZoneBeginLean;
|
||||
QueueGpuZoneEnd gpuZoneEnd;
|
||||
QueueGpuTime gpuTime;
|
||||
QueueGpuCalibration gpuCalibration;
|
||||
QueueGpuContextName gpuContextName;
|
||||
QueueGpuContextNameFat gpuContextNameFat;
|
||||
QueueMemAlloc memAlloc;
|
||||
QueueMemFree memFree;
|
||||
QueueMemNamePayload memName;
|
||||
QueueCallstackFat callstackFat;
|
||||
QueueCallstackAllocFat callstackAllocFat;
|
||||
QueueCallstackSample callstackSample;
|
||||
QueueCallstackSampleFat callstackSampleFat;
|
||||
QueueCallstackFrameSize callstackFrameSize;
|
||||
QueueCallstackFrame callstackFrame;
|
||||
QueueSymbolInformation symbolInformation;
|
||||
QueueCodeInformation codeInformation;
|
||||
QueueCrashReport crashReport;
|
||||
QueueSysTime sysTime;
|
||||
QueueContextSwitch contextSwitch;
|
||||
QueueThreadWakeup threadWakeup;
|
||||
QueueTidToPid tidToPid;
|
||||
QueuePlotConfig plotConfig;
|
||||
QueueParamSetup paramSetup;
|
||||
QueueCpuTopology cpuTopology;
|
||||
};
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
|
||||
enum { QueueItemSize = sizeof( QueueItem ) };
|
||||
|
||||
static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ), // zone text
|
||||
sizeof( QueueHeader ), // zone name
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
|
||||
sizeof( QueueHeader ), // callstack memory
|
||||
sizeof( QueueHeader ), // callstack
|
||||
sizeof( QueueHeader ), // callstack alloc
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockName ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial
|
||||
sizeof( QueueHeader ) + sizeof( QueuePlotData ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueContextSwitch ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuContextName ),
|
||||
// above items must be first
|
||||
sizeof( QueueHeader ), // terminate
|
||||
sizeof( QueueHeader ), // keep alive
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
|
||||
sizeof( QueueHeader ), // crash
|
||||
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneColor ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValue ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end
|
||||
sizeof( QueueHeader ) + sizeof( QueueSourceLocation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCodeInformation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSysTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
|
||||
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
|
||||
sizeof( QueueHeader ), // server query acknowledgement
|
||||
sizeof( QueueHeader ), // source code not available
|
||||
sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
|
||||
sizeof( QueueHeader ), // single string data
|
||||
sizeof( QueueHeader ), // second string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
|
||||
// keep all QueueStringTransfer below
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack alloc payload
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // source code
|
||||
};
|
||||
|
||||
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
||||
static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" );
|
||||
static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
|
||||
static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" );
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,749 @@
|
|||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <new>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "TracyAlloc.hpp"
|
||||
#include "TracySocket.hpp"
|
||||
#include "TracySystem.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# include <winsock2.h>
|
||||
# include <ws2tcpip.h>
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning(disable:4244)
|
||||
# pragma warning(disable:4267)
|
||||
# endif
|
||||
# define poll WSAPoll
|
||||
#else
|
||||
# include <arpa/inet.h>
|
||||
# include <sys/socket.h>
|
||||
# include <sys/param.h>
|
||||
# include <errno.h>
|
||||
# include <fcntl.h>
|
||||
# include <netinet/in.h>
|
||||
# include <netdb.h>
|
||||
# include <unistd.h>
|
||||
# include <poll.h>
|
||||
#endif
|
||||
|
||||
#ifndef MSG_NOSIGNAL
|
||||
# define MSG_NOSIGNAL 0
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef SOCKET socket_t;
|
||||
#else
|
||||
typedef int socket_t;
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
struct __wsinit
|
||||
{
|
||||
__wsinit()
|
||||
{
|
||||
WSADATA wsaData;
|
||||
if( WSAStartup( MAKEWORD( 2, 2 ), &wsaData ) != 0 )
|
||||
{
|
||||
fprintf( stderr, "Cannot init winsock.\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void InitWinSock()
|
||||
{
|
||||
static __wsinit init;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
enum { BufSize = 128 * 1024 };
|
||||
|
||||
Socket::Socket()
|
||||
: m_buf( (char*)tracy_malloc( BufSize ) )
|
||||
, m_bufPtr( nullptr )
|
||||
, m_sock( -1 )
|
||||
, m_bufLeft( 0 )
|
||||
, m_ptr( nullptr )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InitWinSock();
|
||||
#endif
|
||||
}
|
||||
|
||||
Socket::Socket( int sock )
|
||||
: m_buf( (char*)tracy_malloc( BufSize ) )
|
||||
, m_bufPtr( nullptr )
|
||||
, m_sock( sock )
|
||||
, m_bufLeft( 0 )
|
||||
, m_ptr( nullptr )
|
||||
{
|
||||
}
|
||||
|
||||
Socket::~Socket()
|
||||
{
|
||||
tracy_free( m_buf );
|
||||
if( m_sock.load( std::memory_order_relaxed ) != -1 )
|
||||
{
|
||||
Close();
|
||||
}
|
||||
if( m_ptr )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_connSock );
|
||||
#else
|
||||
close( m_connSock );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool Socket::Connect( const char* addr, uint16_t port )
|
||||
{
|
||||
assert( !IsValid() );
|
||||
|
||||
if( m_ptr )
|
||||
{
|
||||
const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
|
||||
if( c == -1 )
|
||||
{
|
||||
#if defined _WIN32
|
||||
const auto err = WSAGetLastError();
|
||||
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
|
||||
if( err != WSAEISCONN )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
closesocket( m_connSock );
|
||||
m_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
const auto err = errno;
|
||||
if( err == EALREADY || err == EINPROGRESS ) return false;
|
||||
if( err != EISCONN )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
close( m_connSock );
|
||||
m_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined _WIN32
|
||||
u_long nonblocking = 0;
|
||||
ioctlsocket( m_connSock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( m_connSock, F_GETFL, 0 );
|
||||
fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK );
|
||||
#endif
|
||||
m_sock.store( m_connSock, std::memory_order_relaxed );
|
||||
freeaddrinfo( m_res );
|
||||
m_ptr = nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
|
||||
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
int sock = 0;
|
||||
for( ptr = res; ptr; ptr = ptr->ai_next )
|
||||
{
|
||||
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
u_long nonblocking = 1;
|
||||
ioctlsocket( sock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( sock, F_GETFL, 0 );
|
||||
fcntl( sock, F_SETFL, flags | O_NONBLOCK );
|
||||
#endif
|
||||
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined _WIN32
|
||||
const auto err = WSAGetLastError();
|
||||
if( err != WSAEWOULDBLOCK )
|
||||
{
|
||||
closesocket( sock );
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
if( errno != EINPROGRESS )
|
||||
{
|
||||
close( sock );
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
m_res = res;
|
||||
m_ptr = ptr;
|
||||
m_connSock = sock;
|
||||
return false;
|
||||
}
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
#if defined _WIN32
|
||||
u_long nonblocking = 0;
|
||||
ioctlsocket( sock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( sock, F_GETFL, 0 );
|
||||
fcntl( sock, F_SETFL, flags & ~O_NONBLOCK );
|
||||
#endif
|
||||
|
||||
m_sock.store( sock, std::memory_order_relaxed );
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::ConnectBlocking( const char* addr, uint16_t port )
|
||||
{
|
||||
assert( !IsValid() );
|
||||
assert( !m_ptr );
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
|
||||
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
int sock = 0;
|
||||
for( ptr = res; ptr; ptr = ptr->ai_next )
|
||||
{
|
||||
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( sock );
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
m_sock.store( sock, std::memory_order_relaxed );
|
||||
return true;
|
||||
}
|
||||
|
||||
void Socket::Close()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
assert( sock != -1 );
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( sock );
|
||||
#endif
|
||||
m_sock.store( -1, std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
int Socket::Send( const void* _buf, int len )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (const char*)_buf;
|
||||
assert( sock != -1 );
|
||||
auto start = buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
auto ret = send( sock, buf, len, MSG_NOSIGNAL );
|
||||
if( ret == -1 ) return -1;
|
||||
len -= ret;
|
||||
buf += ret;
|
||||
}
|
||||
return int( buf - start );
|
||||
}
|
||||
|
||||
int Socket::GetSendBufSize()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
int bufSize;
|
||||
#if defined _WIN32
|
||||
int sz = sizeof( bufSize );
|
||||
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
|
||||
#else
|
||||
socklen_t sz = sizeof( bufSize );
|
||||
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
|
||||
#endif
|
||||
return bufSize;
|
||||
}
|
||||
|
||||
int Socket::RecvBuffered( void* buf, int len, int timeout )
|
||||
{
|
||||
if( len <= m_bufLeft )
|
||||
{
|
||||
memcpy( buf, m_bufPtr, len );
|
||||
m_bufPtr += len;
|
||||
m_bufLeft -= len;
|
||||
return len;
|
||||
}
|
||||
|
||||
if( m_bufLeft > 0 )
|
||||
{
|
||||
memcpy( buf, m_bufPtr, m_bufLeft );
|
||||
const auto ret = m_bufLeft;
|
||||
m_bufLeft = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if( len >= BufSize ) return Recv( buf, len, timeout );
|
||||
|
||||
m_bufLeft = Recv( m_buf, BufSize, timeout );
|
||||
if( m_bufLeft <= 0 ) return m_bufLeft;
|
||||
|
||||
const auto sz = len < m_bufLeft ? len : m_bufLeft;
|
||||
memcpy( buf, m_buf, sz );
|
||||
m_bufPtr = m_buf + sz;
|
||||
m_bufLeft -= sz;
|
||||
return sz;
|
||||
}
|
||||
|
||||
int Socket::Recv( void* _buf, int len, int timeout )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
if( poll( &fd, 1, timeout ) > 0 )
|
||||
{
|
||||
return recv( sock, buf, len, 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int Socket::ReadUpTo( void* _buf, int len, int timeout )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
int rd = 0;
|
||||
while( len > 0 )
|
||||
{
|
||||
const auto res = recv( sock, buf, len, 0 );
|
||||
if( res == 0 ) break;
|
||||
if( res == -1 ) return -1;
|
||||
len -= res;
|
||||
rd += res;
|
||||
buf += res;
|
||||
}
|
||||
return rd;
|
||||
}
|
||||
|
||||
bool Socket::Read( void* buf, int len, int timeout )
|
||||
{
|
||||
auto cbuf = (char*)buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
if( !ReadImpl( cbuf, len, timeout ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::ReadImpl( char*& buf, int& len, int timeout )
|
||||
{
|
||||
const auto sz = RecvBuffered( buf, len, timeout );
|
||||
switch( sz )
|
||||
{
|
||||
case 0:
|
||||
return false;
|
||||
case -1:
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto err = WSAGetLastError();
|
||||
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::ReadRaw( void* _buf, int len, int timeout )
|
||||
{
|
||||
auto buf = (char*)_buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
const auto sz = Recv( buf, len, timeout );
|
||||
if( sz <= 0 ) return false;
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::HasData()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
if( m_bufLeft > 0 ) return true;
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
return poll( &fd, 1, 0 ) > 0;
|
||||
}
|
||||
|
||||
bool Socket::IsValid() const
|
||||
{
|
||||
return m_sock.load( std::memory_order_relaxed ) >= 0;
|
||||
}
|
||||
|
||||
|
||||
ListenSocket::ListenSocket()
|
||||
: m_sock( -1 )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InitWinSock();
|
||||
#endif
|
||||
}
|
||||
|
||||
ListenSocket::~ListenSocket()
|
||||
{
|
||||
if( m_sock != -1 ) Close();
|
||||
}
|
||||
|
||||
static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res )
|
||||
{
|
||||
struct addrinfo hints;
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = ai_family;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
#ifndef TRACY_ONLY_LOCALHOST
|
||||
const char* onlyLocalhost = GetEnvVar( "TRACY_ONLY_LOCALHOST" );
|
||||
if( !onlyLocalhost || onlyLocalhost[0] != '1' )
|
||||
{
|
||||
hints.ai_flags = AI_PASSIVE;
|
||||
}
|
||||
#endif
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1;
|
||||
int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol );
|
||||
if (sock == -1) freeaddrinfo( *res );
|
||||
return sock;
|
||||
}
|
||||
|
||||
bool ListenSocket::Listen( uint16_t port, int backlog )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
struct addrinfo* res = nullptr;
|
||||
|
||||
#if !defined TRACY_ONLY_IPV4 && !defined TRACY_ONLY_LOCALHOST
|
||||
const char* onlyIPv4 = GetEnvVar( "TRACY_ONLY_IPV4" );
|
||||
if( !onlyIPv4 || onlyIPv4[0] != '1' )
|
||||
{
|
||||
m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res );
|
||||
}
|
||||
#endif
|
||||
if (m_sock == -1)
|
||||
{
|
||||
// IPV6 protocol may not be available/is disabled. Try to create a socket
|
||||
// with the IPV4 protocol
|
||||
m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res );
|
||||
if( m_sock == -1 ) return false;
|
||||
}
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
unsigned long val = 0;
|
||||
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
|
||||
#elif defined BSD
|
||||
int val = 0;
|
||||
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
|
||||
val = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
|
||||
#else
|
||||
int val = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
|
||||
if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
|
||||
freeaddrinfo( res );
|
||||
return true;
|
||||
}
|
||||
|
||||
Socket* ListenSocket::Accept()
|
||||
{
|
||||
struct sockaddr_storage remote;
|
||||
socklen_t sz = sizeof( remote );
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
if( poll( &fd, 1, 10 ) > 0 )
|
||||
{
|
||||
int sock = accept( m_sock, (sockaddr*)&remote, &sz);
|
||||
if( sock == -1 ) return nullptr;
|
||||
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
|
||||
auto ptr = (Socket*)tracy_malloc( sizeof( Socket ) );
|
||||
new(ptr) Socket( sock );
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void ListenSocket::Close()
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_sock );
|
||||
#else
|
||||
close( m_sock );
|
||||
#endif
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
UdpBroadcast::UdpBroadcast()
|
||||
: m_sock( -1 )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InitWinSock();
|
||||
#endif
|
||||
}
|
||||
|
||||
UdpBroadcast::~UdpBroadcast()
|
||||
{
|
||||
if( m_sock != -1 ) Close();
|
||||
}
|
||||
|
||||
bool UdpBroadcast::Open( const char* addr, uint16_t port )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_INET;
|
||||
hints.ai_socktype = SOCK_DGRAM;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
|
||||
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
int sock = 0;
|
||||
for( ptr = res; ptr; ptr = ptr->ai_next )
|
||||
{
|
||||
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
unsigned long broadcast = 1;
|
||||
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
|
||||
#else
|
||||
int broadcast = 1;
|
||||
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 )
|
||||
#endif
|
||||
{
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( sock );
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
m_sock = sock;
|
||||
inet_pton( AF_INET, addr, &m_addr );
|
||||
return true;
|
||||
}
|
||||
|
||||
void UdpBroadcast::Close()
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_sock );
|
||||
#else
|
||||
close( m_sock );
|
||||
#endif
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
int UdpBroadcast::Send( uint16_t port, const void* data, int len )
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
struct sockaddr_in addr;
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_port = htons( port );
|
||||
addr.sin_addr.s_addr = m_addr;
|
||||
return sendto( m_sock, (const char*)data, len, MSG_NOSIGNAL, (sockaddr*)&addr, sizeof( addr ) );
|
||||
}
|
||||
|
||||
IpAddress::IpAddress()
|
||||
: m_number( 0 )
|
||||
{
|
||||
*m_text = '\0';
|
||||
}
|
||||
|
||||
IpAddress::~IpAddress()
|
||||
{
|
||||
}
|
||||
|
||||
void IpAddress::Set( const struct sockaddr& addr )
|
||||
{
|
||||
#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
|
||||
struct sockaddr_in tmp;
|
||||
memcpy( &tmp, &addr, sizeof( tmp ) );
|
||||
auto ai = &tmp;
|
||||
#else
|
||||
auto ai = (const struct sockaddr_in*)&addr;
|
||||
#endif
|
||||
inet_ntop( AF_INET, &ai->sin_addr, m_text, 17 );
|
||||
m_number = ai->sin_addr.s_addr;
|
||||
}
|
||||
|
||||
UdpListen::UdpListen()
|
||||
: m_sock( -1 )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InitWinSock();
|
||||
#endif
|
||||
}
|
||||
|
||||
UdpListen::~UdpListen()
|
||||
{
|
||||
if( m_sock != -1 ) Close();
|
||||
}
|
||||
|
||||
bool UdpListen::Listen( uint16_t port )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
int sock;
|
||||
if( ( sock = socket( AF_INET, SOCK_DGRAM, 0 ) ) == -1 ) return false;
|
||||
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
unsigned long reuse = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
|
||||
#else
|
||||
int reuse = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
unsigned long broadcast = 1;
|
||||
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
|
||||
#else
|
||||
int broadcast = 1;
|
||||
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 )
|
||||
#endif
|
||||
{
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( sock );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
struct sockaddr_in addr;
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_port = htons( port );
|
||||
addr.sin_addr.s_addr = INADDR_ANY;
|
||||
|
||||
if( bind( sock, (sockaddr*)&addr, sizeof( addr ) ) == -1 )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( sock );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
m_sock = sock;
|
||||
return true;
|
||||
}
|
||||
|
||||
void UdpListen::Close()
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_sock );
|
||||
#else
|
||||
close( m_sock );
|
||||
#endif
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout )
|
||||
{
|
||||
static char buf[2048];
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.events = POLLIN;
|
||||
if( poll( &fd, 1, timeout ) <= 0 ) return nullptr;
|
||||
|
||||
sockaddr sa;
|
||||
socklen_t salen = sizeof( struct sockaddr );
|
||||
len = (size_t)recvfrom( m_sock, buf, 2048, 0, &sa, &salen );
|
||||
addr.Set( sa );
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
#ifndef __TRACYSOCKET_HPP__
|
||||
#define __TRACYSOCKET_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "TracyForceInline.hpp"
|
||||
|
||||
struct addrinfo;
|
||||
struct sockaddr;
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef _WIN32
|
||||
void InitWinSock();
|
||||
#endif
|
||||
|
||||
class Socket
|
||||
{
|
||||
public:
|
||||
Socket();
|
||||
Socket( int sock );
|
||||
~Socket();
|
||||
|
||||
bool Connect( const char* addr, uint16_t port );
|
||||
bool ConnectBlocking( const char* addr, uint16_t port );
|
||||
void Close();
|
||||
|
||||
int Send( const void* buf, int len );
|
||||
int GetSendBufSize();
|
||||
|
||||
int ReadUpTo( void* buf, int len, int timeout );
|
||||
bool Read( void* buf, int len, int timeout );
|
||||
|
||||
template<typename ShouldExit>
|
||||
bool Read( void* buf, int len, int timeout, ShouldExit exitCb )
|
||||
{
|
||||
auto cbuf = (char*)buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
if( exitCb() ) return false;
|
||||
if( !ReadImpl( cbuf, len, timeout ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ReadRaw( void* buf, int len, int timeout );
|
||||
bool HasData();
|
||||
bool IsValid() const;
|
||||
|
||||
Socket( const Socket& ) = delete;
|
||||
Socket( Socket&& ) = delete;
|
||||
Socket& operator=( const Socket& ) = delete;
|
||||
Socket& operator=( Socket&& ) = delete;
|
||||
|
||||
private:
|
||||
int RecvBuffered( void* buf, int len, int timeout );
|
||||
int Recv( void* buf, int len, int timeout );
|
||||
|
||||
bool ReadImpl( char*& buf, int& len, int timeout );
|
||||
|
||||
char* m_buf;
|
||||
char* m_bufPtr;
|
||||
std::atomic<int> m_sock;
|
||||
int m_bufLeft;
|
||||
|
||||
struct addrinfo *m_res;
|
||||
struct addrinfo *m_ptr;
|
||||
int m_connSock;
|
||||
};
|
||||
|
||||
class ListenSocket
|
||||
{
|
||||
public:
|
||||
ListenSocket();
|
||||
~ListenSocket();
|
||||
|
||||
bool Listen( uint16_t port, int backlog );
|
||||
Socket* Accept();
|
||||
void Close();
|
||||
|
||||
ListenSocket( const ListenSocket& ) = delete;
|
||||
ListenSocket( ListenSocket&& ) = delete;
|
||||
ListenSocket& operator=( const ListenSocket& ) = delete;
|
||||
ListenSocket& operator=( ListenSocket&& ) = delete;
|
||||
|
||||
private:
|
||||
int m_sock;
|
||||
};
|
||||
|
||||
class UdpBroadcast
|
||||
{
|
||||
public:
|
||||
UdpBroadcast();
|
||||
~UdpBroadcast();
|
||||
|
||||
bool Open( const char* addr, uint16_t port );
|
||||
void Close();
|
||||
|
||||
int Send( uint16_t port, const void* data, int len );
|
||||
|
||||
UdpBroadcast( const UdpBroadcast& ) = delete;
|
||||
UdpBroadcast( UdpBroadcast&& ) = delete;
|
||||
UdpBroadcast& operator=( const UdpBroadcast& ) = delete;
|
||||
UdpBroadcast& operator=( UdpBroadcast&& ) = delete;
|
||||
|
||||
private:
|
||||
int m_sock;
|
||||
uint32_t m_addr;
|
||||
};
|
||||
|
||||
class IpAddress
|
||||
{
|
||||
public:
|
||||
IpAddress();
|
||||
~IpAddress();
|
||||
|
||||
void Set( const struct sockaddr& addr );
|
||||
|
||||
uint32_t GetNumber() const { return m_number; }
|
||||
const char* GetText() const { return m_text; }
|
||||
|
||||
IpAddress( const IpAddress& ) = delete;
|
||||
IpAddress( IpAddress&& ) = delete;
|
||||
IpAddress& operator=( const IpAddress& ) = delete;
|
||||
IpAddress& operator=( IpAddress&& ) = delete;
|
||||
|
||||
private:
|
||||
uint32_t m_number;
|
||||
char m_text[17];
|
||||
};
|
||||
|
||||
class UdpListen
|
||||
{
|
||||
public:
|
||||
UdpListen();
|
||||
~UdpListen();
|
||||
|
||||
bool Listen( uint16_t port );
|
||||
void Close();
|
||||
|
||||
const char* Read( size_t& len, IpAddress& addr, int timeout );
|
||||
|
||||
UdpListen( const UdpListen& ) = delete;
|
||||
UdpListen( UdpListen&& ) = delete;
|
||||
UdpListen& operator=( const UdpListen& ) = delete;
|
||||
UdpListen& operator=( UdpListen&& ) = delete;
|
||||
|
||||
private:
|
||||
int m_sock;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,286 @@
|
|||
#if defined _MSC_VER || defined __CYGWIN__ || defined _WIN32
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# endif
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable:4996)
|
||||
#endif
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
# include <malloc.h>
|
||||
#else
|
||||
# include <pthread.h>
|
||||
# include <string.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
# ifdef __ANDROID__
|
||||
# include <sys/types.h>
|
||||
# else
|
||||
# include <sys/syscall.h>
|
||||
# endif
|
||||
# include <fcntl.h>
|
||||
#elif defined __FreeBSD__
|
||||
# include <sys/thr.h>
|
||||
#elif defined __NetBSD__ || defined __DragonFly__
|
||||
# include <sys/lwp.h>
|
||||
#endif
|
||||
|
||||
#ifdef __MINGW32__
|
||||
# define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "TracySystem.hpp"
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR );
|
||||
extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include <atomic>
|
||||
# include "TracyAlloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
TRACY_API uint64_t GetThreadHandleImpl()
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( GetCurrentThreadId() );
|
||||
#elif defined __APPLE__
|
||||
uint64_t id;
|
||||
pthread_threadid_np( pthread_self(), &id );
|
||||
return id;
|
||||
#elif defined __ANDROID__
|
||||
return (uint64_t)gettid();
|
||||
#elif defined __linux__
|
||||
return (uint64_t)syscall( SYS_gettid );
|
||||
#elif defined __FreeBSD__
|
||||
long id;
|
||||
thr_self( &id );
|
||||
return id;
|
||||
#elif defined __NetBSD__
|
||||
return _lwp_self();
|
||||
#elif defined __DragonFly__
|
||||
return lwp_gettid();
|
||||
#elif defined __OpenBSD__
|
||||
return getthrid();
|
||||
#else
|
||||
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( pthread_self() );
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
struct ThreadNameData
|
||||
{
|
||||
uint64_t id;
|
||||
const char* name;
|
||||
ThreadNameData* next;
|
||||
};
|
||||
std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack( push, 8 )
|
||||
struct THREADNAME_INFO
|
||||
{
|
||||
DWORD dwType;
|
||||
LPCSTR szName;
|
||||
DWORD dwThreadID;
|
||||
DWORD dwFlags;
|
||||
};
|
||||
# pragma pack(pop)
|
||||
|
||||
void ThreadNameMsvcMagic( const THREADNAME_INFO& info )
|
||||
{
|
||||
__try
|
||||
{
|
||||
RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACY_API void SetThreadName( const char* name )
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
|
||||
if( _SetThreadDescription )
|
||||
{
|
||||
wchar_t buf[256];
|
||||
mbstowcs( buf, name, 256 );
|
||||
_SetThreadDescription( GetCurrentThread(), buf );
|
||||
}
|
||||
else
|
||||
{
|
||||
# if defined _MSC_VER
|
||||
THREADNAME_INFO info;
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = GetCurrentThreadId();
|
||||
info.dwFlags = 0;
|
||||
ThreadNameMsvcMagic( info );
|
||||
# endif
|
||||
}
|
||||
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__ && !defined __CYGWIN__
|
||||
{
|
||||
const auto sz = strlen( name );
|
||||
if( sz <= 15 )
|
||||
{
|
||||
pthread_setname_np( pthread_self(), name );
|
||||
}
|
||||
else
|
||||
{
|
||||
char buf[16];
|
||||
memcpy( buf, name, 15 );
|
||||
buf[15] = '\0';
|
||||
pthread_setname_np( pthread_self(), buf );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef TRACY_ENABLE
|
||||
{
|
||||
InitRPMallocThread();
|
||||
const auto sz = strlen( name );
|
||||
char* buf = (char*)tracy_malloc( sz+1 );
|
||||
memcpy( buf, name, sz );
|
||||
buf[sz] = '\0';
|
||||
auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) );
|
||||
data->id = detail::GetThreadHandleImpl();
|
||||
data->name = buf;
|
||||
data->next = GetThreadNameData().load( std::memory_order_relaxed );
|
||||
while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TRACY_API const char* GetThreadName( uint64_t id )
|
||||
{
|
||||
static char buf[256];
|
||||
#ifdef TRACY_ENABLE
|
||||
auto ptr = GetThreadNameData().load( std::memory_order_relaxed );
|
||||
while( ptr )
|
||||
{
|
||||
if( ptr->id == id )
|
||||
{
|
||||
return ptr->name;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
#else
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
|
||||
if( _GetThreadDescription )
|
||||
{
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
{
|
||||
PWSTR tmp;
|
||||
_GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
# elif defined __linux__
|
||||
int cs, fd;
|
||||
char path[32];
|
||||
# ifdef __ANDROID__
|
||||
int tid = gettid();
|
||||
# else
|
||||
int tid = (int) syscall( SYS_gettid );
|
||||
# endif
|
||||
snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
|
||||
sprintf( buf, "%" PRIu64, id );
|
||||
# ifndef __ANDROID__
|
||||
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
|
||||
# endif
|
||||
if ( ( fd = open( path, O_RDONLY ) ) > 0) {
|
||||
int len = read( fd, buf, 255 );
|
||||
if( len > 0 )
|
||||
{
|
||||
buf[len] = 0;
|
||||
if( len > 1 && buf[len-1] == '\n' )
|
||||
{
|
||||
buf[len-1] = 0;
|
||||
}
|
||||
}
|
||||
close( fd );
|
||||
}
|
||||
# ifndef __ANDROID__
|
||||
pthread_setcancelstate( cs, 0 );
|
||||
# endif
|
||||
return buf;
|
||||
# endif
|
||||
#endif
|
||||
sprintf( buf, "%" PRIu64, id );
|
||||
return buf;
|
||||
}
|
||||
|
||||
TRACY_API const char* GetEnvVar( const char* name )
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
// unfortunately getenv() on Windows is just fundamentally broken. It caches the entire
|
||||
// environment block once on startup, then never refreshes it again. If any environment
|
||||
// strings are added or modified after startup of the CRT, those changes will not be
|
||||
// seen by getenv(). This removes the possibility of an app using this SDK from
|
||||
// programmatically setting any of the behaviour controlling envvars here.
|
||||
//
|
||||
// To work around this, we'll instead go directly to the Win32 environment strings APIs
|
||||
// to get the current value.
|
||||
static char buffer[1024];
|
||||
DWORD const kBufferSize = DWORD(sizeof(buffer) / sizeof(buffer[0]));
|
||||
DWORD count = GetEnvironmentVariableA(name, buffer, kBufferSize);
|
||||
|
||||
if( count == 0 )
|
||||
return nullptr;
|
||||
|
||||
if( count >= kBufferSize )
|
||||
{
|
||||
char* buf = reinterpret_cast<char*>(_alloca(count + 1));
|
||||
count = GetEnvironmentVariableA(name, buf, count + 1);
|
||||
memcpy(buffer, buf, kBufferSize);
|
||||
buffer[kBufferSize - 1] = 0;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
#else
|
||||
return getenv(name);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef __TRACYSYSTEM_HPP__
|
||||
#define __TRACYSYSTEM_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "TracyApi.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
TRACY_API uint64_t GetThreadHandleImpl();
|
||||
}
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
TRACY_API uint64_t GetThreadHandle();
|
||||
#else
|
||||
static inline uint64_t GetThreadHandle()
|
||||
{
|
||||
return detail::GetThreadHandleImpl();
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACY_API void SetThreadName( const char* name );
|
||||
TRACY_API const char* GetThreadName( uint64_t id );
|
||||
|
||||
TRACY_API const char* GetEnvVar(const char* name);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,18 @@
|
|||
# Extract the actual list of source files from a sibling Visual Studio project.
|
||||
|
||||
# Ensure these are simply-substituted variables, without changing their values.
|
||||
SRC := $(SRC)
|
||||
SRC2 := $(SRC2)
|
||||
SRC3 := $(SRC3)
|
||||
|
||||
# Paths here are relative to the directory in which make was invoked, not to
|
||||
# this file, so ../win32/$(PROJECT).vcxproj refers to the Visual Studio project
|
||||
# of whichever tool is including this makefile fragment.
|
||||
|
||||
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
|
||||
# The tool-specific makefile may request that certain files be omitted.
|
||||
SRC += $(filter-out $(FILTER),$(BASE))
|
||||
SRC2 += $(filter-out $(FILTER),$(BASE2))
|
||||
SRC3 += $(filter-out $(FILTER),$(BASE3))
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,777 @@
|
|||
/*
|
||||
* LZ4 - Fast LZ compression algorithm
|
||||
* Header File
|
||||
* Copyright (C) 2011-present, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 homepage : http://www.lz4.org
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
*/
|
||||
|
||||
#ifndef TRACY_LZ4_H_2983827168210
|
||||
#define TRACY_LZ4_H_2983827168210
|
||||
|
||||
/* --- Dependency --- */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
/**
|
||||
Introduction
|
||||
|
||||
LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
|
||||
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
|
||||
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
|
||||
|
||||
The LZ4 compression library provides in-memory compression and decompression functions.
|
||||
It gives full buffer control to user.
|
||||
Compression can be done in:
|
||||
- a single step (described as Simple Functions)
|
||||
- a single step, reusing a context (described in Advanced Functions)
|
||||
- unbounded multiple steps (described as Streaming compression)
|
||||
|
||||
lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
|
||||
Decompressing such a compressed block requires additional metadata.
|
||||
Exact metadata depends on exact decompression function.
|
||||
For the typical case of LZ4_decompress_safe(),
|
||||
metadata includes block's compressed size, and maximum bound of decompressed size.
|
||||
Each application is free to encode and pass such metadata in whichever way it wants.
|
||||
|
||||
lz4.h only handle blocks, it can not generate Frames.
|
||||
|
||||
Blocks are different from Frames (doc/lz4_Frame_format.md).
|
||||
Frames bundle both blocks and metadata in a specified manner.
|
||||
Embedding metadata is required for compressed data to be self-contained and portable.
|
||||
Frame format is delivered through a companion API, declared in lz4frame.h.
|
||||
The `lz4` CLI can only manage frames.
|
||||
*/
|
||||
|
||||
/*^***************************************************************
|
||||
* Export parameters
|
||||
*****************************************************************/
|
||||
/*
|
||||
* LZ4_DLL_EXPORT :
|
||||
* Enable exporting of functions when building a Windows DLL
|
||||
* LZ4LIB_VISIBILITY :
|
||||
* Control library symbols visibility.
|
||||
*/
|
||||
#ifndef LZ4LIB_VISIBILITY
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define LZ4LIB_VISIBILITY
|
||||
# endif
|
||||
#endif
|
||||
#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
|
||||
# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
|
||||
#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
|
||||
# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define LZ4LIB_API LZ4LIB_VISIBILITY
|
||||
#endif
|
||||
|
||||
/*------ Version ------*/
|
||||
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
|
||||
#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
|
||||
#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
|
||||
|
||||
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
|
||||
|
||||
#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
|
||||
#define LZ4_QUOTE(str) #str
|
||||
#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
|
||||
#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */
|
||||
LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Tuning parameter
|
||||
**************************************/
|
||||
/*!
|
||||
* LZ4_MEMORY_USAGE :
|
||||
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
||||
* Increasing memory usage improves compression ratio.
|
||||
* Reduced memory usage may improve speed, thanks to better cache locality.
|
||||
* Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
|
||||
*/
|
||||
#ifndef LZ4_MEMORY_USAGE
|
||||
# define LZ4_MEMORY_USAGE 14
|
||||
#endif
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Simple Functions
|
||||
**************************************/
|
||||
/*! LZ4_compress_default() :
|
||||
* Compresses 'srcSize' bytes from buffer 'src'
|
||||
* into already allocated 'dst' buffer of size 'dstCapacity'.
|
||||
* Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
|
||||
* It also runs faster, so it's a recommended setting.
|
||||
* If the function cannot compress 'src' into a more limited 'dst' budget,
|
||||
* compression stops *immediately*, and the function result is zero.
|
||||
* In which case, 'dst' content is undefined (invalid).
|
||||
* srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
|
||||
* dstCapacity : size of buffer 'dst' (which must be already allocated)
|
||||
* @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
|
||||
* or 0 if compression fails
|
||||
* Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
|
||||
|
||||
/*! LZ4_decompress_safe() :
|
||||
* compressedSize : is the exact complete size of the compressed block.
|
||||
* dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
|
||||
* @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
|
||||
* If destination buffer is not large enough, decoding will stop and output an error code (negative value).
|
||||
* If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
* Note 1 : This function is protected against malicious data packets :
|
||||
* it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
|
||||
* even if the compressed block is maliciously modified to order the decoder to do these actions.
|
||||
* In such case, the decoder stops immediately, and considers the compressed block malformed.
|
||||
* Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
|
||||
* The implementation is free to send / store / derive this information in whichever way is most beneficial.
|
||||
* If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Advanced Functions
|
||||
**************************************/
|
||||
#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
|
||||
#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
|
||||
|
||||
/*! LZ4_compressBound() :
|
||||
Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
|
||||
This function is primarily useful for memory allocation purposes (destination buffer size).
|
||||
Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
|
||||
Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
|
||||
inputSize : max supported value is LZ4_MAX_INPUT_SIZE
|
||||
return : maximum output size in a "worst case" scenario
|
||||
or 0, if input size is incorrect (too large or negative)
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compressBound(int inputSize);
|
||||
|
||||
/*! LZ4_compress_fast() :
|
||||
Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
|
||||
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
|
||||
It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
|
||||
An acceleration value of "1" is the same as regular LZ4_compress_default()
|
||||
Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
|
||||
Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
|
||||
/*! LZ4_compress_fast_extState() :
|
||||
* Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
|
||||
* Use LZ4_sizeofState() to know how much memory must be allocated,
|
||||
* and allocate it on 8-bytes boundaries (using `malloc()` typically).
|
||||
* Then, provide this buffer as `void* state` to compression function.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_sizeofState(void);
|
||||
LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
|
||||
/*! LZ4_compress_destSize() :
|
||||
* Reverse the logic : compresses as much data as possible from 'src' buffer
|
||||
* into already allocated buffer 'dst', of size >= 'targetDestSize'.
|
||||
* This function either compresses the entire 'src' content into 'dst' if it's large enough,
|
||||
* or fill 'dst' buffer completely with as much data as possible from 'src'.
|
||||
* note: acceleration parameter is fixed to "default".
|
||||
*
|
||||
* *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
|
||||
* New value is necessarily <= input value.
|
||||
* @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
|
||||
* or 0 if compression fails.
|
||||
*
|
||||
* Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
|
||||
* the produced compressed content could, in specific circumstances,
|
||||
* require to be decompressed into a destination buffer larger
|
||||
* by at least 1 byte than the content to decompress.
|
||||
* If an application uses `LZ4_compress_destSize()`,
|
||||
* it's highly recommended to update liblz4 to v1.9.2 or better.
|
||||
* If this can't be done or ensured,
|
||||
* the receiving decompression function should provide
|
||||
* a dstCapacity which is > decompressedSize, by at least 1 byte.
|
||||
* See https://github.com/lz4/lz4/issues/859 for details
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
|
||||
|
||||
|
||||
/*! LZ4_decompress_safe_partial() :
|
||||
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
|
||||
* into destination buffer 'dst' of size 'dstCapacity'.
|
||||
* Up to 'targetOutputSize' bytes will be decoded.
|
||||
* The function stops decoding on reaching this objective.
|
||||
* This can be useful to boost performance
|
||||
* whenever only the beginning of a block is required.
|
||||
*
|
||||
* @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
|
||||
* If source stream is detected malformed, function returns a negative result.
|
||||
*
|
||||
* Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
|
||||
*
|
||||
* Note 2 : targetOutputSize must be <= dstCapacity
|
||||
*
|
||||
* Note 3 : this function effectively stops decoding on reaching targetOutputSize,
|
||||
* so dstCapacity is kind of redundant.
|
||||
* This is because in older versions of this function,
|
||||
* decoding operation would still write complete sequences.
|
||||
* Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
|
||||
* it could write more bytes, though only up to dstCapacity.
|
||||
* Some "margin" used to be required for this operation to work properly.
|
||||
* Thankfully, this is no longer necessary.
|
||||
* The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
|
||||
*
|
||||
* Note 4 : If srcSize is the exact size of the block,
|
||||
* then targetOutputSize can be any value,
|
||||
* including larger than the block's decompressed size.
|
||||
* The function will, at most, generate block's decompressed size.
|
||||
*
|
||||
* Note 5 : If srcSize is _larger_ than block's compressed size,
|
||||
* then targetOutputSize **MUST** be <= block's decompressed size.
|
||||
* Otherwise, *silent corruption will occur*.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
|
||||
|
||||
|
||||
/*-*********************************************
|
||||
* Streaming Compression Functions
|
||||
***********************************************/
|
||||
typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
|
||||
|
||||
LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
|
||||
LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
/*! LZ4_resetStream_fast() : v1.9.0+
|
||||
* Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
|
||||
* (e.g., LZ4_compress_fast_continue()).
|
||||
*
|
||||
* An LZ4_stream_t must be initialized once before usage.
|
||||
* This is automatically done when created by LZ4_createStream().
|
||||
* However, should the LZ4_stream_t be simply declared on stack (for example),
|
||||
* it's necessary to initialize it first, using LZ4_initStream().
|
||||
*
|
||||
* After init, start any new stream with LZ4_resetStream_fast().
|
||||
* A same LZ4_stream_t can be re-used multiple times consecutively
|
||||
* and compress multiple streams,
|
||||
* provided that it starts each new stream with LZ4_resetStream_fast().
|
||||
*
|
||||
* LZ4_resetStream_fast() is much faster than LZ4_initStream(),
|
||||
* but is not compatible with memory regions containing garbage data.
|
||||
*
|
||||
* Note: it's only useful to call LZ4_resetStream_fast()
|
||||
* in the context of streaming compression.
|
||||
* The *extState* functions perform their own resets.
|
||||
* Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
|
||||
*/
|
||||
LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
|
||||
|
||||
/*! LZ4_loadDict() :
|
||||
* Use this function to reference a static dictionary into LZ4_stream_t.
|
||||
* The dictionary must remain available during compression.
|
||||
* LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
|
||||
* The same dictionary will have to be loaded on decompression side for successful decoding.
|
||||
* Dictionary are useful for better compression of small data (KB range).
|
||||
* While LZ4 accept any input as dictionary,
|
||||
* results are generally better when using Zstandard's Dictionary Builder.
|
||||
* Loading a size of 0 is allowed, and is the same as reset.
|
||||
* @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
|
||||
*/
|
||||
LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
|
||||
|
||||
/*! LZ4_compress_fast_continue() :
|
||||
* Compress 'src' content using data from previously compressed blocks, for better compression ratio.
|
||||
* 'dst' buffer must be already allocated.
|
||||
* If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
|
||||
*
|
||||
* @return : size of compressed block
|
||||
* or 0 if there is an error (typically, cannot fit into 'dst').
|
||||
*
|
||||
* Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
|
||||
* Each block has precise boundaries.
|
||||
* Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
|
||||
* It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
|
||||
*
|
||||
* Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
|
||||
*
|
||||
* Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
|
||||
* Make sure that buffers are separated, by at least one byte.
|
||||
* This construction ensures that each block only depends on previous block.
|
||||
*
|
||||
* Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
|
||||
*
|
||||
* Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
/*! LZ4_saveDict() :
|
||||
* If last 64KB data cannot be guaranteed to remain available at its current memory location,
|
||||
* save it into a safer place (char* safeBuffer).
|
||||
* This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
|
||||
* but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
|
||||
* @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
|
||||
|
||||
|
||||
/*-**********************************************
|
||||
* Streaming Decompression Functions
|
||||
* Bufferless synchronous API
|
||||
************************************************/
|
||||
typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
|
||||
|
||||
/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
|
||||
* creation / destruction of streaming decompression tracking context.
|
||||
* A tracking context can be re-used multiple times.
|
||||
*/
|
||||
LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
|
||||
LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
|
||||
|
||||
/*! LZ4_setStreamDecode() :
|
||||
* An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
|
||||
* Use this function to start decompression of a new stream of blocks.
|
||||
* A dictionary can optionally be set. Use NULL or size 0 for a reset order.
|
||||
* Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
|
||||
* @return : 1 if OK, 0 if error
|
||||
*/
|
||||
LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
|
||||
|
||||
/*! LZ4_decoderRingBufferSize() : v1.8.2+
|
||||
* Note : in a ring buffer scenario (optional),
|
||||
* blocks are presumed decompressed next to each other
|
||||
* up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
|
||||
* at which stage it resumes from beginning of ring buffer.
|
||||
* When setting such a ring buffer for streaming decompression,
|
||||
* provides the minimum size of this ring buffer
|
||||
* to be compatible with any source respecting maxBlockSize condition.
|
||||
* @return : minimum ring buffer size,
|
||||
* or 0 if there is an error (invalid maxBlockSize).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
|
||||
#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */
|
||||
|
||||
/*! LZ4_decompress_*_continue() :
|
||||
* These decoding functions allow decompression of consecutive blocks in "streaming" mode.
|
||||
* A block is an unsplittable entity, it must be presented entirely to a decompression function.
|
||||
* Decompression functions only accepts one block at a time.
|
||||
* The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
|
||||
* If less than 64KB of data has been decoded, all the data must be present.
|
||||
*
|
||||
* Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
|
||||
* - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
|
||||
* maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
|
||||
* In which case, encoding and decoding buffers do not need to be synchronized.
|
||||
* Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
|
||||
* - Synchronized mode :
|
||||
* Decompression buffer size is _exactly_ the same as compression buffer size,
|
||||
* and follows exactly same update rule (block boundaries at same positions),
|
||||
* and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
|
||||
* _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
|
||||
* - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
|
||||
* In which case, encoding and decoding buffers do not need to be synchronized,
|
||||
* and encoding ring buffer can have any size, including small ones ( < 64 KB).
|
||||
*
|
||||
* Whenever these conditions are not possible,
|
||||
* save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
|
||||
* then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
|
||||
|
||||
|
||||
/*! LZ4_decompress_*_usingDict() :
|
||||
* These decoding functions work the same as
|
||||
* a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
|
||||
* They are stand-alone, and don't need an LZ4_streamDecode_t structure.
|
||||
* Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
|
||||
* Performance tip : Decompression speed can be substantially increased
|
||||
* when dst == dictStart + dictSize.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
|
||||
|
||||
/*^*************************************
|
||||
* !!!!!! STATIC LINKING ONLY !!!!!!
|
||||
***************************************/
|
||||
|
||||
/*-****************************************************************************
|
||||
* Experimental section
|
||||
*
|
||||
* Symbols declared in this section must be considered unstable. Their
|
||||
* signatures or semantics may change, or they may be removed altogether in the
|
||||
* future. They are therefore only safe to depend on when the caller is
|
||||
* statically linked against the library.
|
||||
*
|
||||
* To protect against unsafe usage, not only are the declarations guarded,
|
||||
* the definitions are hidden by default
|
||||
* when building LZ4 as a shared/dynamic library.
|
||||
*
|
||||
* In order to access these declarations,
|
||||
* define LZ4_STATIC_LINKING_ONLY in your application
|
||||
* before including LZ4's headers.
|
||||
*
|
||||
* In order to make their implementations accessible dynamically, you must
|
||||
* define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
|
||||
******************************************************************************/
|
||||
|
||||
#ifdef LZ4_STATIC_LINKING_ONLY
|
||||
|
||||
#ifndef TRACY_LZ4_STATIC_3504398509
|
||||
#define TRACY_LZ4_STATIC_3504398509
|
||||
|
||||
#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
|
||||
#define LZ4LIB_STATIC_API LZ4LIB_API
|
||||
#else
|
||||
#define LZ4LIB_STATIC_API
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*! LZ4_compress_fast_extState_fastReset() :
|
||||
* A variant of LZ4_compress_fast_extState().
|
||||
*
|
||||
* Using this variant avoids an expensive initialization step.
|
||||
* It is only safe to call if the state buffer is known to be correctly initialized already
|
||||
* (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
|
||||
* From a high level, the difference is that
|
||||
* this function initializes the provided state with a call to something like LZ4_resetStream_fast()
|
||||
* while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
|
||||
*/
|
||||
LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
/*! LZ4_attach_dictionary() :
|
||||
* This is an experimental API that allows
|
||||
* efficient use of a static dictionary many times.
|
||||
*
|
||||
* Rather than re-loading the dictionary buffer into a working context before
|
||||
* each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
|
||||
* working LZ4_stream_t, this function introduces a no-copy setup mechanism,
|
||||
* in which the working stream references the dictionary stream in-place.
|
||||
*
|
||||
* Several assumptions are made about the state of the dictionary stream.
|
||||
* Currently, only streams which have been prepared by LZ4_loadDict() should
|
||||
* be expected to work.
|
||||
*
|
||||
* Alternatively, the provided dictionaryStream may be NULL,
|
||||
* in which case any existing dictionary stream is unset.
|
||||
*
|
||||
* If a dictionary is provided, it replaces any pre-existing stream history.
|
||||
* The dictionary contents are the only history that can be referenced and
|
||||
* logically immediately precede the data compressed in the first subsequent
|
||||
* compression call.
|
||||
*
|
||||
* The dictionary will only remain attached to the working stream through the
|
||||
* first compression call, at the end of which it is cleared. The dictionary
|
||||
* stream (and source buffer) must remain in-place / accessible / unchanged
|
||||
* through the completion of the first compression call on the stream.
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
|
||||
|
||||
|
||||
/*! In-place compression and decompression
|
||||
*
|
||||
* It's possible to have input and output sharing the same buffer,
|
||||
* for highly contrained memory environments.
|
||||
* In both cases, it requires input to lay at the end of the buffer,
|
||||
* and decompression to start at beginning of the buffer.
|
||||
* Buffer size must feature some margin, hence be larger than final size.
|
||||
*
|
||||
* |<------------------------buffer--------------------------------->|
|
||||
* |<-----------compressed data--------->|
|
||||
* |<-----------decompressed size------------------>|
|
||||
* |<----margin---->|
|
||||
*
|
||||
* This technique is more useful for decompression,
|
||||
* since decompressed size is typically larger,
|
||||
* and margin is short.
|
||||
*
|
||||
* In-place decompression will work inside any buffer
|
||||
* which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
|
||||
* This presumes that decompressedSize > compressedSize.
|
||||
* Otherwise, it means compression actually expanded data,
|
||||
* and it would be more efficient to store such data with a flag indicating it's not compressed.
|
||||
* This can happen when data is not compressible (already compressed, or encrypted).
|
||||
*
|
||||
* For in-place compression, margin is larger, as it must be able to cope with both
|
||||
* history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
|
||||
* and data expansion, which can happen when input is not compressible.
|
||||
* As a consequence, buffer size requirements are much higher,
|
||||
* and memory savings offered by in-place compression are more limited.
|
||||
*
|
||||
* There are ways to limit this cost for compression :
|
||||
* - Reduce history size, by modifying LZ4_DISTANCE_MAX.
|
||||
* Note that it is a compile-time constant, so all compressions will apply this limit.
|
||||
* Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
|
||||
* so it's a reasonable trick when inputs are known to be small.
|
||||
* - Require the compressor to deliver a "maximum compressed size".
|
||||
* This is the `dstCapacity` parameter in `LZ4_compress*()`.
|
||||
* When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
|
||||
* in which case, the return code will be 0 (zero).
|
||||
* The caller must be ready for these cases to happen,
|
||||
* and typically design a backup scheme to send data uncompressed.
|
||||
* The combination of both techniques can significantly reduce
|
||||
* the amount of margin required for in-place compression.
|
||||
*
|
||||
* In-place compression can work in any buffer
|
||||
* which size is >= (maxCompressedSize)
|
||||
* with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
|
||||
* LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
|
||||
* so it's possible to reduce memory requirements by playing with them.
|
||||
*/
|
||||
|
||||
#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
|
||||
#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
|
||||
|
||||
#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
|
||||
# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
|
||||
#endif
|
||||
|
||||
#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
|
||||
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_STATIC_3504398509 */
|
||||
#endif /* LZ4_STATIC_LINKING_ONLY */
|
||||
|
||||
#ifndef TRACY_LZ4_H_98237428734687
|
||||
#define TRACY_LZ4_H_98237428734687
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*-************************************************************
|
||||
* Private Definitions
|
||||
**************************************************************
|
||||
* Do not use these definitions directly.
|
||||
* They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
|
||||
* Accessing members will expose user code to API and/or ABI break in future versions of the library.
|
||||
**************************************************************/
|
||||
#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
|
||||
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
|
||||
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
|
||||
|
||||
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
typedef int8_t LZ4_i8;
|
||||
typedef uint8_t LZ4_byte;
|
||||
typedef uint16_t LZ4_u16;
|
||||
typedef uint32_t LZ4_u32;
|
||||
#else
|
||||
typedef signed char LZ4_i8;
|
||||
typedef unsigned char LZ4_byte;
|
||||
typedef unsigned short LZ4_u16;
|
||||
typedef unsigned int LZ4_u32;
|
||||
#endif
|
||||
|
||||
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
|
||||
struct LZ4_stream_t_internal {
|
||||
LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
|
||||
LZ4_u32 currentOffset;
|
||||
LZ4_u32 tableType;
|
||||
const LZ4_byte* dictionary;
|
||||
const LZ4_stream_t_internal* dictCtx;
|
||||
LZ4_u32 dictSize;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const LZ4_byte* externalDict;
|
||||
size_t extDictSize;
|
||||
const LZ4_byte* prefixEnd;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
|
||||
/*! LZ4_stream_t :
|
||||
* Do not use below internal definitions directly !
|
||||
* Declare or allocate an LZ4_stream_t instead.
|
||||
* LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
|
||||
* The structure definition can be convenient for static allocation
|
||||
* (on stack, or as part of larger structure).
|
||||
* Init this structure with LZ4_initStream() before first use.
|
||||
* note : only use this definition in association with static linking !
|
||||
* this definition is not API/ABI safe, and may change in future versions.
|
||||
*/
|
||||
#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */
|
||||
#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
|
||||
union LZ4_stream_u {
|
||||
void* table[LZ4_STREAMSIZE_VOIDP];
|
||||
LZ4_stream_t_internal internal_donotuse;
|
||||
}; /* previously typedef'd to LZ4_stream_t */
|
||||
|
||||
|
||||
/*! LZ4_initStream() : v1.9.0+
|
||||
* An LZ4_stream_t structure must be initialized at least once.
|
||||
* This is automatically done when invoking LZ4_createStream(),
|
||||
* but it's not when the structure is simply declared on stack (for example).
|
||||
*
|
||||
* Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
|
||||
* It can also initialize any arbitrary buffer of sufficient size,
|
||||
* and will @return a pointer of proper type upon initialization.
|
||||
*
|
||||
* Note : initialization fails if size and alignment conditions are not respected.
|
||||
* In which case, the function will @return NULL.
|
||||
* Note2: An LZ4_stream_t structure guarantees correct alignment and size.
|
||||
* Note3: Before v1.9.0, use LZ4_resetStream() instead
|
||||
*/
|
||||
LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
|
||||
|
||||
|
||||
/*! LZ4_streamDecode_t :
|
||||
* information structure to track an LZ4 stream during decompression.
|
||||
* init this structure using LZ4_setStreamDecode() before first use.
|
||||
* note : only use in association with static linking !
|
||||
* this definition is not API/ABI safe,
|
||||
* and may change in a future version !
|
||||
*/
|
||||
#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
|
||||
#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
|
||||
union LZ4_streamDecode_u {
|
||||
unsigned long long table[LZ4_STREAMDECODESIZE_U64];
|
||||
LZ4_streamDecode_t_internal internal_donotuse;
|
||||
} ; /* previously typedef'd to LZ4_streamDecode_t */
|
||||
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Obsolete Functions
|
||||
**************************************/
|
||||
|
||||
/*! Deprecation warnings
|
||||
*
|
||||
* Deprecated functions make the compiler generate a warning when invoked.
|
||||
* This is meant to invite users to update their source code.
|
||||
* Should deprecation warnings be a problem, it is generally possible to disable them,
|
||||
* typically with -Wno-deprecated-declarations for gcc
|
||||
* or _CRT_SECURE_NO_WARNINGS in Visual.
|
||||
*
|
||||
* Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
|
||||
* before including the header file.
|
||||
*/
|
||||
#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
|
||||
# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
|
||||
#else
|
||||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
||||
# define LZ4_DEPRECATED(message) [[deprecated(message)]]
|
||||
# elif defined(_MSC_VER)
|
||||
# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
|
||||
# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
|
||||
# else
|
||||
# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
|
||||
# define LZ4_DEPRECATED(message) /* disabled */
|
||||
# endif
|
||||
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
/*! Obsolete compression functions (since v1.7.3) */
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
|
||||
/*! Obsolete decompression functions (since v1.8.0) */
|
||||
LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
|
||||
LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
|
||||
|
||||
/* Obsolete streaming functions (since v1.7.0)
|
||||
* degraded functionality; do not use!
|
||||
*
|
||||
* In order to perform streaming compression, these functions depended on data
|
||||
* that is no longer tracked in the state. They have been preserved as well as
|
||||
* possible: using them will still produce a correct output. However, they don't
|
||||
* actually retain any history between compression calls. The compression ratio
|
||||
* achieved will therefore be no better than compressing each chunk
|
||||
* independently.
|
||||
*/
|
||||
LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
|
||||
LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void);
|
||||
LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
|
||||
LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
|
||||
|
||||
/*! Obsolete streaming decoding functions (since v1.7.0) */
|
||||
LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
|
||||
LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
|
||||
|
||||
/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
|
||||
* These functions used to be faster than LZ4_decompress_safe(),
|
||||
* but this is no longer the case. They are now slower.
|
||||
* This is because LZ4_decompress_fast() doesn't know the input size,
|
||||
* and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
|
||||
* On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
|
||||
* As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
|
||||
*
|
||||
* The last remaining LZ4_decompress_fast() specificity is that
|
||||
* it can decompress a block without knowing its compressed size.
|
||||
* Such functionality can be achieved in a more secure manner
|
||||
* by employing LZ4_decompress_safe_partial().
|
||||
*
|
||||
* Parameters:
|
||||
* originalSize : is the uncompressed size to regenerate.
|
||||
* `dst` must be already allocated, its size must be >= 'originalSize' bytes.
|
||||
* @return : number of bytes read from source buffer (== compressed size).
|
||||
* The function expects to finish at block's end exactly.
|
||||
* If the source stream is detected malformed, the function stops decoding and returns a negative result.
|
||||
* note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
|
||||
* However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
|
||||
* Also, since match offsets are not validated, match reads from 'src' may underflow too.
|
||||
* These issues never happen if input (compressed) data is correct.
|
||||
* But they may happen if input data is invalid (error or intentional tampering).
|
||||
* As a consequence, use these functions in trusted environments with trusted data **only**.
|
||||
*/
|
||||
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
|
||||
LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
|
||||
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
|
||||
LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
|
||||
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
|
||||
LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
|
||||
|
||||
/*! LZ4_resetStream() :
|
||||
* An LZ4_stream_t structure must be initialized at least once.
|
||||
* This is done with LZ4_initStream(), or LZ4_resetStream().
|
||||
* Consider switching to LZ4_initStream(),
|
||||
* invoking LZ4_resetStream() will trigger deprecation warnings in the future.
|
||||
*/
|
||||
LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_H_98237428734687 */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,405 @@
|
|||
/*
|
||||
LZ4 HC - High Compression Mode of LZ4
|
||||
Header File
|
||||
Copyright (C) 2011-2017, Yann Collet.
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
*/
|
||||
#ifndef TRACY_LZ4_HC_H_19834876238432
|
||||
#define TRACY_LZ4_HC_H_19834876238432
|
||||
|
||||
/* --- Dependency --- */
|
||||
/* note : lz4hc requires lz4.h/lz4.c for compilation */
|
||||
#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
||||
|
||||
|
||||
/* --- Useful constants --- */
|
||||
#define LZ4HC_CLEVEL_MIN 3
|
||||
#define LZ4HC_CLEVEL_DEFAULT 9
|
||||
#define LZ4HC_CLEVEL_OPT_MIN 10
|
||||
#define LZ4HC_CLEVEL_MAX 12
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*-************************************
|
||||
* Block Compression
|
||||
**************************************/
|
||||
/*! LZ4_compress_HC() :
|
||||
* Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
|
||||
* `dst` must be already allocated.
|
||||
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
|
||||
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
||||
* `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
|
||||
* Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
||||
* @return : the number of bytes written into 'dst'
|
||||
* or 0 if compression fails.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
|
||||
|
||||
|
||||
/* Note :
|
||||
* Decompression functions are provided within "lz4.h" (BSD license)
|
||||
*/
|
||||
|
||||
|
||||
/*! LZ4_compress_HC_extStateHC() :
|
||||
* Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
|
||||
* `state` size is provided by LZ4_sizeofStateHC().
|
||||
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_sizeofStateHC(void);
|
||||
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
||||
|
||||
|
||||
/*! LZ4_compress_HC_destSize() : v1.9.0+
|
||||
* Will compress as much data as possible from `src`
|
||||
* to fit into `targetDstSize` budget.
|
||||
* Result is provided in 2 parts :
|
||||
* @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
|
||||
* or 0 if compression fails.
|
||||
* `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
|
||||
const char* src, char* dst,
|
||||
int* srcSizePtr, int targetDstSize,
|
||||
int compressionLevel);
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Streaming Compression
|
||||
* Bufferless synchronous API
|
||||
**************************************/
|
||||
typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
|
||||
|
||||
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
|
||||
* These functions create and release memory for LZ4 HC streaming state.
|
||||
* Newly created states are automatically initialized.
|
||||
* A same state can be used multiple times consecutively,
|
||||
* starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
|
||||
*/
|
||||
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
|
||||
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
||||
|
||||
/*
|
||||
These functions compress data in successive blocks of any size,
|
||||
using previous blocks as dictionary, to improve compression ratio.
|
||||
One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
|
||||
There is an exception for ring buffers, which can be smaller than 64 KB.
|
||||
Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
|
||||
|
||||
Before starting compression, state must be allocated and properly initialized.
|
||||
LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
|
||||
|
||||
Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
|
||||
or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
|
||||
LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
|
||||
which is automatically the case when state is created using LZ4_createStreamHC().
|
||||
|
||||
After reset, a first "fictional block" can be designated as initial dictionary,
|
||||
using LZ4_loadDictHC() (Optional).
|
||||
|
||||
Invoke LZ4_compress_HC_continue() to compress each successive block.
|
||||
The number of blocks is unlimited.
|
||||
Previous input blocks, including initial dictionary when present,
|
||||
must remain accessible and unmodified during compression.
|
||||
|
||||
It's allowed to update compression level anytime between blocks,
|
||||
using LZ4_setCompressionLevel() (experimental).
|
||||
|
||||
'dst' buffer should be sized to handle worst case scenarios
|
||||
(see LZ4_compressBound(), it ensures compression success).
|
||||
In case of failure, the API does not guarantee recovery,
|
||||
so the state _must_ be reset.
|
||||
To ensure compression success
|
||||
whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
|
||||
consider using LZ4_compress_HC_continue_destSize().
|
||||
|
||||
Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
|
||||
it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
|
||||
Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
|
||||
|
||||
After completing a streaming compression,
|
||||
it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
|
||||
just by resetting it, using LZ4_resetStreamHC_fast().
|
||||
*/
|
||||
|
||||
LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */
|
||||
LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
|
||||
|
||||
LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
|
||||
const char* src, char* dst,
|
||||
int srcSize, int maxDstSize);
|
||||
|
||||
/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
|
||||
* Similar to LZ4_compress_HC_continue(),
|
||||
* but will read as much data as possible from `src`
|
||||
* to fit into `targetDstSize` budget.
|
||||
* Result is provided into 2 parts :
|
||||
* @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
|
||||
* or 0 if compression fails.
|
||||
* `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
|
||||
* Note that this function may not consume the entire input.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
const char* src, char* dst,
|
||||
int* srcSizePtr, int targetDstSize);
|
||||
|
||||
LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
|
||||
|
||||
|
||||
|
||||
/*^**********************************************
|
||||
* !!!!!! STATIC LINKING ONLY !!!!!!
|
||||
***********************************************/
|
||||
|
||||
/*-******************************************************************
|
||||
* PRIVATE DEFINITIONS :
|
||||
* Do not use these definitions directly.
|
||||
* They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
|
||||
* Declare an `LZ4_streamHC_t` directly, rather than any type below.
|
||||
* Even then, only do so in the context of static linking, as definitions may change between versions.
|
||||
********************************************************************/
|
||||
|
||||
#define LZ4HC_DICTIONARY_LOGSIZE 16
|
||||
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
|
||||
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
|
||||
|
||||
#define LZ4HC_HASH_LOG 15
|
||||
#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
|
||||
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
|
||||
|
||||
|
||||
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
|
||||
struct LZ4HC_CCtx_internal
|
||||
{
|
||||
LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
|
||||
LZ4_u16 chainTable[LZ4HC_MAXD];
|
||||
const LZ4_byte* end; /* next block here to continue on current prefix */
|
||||
const LZ4_byte* base; /* All index relative to this position */
|
||||
const LZ4_byte* dictBase; /* alternate base for extDict */
|
||||
LZ4_u32 dictLimit; /* below that point, need extDict */
|
||||
LZ4_u32 lowLimit; /* below that point, no more dict */
|
||||
LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
|
||||
short compressionLevel;
|
||||
LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
|
||||
otherwise, favor compression ratio */
|
||||
LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
|
||||
const LZ4HC_CCtx_internal* dictCtx;
|
||||
};
|
||||
|
||||
|
||||
/* Do not use these definitions directly !
|
||||
* Declare or allocate an LZ4_streamHC_t instead.
|
||||
*/
|
||||
#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */
|
||||
#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
|
||||
union LZ4_streamHC_u {
|
||||
void* table[LZ4_STREAMHCSIZE_VOIDP];
|
||||
LZ4HC_CCtx_internal internal_donotuse;
|
||||
}; /* previously typedef'd to LZ4_streamHC_t */
|
||||
|
||||
/* LZ4_streamHC_t :
|
||||
* This structure allows static allocation of LZ4 HC streaming state.
|
||||
* This can be used to allocate statically, on state, or as part of a larger structure.
|
||||
*
|
||||
* Such state **must** be initialized using LZ4_initStreamHC() before first use.
|
||||
*
|
||||
* Note that invoking LZ4_initStreamHC() is not required when
|
||||
* the state was created using LZ4_createStreamHC() (which is recommended).
|
||||
* Using the normal builder, a newly created state is automatically initialized.
|
||||
*
|
||||
* Static allocation shall only be used in combination with static linking.
|
||||
*/
|
||||
|
||||
/* LZ4_initStreamHC() : v1.9.0+
|
||||
* Required before first use of a statically allocated LZ4_streamHC_t.
|
||||
* Before v1.9.0 : use LZ4_resetStreamHC() instead
|
||||
*/
|
||||
LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size);
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Deprecated Functions
|
||||
**************************************/
|
||||
/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
|
||||
|
||||
/* deprecated compression functions */
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
|
||||
/* Obsolete streaming functions; degraded functionality; do not use!
|
||||
*
|
||||
* In order to perform streaming compression, these functions depended on data
|
||||
* that is no longer tracked in the state. They have been preserved as well as
|
||||
* possible: using them will still produce a correct output. However, use of
|
||||
* LZ4_slideInputBufferHC() will truncate the history of the stream, rather
|
||||
* than preserve a window-sized chunk of history.
|
||||
*/
|
||||
LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
|
||||
LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
|
||||
LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
||||
LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void);
|
||||
LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
|
||||
|
||||
|
||||
/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
|
||||
* The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
|
||||
* which is now the recommended function to start a new stream of blocks,
|
||||
* but cannot be used to initialize a memory segment containing arbitrary garbage data.
|
||||
*
|
||||
* It is recommended to switch to LZ4_initStreamHC().
|
||||
* LZ4_resetStreamHC() will generate deprecation warnings in a future version.
|
||||
*/
|
||||
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_HC_H_19834876238432 */
|
||||
|
||||
|
||||
/*-**************************************************
|
||||
* !!!!! STATIC LINKING ONLY !!!!!
|
||||
* Following definitions are considered experimental.
|
||||
* They should not be linked from DLL,
|
||||
* as there is no guarantee of API stability yet.
|
||||
* Prototypes will be promoted to "stable" status
|
||||
* after successfull usage in real-life scenarios.
|
||||
***************************************************/
|
||||
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
|
||||
#ifndef TRACY_LZ4_HC_SLO_098092834
|
||||
#define TRACY_LZ4_HC_SLO_098092834
|
||||
|
||||
#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
|
||||
#include "tracy_lz4.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
|
||||
* It's possible to change compression level
|
||||
* between successive invocations of LZ4_compress_HC_continue*()
|
||||
* for dynamic adaptation.
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
|
||||
LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
|
||||
|
||||
/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
|
||||
* Opt. Parser will favor decompression speed over compression ratio.
|
||||
* Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
|
||||
LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
|
||||
|
||||
/*! LZ4_resetStreamHC_fast() : v1.9.0+
|
||||
* When an LZ4_streamHC_t is known to be in a internally coherent state,
|
||||
* it can often be prepared for a new compression with almost no work, only
|
||||
* sometimes falling back to the full, expensive reset that is always required
|
||||
* when the stream is in an indeterminate state (i.e., the reset performed by
|
||||
* LZ4_resetStreamHC()).
|
||||
*
|
||||
* LZ4_streamHCs are guaranteed to be in a valid state when:
|
||||
* - returned from LZ4_createStreamHC()
|
||||
* - reset by LZ4_resetStreamHC()
|
||||
* - memset(stream, 0, sizeof(LZ4_streamHC_t))
|
||||
* - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
|
||||
* - the stream was in a valid state and was then used in any compression call
|
||||
* that returned success
|
||||
* - the stream was in an indeterminate state and was used in a compression
|
||||
* call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
|
||||
* returned success
|
||||
*
|
||||
* Note:
|
||||
* A stream that was last used in a compression call that returned an error
|
||||
* may be passed to this function. However, it will be fully reset, which will
|
||||
* clear any existing history and settings from the context.
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
|
||||
LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
|
||||
|
||||
/*! LZ4_compress_HC_extStateHC_fastReset() :
|
||||
* A variant of LZ4_compress_HC_extStateHC().
|
||||
*
|
||||
* Using this variant avoids an expensive initialization step. It is only safe
|
||||
* to call if the state buffer is known to be correctly initialized already
|
||||
* (see above comment on LZ4_resetStreamHC_fast() for a definition of
|
||||
* "correctly initialized"). From a high level, the difference is that this
|
||||
* function initializes the provided state with a call to
|
||||
* LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
|
||||
* call to LZ4_resetStreamHC().
|
||||
*/
|
||||
LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
|
||||
void* state,
|
||||
const char* src, char* dst,
|
||||
int srcSize, int dstCapacity,
|
||||
int compressionLevel);
|
||||
|
||||
/*! LZ4_attach_HC_dictionary() :
|
||||
* This is an experimental API that allows for the efficient use of a
|
||||
* static dictionary many times.
|
||||
*
|
||||
* Rather than re-loading the dictionary buffer into a working context before
|
||||
* each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
|
||||
* working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
|
||||
* in which the working stream references the dictionary stream in-place.
|
||||
*
|
||||
* Several assumptions are made about the state of the dictionary stream.
|
||||
* Currently, only streams which have been prepared by LZ4_loadDictHC() should
|
||||
* be expected to work.
|
||||
*
|
||||
* Alternatively, the provided dictionary stream pointer may be NULL, in which
|
||||
* case any existing dictionary stream is unset.
|
||||
*
|
||||
* A dictionary should only be attached to a stream without any history (i.e.,
|
||||
* a stream that has just been reset).
|
||||
*
|
||||
* The dictionary will remain attached to the working stream only for the
|
||||
* current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
|
||||
* dictionary context association from the working stream. The dictionary
|
||||
* stream (and source buffer) must remain in-place / accessible / unchanged
|
||||
* through the lifetime of the stream session.
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
|
||||
LZ4_streamHC_t *working_stream,
|
||||
const LZ4_streamHC_t *dictionary_stream);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_HC_SLO_098092834 */
|
||||
#endif /* LZ4_HC_STATIC_LINKING_ONLY */
|
|
@ -0,0 +1,13 @@
|
|||
ARCH := $(shell uname -m)
|
||||
|
||||
ifeq (0,$(shell $(CC) --version | grep clang && echo 1 || echo 0))
|
||||
CFLAGS += -s
|
||||
else
|
||||
LDFLAGS := -s
|
||||
endif
|
||||
|
||||
ifneq (,$(filter $(ARCH),aarch64 arm64))
|
||||
CFLAGS += -mcpu=native
|
||||
else
|
||||
CFLAGS += -march=native
|
||||
endif
|
|
@ -0,0 +1,71 @@
|
|||
# Common code needed by most Tracy Unix Makefiles.
|
||||
|
||||
# Ensure these are simply-substituted variables, without changing their values.
|
||||
LIBS := $(LIBS)
|
||||
|
||||
# Tracy does not use TBB directly, but the implementation of parallel algorithms
|
||||
# in some versions of libstdc++ depends on TBB. When it does, you must
|
||||
# explicitly link against -ltbb.
|
||||
#
|
||||
# Some distributions have pgk-config files for TBB, others don't.
|
||||
ifeq (0,$(shell pkg-config --libs tbb >/dev/null 2>&1; echo $$?))
|
||||
LIBS += $(shell pkg-config --libs tbb)
|
||||
else ifeq (0,$(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?))
|
||||
LIBS += -ltbb
|
||||
endif
|
||||
|
||||
OBJDIRBASE := obj/$(BUILD)
|
||||
OBJDIR := $(OBJDIRBASE)/o/o/o
|
||||
|
||||
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
|
||||
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
|
||||
OBJ3 := $(addprefix $(OBJDIR)/,$(SRC3:%.m=%.o))
|
||||
|
||||
all: $(IMAGE)
|
||||
|
||||
$(OBJDIR)/%.o: %.cpp
|
||||
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.cpp
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.c
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.m
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.m
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
ifeq (yes,$(SHARED_LIBRARY))
|
||||
$(IMAGE): $(OBJ) $(OBJ2)
|
||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -shared -o $@
|
||||
else
|
||||
$(IMAGE): $(OBJ) $(OBJ2) $(OBJ3)
|
||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ3) $(LIBS) -o $@
|
||||
endif
|
||||
|
||||
ifneq "$(MAKECMDGOALS)" "clean"
|
||||
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) $(addprefix $(OBJDIR)/,$(SRC3:.m=.d))
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJDIRBASE) $(IMAGE)*
|
||||
|
||||
.PHONY: clean all
|
|
@ -0,0 +1,311 @@
|
|||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../server/TracyFileRead.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "../../getopt/getopt.h"
|
||||
|
||||
void print_usage_exit(int e)
|
||||
{
|
||||
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
|
||||
fprintf(stderr, "Usage:\n");
|
||||
fprintf(stderr, " extract [OPTION...] <trace file>\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -h, --help Print usage\n");
|
||||
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
|
||||
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
|
||||
fprintf(stderr, " -c, --case Case sensitive filtering\n");
|
||||
fprintf(stderr, " -e, --self Get self times\n");
|
||||
fprintf(stderr, " -u, --unwrap Report each zone event\n");
|
||||
|
||||
exit(e);
|
||||
}
|
||||
|
||||
struct Args {
|
||||
const char* filter;
|
||||
const char* separator;
|
||||
const char* trace_file;
|
||||
bool case_sensitive;
|
||||
bool self_time;
|
||||
bool unwrap;
|
||||
};
|
||||
|
||||
Args parse_args(int argc, char** argv)
|
||||
{
|
||||
if (argc == 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
Args args = { "", ",", "", false, false, false };
|
||||
|
||||
struct option long_opts[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "filter", optional_argument, NULL, 'f' },
|
||||
{ "sep", optional_argument, NULL, 's' },
|
||||
{ "case", no_argument, NULL, 'c' },
|
||||
{ "self", no_argument, NULL, 'e' },
|
||||
{ "unwrap", no_argument, NULL, 'u' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "hf:s:ceu", long_opts, NULL)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'h':
|
||||
print_usage_exit(0);
|
||||
break;
|
||||
case 'f':
|
||||
args.filter = optarg;
|
||||
break;
|
||||
case 's':
|
||||
args.separator = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
args.case_sensitive = true;
|
||||
break;
|
||||
case 'e':
|
||||
args.self_time = true;
|
||||
break;
|
||||
case 'u':
|
||||
args.unwrap = true;
|
||||
break;
|
||||
default:
|
||||
print_usage_exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc != optind + 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
args.trace_file = argv[optind];
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
bool is_substring(
|
||||
const char* term,
|
||||
const char* s,
|
||||
bool case_sensitive = false
|
||||
){
|
||||
auto new_term = std::string(term);
|
||||
auto new_s = std::string(s);
|
||||
|
||||
if (!case_sensitive) {
|
||||
std::transform(
|
||||
new_term.begin(),
|
||||
new_term.end(),
|
||||
new_term.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
|
||||
std::transform(
|
||||
new_s.begin(),
|
||||
new_s.end(),
|
||||
new_s.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
}
|
||||
|
||||
return new_s.find(new_term) != std::string::npos;
|
||||
}
|
||||
|
||||
const char* get_name(int32_t id, const tracy::Worker& worker)
|
||||
{
|
||||
auto& srcloc = worker.GetSourceLocation(id);
|
||||
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string join(const T& v, const char* sep) {
|
||||
std::ostringstream s;
|
||||
for (const auto& i : v) {
|
||||
if (&i != &v[0]) {
|
||||
s << sep;
|
||||
}
|
||||
s << i;
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// From TracyView.cpp
|
||||
int64_t GetZoneChildTimeFast(
|
||||
const tracy::Worker& worker,
|
||||
const tracy::ZoneEvent& zone
|
||||
){
|
||||
int64_t time = 0;
|
||||
if( zone.HasChildren() )
|
||||
{
|
||||
auto& children = worker.GetZoneChildren( zone.Child() );
|
||||
if( children.is_magic() )
|
||||
{
|
||||
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
|
||||
for( auto& v : vec )
|
||||
{
|
||||
assert( v.IsEndValid() );
|
||||
time += v.End() - v.Start();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( auto& v : children )
|
||||
{
|
||||
assert( v->IsEndValid() );
|
||||
time += v->End() - v->Start();
|
||||
}
|
||||
}
|
||||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!AttachConsole(ATTACH_PARENT_PROCESS))
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
|
||||
}
|
||||
#endif
|
||||
|
||||
Args args = parse_args(argc, argv);
|
||||
|
||||
auto f = std::unique_ptr<tracy::FileRead>(
|
||||
tracy::FileRead::Open(args.trace_file)
|
||||
);
|
||||
if (!f)
|
||||
{
|
||||
fprintf(stderr, "Could not open file %s\n", args.trace_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto worker = tracy::Worker(*f);
|
||||
|
||||
while (!worker.AreSourceLocationZonesReady())
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
auto& slz = worker.GetSourceLocationZones();
|
||||
tracy::Vector<decltype(slz.begin())> slz_selected;
|
||||
slz_selected.reserve(slz.size());
|
||||
|
||||
uint32_t total_cnt = 0;
|
||||
for(auto it = slz.begin(); it != slz.end(); ++it)
|
||||
{
|
||||
if(it->second.total != 0)
|
||||
{
|
||||
++total_cnt;
|
||||
if(args.filter[0] == '\0')
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto name = get_name(it->first, worker);
|
||||
if(is_substring(args.filter, name, args.case_sensitive))
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> columns;
|
||||
if (args.unwrap)
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns"
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "total_ns", "total_perc",
|
||||
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
|
||||
};
|
||||
}
|
||||
std::string header = join(columns, args.separator);
|
||||
printf("%s\n", header.data());
|
||||
|
||||
const auto last_time = worker.GetLastTime();
|
||||
for(auto& it : slz_selected)
|
||||
{
|
||||
std::vector<std::string> values(columns.size());
|
||||
|
||||
values[0] = get_name(it->first, worker);
|
||||
|
||||
const auto& srcloc = worker.GetSourceLocation(it->first);
|
||||
values[1] = worker.GetString(srcloc.file);
|
||||
values[2] = std::to_string(srcloc.line);
|
||||
|
||||
const auto& zone_data = it->second;
|
||||
|
||||
if (args.unwrap)
|
||||
{
|
||||
int i = 0;
|
||||
for (const auto& zone_thread_data : zone_data.zones) {
|
||||
const auto zone_event = zone_thread_data.Zone();
|
||||
const auto start = zone_event->Start();
|
||||
const auto end = zone_event->End();
|
||||
|
||||
values[3] = std::to_string(start);
|
||||
|
||||
auto timespan = end - start;
|
||||
if (args.self_time) {
|
||||
timespan -= GetZoneChildTimeFast(worker, *zone_event);
|
||||
}
|
||||
values[4] = std::to_string(timespan);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
|
||||
values[3] = std::to_string(time);
|
||||
values[4] = std::to_string(100. * time / last_time);
|
||||
|
||||
values[5] = std::to_string(zone_data.zones.size());
|
||||
|
||||
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
|
||||
/ zone_data.zones.size();
|
||||
values[6] = std::to_string(avg);
|
||||
|
||||
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
|
||||
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
|
||||
values[7] = std::to_string(tmin);
|
||||
values[8] = std::to_string(tmax);
|
||||
|
||||
const auto sz = zone_data.zones.size();
|
||||
const auto ss = zone_data.sumSq
|
||||
- 2. * zone_data.total * avg
|
||||
+ avg * avg * sz;
|
||||
const auto std = sqrt(ss / (sz - 1));
|
||||
values[9] = std::to_string(std);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,370 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
id="svg8"
|
||||
version="1.1"
|
||||
viewBox="0 0 139.17125 37.041668"
|
||||
height="140"
|
||||
width="526.00159">
|
||||
<defs
|
||||
id="defs2">
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker6660"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path6658" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker6158"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path6156" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow1Send"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.2,0,0,-0.2,-1.2,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path4694" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker5984"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path5982" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker5482"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path5480"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker5472"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path5470"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker5378"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path5376" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker5308"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path5306" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow1Mend"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path4688" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker5170"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path5168"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.8,0,0,-0.8,-10,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker4963"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path4961"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.8,0,0,-0.8,-10,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker6158-2"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path6156-2" />
|
||||
</marker>
|
||||
</defs>
|
||||
<metadata
|
||||
id="metadata5">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
transform="translate(-18.388332,-17.864582)"
|
||||
id="layer1">
|
||||
<g
|
||||
id="g4666">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect4607"
|
||||
width="17.197916"
|
||||
height="6.614583"
|
||||
x="18.520834"
|
||||
y="20.510416" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="20.968229"
|
||||
y="24.869841"
|
||||
id="text4611"><tspan
|
||||
id="tspan4609"
|
||||
x="20.968229"
|
||||
y="24.869841"
|
||||
style="stroke-width:0.26458332px">Thread 1</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g4661">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect4607-4"
|
||||
width="17.197916"
|
||||
height="6.6145835"
|
||||
x="18.520834"
|
||||
y="32.416668" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458349px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="20.979254"
|
||||
y="36.776093"
|
||||
id="text4611-8"><tspan
|
||||
id="tspan4609-9"
|
||||
x="20.979254"
|
||||
y="36.776093"
|
||||
style="stroke-width:0.26458332px">Thread 2</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g4671">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect4607-8"
|
||||
width="17.197916"
|
||||
height="6.6145835"
|
||||
x="18.520832"
|
||||
y="44.322918" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458349px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="20.951002"
|
||||
y="48.682343"
|
||||
id="text4611-89"><tspan
|
||||
id="tspan4609-6"
|
||||
x="20.951002"
|
||||
y="48.682343"
|
||||
style="stroke-width:0.26458332px">Thread 3</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g5096">
|
||||
<ellipse
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path4644"
|
||||
cx="67.775978"
|
||||
cy="36.3787"
|
||||
rx="10.583333"
|
||||
ry="4.6302085" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="67.733261"
|
||||
y="35.623535"
|
||||
id="text4648"><tspan
|
||||
id="tspan4646"
|
||||
x="67.733261"
|
||||
y="35.623535"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px">Tracy</tspan><tspan
|
||||
x="67.733261"
|
||||
y="39.151314"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
|
||||
id="tspan4650">client</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path4673"
|
||||
d="m 37.041666,24.479166 19.84375,7.937502"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5472)" />
|
||||
<path
|
||||
id="path4675"
|
||||
d="m 37.041666,46.968751 19.84375,-6.614584"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5482)" />
|
||||
<path
|
||||
id="path4677"
|
||||
d="M 37.041667,36.385417 H 55.5625"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5378)" />
|
||||
<path
|
||||
id="path5059"
|
||||
d="M 84.666667,17.864582 V 54.90625"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:1.05833327, 2.11666654;stroke-dashoffset:0;stroke-opacity:1" />
|
||||
<g
|
||||
id="g5106">
|
||||
<ellipse
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path4644-1"
|
||||
cx="101.98283"
|
||||
cy="36.56768"
|
||||
rx="10.583333"
|
||||
ry="4.6302085" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="101.90772"
|
||||
y="35.812515"
|
||||
id="text4648-2"><tspan
|
||||
id="tspan4646-5"
|
||||
x="101.90772"
|
||||
y="35.812515"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px">Tracy</tspan><tspan
|
||||
x="101.90772"
|
||||
y="39.340294"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
|
||||
id="tspan4650-1">server</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path5108"
|
||||
d="M 79.375,37.708333 H 89.958333"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6660)" />
|
||||
<path
|
||||
id="path5110"
|
||||
d="M 89.958333,35.0625 H 79.375"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5308)" />
|
||||
<g
|
||||
transform="translate(-2.64619,-1.3704153)"
|
||||
id="g6152">
|
||||
<ellipse
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
id="path6114"
|
||||
cx="128.98439"
|
||||
cy="33.692333"
|
||||
rx="4.6302085"
|
||||
ry="1.2756696" />
|
||||
<path
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
d="m 124.36251,41.677042 c -0.004,0.01582 -0.007,0.03168 -0.008,0.04754 5.3e-4,0.704384 2.07327,1.275328 4.62995,1.275373 2.55689,3.5e-5 4.62988,-0.570931 4.63048,-1.275373 -10e-4,-0.01585 -0.003,-0.03171 -0.006,-0.04754"
|
||||
id="path6114-1" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 124.35417,33.739583 v 8.021022"
|
||||
id="path6138" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 133.61458,33.739582 v 8.051744"
|
||||
id="path6140" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="126.89217"
|
||||
y="39.409225"
|
||||
id="text6144"><tspan
|
||||
id="tspan6142"
|
||||
x="126.89217"
|
||||
y="39.409225"
|
||||
style="stroke-width:0.26458332px">DB</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path6154"
|
||||
d="m 113.77082,36.385418 h 6.61459"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6158)" />
|
||||
<g
|
||||
transform="translate(2.6458333,1.2715659e-6)"
|
||||
id="g6241">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
id="rect6232"
|
||||
width="17.197916"
|
||||
height="5.291667"
|
||||
x="137.58333"
|
||||
y="33.739582" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="140.8851"
|
||||
y="37.164005"
|
||||
id="text6236"><tspan
|
||||
id="tspan6234"
|
||||
x="140.8851"
|
||||
y="37.164005"
|
||||
style="stroke-width:0.26458332px">Display</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path6154-3"
|
||||
d="m 132.29166,36.385417 h 6.61459"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6158-2)" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 15 KiB |
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
Binary file not shown.
After Width: | Height: | Size: 261 KiB |
Binary file not shown.
After Width: | Height: | Size: 156 KiB |
|
@ -0,0 +1,14 @@
|
|||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
project(OpenCLVectorAdd)
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
|
||||
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
|
||||
|
||||
add_library(TracyClient STATIC ../../TracyClient.cpp
|
||||
../../TracyOpenCL.hpp)
|
||||
target_include_directories(TracyClient PUBLIC ../../)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
|
||||
|
||||
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient)
|
|
@ -0,0 +1,190 @@
|
|||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <Tracy.hpp>
|
||||
#include <TracyOpenCL.hpp>
|
||||
|
||||
#define CL_ASSERT(err) \
|
||||
if((err) != CL_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "OpenCL Call Returned " << err << std::endl; \
|
||||
assert(false); \
|
||||
}
|
||||
|
||||
const char kernelSource[] =
|
||||
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
|
||||
" { "
|
||||
" int i = get_global_id(0); "
|
||||
" if (i < N) { "
|
||||
" C[i] = A[i] + B[i]; "
|
||||
" } "
|
||||
" } ";
|
||||
|
||||
int main()
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue commandQueue;
|
||||
cl_kernel vectorAddKernel;
|
||||
cl_program program;
|
||||
cl_int err;
|
||||
cl_mem bufferA, bufferB, bufferC;
|
||||
|
||||
TracyCLCtx tracyCLCtx;
|
||||
|
||||
{
|
||||
ZoneScopedN("OpenCL Init");
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
|
||||
|
||||
if (numPlatforms == 0)
|
||||
{
|
||||
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
|
||||
|
||||
size_t platformNameBufferSize = 0;
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
|
||||
std::string platformName(platformNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Platform: " << platformName << std::endl;
|
||||
|
||||
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
|
||||
size_t deviceNameBufferSize = 0;
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
|
||||
std::string deviceName(deviceNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Device: " << deviceName << std::endl;
|
||||
|
||||
err = CL_SUCCESS;
|
||||
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
size_t kernelSourceLength = sizeof(kernelSource);
|
||||
const char* kernelSourceArray = { kernelSource };
|
||||
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
|
||||
{
|
||||
size_t programBuildLogBufferSize = 0;
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
|
||||
std::string programBuildLog(programBuildLogBufferSize, '\0');
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
|
||||
std::clog << programBuildLog << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
CL_ASSERT(err);
|
||||
}
|
||||
|
||||
tracyCLCtx = TracyCLContext(context, device);
|
||||
|
||||
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
|
||||
std::vector<float> hostA, hostB, hostC;
|
||||
|
||||
{
|
||||
ZoneScopedN("Host Data Init");
|
||||
hostA.resize(N);
|
||||
hostB.resize(N);
|
||||
hostC.resize(N);
|
||||
|
||||
std::iota(std::begin(hostA), std::end(hostA), 0);
|
||||
std::iota(std::begin(hostB), std::end(hostB), 0);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Host to Device Memory Copy");
|
||||
|
||||
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
cl_event writeBufferAEvent, writeBufferBEvent;
|
||||
{
|
||||
ZoneScopedN("Write Buffer A");
|
||||
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_TRUE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferAEvent);
|
||||
}
|
||||
{
|
||||
ZoneScopedN("Write Buffer B");
|
||||
TracyCLZone(tracyCLCtx, "Write BufferB");
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_TRUE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferBEvent);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 10; ++i)
|
||||
{
|
||||
ZoneScopedN("VectorAdd Kernel Launch");
|
||||
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
||||
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(int), &static_cast<int>(N)));
|
||||
|
||||
cl_event vectorAddKernelEvent;
|
||||
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
||||
|
||||
CL_ASSERT(clWaitForEvents(1, &vectorAddKernelEvent));
|
||||
|
||||
TracyCLZoneSetEvent(vectorAddKernelEvent);
|
||||
|
||||
cl_ulong kernelStartTime, kernelEndTime;
|
||||
CL_ASSERT(clGetEventProfilingInfo(vectorAddKernelEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernelStartTime, nullptr));
|
||||
CL_ASSERT(clGetEventProfilingInfo(vectorAddKernelEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernelEndTime, nullptr));
|
||||
std::cout << "VectorAdd Kernel Elapsed: " << ((kernelEndTime - kernelStartTime) / 1000) << " us" << std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Device to Host Memory Copy");
|
||||
TracyCLZone(tracyCLCtx, "Read Buffer C");
|
||||
|
||||
cl_event readbufferCEvent;
|
||||
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
|
||||
TracyCLZoneSetEvent(readbufferCEvent);
|
||||
}
|
||||
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
|
||||
{
|
||||
ZoneScopedN("Checking results");
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
assert(hostC[i] == hostA[i] + hostB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Results are correct!" << std::endl;
|
||||
|
||||
TracyCLDestroy(tracyCLCtx);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
Windows/Compiled*Shader.h
|
|
@ -0,0 +1,4 @@
|
|||
https://github.com/aras-p/ToyPathTracer
|
||||
|
||||
Modified to render only 10 frames. Client part requires 12 GB, server part
|
||||
requires 6.4 GB.
|
|
@ -0,0 +1,33 @@
|
|||
|
||||
#if defined(__APPLE__) && !defined(__METAL_VERSION__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#define kBackbufferWidth 1280
|
||||
#define kBackbufferHeight 720
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#define CPU_CAN_DO_SIMD 0
|
||||
#define CPU_CAN_DO_THREADS 0
|
||||
#else
|
||||
#define CPU_CAN_DO_SIMD 1
|
||||
#define CPU_CAN_DO_THREADS 1
|
||||
#endif
|
||||
|
||||
|
||||
#define DO_SAMPLES_PER_PIXEL 4
|
||||
#define DO_ANIMATE_SMOOTHING 0.9f
|
||||
#define DO_LIGHT_SAMPLING 1
|
||||
#define DO_MITSUBA_COMPARE 0
|
||||
|
||||
// Should path tracing be done on the GPU with a compute shader?
|
||||
#define DO_COMPUTE_GPU 0
|
||||
#define kCSGroupSizeX 8
|
||||
#define kCSGroupSizeY 8
|
||||
#define kCSMaxObjects 64
|
||||
|
||||
// Should float3 struct use SSE/NEON?
|
||||
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
|
||||
|
||||
// Should HitSpheres function use SSE/NEON?
|
||||
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)
|
|
@ -0,0 +1,192 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define VM_INLINE __forceinline
|
||||
#else
|
||||
#define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline
|
||||
#endif
|
||||
|
||||
#define kSimdWidth 4
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = _mm_loadu_ps(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { m = _mm_set_ps(w, z, y, x); }
|
||||
VM_INLINE explicit float4(float v) { m = _mm_set_ps1(v); }
|
||||
VM_INLINE explicit float4(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
VM_INLINE float getW() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))); }
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = _mm_and_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = _mm_or_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = _mm_xor_ps(a.m, _mm_set1_ps(-0.0f)); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
v = min(v, SHUFFLE4(v, 2, 3, 0, 0));
|
||||
v = min(v, SHUFFLE4(v, 1, 0, 0, 0));
|
||||
return v.getX();
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v) { return _mm_movemask_ps(v.m); }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
a.m = _mm_blendv_ps(a.m, b.m, cond.m);
|
||||
#else
|
||||
__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
|
||||
a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
return _mm_blendv_epi8(a, b, _mm_castps_si128(cond.m));
|
||||
#else
|
||||
__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
|
||||
return _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, a));
|
||||
#endif
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
|
||||
|
||||
#elif !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#define USE_NEON 1
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float4(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); }
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
a.m = vbslq_f32(cond.m, b.m, a.m);
|
||||
return a;
|
||||
}
|
||||
VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond)
|
||||
{
|
||||
return vbslq_f32(cond.m, b, a);
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v)
|
||||
{
|
||||
float32x4_t V = v.m;
|
||||
float32x4_t S0 = vrsqrteq_f32(V);
|
||||
float32x4_t P0 = vmulq_f32( V, S0 );
|
||||
float32x4_t R0 = vrsqrtsq_f32( P0, S0 );
|
||||
float32x4_t S1 = vmulq_f32( S0, R0 );
|
||||
float32x4_t P1 = vmulq_f32( V, S1 );
|
||||
float32x4_t R1 = vrsqrtsq_f32( P1, S1 );
|
||||
float32x4_t S2 = vmulq_f32( S1, R1 );
|
||||
float32x4_t P2 = vmulq_f32( V, S2 );
|
||||
float32x4_t R2 = vrsqrtsq_f32( P2, S2 );
|
||||
float32x4_t S3 = vmulq_f32( S2, R2 );
|
||||
return float4(vmulq_f32(V, S3));
|
||||
}
|
||||
|
||||
VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); }
|
||||
VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); }
|
||||
VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); }
|
||||
VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); }
|
||||
|
||||
#endif
|
|
@ -0,0 +1,203 @@
|
|||
#include "Maths.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint32_t XorShift32(uint32_t& state)
|
||||
{
|
||||
uint32_t x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(uint32_t& state)
|
||||
{
|
||||
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do
|
||||
{
|
||||
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
|
||||
} while (dot(p,p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomInUnitSphere(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do {
|
||||
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
|
||||
} while (sqLength(p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomUnitVector(uint32_t& state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * kPI;
|
||||
float r = sqrtf(1.0f - z * z);
|
||||
float x = r * cosf(a);
|
||||
float y = r * sinf(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
|
||||
{
|
||||
#if DO_HIT_SPHERES_SIMD
|
||||
float4 hitT = float4(tMax);
|
||||
#if USE_NEON
|
||||
int32x4_t id = vdupq_n_s32(-1);
|
||||
#else
|
||||
__m128i id = _mm_set1_epi32(-1);
|
||||
#endif
|
||||
|
||||
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
|
||||
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
|
||||
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
|
||||
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
|
||||
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
|
||||
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
|
||||
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
|
||||
#elif DO_FLOAT3_WITH_SIMD
|
||||
float4 rOrigX = splatX(r.orig.m);
|
||||
float4 rOrigY = splatY(r.orig.m);
|
||||
float4 rOrigZ = splatZ(r.orig.m);
|
||||
float4 rDirX = splatX(r.dir.m);
|
||||
float4 rDirY = splatY(r.dir.m);
|
||||
float4 rDirZ = splatZ(r.dir.m);
|
||||
#else
|
||||
float4 rOrigX = float4(r.orig.x);
|
||||
float4 rOrigY = float4(r.orig.y);
|
||||
float4 rOrigZ = float4(r.orig.z);
|
||||
float4 rDirX = float4(r.dir.x);
|
||||
float4 rDirY = float4(r.dir.y);
|
||||
float4 rDirZ = float4(r.dir.z);
|
||||
#endif
|
||||
float4 tMin4 = float4(tMin);
|
||||
#if USE_NEON
|
||||
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
|
||||
#else
|
||||
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
|
||||
#endif
|
||||
// process 4 spheres at once
|
||||
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
|
||||
{
|
||||
// load data for 4 spheres
|
||||
float4 sCenterX = float4(spheres.centerX + i);
|
||||
float4 sCenterY = float4(spheres.centerY + i);
|
||||
float4 sCenterZ = float4(spheres.centerZ + i);
|
||||
float4 sSqRadius = float4(spheres.sqRadius + i);
|
||||
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
|
||||
float4 coX = sCenterX - rOrigX;
|
||||
float4 coY = sCenterY - rOrigY;
|
||||
float4 coZ = sCenterZ - rOrigZ;
|
||||
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
|
||||
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
|
||||
float4 discr = nb * nb - c;
|
||||
bool4 discrPos = discr > float4(0.0f);
|
||||
// if ray hits any of the 4 spheres
|
||||
if (any(discrPos))
|
||||
{
|
||||
float4 discrSq = sqrtf(discr);
|
||||
|
||||
// ray could hit spheres at t0 & t1
|
||||
float4 t0 = nb - discrSq;
|
||||
float4 t1 = nb + discrSq;
|
||||
|
||||
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
|
||||
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
|
||||
// if hit, take it
|
||||
id = select(id, curId, msk);
|
||||
hitT = select(hitT, t, msk);
|
||||
}
|
||||
#if USE_NEON
|
||||
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
|
||||
#else
|
||||
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
|
||||
#endif
|
||||
}
|
||||
// now we have up to 4 hits, find and return closest one
|
||||
float minT = hmin(hitT);
|
||||
if (minT < tMax) // any actual hits?
|
||||
{
|
||||
int minMask = mask(hitT == float4(minT));
|
||||
if (minMask != 0)
|
||||
{
|
||||
int id_scalar[4];
|
||||
float hitT_scalar[4];
|
||||
#if USE_NEON
|
||||
vst1q_s32(id_scalar, id);
|
||||
vst1q_f32(hitT_scalar, hitT.m);
|
||||
#else
|
||||
_mm_storeu_si128((__m128i *)id_scalar, id);
|
||||
_mm_storeu_ps(hitT_scalar, hitT.m);
|
||||
#endif
|
||||
|
||||
// In general, you would do this with a bit scan (first set/trailing zero count).
|
||||
// But who cares, it's only 16 options.
|
||||
static const int laneId[16] =
|
||||
{
|
||||
0, 0, 1, 0, // 00xx
|
||||
2, 0, 1, 0, // 01xx
|
||||
3, 0, 1, 0, // 10xx
|
||||
2, 0, 1, 0, // 11xx
|
||||
};
|
||||
|
||||
int lane = laneId[minMask];
|
||||
int hitId = id_scalar[lane];
|
||||
float finalHitT = hitT_scalar[lane];
|
||||
|
||||
outHit.pos = r.pointAt(finalHitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
|
||||
outHit.t = finalHitT;
|
||||
return hitId;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
||||
#else // #if DO_HIT_SPHERES_SIMD
|
||||
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < spheres.count; ++i)
|
||||
{
|
||||
float coX = spheres.centerX[i] - r.orig.getX();
|
||||
float coY = spheres.centerY[i] - r.orig.getY();
|
||||
float coZ = spheres.centerZ[i] - r.orig.getZ();
|
||||
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
|
||||
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrtf(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = r.pointAt(hitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
|
||||
outHit.t = hitT;
|
||||
return id;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
#endif // #else of #if DO_HIT_SPHERES_SIMD
|
||||
}
|
|
@ -0,0 +1,436 @@
|
|||
#pragma once
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include "Config.h"
|
||||
#include "MathSimd.h"
|
||||
|
||||
#define kPI 3.1415926f
|
||||
|
||||
// SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/
|
||||
#if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
// SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
|
||||
// SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
|
||||
#define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
|
||||
VM_INLINE explicit float3(float v) { m = _mm_set1_ps(v); }
|
||||
VM_INLINE explicit float3(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
|
||||
VM_INLINE float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
|
||||
VM_INLINE float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = _mm_move_ss(m, _mm_set_ss(x));
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(y));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(z));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
v = min(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
v = max(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
#else // #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float3(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
|
||||
VM_INLINE float3 yzx() const
|
||||
{
|
||||
float32x2_t low = vget_low_f32(m);
|
||||
float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low);
|
||||
return float3(yzx);
|
||||
}
|
||||
VM_INLINE float3 zxy() const
|
||||
{
|
||||
float32x4_t p = m;
|
||||
p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1];
|
||||
return float3(p);
|
||||
}
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = vsetq_lane_f32(x, m, 0);
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
m = vsetq_lane_f32(y, m, 1);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
m = vsetq_lane_f32(z, m, 2);
|
||||
}
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float32x4_t rcp_2(float32x4_t v)
|
||||
{
|
||||
float32x4_t e = vrecpeq_f32(v);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
return e;
|
||||
}
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs);
|
||||
return vget_lane_f32(maxOfMaxOfHalfs, 0);
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
|
||||
#endif // #else of #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
#else // #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
// ---- Simple scalar C implementation
|
||||
|
||||
|
||||
struct float3
|
||||
{
|
||||
float3() : x(0), y(0), z(0) {}
|
||||
float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
|
||||
|
||||
float3 operator-() const { return float3(-x, -y, -z); }
|
||||
float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
|
||||
float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
|
||||
float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
|
||||
float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
|
||||
|
||||
VM_INLINE float getX() const { return x; }
|
||||
VM_INLINE float getY() const { return y; }
|
||||
VM_INLINE float getZ() const { return z; }
|
||||
VM_INLINE void setX(float x_) { x = x_; }
|
||||
VM_INLINE void setY(float y_) { y = y_; }
|
||||
VM_INLINE void setZ(float z_) { z = z_; }
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
VM_INLINE float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
|
||||
VM_INLINE float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
|
||||
VM_INLINE float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
|
||||
VM_INLINE float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
|
||||
VM_INLINE float3 cross(const float3& a, const float3& b)
|
||||
{
|
||||
return float3(
|
||||
a.y*b.z - a.z*b.y,
|
||||
-(a.x*b.z - a.z*b.x),
|
||||
a.x*b.y - a.y*b.x
|
||||
);
|
||||
}
|
||||
#endif // #else of #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); }
|
||||
VM_INLINE float sqLength(float3 v) { return dot(v, v); }
|
||||
VM_INLINE float3 normalize(float3 v) { return v * (1.0f / length(v)); }
|
||||
VM_INLINE float3 lerp(float3 a, float3 b, float t) { return a + (b-a)*t; }
|
||||
|
||||
|
||||
inline void AssertUnit(float3 v)
|
||||
{
|
||||
assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
|
||||
}
|
||||
|
||||
inline float3 reflect(float3 v, float3 n)
|
||||
{
|
||||
return v - 2*dot(v,n)*n;
|
||||
}
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, float3& outRefracted)
|
||||
{
|
||||
AssertUnit(v);
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint*nint*(1-dt*dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n*dt) - n*sqrtf(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1-ri) / (1+ri);
|
||||
r0 = r0*r0;
|
||||
return r0 + (1-r0)*powf(1-cosine, 5);
|
||||
}
|
||||
|
||||
struct Ray
|
||||
{
|
||||
Ray() {}
|
||||
Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
|
||||
|
||||
float3 pointAt(float t) const { return orig + dir * t; }
|
||||
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
Sphere() : radius(1.0f), invRadius(0.0f) {}
|
||||
Sphere(float3 center_, float radius_) : center(center_), radius(radius_), invRadius(0.0f) {}
|
||||
|
||||
void UpdateDerivedData() { invRadius = 1.0f/radius; }
|
||||
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
|
||||
// data for all spheres in a "structure of arrays" layout
|
||||
struct SpheresSoA
|
||||
{
|
||||
SpheresSoA(int c)
|
||||
{
|
||||
count = c;
|
||||
// we'll be processing spheres in kSimdWidth chunks, so make sure to allocate
|
||||
// enough space
|
||||
simdCount = (c + (kSimdWidth - 1)) / kSimdWidth * kSimdWidth;
|
||||
centerX = new float[simdCount];
|
||||
centerY = new float[simdCount];
|
||||
centerZ = new float[simdCount];
|
||||
sqRadius = new float[simdCount];
|
||||
invRadius = new float[simdCount];
|
||||
// set all data to "impossible sphere" state
|
||||
for (int i = count; i < simdCount; ++i)
|
||||
{
|
||||
centerX[i] = centerY[i] = centerZ[i] = 10000.0f;
|
||||
sqRadius[i] = 0.0f;
|
||||
invRadius[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
~SpheresSoA()
|
||||
{
|
||||
delete[] centerX;
|
||||
delete[] centerY;
|
||||
delete[] centerZ;
|
||||
delete[] sqRadius;
|
||||
delete[] invRadius;
|
||||
}
|
||||
float* centerX;
|
||||
float* centerY;
|
||||
float* centerZ;
|
||||
float* sqRadius;
|
||||
float* invRadius;
|
||||
int simdCount;
|
||||
int count;
|
||||
};
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit);
|
||||
|
||||
float RandomFloat01(uint32_t& state);
|
||||
float3 RandomInUnitDisk(uint32_t& state);
|
||||
float3 RandomInUnitSphere(uint32_t& state);
|
||||
float3 RandomUnitVector(uint32_t& state);
|
||||
|
||||
struct Camera
|
||||
{
|
||||
Camera() {}
|
||||
// vfov is top to bottom in degrees
|
||||
Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
|
||||
{
|
||||
lensRadius = aperture / 2;
|
||||
float theta = vfov*kPI/180;
|
||||
float halfHeight = tanf(theta/2);
|
||||
float halfWidth = aspect * halfHeight;
|
||||
origin = lookFrom;
|
||||
w = normalize(lookFrom - lookAt);
|
||||
u = normalize(cross(vup, w));
|
||||
v = cross(w, u);
|
||||
lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
|
||||
horizontal = 2*halfWidth*focusDist*u;
|
||||
vertical = 2*halfHeight*focusDist*v;
|
||||
}
|
||||
|
||||
Ray GetRay(float s, float t, uint32_t& state) const
|
||||
{
|
||||
float3 rd = lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = u * rd.getX() + v * rd.getY();
|
||||
return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
|
||||
}
|
||||
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
|
@ -0,0 +1,392 @@
|
|||
#include "Config.h"
|
||||
#include "Test.h"
|
||||
#include "Maths.h"
|
||||
#include <algorithm>
|
||||
#if CPU_CAN_DO_THREADS
|
||||
#include "enkiTS/TaskScheduler_c.h"
|
||||
#include <thread>
|
||||
#endif
|
||||
#include <atomic>
|
||||
|
||||
#include "../../../Tracy.hpp"
|
||||
|
||||
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
|
||||
#define DO_BIG_SCENE 1
|
||||
|
||||
static Sphere s_Spheres[] =
|
||||
{
|
||||
{float3(0,-100.5,-1), 100},
|
||||
{float3(2,0,-1), 0.5f},
|
||||
{float3(0,0,-1), 0.5f},
|
||||
{float3(-2,0,-1), 0.5f},
|
||||
{float3(2,0,1), 0.5f},
|
||||
{float3(0,0,1), 0.5f},
|
||||
{float3(-2,0,1), 0.5f},
|
||||
{float3(0.5f,1,0.5f), 0.5f},
|
||||
{float3(-1.5f,1.5f,0.f), 0.3f},
|
||||
#if DO_BIG_SCENE
|
||||
{float3(4,0,-3), 0.5f}, {float3(3,0,-3), 0.5f}, {float3(2,0,-3), 0.5f}, {float3(1,0,-3), 0.5f}, {float3(0,0,-3), 0.5f}, {float3(-1,0,-3), 0.5f}, {float3(-2,0,-3), 0.5f}, {float3(-3,0,-3), 0.5f}, {float3(-4,0,-3), 0.5f},
|
||||
{float3(4,0,-4), 0.5f}, {float3(3,0,-4), 0.5f}, {float3(2,0,-4), 0.5f}, {float3(1,0,-4), 0.5f}, {float3(0,0,-4), 0.5f}, {float3(-1,0,-4), 0.5f}, {float3(-2,0,-4), 0.5f}, {float3(-3,0,-4), 0.5f}, {float3(-4,0,-4), 0.5f},
|
||||
{float3(4,0,-5), 0.5f}, {float3(3,0,-5), 0.5f}, {float3(2,0,-5), 0.5f}, {float3(1,0,-5), 0.5f}, {float3(0,0,-5), 0.5f}, {float3(-1,0,-5), 0.5f}, {float3(-2,0,-5), 0.5f}, {float3(-3,0,-5), 0.5f}, {float3(-4,0,-5), 0.5f},
|
||||
{float3(4,0,-6), 0.5f}, {float3(3,0,-6), 0.5f}, {float3(2,0,-6), 0.5f}, {float3(1,0,-6), 0.5f}, {float3(0,0,-6), 0.5f}, {float3(-1,0,-6), 0.5f}, {float3(-2,0,-6), 0.5f}, {float3(-3,0,-6), 0.5f}, {float3(-4,0,-6), 0.5f},
|
||||
{float3(1.5f,1.5f,-2), 0.3f},
|
||||
#endif // #if DO_BIG_SCENE
|
||||
};
|
||||
const int kSphereCount = sizeof(s_Spheres) / sizeof(s_Spheres[0]);
|
||||
|
||||
static SpheresSoA s_SpheresSoA(kSphereCount);
|
||||
|
||||
struct Material
|
||||
{
|
||||
enum Type { Lambert, Metal, Dielectric };
|
||||
Type type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
static Material s_SphereMats[kSphereCount] =
|
||||
{
|
||||
{ Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.4f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.4f, 0.4f, 0.8f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.2f, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.6f, 0 },
|
||||
{ Material::Dielectric, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 1.5f },
|
||||
{ Material::Lambert, float3(0.8f, 0.6f, 0.2f), float3(30,25,15), 0, 0 },
|
||||
#if DO_BIG_SCENE
|
||||
{ Material::Lambert, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.1f, 0.2f, 0.5f), float3(3,10,20), 0, 0 },
|
||||
#endif
|
||||
};
|
||||
|
||||
static int s_EmissiveSpheres[kSphereCount];
|
||||
static int s_EmissiveSphereCount;
|
||||
|
||||
static Camera s_Cam;
|
||||
|
||||
const float kMinT = 0.001f;
|
||||
const float kMaxT = 1.0e7f;
|
||||
const int kMaxDepth = 10;
|
||||
|
||||
|
||||
bool HitWorld(const Ray& r, float tMin, float tMax, Hit& outHit, int& outID)
|
||||
{
|
||||
outID = HitSpheres(r, s_SpheresSoA, tMin, tMax, outHit);
|
||||
return outID != -1;
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(const Material& mat, const Ray& r_in, const Hit& rec, float3& attenuation, Ray& scattered, float3& outLightE, int& inoutRayCount, uint32_t& state)
|
||||
{
|
||||
ZoneScoped;
|
||||
outLightE = float3(0,0,0);
|
||||
if (mat.type == Material::Lambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = Ray(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < s_EmissiveSphereCount; ++j)
|
||||
{
|
||||
int i = s_EmissiveSpheres[j];
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (&mat == &smat)
|
||||
continue; // skip self
|
||||
const Sphere& s = s_Spheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(fabs(sw.getX())>0.01f ? float3(0,1,0):float3(1,0,0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrtf(1.0f - s.radius*s.radius / sqLength(rec.pos-s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrtf(1.0f - cosA*cosA);
|
||||
float phi = 2 * kPI * eps2;
|
||||
float3 l = su * (cosf(phi) * sinA) + sv * (sinf(phi) * sinA) + sw * cosA;
|
||||
//l = normalize(l); // NOTE(fg): This is already normalized, by construction.
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
int hitID;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(Ray(rec.pos, l), kMinT, kMaxT, lightHit, hitID) && hitID == i)
|
||||
{
|
||||
float omega = 2 * kPI * (1-cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
AssertUnit(rdir);
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (std::max(0.0f, dot(l, nl)) * omega / kPI);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == Material::Metal)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = Ray(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == Material::Dielectric)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1,1,1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = Ray(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = Ray(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1,0,1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state, bool doMaterialE = true)
|
||||
{
|
||||
ZoneScoped;
|
||||
Hit rec;
|
||||
int id = 0;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(r, kMinT, kMaxT, rec, id))
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
const Material& mat = s_SphereMats[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (depth < kMaxDepth && Scatter(mat, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = float3(0,0,0); // don't add material emission if told so
|
||||
// dor Lambert materials, we just did explicit light (emissive) sampling and already
|
||||
// for their contribution, so if next ray bounce hits the light again, don't add
|
||||
// emission
|
||||
doMaterialE = (mat.type != Material::Lambert);
|
||||
#endif
|
||||
return matE + lightE + attenuation * Trace(scattered, depth+1, inoutRayCount, state, doMaterialE);
|
||||
}
|
||||
else
|
||||
{
|
||||
return matE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
return float3(0.15f,0.21f,0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.getY() + 1.0f);
|
||||
return ((1.0f-t)*float3(1.0f, 1.0f, 1.0f) + t*float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
static enkiTaskScheduler* g_TS;
|
||||
#endif
|
||||
|
||||
void InitializeTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
g_TS = enkiNewTaskScheduler();
|
||||
enkiInitTaskSchedulerNumThreads(g_TS, std::max<int>( 2, std::thread::hardware_concurrency() - 2));
|
||||
#endif
|
||||
}
|
||||
|
||||
void ShutdownTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiDeleteTaskScheduler(g_TS);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct JobData
|
||||
{
|
||||
float time;
|
||||
int frameCount;
|
||||
int screenWidth, screenHeight;
|
||||
float* backbuffer;
|
||||
Camera* cam;
|
||||
std::atomic<int> rayCount;
|
||||
unsigned testFlags;
|
||||
};
|
||||
|
||||
static void TraceRowJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData& data = *(JobData*)data_;
|
||||
float* backbuffer = data.backbuffer + start * data.screenWidth * 4;
|
||||
float invWidth = 1.0f / data.screenWidth;
|
||||
float invHeight = 1.0f / data.screenHeight;
|
||||
float lerpFac = float(data.frameCount) / float(data.frameCount+1);
|
||||
if (data.testFlags & kFlagAnimate)
|
||||
lerpFac *= DO_ANIMATE_SMOOTHING;
|
||||
if (!(data.testFlags & kFlagProgressive))
|
||||
lerpFac = 0;
|
||||
int rayCount = 0;
|
||||
for (uint32_t y = start; y < end; ++y)
|
||||
{
|
||||
uint32_t state = (y * 9781 + data.frameCount * 6271) | 1;
|
||||
for (int x = 0; x < data.screenWidth; ++x)
|
||||
{
|
||||
float3 col(0, 0, 0);
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(x + RandomFloat01(state)) * invWidth;
|
||||
float v = float(y + RandomFloat01(state)) * invHeight;
|
||||
Ray r = data.cam->GetRay(u, v, state);
|
||||
col += Trace(r, 0, rayCount, state);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev(backbuffer[0], backbuffer[1], backbuffer[2]);
|
||||
col = prev * lerpFac + col * (1-lerpFac);
|
||||
col.store(backbuffer);
|
||||
backbuffer += 4;
|
||||
}
|
||||
}
|
||||
data.rayCount += rayCount;
|
||||
}
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
if (testFlags & kFlagAnimate)
|
||||
{
|
||||
s_Spheres[1].center.setY(cosf(time) + 1.0f);
|
||||
s_Spheres[8].center.setZ(sinf(time)*0.3f);
|
||||
}
|
||||
float3 lookfrom(0, 2, 3);
|
||||
float3 lookat(0, 0, 0);
|
||||
float distToFocus = 3;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
float aperture = 0.0f;
|
||||
#else
|
||||
float aperture = 0.1f;
|
||||
#endif
|
||||
#if DO_BIG_SCENE
|
||||
aperture *= 0.2f;
|
||||
#endif
|
||||
|
||||
s_EmissiveSphereCount = 0;
|
||||
for (int i = 0; i < kSphereCount; ++i)
|
||||
{
|
||||
Sphere& s = s_Spheres[i];
|
||||
s.UpdateDerivedData();
|
||||
s_SpheresSoA.centerX[i] = s.center.getX();
|
||||
s_SpheresSoA.centerY[i] = s.center.getY();
|
||||
s_SpheresSoA.centerZ[i] = s.center.getZ();
|
||||
s_SpheresSoA.sqRadius[i] = s.radius * s.radius;
|
||||
s_SpheresSoA.invRadius[i] = s.invRadius;
|
||||
|
||||
// Remember IDs of emissive spheres (light sources)
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (smat.emissive.getX() > 0 || smat.emissive.getY() > 0 || smat.emissive.getZ() > 0)
|
||||
{
|
||||
s_EmissiveSpheres[s_EmissiveSphereCount] = i;
|
||||
s_EmissiveSphereCount++;
|
||||
}
|
||||
}
|
||||
|
||||
s_Cam = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
|
||||
}
|
||||
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData args;
|
||||
args.time = time;
|
||||
args.frameCount = frameCount;
|
||||
args.screenWidth = screenWidth;
|
||||
args.screenHeight = screenHeight;
|
||||
args.backbuffer = backbuffer;
|
||||
args.cam = &s_Cam;
|
||||
args.testFlags = testFlags;
|
||||
args.rayCount = 0;
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
|
||||
bool threaded = true;
|
||||
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
|
||||
enkiWaitForTaskSet(g_TS, task);
|
||||
enkiDeleteTaskSet(task);
|
||||
#else
|
||||
TraceRowJob(0, screenHeight, 0, &args);
|
||||
#endif
|
||||
|
||||
outRayCount = args.rayCount;
|
||||
}
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize)
|
||||
{
|
||||
ZoneScoped;
|
||||
outCount = kSphereCount;
|
||||
outObjectSize = sizeof(Sphere);
|
||||
outMaterialSize = sizeof(Material);
|
||||
outCamSize = sizeof(Camera);
|
||||
}
|
||||
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount)
|
||||
{
|
||||
ZoneScoped;
|
||||
memcpy(outObjects, s_Spheres, kSphereCount * sizeof(s_Spheres[0]));
|
||||
memcpy(outMaterials, s_SphereMats, kSphereCount * sizeof(s_SphereMats[0]));
|
||||
memcpy(outCam, &s_Cam, sizeof(s_Cam));
|
||||
memcpy(outEmissives, s_EmissiveSpheres, s_EmissiveSphereCount * sizeof(s_EmissiveSpheres[0]));
|
||||
*outEmissiveCount = s_EmissiveSphereCount;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
enum TestFlags
|
||||
{
|
||||
kFlagAnimate = (1 << 0),
|
||||
kFlagProgressive = (1 << 1),
|
||||
};
|
||||
|
||||
void InitializeTest();
|
||||
void ShutdownTest();
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags);
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags);
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize);
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount);
|
|
@ -0,0 +1,79 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
#undef GetObject
|
||||
#include <intrin.h>
|
||||
|
||||
extern "C" void _ReadWriteBarrier();
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_InterlockedCompareExchange)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd)
|
||||
|
||||
// Memory Barriers to prevent CPU and Compiler re-ordering
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
|
||||
#define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
|
||||
#define BASE_ALIGN(x) __declspec( align( x ) )
|
||||
|
||||
#else
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_ALIGN(x) __attribute__ ((aligned( x )))
|
||||
#endif
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
|
||||
// returns old *pDest (so if successfull, returns compareWith)
|
||||
inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
// Atomically performs: tmp = *pDest; *pDest += value; return tmp;
|
||||
inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _InterlockedExchangeAdd( (long*)pDest, value );
|
||||
#else
|
||||
return __sync_fetch_and_add( pDest, value );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
240
3rdparty/tracy/examples/ToyPathTracer/Source/enkiTS/LockLessMultiReadPipe.h
vendored
Normal file
240
3rdparty/tracy/examples/ToyPathTracer/Source/enkiTS/LockLessMultiReadPipe.h
vendored
Normal file
|
@ -0,0 +1,240 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "Atomics.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
|
||||
// Readers can only read from the back of the pipe
|
||||
// The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
|
||||
// for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
|
||||
// Note: using log2 sizes so we do not need to clamp (multi-operation)
|
||||
// T is the contained type
|
||||
// Note this is not true lockless as the use of flags as a form of lock state.
|
||||
template<uint8_t cSizeLog2, typename T> class LockLessMultiReadPipe
|
||||
{
|
||||
public:
|
||||
LockLessMultiReadPipe();
|
||||
~LockLessMultiReadPipe() {}
|
||||
|
||||
// ReaderTryReadBack returns false if we were unable to read
|
||||
// This is thread safe for both multiple readers and the writer
|
||||
bool ReaderTryReadBack( T* pOut );
|
||||
|
||||
// WriterTryReadFront returns false if we were unable to read
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryReadFront( T* pOut );
|
||||
|
||||
// WriterTryWriteFront returns false if we were unable to write
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryWriteFront( const T& in );
|
||||
|
||||
// IsPipeEmpty() is a utility function, not intended for general use
|
||||
// Should only be used very prudently.
|
||||
bool IsPipeEmpty() const
|
||||
{
|
||||
return 0 == m_WriteIndex - m_ReadCount;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
m_WriteIndex = 0;
|
||||
m_ReadIndex = 0;
|
||||
m_ReadCount = 0;
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
private:
|
||||
const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
|
||||
const static uint32_t ms_cIndexMask = ms_cSize - 1;
|
||||
const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
|
||||
|
||||
T m_Buffer[ ms_cSize ];
|
||||
|
||||
// read and write indexes allow fast access to the pipe, but actual access
|
||||
// controlled by the access flags.
|
||||
volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadCount;
|
||||
volatile uint32_t m_Flags[ ms_cSize ];
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
|
||||
};
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
LockLessMultiReadPipe<cSizeLog2,T>::LockLessMultiReadPipe()
|
||||
: m_WriteIndex(0)
|
||||
, m_ReadIndex(0)
|
||||
, m_ReadCount(0)
|
||||
{
|
||||
assert( cSizeLog2 < 32 );
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::ReaderTryReadBack( T* pOut )
|
||||
{
|
||||
|
||||
uint32_t actualReadIndex;
|
||||
|
||||
uint32_t readCount = m_ReadCount;
|
||||
|
||||
// We get hold of read index for consistency,
|
||||
// and do first pass starting at read count
|
||||
uint32_t readIndexToUse = readCount;
|
||||
|
||||
|
||||
while(true)
|
||||
{
|
||||
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if( readIndexToUse >= writeIndex )
|
||||
{
|
||||
// move back to start
|
||||
readIndexToUse = m_ReadIndex;
|
||||
}
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
actualReadIndex = readIndexToUse & ms_cIndexMask;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange
|
||||
uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
++readIndexToUse;
|
||||
|
||||
//update known readcount
|
||||
readCount = m_ReadCount;
|
||||
}
|
||||
|
||||
// we update the read index using an atomic add, as we've only read one piece of data.
|
||||
// this ensure consistency of the read index, and the above loop ensures readers
|
||||
// only read from unread data
|
||||
AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
|
||||
|
||||
BASE_MEMORYBARRIER_ACQUIRE();
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryReadFront( T* pOut )
|
||||
{
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
uint32_t frontReadIndex = writeIndex;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
|
||||
uint32_t previous = FLAG_INVALID;
|
||||
uint32_t actualReadIndex = 0;
|
||||
while( true )
|
||||
{
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t readCount = m_ReadCount;
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe || 0 == frontReadIndex )
|
||||
{
|
||||
// frontReadIndex can get to 0 here if that item was just being read by another thread.
|
||||
m_ReadIndex = readCount;
|
||||
return false;
|
||||
}
|
||||
--frontReadIndex;
|
||||
actualReadIndex = frontReadIndex & ms_cIndexMask;
|
||||
previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if( m_ReadIndex >= frontReadIndex )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and writer owns the write index
|
||||
// we only move one back as this is as many as we have read, not where we have read from.
|
||||
--m_WriteIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryWriteFront( const T& in )
|
||||
{
|
||||
// The writer 'owns' the write index, and readers can only reduce
|
||||
// the amount of data in the pipe.
|
||||
// We get hold of both values for consistency and to reduce false sharing
|
||||
// impacting more than one access
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
|
||||
|
||||
// a reader may still be reading this item, as there are multiple readers
|
||||
if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
|
||||
{
|
||||
return false; // still being read, so have caught up with tail.
|
||||
}
|
||||
|
||||
|
||||
// as we are the only writer we can update the data without atomics
|
||||
// whilst the write index has not been updated
|
||||
m_Buffer[ actualWriteIndex ] = in;
|
||||
m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
|
||||
|
||||
// We need to ensure the above writes occur prior to updating the write index,
|
||||
// otherwise another thread might read before it's finished
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and the writer controls the write index
|
||||
++writeIndex;
|
||||
m_WriteIndex = writeIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,437 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "TaskScheduler.h"
|
||||
#include "LockLessMultiReadPipe.h"
|
||||
|
||||
|
||||
|
||||
using namespace enki;
|
||||
|
||||
|
||||
static const uint32_t PIPESIZE_LOG2 = 8;
|
||||
static const uint32_t SPIN_COUNT = 100;
|
||||
static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10;
|
||||
static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8;
|
||||
|
||||
// each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable
|
||||
static THREAD_LOCAL uint32_t gtl_threadNum = 0;
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct SubTaskSet
|
||||
{
|
||||
ITaskSet* pTask;
|
||||
TaskSetPartition partition;
|
||||
};
|
||||
|
||||
// we derive class TaskPipe rather than typedef to get forward declaration working easily
|
||||
class TaskPipe : public LockLessMultiReadPipe<PIPESIZE_LOG2,enki::SubTaskSet> {};
|
||||
|
||||
struct ThreadArgs
|
||||
{
|
||||
uint32_t threadNum;
|
||||
TaskScheduler* pTaskScheduler;
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ )
|
||||
{
|
||||
SubTaskSet splitTask = subTask_;
|
||||
uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start;
|
||||
|
||||
if( rangeToSplit_ > rangeLeft )
|
||||
{
|
||||
rangeToSplit_ = rangeLeft;
|
||||
}
|
||||
splitTask.partition.end = subTask_.partition.start + rangeToSplit_;
|
||||
subTask_.partition.start = splitTask.partition.end;
|
||||
return splitTask;
|
||||
}
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _M_IX86 || defined _M_X64
|
||||
#pragma intrinsic(_mm_pause)
|
||||
inline void Pause() { _mm_pause(); }
|
||||
#endif
|
||||
#elif defined __i386__ || defined __x86_64__
|
||||
inline void Pause() { __asm__ __volatile__("pause;"); }
|
||||
#else
|
||||
inline void Pause() { ;} // may have NOP or yield equiv
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_)
|
||||
{
|
||||
if( func_ )
|
||||
{
|
||||
func_(threadnum_);
|
||||
}
|
||||
}
|
||||
|
||||
ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks()
|
||||
{
|
||||
return &m_ProfilerCallbacks;
|
||||
}
|
||||
|
||||
THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs )
|
||||
{
|
||||
ThreadArgs args = *(ThreadArgs*)pArgs;
|
||||
uint32_t threadNum = args.threadNum;
|
||||
TaskScheduler* pTS = args.pTaskScheduler;
|
||||
gtl_threadNum = threadNum;
|
||||
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum );
|
||||
|
||||
uint32_t spinCount = 0;
|
||||
uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped.
|
||||
while( pTS->m_bRunning )
|
||||
{
|
||||
if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) )
|
||||
{
|
||||
// no tasks, will spin then wait
|
||||
++spinCount;
|
||||
if( spinCount > SPIN_COUNT )
|
||||
{
|
||||
pTS->WaitForTasks( threadNum );
|
||||
spinCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER;
|
||||
while( spinBackoffCount )
|
||||
{
|
||||
Pause();
|
||||
--spinBackoffCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
spinCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
AtomicAdd( &pTS->m_NumThreadsRunning, -1 );
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void TaskScheduler::StartThreads()
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_bRunning = true;
|
||||
|
||||
SemaphoreCreate( m_NewTaskSemaphore );
|
||||
|
||||
// we create one less thread than m_NumThreads as the main thread counts as one
|
||||
m_pThreadNumStore = new ThreadArgs[m_NumThreads];
|
||||
m_pThreadIDs = new threadid_t[m_NumThreads];
|
||||
m_pThreadNumStore[0].threadNum = 0;
|
||||
m_pThreadNumStore[0].pTaskScheduler = this;
|
||||
m_pThreadIDs[0] = 0;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 1;// acount for main thread
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
m_pThreadNumStore[thread].threadNum = thread;
|
||||
m_pThreadNumStore[thread].pTaskScheduler = this;
|
||||
ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] );
|
||||
++m_NumThreadsRunning;
|
||||
}
|
||||
|
||||
// ensure we have sufficient tasks to equally fill either all threads including main
|
||||
// or just the threads we've launched, this is outside the firstinit as we want to be able
|
||||
// to runtime change it
|
||||
if( 1 == m_NumThreads )
|
||||
{
|
||||
m_NumPartitions = 1;
|
||||
m_NumInitialPartitions = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_NumPartitions = m_NumThreads * (m_NumThreads - 1);
|
||||
m_NumInitialPartitions = m_NumThreads - 1;
|
||||
if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS )
|
||||
{
|
||||
m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS;
|
||||
}
|
||||
}
|
||||
|
||||
m_bHaveThreads = true;
|
||||
}
|
||||
|
||||
void TaskScheduler::StopThreads( bool bWait_ )
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
// wait for them threads quit before deleting data
|
||||
m_bRunning = false;
|
||||
while( bWait_ && m_NumThreadsRunning > 1 )
|
||||
{
|
||||
// keep firing event to ensure all threads pick up state of m_bRunning
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning );
|
||||
}
|
||||
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
ThreadTerminate( m_pThreadIDs[thread] );
|
||||
}
|
||||
|
||||
m_NumThreads = 0;
|
||||
delete[] m_pThreadNumStore;
|
||||
delete[] m_pThreadIDs;
|
||||
m_pThreadNumStore = 0;
|
||||
m_pThreadIDs = 0;
|
||||
SemaphoreClose( m_NewTaskSemaphore );
|
||||
|
||||
m_bHaveThreads = false;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ )
|
||||
{
|
||||
// check for tasks
|
||||
SubTaskSet subTask;
|
||||
bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask );
|
||||
|
||||
uint32_t threadToCheck = hintPipeToCheck_io_;
|
||||
uint32_t checkCount = 0;
|
||||
while( !bHaveTask && checkCount < m_NumThreads )
|
||||
{
|
||||
threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads;
|
||||
if( threadToCheck != threadNum )
|
||||
{
|
||||
bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask );
|
||||
}
|
||||
++checkCount;
|
||||
}
|
||||
|
||||
if( bHaveTask )
|
||||
{
|
||||
// update hint, will preserve value unless actually got task from another thread.
|
||||
hintPipeToCheck_io_ = threadToCheck;
|
||||
|
||||
uint32_t partitionSize = subTask.partition.end - subTask.partition.start;
|
||||
if( subTask.pTask->m_RangeToRun < partitionSize )
|
||||
{
|
||||
SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun );
|
||||
SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 );
|
||||
taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum );
|
||||
AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// the task has already been divided up by AddTaskSetToPipe, so just run it
|
||||
subTask.pTask->ExecuteRange( subTask.partition, threadNum );
|
||||
AtomicAdd( &subTask.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
}
|
||||
|
||||
return bHaveTask;
|
||||
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitForTasks( uint32_t threadNum )
|
||||
{
|
||||
// We incrememt the number of threads waiting here in order
|
||||
// to ensure that the check for tasks occurs after the increment
|
||||
// to prevent a task being added after a check, then the thread waiting.
|
||||
// This will occasionally result in threads being mistakenly awoken,
|
||||
// but they will then go back to sleep.
|
||||
AtomicAdd( &m_NumThreadsWaiting, 1 );
|
||||
|
||||
bool bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !bHaveTasks )
|
||||
{
|
||||
SafeCallback( m_ProfilerCallbacks.waitStart, threadNum );
|
||||
SemaphoreWait( m_NewTaskSemaphore );
|
||||
SafeCallback( m_ProfilerCallbacks.waitStop, threadNum );
|
||||
}
|
||||
|
||||
int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 );
|
||||
assert( prev != 0 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WakeThreads()
|
||||
{
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting );
|
||||
}
|
||||
|
||||
void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ )
|
||||
{
|
||||
int32_t numAdded = 0;
|
||||
while( subTask_.partition.start != subTask_.partition.end )
|
||||
{
|
||||
SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ );
|
||||
|
||||
// add the partition to the pipe
|
||||
++numAdded;
|
||||
if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) )
|
||||
{
|
||||
if( numAdded > 1 )
|
||||
{
|
||||
WakeThreads();
|
||||
}
|
||||
// alter range to run the appropriate fraction
|
||||
if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ )
|
||||
{
|
||||
taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun;
|
||||
subTask_.partition.start = taskToAdd.partition.end;
|
||||
}
|
||||
taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ );
|
||||
--numAdded;
|
||||
}
|
||||
}
|
||||
|
||||
// increment running count by number added
|
||||
AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ );
|
||||
|
||||
WakeThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet )
|
||||
{
|
||||
// set running count to -1 to guarantee it won't be found complete until all subtasks added
|
||||
pTaskSet->m_RunningCount = -1;
|
||||
|
||||
// divide task up and add to pipe
|
||||
pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions;
|
||||
if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; }
|
||||
|
||||
uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions;
|
||||
if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; }
|
||||
|
||||
SubTaskSet subTask;
|
||||
subTask.pTask = pTaskSet;
|
||||
subTask.partition.start = 0;
|
||||
subTask.partition.end = pTaskSet->m_SetSize;
|
||||
SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet )
|
||||
{
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
if( pTaskSet )
|
||||
{
|
||||
while( pTaskSet->m_RunningCount )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
// should add a spin then wait for task completion event.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAll()
|
||||
{
|
||||
bool bHaveTasks = true;
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
int32_t threadsRunning = m_NumThreadsRunning - 1;
|
||||
while( bHaveTasks || m_NumThreadsWaiting < threadsRunning )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAllAndShutdown()
|
||||
{
|
||||
WaitforAll();
|
||||
StopThreads(true);
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
uint32_t TaskScheduler::GetNumTaskThreads() const
|
||||
{
|
||||
return m_NumThreads;
|
||||
}
|
||||
|
||||
TaskScheduler::TaskScheduler()
|
||||
: m_pPipesPerThread(NULL)
|
||||
, m_NumThreads(0)
|
||||
, m_pThreadNumStore(NULL)
|
||||
, m_pThreadIDs(NULL)
|
||||
, m_bRunning(false)
|
||||
, m_NumThreadsRunning(0)
|
||||
, m_NumThreadsWaiting(0)
|
||||
, m_NumPartitions(0)
|
||||
, m_bHaveThreads(false)
|
||||
{
|
||||
memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks));
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler()
|
||||
{
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize( uint32_t numThreads_ )
|
||||
{
|
||||
assert( numThreads_ );
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
delete[] m_pPipesPerThread;
|
||||
|
||||
m_NumThreads = numThreads_;
|
||||
|
||||
m_pPipesPerThread = new TaskPipe[ m_NumThreads ];
|
||||
|
||||
StartThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize()
|
||||
{
|
||||
Initialize( GetNumHardwareThreads() );
|
||||
}
|
|
@ -0,0 +1,177 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "Threads.h"
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct TaskSetPartition
|
||||
{
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
};
|
||||
|
||||
class TaskScheduler;
|
||||
class TaskPipe;
|
||||
struct ThreadArgs;
|
||||
struct SubTaskSet;
|
||||
|
||||
// Subclass ITaskSet to create tasks.
|
||||
// TaskSets can be re-used, but check
|
||||
class ITaskSet
|
||||
{
|
||||
public:
|
||||
ITaskSet()
|
||||
: m_SetSize(1)
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_, uint32_t minRange_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange( minRange_ )
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(minRange_)
|
||||
{}
|
||||
|
||||
// Execute range should be overloaded to process tasks. It will be called with a
|
||||
// range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize;
|
||||
// The range values should be mapped so that linearly processing them in order is cache friendly
|
||||
// i.e. neighbouring values should be close together.
|
||||
// threadnum should not be used for changing processing of data, it's intended purpose
|
||||
// is to allow per-thread data buckets for output.
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0;
|
||||
|
||||
// Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1
|
||||
uint32_t m_SetSize;
|
||||
|
||||
// Minimum size of of TaskSetPartition range when splitting a task set into partitions.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
uint32_t m_MinRange;
|
||||
|
||||
bool GetIsComplete()
|
||||
{
|
||||
return 0 == m_RunningCount;
|
||||
}
|
||||
private:
|
||||
friend class TaskScheduler;
|
||||
volatile int32_t m_RunningCount;
|
||||
uint32_t m_RangeToRun;
|
||||
};
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct ProfilerCallbacks
|
||||
{
|
||||
ProfilerCallbackFunc threadStart;
|
||||
ProfilerCallbackFunc threadStop;
|
||||
ProfilerCallbackFunc waitStart;
|
||||
ProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
class TaskScheduler
|
||||
{
|
||||
public:
|
||||
TaskScheduler();
|
||||
~TaskScheduler();
|
||||
|
||||
// Call either Initialize() or Initialize( numThreads_ ) before adding tasks.
|
||||
|
||||
// Initialize() will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void Initialize();
|
||||
|
||||
// Initialize( numThreads_ ) - numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void Initialize( uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Adds the TaskSet to pipe and returns if the pipe is not full.
|
||||
// If the pipe is full, pTaskSet is run.
|
||||
// should only be called from main thread, or within a task
|
||||
void AddTaskSetToPipe( ITaskSet* pTaskSet );
|
||||
|
||||
// Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete();
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void WaitforTaskSet( const ITaskSet* pTaskSet );
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAll();
|
||||
|
||||
// Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAllAndShutdown();
|
||||
|
||||
// Returns the number of threads created for running tasks + 1
|
||||
// to account for the main thread.
|
||||
uint32_t GetNumTaskThreads() const;
|
||||
|
||||
// Returns the ProfilerCallbacks structure so that it can be modified to
|
||||
// set the callbacks.
|
||||
ProfilerCallbacks* GetProfilerCallbacks();
|
||||
|
||||
private:
|
||||
static THREADFUNC_DECL TaskingThreadFunction( void* pArgs );
|
||||
void WaitForTasks( uint32_t threadNum );
|
||||
bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ );
|
||||
void StartThreads();
|
||||
void StopThreads( bool bWait_ );
|
||||
void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ );
|
||||
void WakeThreads();
|
||||
|
||||
TaskPipe* m_pPipesPerThread;
|
||||
|
||||
uint32_t m_NumThreads;
|
||||
ThreadArgs* m_pThreadNumStore;
|
||||
threadid_t* m_pThreadIDs;
|
||||
volatile bool m_bRunning;
|
||||
volatile int32_t m_NumThreadsRunning;
|
||||
volatile int32_t m_NumThreadsWaiting;
|
||||
uint32_t m_NumPartitions;
|
||||
uint32_t m_NumInitialPartitions;
|
||||
semaphoreid_t m_NewTaskSemaphore;
|
||||
bool m_bHaveThreads;
|
||||
ProfilerCallbacks m_ProfilerCallbacks;
|
||||
|
||||
TaskScheduler( const TaskScheduler& nocopy );
|
||||
TaskScheduler& operator=( const TaskScheduler& nocopy );
|
||||
};
|
||||
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include "TaskScheduler_c.h"
|
||||
#include "TaskScheduler.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
using namespace enki;
|
||||
|
||||
struct enkiTaskScheduler : TaskScheduler
|
||||
{
|
||||
};
|
||||
|
||||
struct enkiTaskSet : ITaskSet
|
||||
{
|
||||
enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {}
|
||||
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum )
|
||||
{
|
||||
taskFun( range.start, range.end, threadnum, pArgs );
|
||||
}
|
||||
|
||||
enkiTaskExecuteRange taskFun;
|
||||
void* pArgs;
|
||||
};
|
||||
|
||||
enkiTaskScheduler* enkiNewTaskScheduler()
|
||||
{
|
||||
enkiTaskScheduler* pETS = new enkiTaskScheduler();
|
||||
return pETS;
|
||||
}
|
||||
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->Initialize();
|
||||
}
|
||||
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ )
|
||||
{
|
||||
pETS_->Initialize( numThreads_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
delete pETS_;
|
||||
}
|
||||
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ )
|
||||
{
|
||||
return new enkiTaskSet( taskFunc_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
delete pTaskSet_;
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_)
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->m_MinRange = minRange_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
return ( pTaskSet_->GetIsComplete() ) ? 1 : 0;
|
||||
}
|
||||
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
pETS_->WaitforTaskSet( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->WaitforAll();
|
||||
}
|
||||
|
||||
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
return pETS_->GetNumTaskThreads();
|
||||
}
|
||||
|
||||
enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) );
|
||||
return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks();
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct enkiTaskScheduler enkiTaskScheduler;
|
||||
typedef struct enkiTaskSet enkiTaskSet;
|
||||
|
||||
typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ );
|
||||
|
||||
|
||||
// Create a new task scheduler
|
||||
enkiTaskScheduler* enkiNewTaskScheduler();
|
||||
|
||||
// Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Initialize a task scheduler with numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Delete a task scheduler
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Create a task set.
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ );
|
||||
|
||||
// Delete a task set.
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ );
|
||||
|
||||
// Schedule the task
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_ );
|
||||
|
||||
// Schedule the task with a minimum range.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_, uint32_t minRange_ );
|
||||
|
||||
|
||||
// Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not.
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Wait for a given task.
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ );
|
||||
|
||||
|
||||
// get number of threads
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct enkiProfilerCallbacks
|
||||
{
|
||||
enkiProfilerCallbackFunc threadStart;
|
||||
enkiProfilerCallbackFunc threadStop;
|
||||
enkiProfilerCallbackFunc waitStart;
|
||||
enkiProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
// Get the callback structure so it can be set
|
||||
struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,210 @@
|
|||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "Atomics.h"
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
|
||||
#define THREADFUNC_DECL DWORD WINAPI
|
||||
#define THREAD_LOCAL __declspec( thread )
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef HANDLE threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
DWORD threadid;
|
||||
*returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid );
|
||||
return *returnid != NULL;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return CloseHandle( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
SYSTEM_INFO sysInfo;
|
||||
GetSystemInfo(&sysInfo);
|
||||
return sysInfo.dwNumberOfProcessors;
|
||||
}
|
||||
}
|
||||
|
||||
#else // posix
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#define THREADFUNC_DECL void*
|
||||
#define THREAD_LOCAL __thread
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef pthread_t threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg );
|
||||
|
||||
return retval == 0;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return pthread_cancel( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // posix
|
||||
|
||||
|
||||
// Semaphore implementation
|
||||
#ifdef _WIN32
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct semaphoreid_t
|
||||
{
|
||||
HANDLE sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
CloseHandle( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE );
|
||||
|
||||
assert( retval != WAIT_FAILED );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
if( countWaiting )
|
||||
{
|
||||
ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(__MACH__)
|
||||
|
||||
// OS X does not have POSIX semaphores
|
||||
// see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html
|
||||
#include <mach/mach.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
semaphore_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_destroy( mach_task_self(), semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_wait( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
semaphore_signal( semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else // POSIX
|
||||
|
||||
#include <semaphore.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_init( &semaphoreid.sem, 0, 0 );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
sem_destroy( &semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_wait( &semaphoreid.sem );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
sem_post( &semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,395 @@
|
|||
#include "../Source/Config.h"
|
||||
|
||||
inline uint RNG(inout uint state)
|
||||
{
|
||||
uint x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(inout uint state)
|
||||
{
|
||||
return (RNG(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(inout uint state)
|
||||
{
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float2 xy = float2(cos(a), sin(a));
|
||||
xy *= sqrt(RandomFloat01(state));
|
||||
return float3(xy, 0);
|
||||
}
|
||||
float3 RandomInUnitSphere(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float t = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(max(0.0, 1.0f - z * z));
|
||||
float x = r * cos(t);
|
||||
float y = r * sin(t);
|
||||
float3 res = float3(x, y, z);
|
||||
res *= pow(RandomFloat01(state), 1.0 / 3.0);
|
||||
return res;
|
||||
}
|
||||
float3 RandomUnitVector(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(1.0f - z * z);
|
||||
float x = r * cos(a);
|
||||
float y = r * sin(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct Ray
|
||||
{
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
Ray MakeRay(float3 orig_, float3 dir_) { Ray r; r.orig = orig_; r.dir = dir_; return r; }
|
||||
float3 RayPointAt(Ray r, float t) { return r.orig + r.dir * t; }
|
||||
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, out float3 outRefracted)
|
||||
{
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint * nint*(1 - dt * dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n * dt) - n * sqrt(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1 - ri) / (1 + ri);
|
||||
r0 = r0 * r0;
|
||||
// note: saturate to guard against possible tiny negative numbers
|
||||
return r0 + (1 - r0)*pow(saturate(1 - cosine), 5);
|
||||
}
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
#define MatLambert 0
|
||||
#define MatMetal 1
|
||||
#define MatDielectric 2
|
||||
|
||||
struct Material
|
||||
{
|
||||
int type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
groupshared Sphere s_GroupSpheres[kCSMaxObjects];
|
||||
groupshared Material s_GroupMaterials[kCSMaxObjects];
|
||||
groupshared int s_GroupEmissives[kCSMaxObjects];
|
||||
|
||||
|
||||
struct Camera
|
||||
{
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
Ray CameraGetRay(Camera cam, float s, float t, inout uint state)
|
||||
{
|
||||
float3 rd = cam.lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = cam.u * rd.x + cam.v * rd.y;
|
||||
return MakeRay(cam.origin + offset, normalize(cam.lowerLeftCorner + s * cam.horizontal + t * cam.vertical - cam.origin - offset));
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(Ray r, int sphereCount, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < sphereCount; ++i)
|
||||
{
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
float3 co = s.center - r.orig;
|
||||
float nb = dot(co, r.dir);
|
||||
float c = dot(co, co) - s.radius*s.radius;
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrt(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = RayPointAt(r, hitT);
|
||||
outHit.normal = (outHit.pos - s_GroupSpheres[id].center) * s_GroupSpheres[id].invRadius;
|
||||
outHit.t = hitT;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
struct Params
|
||||
{
|
||||
Camera cam;
|
||||
int sphereCount;
|
||||
int screenWidth;
|
||||
int screenHeight;
|
||||
int frames;
|
||||
float invWidth;
|
||||
float invHeight;
|
||||
float lerpFac;
|
||||
int emissiveCount;
|
||||
};
|
||||
|
||||
|
||||
#define kMinT 0.001f
|
||||
#define kMaxT 1.0e7f
|
||||
#define kMaxDepth 10
|
||||
|
||||
|
||||
static int HitWorld(int sphereCount, Ray r, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
return HitSpheres(r, sphereCount, tMin, tMax, outHit);
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(int sphereCount, int emissiveCount, int matID, Ray r_in, Hit rec, out float3 attenuation, out Ray scattered, out float3 outLightE, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
outLightE = float3(0, 0, 0);
|
||||
Material mat = s_GroupMaterials[matID];
|
||||
if (mat.type == MatLambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = MakeRay(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < emissiveCount; ++j)
|
||||
{
|
||||
int i = s_GroupEmissives[j];
|
||||
if (matID == i)
|
||||
continue; // skip self
|
||||
Material smat = s_GroupMaterials[i];
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(abs(sw.x)>0.01f ? float3(0, 1, 0) : float3(1, 0, 0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrt(1.0f - s.radius*s.radius / dot(rec.pos - s.center, rec.pos - s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrt(1.0f - cosA * cosA);
|
||||
float phi = 2 * 3.1415926 * eps2;
|
||||
float3 l = su * cos(phi) * sinA + sv * sin(phi) * sinA + sw * cosA;
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
++inoutRayCount;
|
||||
int hitID = HitWorld(sphereCount, MakeRay(rec.pos, l), kMinT, kMaxT, lightHit);
|
||||
if (hitID == i)
|
||||
{
|
||||
float omega = 2 * 3.1415926 * (1 - cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (max(0.0f, dot(l, nl)) * omega / 3.1415926);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == MatMetal)
|
||||
{
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = MakeRay(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == MatDielectric)
|
||||
{
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1, 1, 1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = MakeRay(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = MakeRay(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1, 0, 1);
|
||||
scattered = MakeRay(float3(0,0,0), float3(0, 0, 1));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(int sphereCount, int emissiveCount, Ray r, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
float3 col = 0;
|
||||
float3 curAtten = 1;
|
||||
bool doMaterialE = true;
|
||||
// GPUs don't support recursion, so do tracing iterations in a loop up to max depth
|
||||
for (int depth = 0; depth < kMaxDepth; ++depth)
|
||||
{
|
||||
Hit rec;
|
||||
++inoutRayCount;
|
||||
int id = HitWorld(sphereCount, r, kMinT, kMaxT, rec);
|
||||
if (id >= 0)
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
Material mat = s_GroupMaterials[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (Scatter(sphereCount, emissiveCount, id, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = 0;
|
||||
doMaterialE = (mat.type != MatLambert);
|
||||
#endif
|
||||
col += curAtten * (matE + lightE);
|
||||
curAtten *= attenuation;
|
||||
r = scattered;
|
||||
}
|
||||
else
|
||||
{
|
||||
col += curAtten * matE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
col += curAtten * float3(0.15f, 0.21f, 0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.y + 1.0f);
|
||||
float3 skyCol = ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
col += curAtten * skyCol;
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
return col;
|
||||
}
|
||||
|
||||
Texture2D srcImage : register(t0);
|
||||
RWTexture2D<float4> dstImage : register(u0);
|
||||
StructuredBuffer<Sphere> g_Spheres : register(t1);
|
||||
StructuredBuffer<Material> g_Materials : register(t2);
|
||||
StructuredBuffer<Params> g_Params : register(t3);
|
||||
StructuredBuffer<int> g_Emissives : register(t4);
|
||||
RWByteAddressBuffer g_OutRayCount : register(u1);
|
||||
|
||||
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
|
||||
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
|
||||
{
|
||||
// First, move scene data (spheres, materials, emissive indices) into group shared
|
||||
// memory. Do this in parallel; each thread in group copies its own chunk of data.
|
||||
uint threadID = tid.y * kCSGroupSizeX + tid.x;
|
||||
uint groupSize = kCSGroupSizeX * kCSGroupSizeY;
|
||||
uint objCount = g_Params[0].sphereCount;
|
||||
uint myObjCount = (objCount + groupSize - 1) / groupSize;
|
||||
uint myObjStart = threadID * myObjCount;
|
||||
for (uint io = myObjStart; io < myObjStart + myObjCount; ++io)
|
||||
{
|
||||
if (io < objCount)
|
||||
{
|
||||
s_GroupSpheres[io] = g_Spheres[io];
|
||||
s_GroupMaterials[io] = g_Materials[io];
|
||||
}
|
||||
if (io < g_Params[0].emissiveCount)
|
||||
{
|
||||
s_GroupEmissives[io] = g_Emissives[io];
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
int rayCount = 0;
|
||||
float3 col = 0;
|
||||
Params params = g_Params[0];
|
||||
uint rngState = (gid.x * 1973 + gid.y * 9277 + params.frames * 26699) | 1;
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(gid.x + RandomFloat01(rngState)) * params.invWidth;
|
||||
float v = float(gid.y + RandomFloat01(rngState)) * params.invHeight;
|
||||
Ray r = CameraGetRay(params.cam, u, v, rngState);
|
||||
col += Trace(params.sphereCount, params.emissiveCount, r, rayCount, rngState);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev = srcImage.Load(int3(gid.xy,0)).rgb;
|
||||
col = lerp(col, prev, params.lerpFac);
|
||||
dstImage[gid.xy] = float4(col, 1);
|
||||
|
||||
g_OutRayCount.InterlockedAdd(0, rayCount);
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
float3 LinearToSRGB(float3 rgb)
|
||||
{
|
||||
rgb = max(rgb, float3(0, 0, 0));
|
||||
return max(1.055 * pow(rgb, 0.416666667) - 0.055, 0.0);
|
||||
}
|
||||
|
||||
Texture2D tex : register(t0);
|
||||
SamplerState smp : register(s0);
|
||||
|
||||
float4 main(float2 uv : TEXCOORD0) : SV_Target
|
||||
{
|
||||
float3 col = tex.Sample(smp, uv).rgb;
|
||||
col = LinearToSRGB(col);
|
||||
return float4(col, 1.0f);
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30907.101
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestCpu", "TestCpu.vcxproj", "{4F84B756-87F5-4B92-827B-DA087DAE1900}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.Build.0 = Debug|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.Build.0 = Debug|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.ActiveCfg = Release|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.Build.0 = Release|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.ActiveCfg = Release|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {067FB780-37B8-465E-AD7E-E7B238B9C04F}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,245 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{4F84B756-87F5-4B92-827B-DA087DAE1900}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>TestCpu</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>TRACY_ENABLE;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<ExceptionHandling>false</ExceptionHandling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>TRACY_ENABLE;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<ExceptionHandling>false</ExceptionHandling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\TracyClient.cpp" />
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp" />
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp" />
|
||||
<ClCompile Include="..\Source\Maths.cpp" />
|
||||
<ClCompile Include="..\Source\Test.cpp" />
|
||||
<ClCompile Include="TestWin.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Source\Config.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\Atomics.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\Threads.h" />
|
||||
<ClInclude Include="..\Source\Maths.h" />
|
||||
<ClInclude Include="..\Source\MathSimd.h" />
|
||||
<ClInclude Include="..\Source\Test.h" />
|
||||
<ClInclude Include="..\Source\stb_image.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\.editorconfig" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="ComputeShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
</FxCompile>
|
||||
<FxCompile Include="PixelShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_PSBytecode</VariableName>
|
||||
</FxCompile>
|
||||
<FxCompile Include="VertexShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Vertex</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_VSBytecode</VariableName>
|
||||
</FxCompile>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,67 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClCompile Include="TestWin.cpp" />
|
||||
<ClCompile Include="..\Source\Test.cpp">
|
||||
<Filter>Source</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\Maths.cpp">
|
||||
<Filter>Source</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\TracyClient.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Source">
|
||||
<UniqueIdentifier>{5f19f217-c1c7-4eeb-be61-8b986fee9375}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source\enkiTS">
|
||||
<UniqueIdentifier>{38c448a8-1dcc-4116-9410-a9f8d068caff}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Source\Test.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\stb_image.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\Atomics.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\Threads.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\Maths.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\Config.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\MathSimd.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\.editorconfig" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="VertexShader.hlsl" />
|
||||
<FxCompile Include="PixelShader.hlsl" />
|
||||
<FxCompile Include="ComputeShader.hlsl" />
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -0,0 +1,567 @@
|
|||
#include <stdint.h>
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#include <d3d11_1.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "../Source/Config.h"
|
||||
#include "../Source/Maths.h"
|
||||
#include "../Source/Test.h"
|
||||
#include "CompiledVertexShader.h"
|
||||
#include "CompiledPixelShader.h"
|
||||
|
||||
#include "../../../Tracy.hpp"
|
||||
#include "../../../TracyD3D11.hpp"
|
||||
|
||||
static HINSTANCE g_HInstance;
|
||||
static HWND g_Wnd;
|
||||
|
||||
ATOM MyRegisterClass(HINSTANCE hInstance);
|
||||
BOOL InitInstance(HINSTANCE, int);
|
||||
LRESULT CALLBACK WndProc(HWND, UINT, WPARAM, LPARAM);
|
||||
INT_PTR CALLBACK About(HWND, UINT, WPARAM, LPARAM);
|
||||
|
||||
static HRESULT InitD3DDevice();
|
||||
static void ShutdownD3DDevice();
|
||||
static void RenderFrame();
|
||||
|
||||
static float* g_Backbuffer;
|
||||
|
||||
static D3D_FEATURE_LEVEL g_D3D11FeatureLevel = D3D_FEATURE_LEVEL_11_0;
|
||||
static ID3D11Device* g_D3D11Device = nullptr;
|
||||
static ID3D11DeviceContext* g_D3D11Ctx = nullptr;
|
||||
static IDXGISwapChain* g_D3D11SwapChain = nullptr;
|
||||
static ID3D11RenderTargetView* g_D3D11RenderTarget = nullptr;
|
||||
static ID3D11VertexShader* g_VertexShader;
|
||||
static ID3D11PixelShader* g_PixelShader;
|
||||
static ID3D11Texture2D *g_BackbufferTexture, *g_BackbufferTexture2;
|
||||
static ID3D11ShaderResourceView *g_BackbufferSRV, *g_BackbufferSRV2;
|
||||
static ID3D11UnorderedAccessView *g_BackbufferUAV, *g_BackbufferUAV2;
|
||||
static ID3D11SamplerState* g_SamplerLinear;
|
||||
static ID3D11RasterizerState* g_RasterState;
|
||||
static int g_BackbufferIndex;
|
||||
static tracy::D3D11Ctx *g_tracyCtx;
|
||||
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
#include "CompiledComputeShader.h"
|
||||
struct ComputeParams
|
||||
{
|
||||
Camera cam;
|
||||
int sphereCount;
|
||||
int screenWidth;
|
||||
int screenHeight;
|
||||
int frames;
|
||||
float invWidth;
|
||||
float invHeight;
|
||||
float lerpFac;
|
||||
int emissiveCount;
|
||||
};
|
||||
static ID3D11ComputeShader* g_ComputeShader;
|
||||
static ID3D11Buffer* g_DataSpheres; static ID3D11ShaderResourceView* g_SRVSpheres;
|
||||
static ID3D11Buffer* g_DataMaterials; static ID3D11ShaderResourceView* g_SRVMaterials;
|
||||
static ID3D11Buffer* g_DataParams; static ID3D11ShaderResourceView* g_SRVParams;
|
||||
static ID3D11Buffer* g_DataEmissives; static ID3D11ShaderResourceView* g_SRVEmissives;
|
||||
static ID3D11Buffer* g_DataCounter; static ID3D11UnorderedAccessView* g_UAVCounter;
|
||||
static int g_SphereCount, g_ObjSize, g_MatSize;
|
||||
static ID3D11Query *g_QueryBegin, *g_QueryEnd, *g_QueryDisjoint;
|
||||
#endif // #if DO_COMPUTE_GPU
|
||||
|
||||
int APIENTRY wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR, _In_ int nCmdShow)
|
||||
{
|
||||
g_Backbuffer = new float[kBackbufferWidth * kBackbufferHeight * 4];
|
||||
memset(g_Backbuffer, 0, kBackbufferWidth * kBackbufferHeight * 4 * sizeof(g_Backbuffer[0]));
|
||||
|
||||
InitializeTest();
|
||||
|
||||
MyRegisterClass(hInstance);
|
||||
if (!InitInstance (hInstance, nCmdShow))
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (FAILED(InitD3DDevice()))
|
||||
{
|
||||
ShutdownD3DDevice();
|
||||
return 0;
|
||||
}
|
||||
|
||||
g_D3D11Device->CreateVertexShader(g_VSBytecode, ARRAYSIZE(g_VSBytecode), NULL, &g_VertexShader);
|
||||
g_D3D11Device->CreatePixelShader(g_PSBytecode, ARRAYSIZE(g_PSBytecode), NULL, &g_PixelShader);
|
||||
#if DO_COMPUTE_GPU
|
||||
g_D3D11Device->CreateComputeShader(g_CSBytecode, ARRAYSIZE(g_CSBytecode), NULL, &g_ComputeShader);
|
||||
#endif
|
||||
|
||||
D3D11_TEXTURE2D_DESC texDesc = {};
|
||||
texDesc.Width = kBackbufferWidth;
|
||||
texDesc.Height = kBackbufferHeight;
|
||||
texDesc.MipLevels = 1;
|
||||
texDesc.ArraySize = 1;
|
||||
texDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
texDesc.SampleDesc.Count = 1;
|
||||
texDesc.SampleDesc.Quality = 0;
|
||||
#if DO_COMPUTE_GPU
|
||||
texDesc.Usage = D3D11_USAGE_DEFAULT;
|
||||
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
|
||||
texDesc.CPUAccessFlags = 0;
|
||||
#else
|
||||
texDesc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
#endif
|
||||
texDesc.MiscFlags = 0;
|
||||
g_D3D11Device->CreateTexture2D(&texDesc, NULL, &g_BackbufferTexture);
|
||||
g_D3D11Device->CreateTexture2D(&texDesc, NULL, &g_BackbufferTexture2);
|
||||
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
||||
srvDesc.Format = texDesc.Format;
|
||||
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
|
||||
srvDesc.Texture2D.MipLevels = 1;
|
||||
srvDesc.Texture2D.MostDetailedMip = 0;
|
||||
g_D3D11Device->CreateShaderResourceView(g_BackbufferTexture, &srvDesc, &g_BackbufferSRV);
|
||||
g_D3D11Device->CreateShaderResourceView(g_BackbufferTexture2, &srvDesc, &g_BackbufferSRV2);
|
||||
|
||||
D3D11_SAMPLER_DESC smpDesc = {};
|
||||
smpDesc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT;
|
||||
smpDesc.AddressU = smpDesc.AddressV = smpDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
g_D3D11Device->CreateSamplerState(&smpDesc, &g_SamplerLinear);
|
||||
|
||||
D3D11_RASTERIZER_DESC rasterDesc = {};
|
||||
rasterDesc.FillMode = D3D11_FILL_SOLID;
|
||||
rasterDesc.CullMode = D3D11_CULL_NONE;
|
||||
g_D3D11Device->CreateRasterizerState(&rasterDesc, &g_RasterState);
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
|
||||
|
||||
int camSize;
|
||||
GetObjectCount(g_SphereCount, g_ObjSize, g_MatSize, camSize);
|
||||
assert(g_ObjSize == 20);
|
||||
assert(g_MatSize == 36);
|
||||
assert(camSize == 88);
|
||||
D3D11_BUFFER_DESC bdesc = {};
|
||||
bdesc.ByteWidth = g_SphereCount * g_ObjSize;
|
||||
bdesc.Usage = D3D11_USAGE_DEFAULT;
|
||||
bdesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
bdesc.CPUAccessFlags = 0;
|
||||
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
bdesc.StructureByteStride = g_ObjSize;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataSpheres);
|
||||
srvDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvDesc.Buffer.FirstElement = 0;
|
||||
srvDesc.Buffer.NumElements = g_SphereCount;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataSpheres, &srvDesc, &g_SRVSpheres);
|
||||
|
||||
bdesc.ByteWidth = g_SphereCount * g_MatSize;
|
||||
bdesc.StructureByteStride = g_MatSize;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataMaterials);
|
||||
srvDesc.Buffer.NumElements = g_SphereCount;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataMaterials, &srvDesc, &g_SRVMaterials);
|
||||
|
||||
bdesc.ByteWidth = sizeof(ComputeParams);
|
||||
bdesc.StructureByteStride = sizeof(ComputeParams);
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataParams);
|
||||
srvDesc.Buffer.NumElements = 1;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataParams, &srvDesc, &g_SRVParams);
|
||||
|
||||
bdesc.ByteWidth = g_SphereCount * 4;
|
||||
bdesc.StructureByteStride = 4;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataEmissives);
|
||||
srvDesc.Buffer.NumElements = g_SphereCount;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataEmissives, &srvDesc, &g_SRVEmissives);
|
||||
|
||||
bdesc.ByteWidth = 4;
|
||||
bdesc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS;
|
||||
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
bdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataCounter);
|
||||
uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
uavDesc.Buffer.FirstElement = 0;
|
||||
uavDesc.Buffer.NumElements = 1;
|
||||
uavDesc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
g_D3D11Device->CreateUnorderedAccessView(g_DataCounter, &uavDesc, &g_UAVCounter);
|
||||
|
||||
uavDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
|
||||
uavDesc.Texture2D.MipSlice = 0;
|
||||
g_D3D11Device->CreateUnorderedAccessView(g_BackbufferTexture, &uavDesc, &g_BackbufferUAV);
|
||||
g_D3D11Device->CreateUnorderedAccessView(g_BackbufferTexture2, &uavDesc, &g_BackbufferUAV2);
|
||||
|
||||
D3D11_QUERY_DESC qDesc = {};
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
g_D3D11Device->CreateQuery(&qDesc, &g_QueryBegin);
|
||||
g_D3D11Device->CreateQuery(&qDesc, &g_QueryEnd);
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
g_D3D11Device->CreateQuery(&qDesc, &g_QueryDisjoint);
|
||||
#endif // #if DO_COMPUTE_GPU
|
||||
|
||||
|
||||
static int framesLeft = 10;
|
||||
|
||||
// Main message loop
|
||||
MSG msg = { 0 };
|
||||
while (msg.message != WM_QUIT)
|
||||
{
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
|
||||
{
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
}
|
||||
else
|
||||
{
|
||||
RenderFrame();
|
||||
TracyD3D11Collect(g_tracyCtx);
|
||||
if( --framesLeft == 0 ) break;
|
||||
}
|
||||
}
|
||||
|
||||
ShutdownTest();
|
||||
ShutdownD3DDevice();
|
||||
|
||||
return (int) msg.wParam;
|
||||
}
|
||||
|
||||
|
||||
ATOM MyRegisterClass(HINSTANCE hInstance)
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
WNDCLASSEXW wcex;
|
||||
memset(&wcex, 0, sizeof(wcex));
|
||||
wcex.cbSize = sizeof(WNDCLASSEX);
|
||||
wcex.style = CS_HREDRAW | CS_VREDRAW;
|
||||
wcex.lpfnWndProc = WndProc;
|
||||
wcex.cbClsExtra = 0;
|
||||
wcex.cbWndExtra = 0;
|
||||
wcex.hInstance = hInstance;
|
||||
wcex.hCursor = LoadCursor(nullptr, IDC_ARROW);
|
||||
wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
|
||||
wcex.lpszClassName = L"TestClass";
|
||||
return RegisterClassExW(&wcex);
|
||||
}
|
||||
|
||||
BOOL InitInstance(HINSTANCE hInstance, int nCmdShow)
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
g_HInstance = hInstance;
|
||||
RECT rc = { 0, 0, kBackbufferWidth, kBackbufferHeight };
|
||||
DWORD style = WS_OVERLAPPED | WS_CAPTION | WS_SYSMENU | WS_MINIMIZEBOX;
|
||||
AdjustWindowRect(&rc, style, FALSE);
|
||||
HWND hWnd = CreateWindowW(L"TestClass", L"Test", style, CW_USEDEFAULT, CW_USEDEFAULT, rc.right-rc.left, rc.bottom-rc.top, nullptr, nullptr, hInstance, nullptr);
|
||||
if (!hWnd)
|
||||
return FALSE;
|
||||
g_Wnd = hWnd;
|
||||
ShowWindow(hWnd, nCmdShow);
|
||||
UpdateWindow(hWnd);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static uint64_t s_Time;
|
||||
static int s_Count;
|
||||
static char s_Buffer[200];
|
||||
static unsigned s_Flags = kFlagProgressive;
|
||||
static int s_FrameCount = 0;
|
||||
|
||||
|
||||
static void RenderFrame()
|
||||
{
|
||||
ZoneScoped;
|
||||
TracyD3D11Zone(g_tracyCtx, "RenderFrame");
|
||||
|
||||
LARGE_INTEGER time1;
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
QueryPerformanceCounter(&time1);
|
||||
float t = float(clock()) / CLOCKS_PER_SEC;
|
||||
UpdateTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, s_Flags);
|
||||
|
||||
g_BackbufferIndex = 1 - g_BackbufferIndex;
|
||||
void* dataSpheres = alloca(g_SphereCount * g_ObjSize);
|
||||
void* dataMaterials = alloca(g_SphereCount * g_MatSize);
|
||||
void* dataEmissives = alloca(g_SphereCount * 4);
|
||||
ComputeParams dataParams;
|
||||
GetSceneDesc(dataSpheres, dataMaterials, &dataParams.cam, dataEmissives, &dataParams.emissiveCount);
|
||||
|
||||
dataParams.sphereCount = g_SphereCount;
|
||||
dataParams.screenWidth = kBackbufferWidth;
|
||||
dataParams.screenHeight = kBackbufferHeight;
|
||||
dataParams.frames = s_FrameCount;
|
||||
dataParams.invWidth = 1.0f / kBackbufferWidth;
|
||||
dataParams.invHeight = 1.0f / kBackbufferHeight;
|
||||
float lerpFac = float(s_FrameCount) / float(s_FrameCount + 1);
|
||||
if (s_Flags & kFlagAnimate)
|
||||
lerpFac *= DO_ANIMATE_SMOOTHING;
|
||||
if (!(s_Flags & kFlagProgressive))
|
||||
lerpFac = 0;
|
||||
dataParams.lerpFac = lerpFac;
|
||||
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataSpheres, 0, NULL, dataSpheres, 0, 0);
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataMaterials, 0, NULL, dataMaterials, 0, 0);
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataParams, 0, NULL, &dataParams, 0, 0);
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataEmissives, 0, NULL, dataEmissives, 0, 0);
|
||||
|
||||
ID3D11ShaderResourceView* srvs[] = {
|
||||
g_BackbufferIndex == 0 ? g_BackbufferSRV2 : g_BackbufferSRV,
|
||||
g_SRVSpheres,
|
||||
g_SRVMaterials,
|
||||
g_SRVParams,
|
||||
g_SRVEmissives
|
||||
};
|
||||
g_D3D11Ctx->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs);
|
||||
ID3D11UnorderedAccessView* uavs[] = {
|
||||
g_BackbufferIndex == 0 ? g_BackbufferUAV : g_BackbufferUAV2,
|
||||
g_UAVCounter
|
||||
};
|
||||
g_D3D11Ctx->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, NULL);
|
||||
g_D3D11Ctx->CSSetShader(g_ComputeShader, NULL, 0);
|
||||
g_D3D11Ctx->Begin(g_QueryDisjoint);
|
||||
g_D3D11Ctx->End(g_QueryBegin);
|
||||
g_D3D11Ctx->Dispatch(kBackbufferWidth/kCSGroupSizeX, kBackbufferHeight/kCSGroupSizeY, 1);
|
||||
g_D3D11Ctx->End(g_QueryEnd);
|
||||
uavs[0] = NULL;
|
||||
g_D3D11Ctx->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, NULL);
|
||||
++s_FrameCount;
|
||||
|
||||
#else
|
||||
QueryPerformanceCounter(&time1);
|
||||
float t = float(clock()) / CLOCKS_PER_SEC;
|
||||
static size_t s_RayCounter = 0;
|
||||
int rayCount;
|
||||
UpdateTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, s_Flags);
|
||||
DrawTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, g_Backbuffer, rayCount, s_Flags);
|
||||
s_FrameCount++;
|
||||
s_RayCounter += rayCount;
|
||||
LARGE_INTEGER time2;
|
||||
QueryPerformanceCounter(&time2);
|
||||
uint64_t dt = time2.QuadPart - time1.QuadPart;
|
||||
++s_Count;
|
||||
s_Time += dt;
|
||||
if (s_Count > 10)
|
||||
{
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
|
||||
double s = double(s_Time) / double(frequency.QuadPart) / s_Count;
|
||||
sprintf_s(s_Buffer, sizeof(s_Buffer), "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i\n", s * 1000.0f, 1.f / s, s_RayCounter / s_Count / s * 1.0e-6f, s_RayCounter / s_Count * 1.0e-6f, s_FrameCount);
|
||||
SetWindowTextA(g_Wnd, s_Buffer);
|
||||
OutputDebugStringA(s_Buffer);
|
||||
s_Count = 0;
|
||||
s_Time = 0;
|
||||
s_RayCounter = 0;
|
||||
}
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
g_D3D11Ctx->Map(g_BackbufferTexture, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
|
||||
const uint8_t* src = (const uint8_t*)g_Backbuffer;
|
||||
uint8_t* dst = (uint8_t*)mapped.pData;
|
||||
for (int y = 0; y < kBackbufferHeight; ++y)
|
||||
{
|
||||
memcpy(dst, src, kBackbufferWidth * 16);
|
||||
src += kBackbufferWidth * 16;
|
||||
dst += mapped.RowPitch;
|
||||
}
|
||||
g_D3D11Ctx->Unmap(g_BackbufferTexture, 0);
|
||||
#endif
|
||||
|
||||
g_D3D11Ctx->VSSetShader(g_VertexShader, NULL, 0);
|
||||
g_D3D11Ctx->PSSetShader(g_PixelShader, NULL, 0);
|
||||
g_D3D11Ctx->PSSetShaderResources(0, 1, g_BackbufferIndex == 0 ? &g_BackbufferSRV : &g_BackbufferSRV2);
|
||||
g_D3D11Ctx->PSSetSamplers(0, 1, &g_SamplerLinear);
|
||||
g_D3D11Ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
g_D3D11Ctx->RSSetState(g_RasterState);
|
||||
g_D3D11Ctx->Draw(3, 0);
|
||||
g_D3D11SwapChain->Present(0, 0);
|
||||
|
||||
FrameMark;
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
g_D3D11Ctx->End(g_QueryDisjoint);
|
||||
|
||||
// get GPU times
|
||||
while (g_D3D11Ctx->GetData(g_QueryDisjoint, NULL, 0, 0) == S_FALSE) { Sleep(0); }
|
||||
D3D10_QUERY_DATA_TIMESTAMP_DISJOINT tsDisjoint;
|
||||
g_D3D11Ctx->GetData(g_QueryDisjoint, &tsDisjoint, sizeof(tsDisjoint), 0);
|
||||
if (!tsDisjoint.Disjoint)
|
||||
{
|
||||
UINT64 tsBegin, tsEnd;
|
||||
// Note: on some GPUs/drivers, even when the disjoint query above already said "yeah I have data",
|
||||
// might still not return "I have data" for timestamp queries before it.
|
||||
while (g_D3D11Ctx->GetData(g_QueryBegin, &tsBegin, sizeof(tsBegin), 0) == S_FALSE) { Sleep(0); }
|
||||
while (g_D3D11Ctx->GetData(g_QueryEnd, &tsEnd, sizeof(tsEnd), 0) == S_FALSE) { Sleep(0); }
|
||||
|
||||
float s = float(tsEnd - tsBegin) / float(tsDisjoint.Frequency);
|
||||
|
||||
static uint64_t s_RayCounter;
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
g_D3D11Ctx->Map(g_DataCounter, 0, D3D11_MAP_READ, 0, &mapped);
|
||||
s_RayCounter += *(const int*)mapped.pData;
|
||||
g_D3D11Ctx->Unmap(g_DataCounter, 0);
|
||||
int zeroCount = 0;
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataCounter, 0, NULL, &zeroCount, 0, 0);
|
||||
|
||||
static float s_Time;
|
||||
++s_Count;
|
||||
s_Time += s;
|
||||
if (s_Count > 150)
|
||||
{
|
||||
s = s_Time / s_Count;
|
||||
sprintf_s(s_Buffer, sizeof(s_Buffer), "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i\n", s * 1000.0f, 1.f / s, s_RayCounter / s_Count / s * 1.0e-6f, s_RayCounter / s_Count * 1.0e-6f, s_FrameCount);
|
||||
SetWindowTextA(g_Wnd, s_Buffer);
|
||||
s_Count = 0;
|
||||
s_Time = 0;
|
||||
s_RayCounter = 0;
|
||||
}
|
||||
|
||||
}
|
||||
#endif // #if DO_COMPUTE_GPU
|
||||
}
|
||||
|
||||
|
||||
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
|
||||
{
|
||||
switch (message)
|
||||
{
|
||||
case WM_PAINT:
|
||||
{
|
||||
PAINTSTRUCT ps;
|
||||
HDC hdc = BeginPaint(hWnd, &ps);
|
||||
EndPaint(hWnd, &ps);
|
||||
}
|
||||
break;
|
||||
case WM_DESTROY:
|
||||
PostQuitMessage(0);
|
||||
break;
|
||||
case WM_CHAR:
|
||||
if (wParam == 'a')
|
||||
s_Flags = s_Flags ^ kFlagAnimate;
|
||||
if (wParam == 'p')
|
||||
{
|
||||
s_Flags = s_Flags ^ kFlagProgressive;
|
||||
s_FrameCount = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return DefWindowProc(hWnd, message, wParam, lParam);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static HRESULT InitD3DDevice()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
RECT rc;
|
||||
GetClientRect(g_Wnd, &rc);
|
||||
UINT width = rc.right - rc.left;
|
||||
UINT height = rc.bottom - rc.top;
|
||||
|
||||
UINT createDeviceFlags = 0;
|
||||
#ifdef _DEBUG
|
||||
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
|
||||
#endif
|
||||
|
||||
D3D_FEATURE_LEVEL featureLevels[] =
|
||||
{
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
};
|
||||
UINT numFeatureLevels = ARRAYSIZE(featureLevels);
|
||||
hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, featureLevels, numFeatureLevels, D3D11_SDK_VERSION, &g_D3D11Device, &g_D3D11FeatureLevel, &g_D3D11Ctx);
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Get DXGI factory
|
||||
IDXGIFactory1* dxgiFactory = nullptr;
|
||||
{
|
||||
IDXGIDevice* dxgiDevice = nullptr;
|
||||
hr = g_D3D11Device->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void**>(&dxgiDevice));
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
IDXGIAdapter* adapter = nullptr;
|
||||
hr = dxgiDevice->GetAdapter(&adapter);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = adapter->GetParent(__uuidof(IDXGIFactory1), reinterpret_cast<void**>(&dxgiFactory));
|
||||
adapter->Release();
|
||||
}
|
||||
dxgiDevice->Release();
|
||||
}
|
||||
}
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Create swap chain
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
ZeroMemory(&sd, sizeof(sd));
|
||||
sd.BufferCount = 1;
|
||||
sd.BufferDesc.Width = width;
|
||||
sd.BufferDesc.Height = height;
|
||||
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
sd.BufferDesc.RefreshRate.Numerator = 60;
|
||||
sd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
sd.OutputWindow = g_Wnd;
|
||||
sd.SampleDesc.Count = 1;
|
||||
sd.SampleDesc.Quality = 0;
|
||||
sd.Windowed = TRUE;
|
||||
hr = dxgiFactory->CreateSwapChain(g_D3D11Device, &sd, &g_D3D11SwapChain);
|
||||
|
||||
// Prevent Alt-Enter
|
||||
dxgiFactory->MakeWindowAssociation(g_Wnd, DXGI_MWA_NO_ALT_ENTER);
|
||||
dxgiFactory->Release();
|
||||
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// RTV
|
||||
ID3D11Texture2D* pBackBuffer = nullptr;
|
||||
hr = g_D3D11SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), reinterpret_cast<void**>(&pBackBuffer));
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
hr = g_D3D11Device->CreateRenderTargetView(pBackBuffer, nullptr, &g_D3D11RenderTarget);
|
||||
pBackBuffer->Release();
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
g_D3D11Ctx->OMSetRenderTargets(1, &g_D3D11RenderTarget, nullptr);
|
||||
|
||||
// Viewport
|
||||
D3D11_VIEWPORT vp;
|
||||
vp.Width = (float)width;
|
||||
vp.Height = (float)height;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
g_D3D11Ctx->RSSetViewports(1, &vp);
|
||||
|
||||
g_tracyCtx = TracyD3D11Context(g_D3D11Device, g_D3D11Ctx);
|
||||
const char* tracyD3D11CtxName = "D3D11";
|
||||
TracyD3D11ContextName(g_tracyCtx, tracyD3D11CtxName, (uint16_t)strlen(tracyD3D11CtxName));
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static void ShutdownD3DDevice()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
if (g_tracyCtx) TracyD3D11Destroy(g_tracyCtx);
|
||||
|
||||
if (g_D3D11Ctx) g_D3D11Ctx->ClearState();
|
||||
|
||||
if (g_D3D11RenderTarget) g_D3D11RenderTarget->Release();
|
||||
if (g_D3D11SwapChain) g_D3D11SwapChain->Release();
|
||||
if (g_D3D11Ctx) g_D3D11Ctx->Release();
|
||||
if (g_D3D11Device) g_D3D11Device->Release();
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
struct vs2ps
|
||||
{
|
||||
float2 uv : TEXCOORD0;
|
||||
float4 pos : SV_Position;
|
||||
};
|
||||
|
||||
vs2ps main(uint vid : SV_VertexID)
|
||||
{
|
||||
vs2ps o;
|
||||
o.uv = float2((vid << 1) & 2, vid & 2);
|
||||
o.pos = float4(o.uv * float2(2, 2) + float2(-1, -1), 0, 1);
|
||||
return o;
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org>
|
|
@ -0,0 +1,77 @@
|
|||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
inline float sqrtfast( float v )
|
||||
{
|
||||
union
|
||||
{
|
||||
int i;
|
||||
float f;
|
||||
} u;
|
||||
|
||||
u.f = v;
|
||||
u.i -= 1 << 23;
|
||||
u.i >>= 1;
|
||||
u.i += 1 << 29;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
inline float linear2sRGB( float v )
|
||||
{
|
||||
float s1 = sqrtfast( v );
|
||||
float s2 = sqrtfast( s1 );
|
||||
float s3 = sqrtfast( s2 );
|
||||
return 0.585122381f * s1 + 0.783140355f * s2 - 0.368262736f * s3;
|
||||
}
|
||||
|
||||
int lerp( int v0, int v1, float t )
|
||||
{
|
||||
return int( ( 1-t ) * v0 + t * v1 );
|
||||
}
|
||||
|
||||
inline float sRGB2linear( float v )
|
||||
{
|
||||
return v * ( v * ( v * 0.305306011f + 0.682171111f ) + 0.012522878f );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int c0 = std::min( 255, int( sRGB2linear( 1.f ) * 255 ) );
|
||||
int c1 = std::min( 255, int( sRGB2linear( 0x44 / 255.f ) * 255 ) );
|
||||
|
||||
int s0 = std::min( 255, int( sRGB2linear( 1.f ) * 255 * 0.5 ) );
|
||||
int s1 = std::min( 255, int( sRGB2linear( 0x44 / 255.f ) * 255 * 0.5 ) );
|
||||
|
||||
float target = 80.f;
|
||||
|
||||
uint32_t t[256];
|
||||
memset( t, 0, sizeof( uint32_t ) * 256 );
|
||||
|
||||
for( int i=1; i<128; i++ )
|
||||
{
|
||||
float m = (i-1) / target;
|
||||
int l0 = std::min( 255, lerp( s0, c0, m ) );
|
||||
int l1 = std::min( 255, lerp( s1, c1, m ) );
|
||||
int g0 = std::min( 255, int( linear2sRGB( l0/255.f ) * 255 ) );
|
||||
int g1 = std::min( 255, int( linear2sRGB( l1/255.f ) * 255 ) );
|
||||
g0 = l0;
|
||||
g1 = l1;
|
||||
t[i] = 0xFF000000 | ( g1 << 16 ) | ( g0 << 8 ) | g1;
|
||||
t[uint8_t(-i)] = 0xFF000000 | ( g1 << 16 ) | ( g1 << 8 ) | g0;
|
||||
}
|
||||
|
||||
printf( "uint32_t MemDecayColor[256] = {\n" );
|
||||
for( int i=0; i<256; i += 8 )
|
||||
{
|
||||
printf( " " );
|
||||
for( int j=i; j<i+8; j++ )
|
||||
{
|
||||
printf( " 0x%X,", t[j] );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
printf( "};\n" );
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
for( int i=0; i<255*3+1; i++ )
|
||||
{
|
||||
// replace 4 with 2 for ARM NEON table
|
||||
uint32_t range = ( 4 << 16 ) / ( 1+i );
|
||||
if( range > 0xFFFF ) range = 0xFFFF;
|
||||
if( i % 16 == 15 )
|
||||
{
|
||||
printf( "0x%04x,\n", range );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "0x%04x, ", range );
|
||||
}
|
||||
}
|
||||
printf( "\n" );
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static const uint8_t IndexTable[4] = { 1, 3, 2, 0 };
|
||||
|
||||
int convert( int v )
|
||||
{
|
||||
int v0 = v & 0x3;
|
||||
int v1 = ( v >> 2 ) & 0x3;
|
||||
int v2 = ( v >> 4 ) & 0x3;
|
||||
int v3 = ( v >> 6 );
|
||||
|
||||
int t0 = IndexTable[v0];
|
||||
int t1 = IndexTable[v1];
|
||||
int t2 = IndexTable[v2];
|
||||
int t3 = IndexTable[v3];
|
||||
|
||||
return t0 | ( t1 << 2 ) | ( t2 << 4 ) | ( t3 << 6 );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
for( int i=0; i<256; i++ )
|
||||
{
|
||||
if( i % 16 == 15 )
|
||||
{
|
||||
printf( "%i,\n", convert( i ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "%i,\t", convert( i ) );
|
||||
}
|
||||
}
|
||||
printf( "\n" );
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
// g++ identify.cpp -lpthread ../common/tracy_lz4.cpp ../zstd/common/*.c ../zstd/decompress/*.c
|
||||
|
||||
#include <memory>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../server/TracyFileRead.hpp"
|
||||
#include "../server/TracyVersion.hpp"
|
||||
|
||||
static const uint8_t FileHeader[8] { 't', 'r', 'a', 'c', 'y', tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch };
|
||||
enum { FileHeaderMagic = 5 };
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
if( argc != 2 )
|
||||
{
|
||||
fprintf( stderr, "Usage: %s trace\n", argv[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
std::unique_ptr<tracy::FileRead> f( tracy::FileRead::Open( argv[1] ) );
|
||||
if( !f )
|
||||
{
|
||||
fprintf( stderr, "%s: Cannot open!\n", argv[1] );
|
||||
return -2;
|
||||
}
|
||||
|
||||
uint8_t hdr[8];
|
||||
f->Read( hdr, sizeof( hdr ) );
|
||||
if( memcmp( FileHeader, hdr, FileHeaderMagic ) != 0 )
|
||||
{
|
||||
fprintf( stderr, "%s: Bad header!\n", argv[1] );
|
||||
return -3;
|
||||
}
|
||||
|
||||
printf( "%s: %i.%i.%i\n", argv[1], hdr[FileHeaderMagic], hdr[FileHeaderMagic+1], hdr[FileHeaderMagic+2] );
|
||||
}
|
||||
catch( const tracy::NotTracyDump& )
|
||||
{
|
||||
fprintf( stderr, "%s: Not a tracy dump!\n", argv[1] );
|
||||
return -4;
|
||||
}
|
||||
catch( const tracy::FileReadError& )
|
||||
{
|
||||
fprintf( stderr, "%s: File read error!\n", argv[1] );
|
||||
return -5;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/sh
|
||||
|
||||
rm -rf tracy-build
|
||||
mkdir tracy-build
|
||||
|
||||
if [ ! -f vswhere.exe ]; then
|
||||
wget https://github.com/microsoft/vswhere/releases/download/2.8.4/vswhere.exe
|
||||
fi
|
||||
|
||||
MSVC=`./vswhere.exe -property installationPath -version '[16.0,16.999]' | head -n 1`
|
||||
MSVC=`wslpath "$MSVC" | tr -d '\r'`
|
||||
MSBUILD=$MSVC/MSBuild/Current/Bin/MSBuild.exe
|
||||
|
||||
for i in capture csvexport import-chrome update; do
|
||||
echo $i...
|
||||
"$MSBUILD" ../$i/build/win32/$i.sln /t:Clean /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
"$MSBUILD" ../$i/build/win32/$i.sln /t:Build /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
cp ../$i/build/win32/x64/Release/$i.exe tracy-build/
|
||||
done
|
||||
|
||||
echo profiler...
|
||||
"$MSBUILD" ../profiler/build/win32/Tracy.sln /t:Clean /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
"$MSBUILD" ../profiler/build/win32/Tracy.sln /t:Build /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
cp ../profiler/build/win32/x64/Release/Tracy.exe tracy-build/
|
|
@ -0,0 +1,24 @@
|
|||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
//int a = 16, b = 44, s = 4;
|
||||
//int av = 12, bv = 6, cv = 3;
|
||||
|
||||
//int a = 32, b = 48, s = 16;
|
||||
//int av = 12, bv = 6, cv = 3;
|
||||
|
||||
int a = 48, b = 64, s = 16;
|
||||
int av = 48, bv = 32, cv = 24;
|
||||
|
||||
printf( "int TrTbl[] = { " );
|
||||
int first = 1;
|
||||
for( int i=0; i<256; i+=s )
|
||||
{
|
||||
if( first ) first = 0; else printf( ", " );
|
||||
if( i < a ) printf( "%i", av );
|
||||
else if( i < b ) printf( "%i", bv );
|
||||
else printf( "%i", cv );
|
||||
}
|
||||
printf( " };\n" );
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
#include <fcntl.h>
|
||||
#include <poll.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
enum { BufSize = 64*1024 };
|
||||
|
||||
typedef int (*open_t)( const char*, int, ... );
|
||||
typedef void (*exit_t)( int );
|
||||
typedef int (*poll_t)( struct pollfd*, nfds_t, int timeout );
|
||||
typedef int (*nanosleep_t)( const struct timespec*, struct timespec* );
|
||||
typedef ssize_t (*read_t)( int, void*, size_t );
|
||||
typedef ssize_t (*write_t)( int, const void*, size_t );
|
||||
|
||||
void _start()
|
||||
{
|
||||
void* libc = dlopen( "libc.so", RTLD_LAZY );
|
||||
|
||||
open_t sym_open = dlsym( libc, "open" );
|
||||
exit_t sym_exit = dlsym( libc, "exit" );
|
||||
poll_t sym_poll = dlsym( libc, "poll" );
|
||||
nanosleep_t sym_nanosleep = dlsym( libc, "nanosleep" );
|
||||
read_t sym_read = dlsym( libc, "read" );
|
||||
write_t sym_write = dlsym( libc, "write" );
|
||||
|
||||
char buf[BufSize];
|
||||
|
||||
int kernelFd = sym_open( "/sys/kernel/debug/tracing/trace_pipe", O_RDONLY );
|
||||
if( kernelFd < 0 ) sym_exit( 0 );
|
||||
|
||||
struct pollfd pfd_in;
|
||||
pfd_in.fd = kernelFd;
|
||||
pfd_in.events = POLLIN | POLLERR;
|
||||
|
||||
struct pollfd pfd_out;
|
||||
pfd_out.fd = STDOUT_FILENO;
|
||||
pfd_out.events = POLLERR;
|
||||
|
||||
struct timespec sleepTime;
|
||||
sleepTime.tv_sec = 0;
|
||||
sleepTime.tv_nsec = 1000 * 1000 * 10;
|
||||
|
||||
// While the pipe is open (no POLLERR on the output fd)
|
||||
while( sym_poll( &pfd_out, 1, 0) <= 0 )
|
||||
{
|
||||
// If there is neither data (POLLIN) nor an error (POLLERR) on
|
||||
// the read fd, sleep. This implements a blocking read without relying
|
||||
// on the Linux kernel's implementation of blocking reads which causes
|
||||
// a large number of context switches.
|
||||
if( sym_poll( &pfd_in, 1, 0 ) <= 0 ) {
|
||||
sym_nanosleep( &sleepTime, NULL );
|
||||
continue; // go back to the while condition polling the output fd
|
||||
}
|
||||
const ssize_t rd = sym_read( kernelFd, buf, BufSize );
|
||||
if( rd <= 0 ) break;
|
||||
sym_write( STDOUT_FILENO, buf, rd );
|
||||
}
|
||||
|
||||
sym_exit( 0 );
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct AsmDesc
|
||||
{
|
||||
uint8_t type;
|
||||
uint16_t width;
|
||||
};
|
||||
|
||||
struct AsmVar
|
||||
{
|
||||
int descNum;
|
||||
AsmDesc desc[5];
|
||||
int isaSet;
|
||||
float tp;
|
||||
int port, uops, minlat, maxlat;
|
||||
bool minbound, maxbound;
|
||||
};
|
||||
|
||||
struct AsmOp
|
||||
{
|
||||
int id;
|
||||
int descId;
|
||||
int numVariants;
|
||||
const AsmVar*const* variant;
|
||||
};
|
||||
|
||||
struct MicroArchitecture
|
||||
{
|
||||
int numOps;
|
||||
const AsmOp*const* ops;
|
||||
};
|
||||
|
||||
extern const char* MicroArchitectureList[];
|
||||
extern const char* PortList[];
|
||||
extern const char* OpsList[];
|
||||
extern const char* OpDescList[];
|
||||
extern const char* IsaList[];
|
||||
extern const MicroArchitecture* const MicroArchitectureData[];
|
||||
|
||||
extern int OpsNum;
|
||||
extern int MicroArchitectureNum;
|
||||
|
||||
};
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue