Skip to content

Commit

Permalink
Remove unnecesary branches/panics from Query accesses (bevyengine#6461)
Browse files Browse the repository at this point in the history
# Objective
Supercedes bevyengine#6452. Upon inspection of the [generated assembly](https://gist.github.com/james7132/c2740c6941b80d7912f1e8888e223cbb#file-original-s) of a [simple Bevy binary](https://gist.github.com/james7132/c2740c6941b80d7912f1e8888e223cbb#file-source-rs) compiled with `cargo rustc --release -- --emit asm`, it's apparent that there are multiple unnecessary branches in the generated assembly:

```assembly
.LBB5_5:
	cmpq	%r10, %r11
	je	.LBB5_15
	movq	(%r11), %rcx
	movq	328(%r15), %rdx
	cmpq	%rdx, %rcx
	jae	.LBB5_14
	movq	312(%r15), %rdi
	leaq	(%rcx,%rcx,2), %rcx
	shlq	$5, %rcx
	movq	336(%r12), %rdx
	movq	64(%rdi,%rcx), %rax
	cmpq	%rdx, %rax
	jbe	.LBB5_4
	leaq	(%rdi,%rcx), %rsi
	movq	48(%rsi), %rbp
	shlq	$4, %rdx
	cmpq	$0, (%rbp,%rdx)
	je	.LBB5_4
	movq	344(%r12), %rbx
	cmpq	%rbx, %rax
	jbe	.LBB5_4
	shlq	$4, %rbx
	cmpq	$0, (%rbp,%rbx)
	je	.LBB5_4
	addq	$8, %r11
	movq	88(%rdi,%rcx), %rcx
	testq	%rcx, %rcx
	je	.LBB5_5
	movq	(%rsi), %rax
	movq	8(%rbp,%rdx), %rdx
	leaq	(%rdx,%rdx,4), %rdi
	shlq	$4, %rdi
	movq	32(%rax,%rdi), %rdx
	movq	56(%rax,%rdi), %r8
	movq	8(%rbp,%rbx), %rbp
	leaq	(%rbp,%rbp,4), %rbp
	shlq	$4, %rbp
	movq	32(%rax,%rbp), %r9
	xorl	%ebp, %ebp
	jmp	.LBB5_13
	.p2align	4, 0x90
```

Almost every one of the instructions starting with `j` is a potential branch, which can significantly slow down accesses. Of these, two labels are both common and never used:

```asm
.LBB5_14:
	leaq	__unnamed_2(%rip), %r8
	callq	_ZN4core9panicking18panic_bounds_check17h70367088e72af65aE
	ud2
.LBB5_4:
	callq	_ZN8bevy_ecs5query25debug_checked_unreachable17h0855ff520ceaea77E
	ud2
	.seh_endproc
```

These correpsond to subprocedure calls to panicking due to out of bounds from indexing `Tables` and `debug_checked_unreadable`. Both of which should be inlined and optimized out, but are not.

## Solution
Make `debug_checked_unreachable` a macro to forcibly inline either `unreachable!()` in debug builds, and `std::hint::unreachable_unchecked()` in release mode. Replace the `Tables` and `Archetype` index access with `get(id).unwrap_or_else(|| debug_checked_unreachable!())` to assume that the table or archetype provided exists.

This has no external breaking change of any kind.

The equivalent section of code with these changes removes most of the conditional jump instructions:

```asm
.LBB5_5:
	movss	(%rbx,%rbp,4), %xmm0
	movl	%r14d, 4(%r8,%rbp,8)
	addss	(%rdi,%rbp,4), %xmm0
	movss	%xmm0, (%rdi,%rbp,4)
	incq	%rbp
.LBB5_1:
	cmpq	%rdx, %rbp
	jne	.LBB5_5
	.p2align	4, 0x90
.LBB5_2:
	cmpq	%rcx, %rax
	je	.LBB5_6
	movq	(%rax), %rdx
	addq	$8, %rax
	movq	312(%rsi), %rbp
	leaq	(%rdx,%rdx,2), %rbx
	shlq	$5, %rbx
	movq	88(%rbp,%rbx), %rdx
	testq	%rdx, %rdx
	je	.LBB5_2
	leaq	(%rbx,%rbp), %r8
	movq	336(%r15), %rdi
	movq	344(%r15), %r9
	movq	48(%rbp,%rbx), %r10
	shlq	$4, %rdi
	movq	(%r8), %rbx
	movq	8(%r10,%rdi), %rdi
	leaq	(%rdi,%rdi,4), %rbp
	shlq	$4, %rbp
	movq	32(%rbx,%rbp), %rdi
	movq	56(%rbx,%rbp), %r8
	shlq	$4, %r9
	movq	8(%r10,%r9), %rbp
	leaq	(%rbp,%rbp,4), %rbp
	shlq	$4, %rbp
	movq	32(%rbx,%rbp), %rbx
	xorl	%ebp, %ebp
	jmp	.LBB5_5
.LBB5_6:
	addq	$40, %rsp
	popq	%rbx
	popq	%rbp
	popq	%rdi
	popq	%rsi
	popq	%r14
	popq	%r15
	retq
	.seh_endproc

```

## Performance

Microbenchmarks results:

<details>

```
group                                                    main                                     no-panic-query
-----                                                    ----                                     --------------
busy_systems/01x_entities_03_systems                     1.20     42.4±2.66µs        ? ?/sec      1.00     35.3±1.68µs        ? ?/sec
busy_systems/01x_entities_06_systems                     1.32     83.8±3.50µs        ? ?/sec      1.00     63.6±1.72µs        ? ?/sec
busy_systems/01x_entities_09_systems                     1.15    113.3±8.90µs        ? ?/sec      1.00     98.2±6.15µs        ? ?/sec
busy_systems/01x_entities_12_systems                     1.27   160.8±32.44µs        ? ?/sec      1.00    126.6±4.70µs        ? ?/sec
busy_systems/01x_entities_15_systems                     1.12    179.6±3.71µs        ? ?/sec      1.00   160.3±11.03µs        ? ?/sec
busy_systems/02x_entities_03_systems                     1.18     76.8±3.14µs        ? ?/sec      1.00     65.2±3.17µs        ? ?/sec
busy_systems/02x_entities_06_systems                     1.16    144.6±6.10µs        ? ?/sec      1.00    124.5±5.14µs        ? ?/sec
busy_systems/02x_entities_09_systems                     1.19    215.3±9.18µs        ? ?/sec      1.00    181.5±5.67µs        ? ?/sec
busy_systems/02x_entities_12_systems                     1.20    266.7±8.33µs        ? ?/sec      1.00    222.0±9.53µs        ? ?/sec
busy_systems/02x_entities_15_systems                     1.23   338.8±10.53µs        ? ?/sec      1.00    276.3±6.94µs        ? ?/sec
busy_systems/03x_entities_03_systems                     1.43    113.5±5.06µs        ? ?/sec      1.00     79.6±1.49µs        ? ?/sec
busy_systems/03x_entities_06_systems                     1.38   217.3±12.67µs        ? ?/sec      1.00    157.5±3.07µs        ? ?/sec
busy_systems/03x_entities_09_systems                     1.23   308.8±24.75µs        ? ?/sec      1.00    251.6±8.93µs        ? ?/sec
busy_systems/03x_entities_12_systems                     1.05   347.7±12.43µs        ? ?/sec      1.00   330.6±11.43µs        ? ?/sec
busy_systems/03x_entities_15_systems                     1.13   455.5±13.88µs        ? ?/sec      1.00   401.7±17.29µs        ? ?/sec
busy_systems/04x_entities_03_systems                     1.24    144.7±5.89µs        ? ?/sec      1.00    116.9±6.29µs        ? ?/sec
busy_systems/04x_entities_06_systems                     1.24   282.8±21.40µs        ? ?/sec      1.00   228.6±21.31µs        ? ?/sec
busy_systems/04x_entities_09_systems                     1.35   431.8±14.10µs        ? ?/sec      1.00    319.6±9.83µs        ? ?/sec
busy_systems/04x_entities_12_systems                     1.16   493.8±22.87µs        ? ?/sec      1.00   424.9±15.24µs        ? ?/sec
busy_systems/04x_entities_15_systems                     1.10   587.5±23.25µs        ? ?/sec      1.00   531.7±16.32µs        ? ?/sec
busy_systems/05x_entities_03_systems                     1.14    148.2±9.61µs        ? ?/sec      1.00    129.5±4.32µs        ? ?/sec
busy_systems/05x_entities_06_systems                     1.31   359.7±17.46µs        ? ?/sec      1.00   273.6±10.55µs        ? ?/sec
busy_systems/05x_entities_09_systems                     1.22   473.5±23.11µs        ? ?/sec      1.00   389.3±13.62µs        ? ?/sec
busy_systems/05x_entities_12_systems                     1.05   562.9±20.76µs        ? ?/sec      1.00   536.5±24.35µs        ? ?/sec
busy_systems/05x_entities_15_systems                     1.23   818.5±28.70µs        ? ?/sec      1.00   666.6±45.87µs        ? ?/sec
contrived/01x_entities_03_systems                        1.27     27.5±0.49µs        ? ?/sec      1.00     21.6±1.71µs        ? ?/sec
contrived/01x_entities_06_systems                        1.22     49.9±1.18µs        ? ?/sec      1.00     40.7±2.62µs        ? ?/sec
contrived/01x_entities_09_systems                        1.30     72.3±2.39µs        ? ?/sec      1.00     55.4±2.60µs        ? ?/sec
contrived/01x_entities_12_systems                        1.28     94.3±9.44µs        ? ?/sec      1.00     73.7±3.62µs        ? ?/sec
contrived/01x_entities_15_systems                        1.25    118.0±2.43µs        ? ?/sec      1.00     94.1±3.99µs        ? ?/sec
contrived/02x_entities_03_systems                        1.23     41.6±1.71µs        ? ?/sec      1.00     33.7±2.30µs        ? ?/sec
contrived/02x_entities_06_systems                        1.19     78.6±2.63µs        ? ?/sec      1.00     65.9±2.35µs        ? ?/sec
contrived/02x_entities_09_systems                        1.28    113.6±3.60µs        ? ?/sec      1.00     88.6±3.60µs        ? ?/sec
contrived/02x_entities_12_systems                        1.20    146.4±5.75µs        ? ?/sec      1.00    121.7±3.35µs        ? ?/sec
contrived/02x_entities_15_systems                        1.23    178.5±4.86µs        ? ?/sec      1.00    145.7±4.00µs        ? ?/sec
contrived/03x_entities_03_systems                        1.42     58.3±2.77µs        ? ?/sec      1.00     41.1±1.54µs        ? ?/sec
contrived/03x_entities_06_systems                        1.32    108.5±7.30µs        ? ?/sec      1.00     82.4±4.86µs        ? ?/sec
contrived/03x_entities_09_systems                        1.23    153.7±4.61µs        ? ?/sec      1.00    125.0±4.76µs        ? ?/sec
contrived/03x_entities_12_systems                        1.18    197.5±5.12µs        ? ?/sec      1.00    166.8±8.14µs        ? ?/sec
contrived/03x_entities_15_systems                        1.23    238.8±6.38µs        ? ?/sec      1.00    194.6±4.55µs        ? ?/sec
contrived/04x_entities_03_systems                        1.34     66.4±3.42µs        ? ?/sec      1.00     49.5±1.98µs        ? ?/sec
contrived/04x_entities_06_systems                        1.27    134.3±4.86µs        ? ?/sec      1.00    105.8±3.58µs        ? ?/sec
contrived/04x_entities_09_systems                        1.26    193.2±3.83µs        ? ?/sec      1.00    153.0±5.60µs        ? ?/sec
contrived/04x_entities_12_systems                        1.16    237.1±5.78µs        ? ?/sec      1.00   204.9±18.77µs        ? ?/sec
contrived/04x_entities_15_systems                        1.17    289.2±4.76µs        ? ?/sec      1.00    246.3±8.57µs        ? ?/sec
contrived/05x_entities_03_systems                        1.26     80.4±2.90µs        ? ?/sec      1.00     63.7±3.07µs        ? ?/sec
contrived/05x_entities_06_systems                        1.27   161.6±13.47µs        ? ?/sec      1.00    127.2±5.59µs        ? ?/sec
contrived/05x_entities_09_systems                        1.22    228.0±7.76µs        ? ?/sec      1.00    186.2±7.68µs        ? ?/sec
contrived/05x_entities_12_systems                        1.20    289.5±6.21µs        ? ?/sec      1.00    241.8±7.52µs        ? ?/sec
contrived/05x_entities_15_systems                        1.18   357.3±11.24µs        ? ?/sec      1.00    302.7±7.21µs        ? ?/sec
heavy_compute/base                                       1.01    302.4±3.52µs        ? ?/sec      1.00    300.2±3.40µs        ? ?/sec
iter_fragmented/base                                     1.00    348.1±7.51ns        ? ?/sec      1.01    351.9±8.32ns        ? ?/sec
iter_fragmented/foreach                                  1.03   239.8±23.78ns        ? ?/sec      1.00   233.8±18.12ns        ? ?/sec
iter_fragmented/foreach_wide                             1.00      3.9±0.13µs        ? ?/sec      1.02      4.0±0.22µs        ? ?/sec
iter_fragmented/wide                                     1.18      4.6±0.15µs        ? ?/sec      1.00      3.9±0.10µs        ? ?/sec
iter_fragmented_sparse/base                              1.02      8.1±0.15ns        ? ?/sec      1.00      7.9±0.56ns        ? ?/sec
iter_fragmented_sparse/foreach                           1.00      7.8±0.22ns        ? ?/sec      1.01      7.9±0.62ns        ? ?/sec
iter_fragmented_sparse/foreach_wide                      1.00     37.2±1.17ns        ? ?/sec      1.10     40.9±0.95ns        ? ?/sec
iter_fragmented_sparse/wide                              1.09     48.4±2.13ns        ? ?/sec      1.00    44.5±18.34ns        ? ?/sec
iter_simple/base                                         1.02      8.4±0.10µs        ? ?/sec      1.00      8.2±0.14µs        ? ?/sec
iter_simple/foreach                                      1.01      8.3±0.07µs        ? ?/sec      1.00      8.2±0.09µs        ? ?/sec
iter_simple/foreach_sparse_set                           1.00     25.3±0.32µs        ? ?/sec      1.02     25.7±0.42µs        ? ?/sec
iter_simple/foreach_wide                                 1.03     41.1±0.94µs        ? ?/sec      1.00     39.9±0.41µs        ? ?/sec
iter_simple/foreach_wide_sparse_set                      1.05    123.6±2.05µs        ? ?/sec      1.00    118.1±2.78µs        ? ?/sec
iter_simple/sparse_set                                   1.14     30.5±1.40µs        ? ?/sec      1.00     26.9±0.64µs        ? ?/sec
iter_simple/system                                       1.01      8.4±0.25µs        ? ?/sec      1.00      8.4±0.11µs        ? ?/sec
iter_simple/wide                                         1.18     48.2±0.62µs        ? ?/sec      1.00     40.7±0.38µs        ? ?/sec
iter_simple/wide_sparse_set                              1.12   140.8±21.56µs        ? ?/sec      1.00    126.0±2.30µs        ? ?/sec
query_get/50000_entities_sparse                          1.17    378.6±7.60µs        ? ?/sec      1.00   324.1±23.17µs        ? ?/sec
query_get/50000_entities_table                           1.08   330.9±10.90µs        ? ?/sec      1.00    306.8±4.98µs        ? ?/sec
query_get_component/50000_entities_sparse                1.00   976.7±19.55µs        ? ?/sec      1.00   979.8±35.87µs        ? ?/sec
query_get_component/50000_entities_table                 1.00  1029.0±15.11µs        ? ?/sec      1.05  1080.0±59.18µs        ? ?/sec
query_get_component_simple/system                        1.13   839.7±14.18µs        ? ?/sec      1.00   742.8±10.72µs        ? ?/sec
query_get_component_simple/unchecked                     1.01   909.0±15.17µs        ? ?/sec      1.00   898.0±13.56µs        ? ?/sec
query_get_many_10/50000_calls_sparse                     1.04      5.5±0.54ms        ? ?/sec      1.00      5.3±0.67ms        ? ?/sec
query_get_many_10/50000_calls_table                      1.01      4.9±0.49ms        ? ?/sec      1.00      4.8±0.45ms        ? ?/sec
query_get_many_2/50000_calls_sparse                      1.28  848.4±210.89µs        ? ?/sec      1.00   664.8±47.69µs        ? ?/sec
query_get_many_2/50000_calls_table                       1.05   779.0±73.85µs        ? ?/sec      1.00   739.2±83.02µs        ? ?/sec
query_get_many_5/50000_calls_sparse                      1.05      2.4±0.37ms        ? ?/sec      1.00      2.3±0.33ms        ? ?/sec
query_get_many_5/50000_calls_table                       1.00  1939.9±75.22µs        ? ?/sec      1.04      2.0±0.19ms        ? ?/sec
run_criteria/yes_using_query/001_systems                 1.00      3.7±0.38µs        ? ?/sec      1.30      4.9±0.14µs        ? ?/sec
run_criteria/yes_using_query/006_systems                 1.00      8.9±0.40µs        ? ?/sec      1.17     10.3±0.57µs        ? ?/sec
run_criteria/yes_using_query/011_systems                 1.00     13.9±0.49µs        ? ?/sec      1.08     15.0±0.89µs        ? ?/sec
run_criteria/yes_using_query/016_systems                 1.00     18.8±0.74µs        ? ?/sec      1.00     18.8±1.43µs        ? ?/sec
run_criteria/yes_using_query/021_systems                 1.07     24.1±0.87µs        ? ?/sec      1.00     22.6±1.58µs        ? ?/sec
run_criteria/yes_using_query/026_systems                 1.04     27.9±0.62µs        ? ?/sec      1.00     26.8±1.71µs        ? ?/sec
run_criteria/yes_using_query/031_systems                 1.09     33.3±1.03µs        ? ?/sec      1.00     30.5±2.18µs        ? ?/sec
run_criteria/yes_using_query/036_systems                 1.14     38.7±0.80µs        ? ?/sec      1.00     33.9±1.75µs        ? ?/sec
run_criteria/yes_using_query/041_systems                 1.18     43.7±1.07µs        ? ?/sec      1.00     37.0±2.39µs        ? ?/sec
run_criteria/yes_using_query/046_systems                 1.14     47.6±1.16µs        ? ?/sec      1.00     41.9±2.09µs        ? ?/sec
run_criteria/yes_using_query/051_systems                 1.17     52.9±2.04µs        ? ?/sec      1.00     45.3±1.75µs        ? ?/sec
run_criteria/yes_using_query/056_systems                 1.25     59.2±2.38µs        ? ?/sec      1.00     47.2±2.01µs        ? ?/sec
run_criteria/yes_using_query/061_systems                 1.28    66.1±15.84µs        ? ?/sec      1.00     51.5±2.47µs        ? ?/sec
run_criteria/yes_using_query/066_systems                 1.28     70.2±2.57µs        ? ?/sec      1.00     54.7±2.58µs        ? ?/sec
run_criteria/yes_using_query/071_systems                 1.30     75.5±2.27µs        ? ?/sec      1.00     58.2±3.31µs        ? ?/sec
run_criteria/yes_using_query/076_systems                 1.26     81.5±2.66µs        ? ?/sec      1.00     64.5±3.13µs        ? ?/sec
run_criteria/yes_using_query/081_systems                 1.29     89.7±2.58µs        ? ?/sec      1.00     69.3±3.47µs        ? ?/sec
run_criteria/yes_using_query/086_systems                 1.33     95.6±3.39µs        ? ?/sec      1.00     71.8±3.48µs        ? ?/sec
run_criteria/yes_using_query/091_systems                 1.25    102.0±3.67µs        ? ?/sec      1.00     81.4±4.82µs        ? ?/sec
run_criteria/yes_using_query/096_systems                 1.33    111.7±3.29µs        ? ?/sec      1.00     83.8±4.15µs        ? ?/sec
run_criteria/yes_using_query/101_systems                 1.29   113.2±12.04µs        ? ?/sec      1.00     87.7±5.15µs        ? ?/sec
world_query_for_each/50000_entities_sparse               1.00     47.4±0.51µs        ? ?/sec      1.00     47.3±0.33µs        ? ?/sec
world_query_for_each/50000_entities_table                1.00     27.2±0.50µs        ? ?/sec      1.00     27.2±0.17µs        ? ?/sec
world_query_get/50000_entities_sparse_wide               1.09    210.5±1.78µs        ? ?/sec      1.00    192.5±2.61µs        ? ?/sec
world_query_get/50000_entities_table                     1.00    127.7±2.09µs        ? ?/sec      1.07    136.2±5.95µs        ? ?/sec
world_query_get/50000_entities_table_wide                1.00    209.8±2.37µs        ? ?/sec      1.15    240.6±2.04µs        ? ?/sec
world_query_iter/50000_entities_sparse                   1.00     54.2±0.36µs        ? ?/sec      1.01     54.7±0.61µs        ? ?/sec
world_query_iter/50000_entities_table                    1.00     27.2±0.31µs        ? ?/sec      1.00     27.3±0.64µs        ? ?/sec
```
</details>

NOTE: This PR includes a change to enable LTO on our benchmarks to get a "fully optimized" baseline for our benchmarks. Both the main and the current PR's results were with LTO enabled.
  • Loading branch information
james7132 authored and ItsDoot committed Feb 1, 2023
1 parent bbd81db commit 79eb719
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 50 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Expand Up @@ -1610,6 +1610,9 @@ target_sdk_version = 31
icon = "@mipmap/ic_launcher"
label = "Bevy Example"

[profile.release]
lto = true

[profile.wasm-release]
inherits = "release"
opt-level = "z"
Expand Down
4 changes: 4 additions & 0 deletions benches/Cargo.toml
Expand Up @@ -17,6 +17,10 @@ bevy_reflect = { path = "../crates/bevy_reflect" }
bevy_tasks = { path = "../crates/bevy_tasks" }
bevy_utils = { path = "../crates/bevy_utils" }

[profile.release]
opt-level = 3
lto = true

[[bench]]
name = "ecs"
path = "benches/bevy_ecs/benches.rs"
Expand Down
38 changes: 16 additions & 22 deletions crates/bevy_ecs/src/query/fetch.rs
Expand Up @@ -3,7 +3,7 @@ use crate::{
change_detection::Ticks,
component::{Component, ComponentId, ComponentStorage, ComponentTicks, StorageType},
entity::Entity,
query::{debug_checked_unreachable, Access, FilteredAccess},
query::{Access, DebugCheckedUnwrap, FilteredAccess},
storage::{ComponentSparseSet, Table},
world::{Mut, World},
};
Expand Down Expand Up @@ -552,7 +552,7 @@ unsafe impl<T: Component> WorldQuery for &T {
.storages()
.sparse_sets
.get(component_id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
}),
}
}
Expand Down Expand Up @@ -585,7 +585,7 @@ unsafe impl<T: Component> WorldQuery for &T {
fetch.table_components = Some(
table
.get_column(component_id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get_data_slice()
.into(),
);
Expand All @@ -600,14 +600,14 @@ unsafe impl<T: Component> WorldQuery for &T {
match T::Storage::STORAGE_TYPE {
StorageType::Table => fetch
.table_components
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get(table_row)
.deref(),
StorageType::SparseSet => fetch
.sparse_set
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get(entity)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.deref(),
}
}
Expand Down Expand Up @@ -696,7 +696,7 @@ unsafe impl<'__w, T: Component> WorldQuery for &'__w mut T {
.storages()
.sparse_sets
.get(component_id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
}),
last_change_tick,
change_tick,
Expand Down Expand Up @@ -730,9 +730,7 @@ unsafe impl<'__w, T: Component> WorldQuery for &'__w mut T {
&component_id: &ComponentId,
table: &'w Table,
) {
let column = table
.get_column(component_id)
.unwrap_or_else(|| debug_checked_unreachable());
let column = table.get_column(component_id).debug_checked_unwrap();
fetch.table_data = Some((
column.get_data_slice().into(),
column.get_ticks_slice().into(),
Expand All @@ -747,9 +745,7 @@ unsafe impl<'__w, T: Component> WorldQuery for &'__w mut T {
) -> Self::Item<'w> {
match T::Storage::STORAGE_TYPE {
StorageType::Table => {
let (table_components, table_ticks) = fetch
.table_data
.unwrap_or_else(|| debug_checked_unreachable());
let (table_components, table_ticks) = fetch.table_data.debug_checked_unwrap();
Mut {
value: table_components.get(table_row).deref_mut(),
ticks: Ticks {
Expand All @@ -762,9 +758,9 @@ unsafe impl<'__w, T: Component> WorldQuery for &'__w mut T {
StorageType::SparseSet => {
let (component, component_ticks) = fetch
.sparse_set
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get_with_ticks(entity)
.unwrap_or_else(|| debug_checked_unreachable());
.debug_checked_unwrap();
Mut {
value: component.assert_unique().deref_mut(),
ticks: Ticks {
Expand Down Expand Up @@ -1038,7 +1034,7 @@ unsafe impl<T: Component> WorldQuery for ChangeTrackers<T> {
.storages()
.sparse_sets
.get(component_id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
}),
marker: PhantomData,
last_change_tick,
Expand Down Expand Up @@ -1077,7 +1073,7 @@ unsafe impl<T: Component> WorldQuery for ChangeTrackers<T> {
fetch.table_ticks = Some(
table
.get_column(id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get_ticks_slice()
.into(),
);
Expand All @@ -1092,9 +1088,7 @@ unsafe impl<T: Component> WorldQuery for ChangeTrackers<T> {
match T::Storage::STORAGE_TYPE {
StorageType::Table => ChangeTrackers {
component_ticks: {
let table_ticks = fetch
.table_ticks
.unwrap_or_else(|| debug_checked_unreachable());
let table_ticks = fetch.table_ticks.debug_checked_unwrap();
table_ticks.get(table_row).read()
},
marker: PhantomData,
Expand All @@ -1104,9 +1098,9 @@ unsafe impl<T: Component> WorldQuery for ChangeTrackers<T> {
StorageType::SparseSet => ChangeTrackers {
component_ticks: *fetch
.sparse_set
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get_ticks(entity)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get(),
marker: PhantomData,
last_change_tick: fetch.last_change_tick,
Expand Down
12 changes: 6 additions & 6 deletions crates/bevy_ecs/src/query/filter.rs
Expand Up @@ -2,7 +2,7 @@ use crate::{
archetype::{Archetype, ArchetypeComponentId},
component::{Component, ComponentId, ComponentStorage, ComponentTicks, StorageType},
entity::Entity,
query::{debug_checked_unreachable, Access, FilteredAccess, WorldQuery},
query::{Access, DebugCheckedUnwrap, FilteredAccess, WorldQuery},
storage::{ComponentSparseSet, Table},
world::World,
};
Expand Down Expand Up @@ -439,7 +439,7 @@ macro_rules! impl_tick_filter {
world.storages()
.sparse_sets
.get(id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
}),
marker: PhantomData,
last_change_tick,
Expand Down Expand Up @@ -476,7 +476,7 @@ macro_rules! impl_tick_filter {
) {
fetch.table_ticks = Some(
table.get_column(component_id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get_ticks_slice()
.into()
);
Expand Down Expand Up @@ -504,7 +504,7 @@ macro_rules! impl_tick_filter {
StorageType::Table => {
$is_detected(&*(
fetch.table_ticks
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get(table_row))
.deref(),
fetch.last_change_tick,
Expand All @@ -514,9 +514,9 @@ macro_rules! impl_tick_filter {
StorageType::SparseSet => {
let ticks = &*fetch
.sparse_set
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get_ticks(entity)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.get();
$is_detected(ticks, fetch.last_change_tick, fetch.change_tick)
}
Expand Down
15 changes: 9 additions & 6 deletions crates/bevy_ecs/src/query/iter.rs
Expand Up @@ -2,7 +2,7 @@ use crate::{
archetype::{ArchetypeEntity, ArchetypeId, Archetypes},
entity::{Entities, Entity},
prelude::World,
query::{ArchetypeFilter, QueryState, WorldQuery},
query::{ArchetypeFilter, DebugCheckedUnwrap, QueryState, WorldQuery},
storage::{TableId, Tables},
};
use std::{borrow::Borrow, iter::FusedIterator, marker::PhantomData, mem::MaybeUninit};
Expand Down Expand Up @@ -153,8 +153,11 @@ where
continue;
}

let archetype = &self.archetypes[location.archetype_id];
let table = &self.tables[archetype.table_id()];
let archetype = self
.archetypes
.get(location.archetype_id)
.debug_checked_unwrap();
let table = self.tables.get(archetype.table_id()).debug_checked_unwrap();

// SAFETY: `archetype` is from the world that `fetch/filter` were created for,
// `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with
Expand Down Expand Up @@ -586,7 +589,7 @@ impl<'w, 's, Q: WorldQuery, F: ReadOnlyWorldQuery> QueryIterationCursor<'w, 's,
// we are on the beginning of the query, or finished processing a table, so skip to the next
if self.current_index == self.current_len {
let table_id = self.table_id_iter.next()?;
let table = &tables[*table_id];
let table = tables.get(*table_id).debug_checked_unwrap();
// SAFETY: `table` is from the world that `fetch/filter` were created for,
// `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with
Q::set_table(&mut self.fetch, &query_state.fetch_state, table);
Expand Down Expand Up @@ -616,10 +619,10 @@ impl<'w, 's, Q: WorldQuery, F: ReadOnlyWorldQuery> QueryIterationCursor<'w, 's,
loop {
if self.current_index == self.current_len {
let archetype_id = self.archetype_id_iter.next()?;
let archetype = &archetypes[*archetype_id];
let archetype = archetypes.get(*archetype_id).debug_checked_unwrap();
// SAFETY: `archetype` and `tables` are from the world that `fetch/filter` were created for,
// `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with
let table = &tables[archetype.table_id()];
let table = tables.get(archetype.table_id()).debug_checked_unwrap();
Q::set_archetype(&mut self.fetch, &query_state.fetch_state, archetype, table);
F::set_archetype(
&mut self.filter,
Expand Down
48 changes: 43 additions & 5 deletions crates/bevy_ecs/src/query/mod.rs
Expand Up @@ -10,11 +10,49 @@ pub use filter::*;
pub use iter::*;
pub use state::*;

#[allow(unreachable_code)]
pub(crate) unsafe fn debug_checked_unreachable() -> ! {
#[cfg(debug_assertions)]
unreachable!();
std::hint::unreachable_unchecked();
/// A debug checked version of [`Option::unwrap_unchecked`]. Will panic in
/// debug modes if unwrapping a `None` or `Err` value in debug mode, but is
/// equivalent to `Option::unwrap_uncheched` or `Result::unwrap_unchecked`
/// in release mode.
pub(crate) trait DebugCheckedUnwrap {
type Item;
/// # Panics
/// Panics if the value is `None` or `Err`, only in debug mode.
///
/// # Safety
/// This must never be called on a `None` or `Err` value. This can
/// only be called on `Some` or `Ok` values.
unsafe fn debug_checked_unwrap(self) -> Self::Item;
}

// Thes two impls are explicitly split to ensure that the unreachable! macro
// does not cause inlining to fail when compiling in release mode.
#[cfg(debug_assertions)]
impl<T> DebugCheckedUnwrap for Option<T> {
type Item = T;

#[inline(always)]
unsafe fn debug_checked_unwrap(self) -> Self::Item {
if let Some(inner) = self {
inner
} else {
unreachable!()
}
}
}

#[cfg(not(debug_assertions))]
impl<T> DebugCheckedUnwrap for Option<T> {
type Item = T;

#[inline(always)]
unsafe fn debug_checked_unwrap(self) -> Self::Item {
if let Some(inner) = self {
inner
} else {
std::hint::unreachable_unchecked()
}
}
}

#[cfg(test)]
Expand Down
28 changes: 19 additions & 9 deletions crates/bevy_ecs/src/query/state.rs
Expand Up @@ -3,7 +3,9 @@ use crate::{
component::ComponentId,
entity::Entity,
prelude::FromWorld,
query::{Access, FilteredAccess, QueryCombinationIter, QueryIter, WorldQuery},
query::{
Access, DebugCheckedUnwrap, FilteredAccess, QueryCombinationIter, QueryIter, WorldQuery,
},
storage::TableId,
world::{World, WorldId},
};
Expand Down Expand Up @@ -409,11 +411,18 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
{
return Err(QueryEntityError::QueryDoesNotMatch(entity));
}
let archetype = &world.archetypes[location.archetype_id];
let archetype = world
.archetypes
.get(location.archetype_id)
.debug_checked_unwrap();
let mut fetch = Q::init_fetch(world, &self.fetch_state, last_change_tick, change_tick);
let mut filter = F::init_fetch(world, &self.filter_state, last_change_tick, change_tick);

let table = &world.storages().tables[archetype.table_id()];
let table = world
.storages()
.tables
.get(archetype.table_id())
.debug_checked_unwrap();
Q::set_archetype(&mut fetch, &self.fetch_state, archetype, table);
F::set_archetype(&mut filter, &self.filter_state, archetype, table);

Expand Down Expand Up @@ -930,7 +939,7 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
let tables = &world.storages().tables;
if Q::IS_DENSE && F::IS_DENSE {
for table_id in &self.matched_table_ids {
let table = &tables[*table_id];
let table = tables.get(*table_id).debug_checked_unwrap();
Q::set_table(&mut fetch, &self.fetch_state, table);
F::set_table(&mut filter, &self.filter_state, table);

Expand All @@ -946,8 +955,8 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
} else {
let archetypes = &world.archetypes;
for archetype_id in &self.matched_archetype_ids {
let archetype = &archetypes[*archetype_id];
let table = &tables[archetype.table_id()];
let archetype = archetypes.get(*archetype_id).debug_checked_unwrap();
let table = tables.get(archetype.table_id()).debug_checked_unwrap();
Q::set_archetype(&mut fetch, &self.fetch_state, archetype, table);
F::set_archetype(&mut filter, &self.filter_state, archetype, table);

Expand Down Expand Up @@ -1025,7 +1034,7 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
change_tick,
);
let tables = &world.storages().tables;
let table = &tables[*table_id];
let table = tables.get(*table_id).debug_checked_unwrap();
let entities = table.entities();
Q::set_table(&mut fetch, &self.fetch_state, table);
F::set_table(&mut filter, &self.filter_state, table);
Expand Down Expand Up @@ -1076,8 +1085,9 @@ impl<Q: WorldQuery, F: ReadOnlyWorldQuery> QueryState<Q, F> {
change_tick,
);
let tables = &world.storages().tables;
let archetype = &world.archetypes[*archetype_id];
let table = &tables[archetype.table_id()];
let archetype =
world.archetypes.get(*archetype_id).debug_checked_unwrap();
let table = tables.get(archetype.table_id()).debug_checked_unwrap();
Q::set_archetype(&mut fetch, &self.fetch_state, archetype, table);
F::set_archetype(&mut filter, &self.filter_state, archetype, table);

Expand Down
4 changes: 2 additions & 2 deletions crates/bevy_ecs/src/storage/table.rs
@@ -1,7 +1,7 @@
use crate::{
component::{ComponentId, ComponentInfo, ComponentTicks, Components},
entity::Entity,
query::debug_checked_unreachable,
query::DebugCheckedUnwrap,
storage::{blob_vec::BlobVec, SparseSet},
};
use bevy_ptr::{OwningPtr, Ptr, PtrMut};
Expand Down Expand Up @@ -386,7 +386,7 @@ impl Table {
for (component_id, column) in self.columns.iter_mut() {
new_table
.get_column_mut(*component_id)
.unwrap_or_else(|| debug_checked_unreachable())
.debug_checked_unwrap()
.initialize_from_unchecked(column, row, new_row);
}
TableMoveResult {
Expand Down

0 comments on commit 79eb719

Please sign in to comment.