From 9e481ec9911be4b07e050a4ad6f719587f34ac54 Mon Sep 17 00:00:00 2001 From: James Liu Date: Mon, 24 Oct 2022 13:22:05 +0000 Subject: [PATCH] Skip empty archetypes and tables when iterating over queries (#4724) # Objective Speed up queries that are fragmented over many empty archetypes and tables. ## Solution Add a early-out to check if the table or archetype is empty before iterating over it. This adds an extra branch for every archetype matched, but skips setting the archetype/table to the underlying state and any iteration over it. This may not be worth it for the default `Query::iter` and maybe even the `Query::for_each` implementations, but this definitely avoids scheduling unnecessary tasks in the `Query::par_for_each` case. Ideally, `matched_archetypes` should only contain archetypes where there's actually work to do, but this would add a `O(n)` flat cost to every call to `update_archetypes` that scales with the number of matched archetypes. TODO: Benchmark --- benches/benches/bevy_ecs/empty_archetypes.rs | 253 +++++++++++++++++++ crates/bevy_ecs/src/query/state.rs | 8 + 2 files changed, 261 insertions(+) create mode 100644 benches/benches/bevy_ecs/empty_archetypes.rs diff --git a/benches/benches/bevy_ecs/empty_archetypes.rs b/benches/benches/bevy_ecs/empty_archetypes.rs new file mode 100644 index 0000000000000..0db82700e691e --- /dev/null +++ b/benches/benches/bevy_ecs/empty_archetypes.rs @@ -0,0 +1,253 @@ +use bevy_ecs::{ + component::Component, + prelude::*, + schedule::{Stage, SystemStage}, + world::World, +}; +use bevy_tasks::{ComputeTaskPool, TaskPool}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; + +criterion_group!(benches, empty_archetypes); +criterion_main!(benches); + +#[derive(Component)] +struct A(f32); + +fn iter( + query: Query<( + &A<0>, + &A<1>, + &A<2>, + &A<3>, + &A<4>, + &A<5>, + &A<6>, + &A<7>, + &A<8>, + &A<9>, + &A<10>, + &A<11>, + &A<12>, + )>, +) { + for comp in query.iter() { + black_box(comp); + } +} + +fn for_each( + query: Query<( + &A<0>, + &A<1>, + &A<2>, + &A<3>, + &A<4>, + &A<5>, + &A<6>, + &A<7>, + &A<8>, + &A<9>, + &A<10>, + &A<11>, + &A<12>, + )>, +) { + query.for_each(|comp| { + black_box(comp); + }); +} + +fn par_for_each( + task_pool: Res, + query: Query<( + &A<0>, + &A<1>, + &A<2>, + &A<3>, + &A<4>, + &A<5>, + &A<6>, + &A<7>, + &A<8>, + &A<9>, + &A<10>, + &A<11>, + &A<12>, + )>, +) { + query.par_for_each(&*task_pool, 64, |comp| { + black_box(comp); + }); +} + +fn setup(parallel: bool, setup: impl FnOnce(&mut SystemStage)) -> (World, SystemStage) { + let mut world = World::new(); + let mut stage = SystemStage::parallel(); + if parallel { + world.insert_resource(ComputeTaskPool(TaskPool::default())); + } + setup(&mut stage); + (world, stage) +} + +/// create `count` entities with distinct archetypes +fn add_archetypes(world: &mut World, count: u16) { + for i in 0..count { + let mut e = world.spawn(); + e.insert(A::<0>(1.0)); + e.insert(A::<1>(1.0)); + e.insert(A::<2>(1.0)); + e.insert(A::<3>(1.0)); + e.insert(A::<4>(1.0)); + e.insert(A::<5>(1.0)); + e.insert(A::<6>(1.0)); + e.insert(A::<7>(1.0)); + e.insert(A::<8>(1.0)); + e.insert(A::<9>(1.0)); + e.insert(A::<10>(1.0)); + e.insert(A::<11>(1.0)); + e.insert(A::<12>(1.0)); + if i & 1 << 1 != 0 { + e.insert(A::<13>(1.0)); + } + if i & 1 << 2 != 0 { + e.insert(A::<14>(1.0)); + } + if i & 1 << 3 != 0 { + e.insert(A::<15>(1.0)); + } + if i & 1 << 4 != 0 { + e.insert(A::<16>(1.0)); + } + if i & 1 << 5 != 0 { + e.insert(A::<18>(1.0)); + } + if i & 1 << 6 != 0 { + e.insert(A::<19>(1.0)); + } + if i & 1 << 7 != 0 { + e.insert(A::<20>(1.0)); + } + if i & 1 << 8 != 0 { + e.insert(A::<21>(1.0)); + } + if i & 1 << 9 != 0 { + e.insert(A::<22>(1.0)); + } + if i & 1 << 10 != 0 { + e.insert(A::<23>(1.0)); + } + if i & 1 << 11 != 0 { + e.insert(A::<24>(1.0)); + } + if i & 1 << 12 != 0 { + e.insert(A::<25>(1.0)); + } + if i & 1 << 13 != 0 { + e.insert(A::<26>(1.0)); + } + if i & 1 << 14 != 0 { + e.insert(A::<27>(1.0)); + } + if i & 1 << 15 != 0 { + e.insert(A::<28>(1.0)); + } + } +} + +fn empty_archetypes(criterion: &mut Criterion) { + let mut group = criterion.benchmark_group("empty_archetypes"); + for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] { + let (mut world, mut stage) = setup(true, |stage| { + stage.add_system(iter); + }); + add_archetypes(&mut world, archetype_count); + world.clear_entities(); + let mut e = world.spawn(); + e.insert(A::<0>(1.0)); + e.insert(A::<1>(1.0)); + e.insert(A::<2>(1.0)); + e.insert(A::<3>(1.0)); + e.insert(A::<4>(1.0)); + e.insert(A::<5>(1.0)); + e.insert(A::<6>(1.0)); + e.insert(A::<7>(1.0)); + e.insert(A::<8>(1.0)); + e.insert(A::<9>(1.0)); + e.insert(A::<10>(1.0)); + e.insert(A::<11>(1.0)); + e.insert(A::<12>(1.0)); + stage.run(&mut world); + group.bench_with_input( + BenchmarkId::new("iter", archetype_count), + &archetype_count, + |bencher, &_| { + bencher.iter(|| { + stage.run(&mut world); + }) + }, + ); + } + for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] { + let (mut world, mut stage) = setup(true, |stage| { + stage.add_system(for_each); + }); + add_archetypes(&mut world, archetype_count); + world.clear_entities(); + let mut e = world.spawn(); + e.insert(A::<0>(1.0)); + e.insert(A::<1>(1.0)); + e.insert(A::<2>(1.0)); + e.insert(A::<3>(1.0)); + e.insert(A::<4>(1.0)); + e.insert(A::<5>(1.0)); + e.insert(A::<6>(1.0)); + e.insert(A::<7>(1.0)); + e.insert(A::<8>(1.0)); + e.insert(A::<9>(1.0)); + e.insert(A::<10>(1.0)); + e.insert(A::<11>(1.0)); + e.insert(A::<12>(1.0)); + stage.run(&mut world); + group.bench_with_input( + BenchmarkId::new("for_each", archetype_count), + &archetype_count, + |bencher, &_| { + bencher.iter(|| { + stage.run(&mut world); + }) + }, + ); + } + for archetype_count in [10, 100, 500, 1000, 2000, 5000, 10000] { + let (mut world, mut stage) = setup(true, |stage| { + stage.add_system(par_for_each); + }); + add_archetypes(&mut world, archetype_count); + world.clear_entities(); + let mut e = world.spawn(); + e.insert(A::<0>(1.0)); + e.insert(A::<1>(1.0)); + e.insert(A::<2>(1.0)); + e.insert(A::<3>(1.0)); + e.insert(A::<4>(1.0)); + e.insert(A::<5>(1.0)); + e.insert(A::<6>(1.0)); + e.insert(A::<7>(1.0)); + e.insert(A::<8>(1.0)); + e.insert(A::<9>(1.0)); + e.insert(A::<10>(1.0)); + e.insert(A::<11>(1.0)); + e.insert(A::<12>(1.0)); + stage.run(&mut world); + group.bench_with_input( + BenchmarkId::new("par_for_each", archetype_count), + &archetype_count, + |bencher, &_| { + bencher.iter(|| { + stage.run(&mut world); + }) + }, + ); + } +} diff --git a/crates/bevy_ecs/src/query/state.rs b/crates/bevy_ecs/src/query/state.rs index 586df9de05d5d..8be0d0c80b419 100644 --- a/crates/bevy_ecs/src/query/state.rs +++ b/crates/bevy_ecs/src/query/state.rs @@ -983,6 +983,10 @@ impl QueryState { let tables = &world.storages().tables; for table_id in &self.matched_table_ids { let table = &tables[*table_id]; + if table.is_empty() { + continue; + } + let mut offset = 0; while offset < table.entity_count() { let func = func.clone(); @@ -1030,6 +1034,10 @@ impl QueryState { for archetype_id in &self.matched_archetype_ids { let mut offset = 0; let archetype = &archetypes[*archetype_id]; + if archetype.is_empty() { + continue; + } + while offset < archetype.len() { let func = func.clone(); let len = batch_size.min(archetype.len() - offset);