Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tracing: fix recursive register_callsite deadlock #2634

Merged
merged 5 commits into from Jun 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 46 additions & 13 deletions tracing/src/lib.rs
Expand Up @@ -988,8 +988,7 @@ pub mod __macro_support {
pub use crate::callsite::{Callsite, Registration};
use crate::{collect::Interest, Metadata};
use core::fmt;
use core::sync::atomic::{AtomicUsize, Ordering};
use tracing_core::Once;
use core::sync::atomic::{AtomicU8, Ordering};

/// Callsite implementation used by macro-generated code.
///
Expand All @@ -1003,9 +1002,9 @@ pub mod __macro_support {
where
T: 'static,
{
interest: AtomicUsize,
interest: AtomicU8,
register: AtomicU8,
meta: &'static Metadata<'static>,
register: Once,
registration: &'static Registration<T>,
}

Expand All @@ -1023,12 +1022,21 @@ pub mod __macro_support {
registration: &'static Registration<T>,
) -> Self {
Self {
interest: AtomicUsize::new(0xDEADFACED),
interest: AtomicU8::new(Self::INTEREST_EMPTY),
register: AtomicU8::new(Self::UNREGISTERED),
meta,
register: Once::new(),
registration,
}
}

const UNREGISTERED: u8 = 0;
const REGISTERING: u8 = 1;
const REGISTERED: u8 = 2;

const INTEREST_NEVER: u8 = 0;
const INTEREST_SOMETIMES: u8 = 1;
const INTEREST_ALWAYS: u8 = 2;
const INTEREST_EMPTY: u8 = 0xFF;
}

impl MacroCallsite<&'static dyn Callsite> {
Expand All @@ -1046,11 +1054,36 @@ pub mod __macro_support {
// This only happens once (or if the cached interest value was corrupted).
#[cold]
pub fn register(&'static self) -> Interest {
self.register
.call_once(|| crate::callsite::register(self.registration));
// Attempt to advance the registration state to `REGISTERING`...
match self.register.compare_exchange(
Self::UNREGISTERED,
Self::REGISTERING,
Ordering::AcqRel,
Ordering::Acquire,
) {
Ok(_) => {
// Okay, we advanced the state, try to register the callsite.
crate::callsite::register(self.registration);
self.register.store(Self::REGISTERED, Ordering::Release);
}
// Great, the callsite is already registered! Just load its
// previous cached interest.
Err(Self::REGISTERED) => {}
// Someone else is registering...
Err(_state) => {
debug_assert_eq!(
_state,
Self::REGISTERING,
"weird callsite registration state"
);
// Just hit `enabled` this time.
return Interest::sometimes();
}
}

match self.interest.load(Ordering::Relaxed) {
0 => Interest::never(),
2 => Interest::always(),
Self::INTEREST_NEVER => Interest::never(),
Self::INTEREST_ALWAYS => Interest::always(),
_ => Interest::sometimes(),
}
}
Expand All @@ -1067,9 +1100,9 @@ pub mod __macro_support {
#[inline]
pub fn interest(&'static self) -> Interest {
match self.interest.load(Ordering::Relaxed) {
0 => Interest::never(),
1 => Interest::sometimes(),
2 => Interest::always(),
Self::INTEREST_NEVER => Interest::never(),
Self::INTEREST_SOMETIMES => Interest::sometimes(),
Self::INTEREST_ALWAYS => Interest::always(),
_ => self.register(),
}
}
Expand Down
51 changes: 51 additions & 0 deletions tracing/tests/rebuild_interest_doesnt_deadlock.rs
@@ -0,0 +1,51 @@
use std::{sync::mpsc, thread, time::Duration};
use tracing::{
callsite,
collect::{self, Collect, Interest},
metadata::Metadata,
span, Event,
};

#[test]
fn rebuild_callsites_doesnt_deadlock() {
pub struct EvilCollector;

impl Collect for EvilCollector {
fn register_callsite(&self, meta: &'static Metadata<'static>) -> Interest {
tracing::info!(?meta, "registered a callsite");
Interest::always()
}

fn enabled(&self, _: &Metadata<'_>) -> bool {
true
}
fn new_span(&self, _: &span::Attributes<'_>) -> span::Id {
span::Id::from_u64(1)
}
fn record(&self, _: &span::Id, _: &span::Record<'_>) {}
fn record_follows_from(&self, _: &span::Id, _: &span::Id) {}
fn event(&self, _: &Event<'_>) {}
fn enter(&self, _: &span::Id) {}
fn exit(&self, _: &span::Id) {}
fn current_span(&self) -> tracing_core::span::Current {
unimplemented!()
}
}

collect::set_global_default(EvilCollector).unwrap();

// spawn a thread, and assert it doesn't hang...
let (tx, didnt_hang) = mpsc::channel();
let th = thread::spawn(move || {
tracing::info!("hello world!");
callsite::rebuild_interest_cache();
tx.send(()).unwrap();
});

didnt_hang
// Note: 60 seconds is *way* more than enough, but let's be generous in
// case of e.g. slow CI machines.
.recv_timeout(Duration::from_secs(60))
.expect("the thread must not have hung!");
th.join().expect("thread should join successfully");
}
49 changes: 49 additions & 0 deletions tracing/tests/register_callsite_doesnt_deadlock.rs
@@ -0,0 +1,49 @@
use std::{sync::mpsc, thread, time::Duration};
use tracing::{
collect::{self, Collect, Interest},
metadata::Metadata,
span, Event,
};

#[test]
fn register_callsite_doesnt_deadlock() {
pub struct EvilCollector;

impl Collect for EvilCollector {
fn register_callsite(&self, meta: &'static Metadata<'static>) -> Interest {
tracing::info!(?meta, "registered a callsite");
Interest::always()
}

fn enabled(&self, _: &Metadata<'_>) -> bool {
true
}
fn new_span(&self, _: &span::Attributes<'_>) -> span::Id {
span::Id::from_u64(1)
}
fn record(&self, _: &span::Id, _: &span::Record<'_>) {}
fn record_follows_from(&self, _: &span::Id, _: &span::Id) {}
fn event(&self, _: &Event<'_>) {}
fn enter(&self, _: &span::Id) {}
fn exit(&self, _: &span::Id) {}
fn current_span(&self) -> tracing_core::span::Current {
unimplemented!()
}
}

collect::set_global_default(EvilCollector).unwrap();

// spawn a thread, and assert it doesn't hang...
let (tx, didnt_hang) = mpsc::channel();
let th = thread::spawn(move || {
tracing::info!("hello world!");
tx.send(()).unwrap();
});

didnt_hang
// Note: 60 seconds is *way* more than enough, but let's be generous in
// case of e.g. slow CI machines.
.recv_timeout(Duration::from_secs(60))
.expect("the thread must not have hung!");
th.join().expect("thread should join successfully");
}