Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

priority and ring_hash LBs: fix interactions when using ring_hash under priority #29332

Merged
merged 29 commits into from Apr 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0c6ccf1
refactor connection delay injection from client_lb_end2end_test
markdroth Apr 6, 2022
96119c0
fix build
markdroth Apr 6, 2022
6462019
fix build on older compilers
markdroth Apr 6, 2022
c9ea149
clang-format
markdroth Apr 6, 2022
58378ae
buildifier
markdroth Apr 6, 2022
033c31e
a bit of code cleanup
markdroth Apr 5, 2022
f80fcac
start failover time whenever the child reports CONNECTING, and don't …
markdroth Apr 6, 2022
44eaf30
clang-format
markdroth Apr 6, 2022
d1588cb
Merge remote-tracking branch 'upstream/master' into priority_lb_fix
markdroth Apr 6, 2022
2467440
rewrite test
markdroth Apr 7, 2022
75e26c2
simplify logic in priority policy
markdroth Apr 7, 2022
73ef75f
Merge remote-tracking branch 'upstream/master' into priority_lb_fix
markdroth Apr 7, 2022
830f049
clang-format
markdroth Apr 7, 2022
b245c89
switch to using a bit to indicate child healthiness
markdroth Apr 8, 2022
eb103bf
fix reversed comment
markdroth Apr 8, 2022
6c37cad
more changes in priority and ring_hash.
markdroth Apr 11, 2022
0c9f246
ring_hash: fix connectivity state seen by aggregation and picker
markdroth Apr 11, 2022
babfae8
fix obiwan error
markdroth Apr 11, 2022
636daa6
swap the order of ring_hash aggregation rules 3 and 4
markdroth Apr 11, 2022
aec7770
restore original test
markdroth Apr 11, 2022
ca1be91
refactor connection injector QueuedAttempt code
markdroth Apr 12, 2022
9a985a6
add test showing that ring_hash will continue connecting without picks
markdroth Apr 12, 2022
0014d11
clang-format
markdroth Apr 12, 2022
cc25c07
Merge remote-tracking branch 'upstream/master' into priority_lb_fix
markdroth Apr 12, 2022
93dbc5a
don't actually need seen_failure_since_ready_ anymore
markdroth Apr 13, 2022
79127fc
Merge remote-tracking branch 'upstream/master' into priority_lb_fix
markdroth Apr 13, 2022
ba54014
fix TSAN problem
markdroth Apr 14, 2022
4f8fcd9
address code review comments
markdroth Apr 14, 2022
9c99a8a
Merge remote-tracking branch 'upstream/master' into priority_lb_fix
markdroth Apr 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions build_autogenerated.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

128 changes: 72 additions & 56 deletions src/core/ext/filters/client_channel/lb_policy/priority/priority.cc
Expand Up @@ -104,7 +104,7 @@ class PriorityLb : public LoadBalancingPolicy {
bool ignore_reresolution_requests);
void ExitIdleLocked();
void ResetBackoffLocked();
void DeactivateLocked();
void MaybeDeactivateLocked();
void MaybeReactivateLocked();

void Orphan() override;
Expand Down Expand Up @@ -217,6 +217,8 @@ class PriorityLb : public LoadBalancingPolicy {
absl::Status connectivity_status_;
RefCountedPtr<RefCountedPicker> picker_wrapper_;

bool seen_ready_or_idle_since_transient_failure_ = true;

OrphanablePtr<DeactivationTimer> deactivation_timer_;
OrphanablePtr<FailoverTimer> failover_timer_;
};
Expand All @@ -225,14 +227,37 @@ class PriorityLb : public LoadBalancingPolicy {

void ShutdownLocked() override;

// Returns UINT32_MAX if child is not in current priority list.
// Returns the priority of the specified child name, or UINT32_MAX if
// the child is not in the current priority list.
uint32_t GetChildPriorityLocked(const std::string& child_name) const;

// Called when a child's connectivity state has changed.
// May propagate the update to the channel or trigger choosing a new
// priority.
void HandleChildConnectivityStateChangeLocked(ChildPriority* child);

// Deletes a child. Called when the child's deactivation timer fires.
void DeleteChild(ChildPriority* child);

void TryNextPriorityLocked(bool report_connecting);
void SelectPriorityLocked(uint32_t priority);
// Iterates through the list of priorities to choose one:
// - If the child for a priority doesn't exist, creates it.
// - If a child's failover timer is pending, returns without selecting
// a priority while we wait for the child to attempt to connect. In
// this case, if report_connecting is true, reports CONNECTING state to
// the channel.
// - If the child is connected, it will be used as the current priority.
// - Otherwise, continues on to the next child.
// Reports TRANSIENT_FAILURE to the channel if all children are not
// connected.
//
// This method is idempotent; it should yield the same result every
// time as a function of the state of the children.
void ChoosePriorityLocked(bool report_connecting);

// Sets the specified priority as the current priority.
// Deactivates any children at lower priorities.
// Returns the child's picker to the channel.
void SetCurrentPriorityLocked(uint32_t priority);

const Duration child_failover_timeout_;

Expand All @@ -246,6 +271,8 @@ class PriorityLb : public LoadBalancingPolicy {

bool update_in_progress_ = false;

// All children that currently exist.
// Some of these children may be in deactivated state.
std::map<std::string, OrphanablePtr<ChildPriority>> children_;
// The priority that is being used.
uint32_t current_priority_ = UINT32_MAX;
Expand Down Expand Up @@ -312,7 +339,7 @@ void PriorityLb::UpdateLocked(UpdateArgs args) {
current_child_from_before_update_ = children_[child_name].get();
// Unset current_priority_, since it was an index into the old
// config's priority list and may no longer be valid. It will be
// reset later by TryNextPriorityLocked(), but we unset it here in
// reset later by ChoosePriorityLocked(), but we unset it here in
// case updating any of our children triggers a state update.
current_priority_ = UINT32_MAX;
}
Expand All @@ -332,7 +359,7 @@ void PriorityLb::UpdateLocked(UpdateArgs args) {
auto config_it = config_->children().find(child_name);
if (config_it == config_->children().end()) {
// Existing child not found in new config. Deactivate it.
child->DeactivateLocked();
child->MaybeDeactivateLocked();
} else {
// Existing child found in new config. Update it.
child->UpdateLocked(config_it->second.config,
Expand All @@ -341,7 +368,7 @@ void PriorityLb::UpdateLocked(UpdateArgs args) {
}
update_in_progress_ = false;
// Try to get connected.
TryNextPriorityLocked(/*report_connecting=*/children_.empty());
ChoosePriorityLocked(/*report_connecting=*/children_.empty());
}

uint32_t PriorityLb::GetChildPriorityLocked(
Expand Down Expand Up @@ -380,11 +407,11 @@ void PriorityLb::HandleChildConnectivityStateChangeLocked(
} else {
// If it's no longer READY or IDLE, we should stop using it.
// We already started trying other priorities as a result of the
// update, but calling TryNextPriorityLocked() ensures that we will
// update, but calling ChoosePriorityLocked() ensures that we will
// properly select between CONNECTING and TRANSIENT_FAILURE as the
// new state to report to our parent.
current_child_from_before_update_ = nullptr;
TryNextPriorityLocked(/*report_connecting=*/true);
ChoosePriorityLocked(/*report_connecting=*/true);
}
return;
}
Expand All @@ -396,52 +423,26 @@ void PriorityLb::HandleChildConnectivityStateChangeLocked(
"priority %u",
this, child_priority, child->name().c_str(), current_priority_);
}
// Ignore priorities not in the current config.
if (child_priority == UINT32_MAX) return;
// Ignore lower-than-current priorities.
if (child_priority > current_priority_) return;
// If a child reports TRANSIENT_FAILURE, start trying the next priority.
// Note that even if this is for a higher-than-current priority, we
// may still need to create some children between this priority and
// the current one (e.g., if we got an update that inserted new
// priorities ahead of the current one).
if (child->connectivity_state() == GRPC_CHANNEL_TRANSIENT_FAILURE) {
TryNextPriorityLocked(
/*report_connecting=*/child_priority == current_priority_);
return;
}
// The update is for a higher-than-current priority (or for any
// priority if we don't have any current priority).
if (child_priority < current_priority_) {
// If the child reports READY or IDLE, switch to that priority.
// Otherwise, ignore the update.
if (child->connectivity_state() == GRPC_CHANNEL_READY ||
child->connectivity_state() == GRPC_CHANNEL_IDLE) {
SelectPriorityLocked(child_priority);
}
return;
}
// The current priority has returned a new picker, so pass it up to
// our parent.
channel_control_helper()->UpdateState(child->connectivity_state(),
child->connectivity_status(),
child->GetPicker());
// Unconditionally call ChoosePriorityLocked(). It should do the
// right thing based on the state of all children.
ChoosePriorityLocked(
/*report_connecting=*/child_priority == current_priority_);
}

void PriorityLb::DeleteChild(ChildPriority* child) {
// If this was the current child from before the most recent update,
// stop using it. We already started trying other priorities as a
// result of the update, but calling TryNextPriorityLocked() ensures that
// result of the update, but calling ChoosePriorityLocked() ensures that
// we will properly select between CONNECTING and TRANSIENT_FAILURE as the
// new state to report to our parent.
if (current_child_from_before_update_ == child) {
current_child_from_before_update_ = nullptr;
TryNextPriorityLocked(/*report_connecting=*/true);
ChoosePriorityLocked(/*report_connecting=*/true);
}
children_.erase(child->name());
}

void PriorityLb::TryNextPriorityLocked(bool report_connecting) {
void PriorityLb::ChoosePriorityLocked(bool report_connecting) {
current_priority_ = UINT32_MAX;
for (uint32_t priority = 0; priority < config_->priorities().size();
++priority) {
Expand Down Expand Up @@ -471,7 +472,7 @@ void PriorityLb::TryNextPriorityLocked(bool report_connecting) {
// If the child is in state READY or IDLE, switch to it.
if (child->connectivity_state() == GRPC_CHANNEL_READY ||
child->connectivity_state() == GRPC_CHANNEL_IDLE) {
SelectPriorityLocked(priority);
SetCurrentPriorityLocked(priority);
return;
}
// Child is not READY or IDLE.
Expand All @@ -491,6 +492,13 @@ void PriorityLb::TryNextPriorityLocked(bool report_connecting) {
return;
}
// Child has been failing for a while. Move on to the next priority.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_priority_trace)) {
gpr_log(GPR_INFO,
"[priority_lb %p] skipping priority %u, child %s: state=%s, "
"failover timer not pending",
this, priority, child_name.c_str(),
ConnectivityStateName(child->connectivity_state()));
}
}
// If there are no more priorities to try, report TRANSIENT_FAILURE.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_priority_trace)) {
Expand All @@ -506,7 +514,7 @@ void PriorityLb::TryNextPriorityLocked(bool report_connecting) {
absl::make_unique<TransientFailurePicker>(status));
}

void PriorityLb::SelectPriorityLocked(uint32_t priority) {
void PriorityLb::SetCurrentPriorityLocked(uint32_t priority) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_priority_trace)) {
gpr_log(GPR_INFO, "[priority_lb %p] selected priority %u, child %s", this,
priority, config_->priorities()[priority].c_str());
Expand All @@ -517,7 +525,7 @@ void PriorityLb::SelectPriorityLocked(uint32_t priority) {
for (uint32_t p = priority + 1; p < config_->priorities().size(); ++p) {
const std::string& child_name = config_->priorities()[p];
auto it = children_.find(child_name);
if (it != children_.end()) it->second->DeactivateLocked();
if (it != children_.end()) it->second->MaybeDeactivateLocked();
}
// Update picker.
auto& child = children_[config_->priorities()[priority]];
Expand Down Expand Up @@ -742,9 +750,6 @@ PriorityLb::ChildPriority::CreateChildPolicyLocked(
}

void PriorityLb::ChildPriority::ExitIdleLocked() {
if (connectivity_state_ == GRPC_CHANNEL_IDLE && failover_timer_ == nullptr) {
failover_timer_ = MakeOrphanable<FailoverTimer>(Ref());
}
child_policy_->ExitIdleLocked();
}

Expand All @@ -766,20 +771,31 @@ void PriorityLb::ChildPriority::OnConnectivityStateUpdateLocked(
connectivity_state_ = state;
connectivity_status_ = status;
picker_wrapper_ = MakeRefCounted<RefCountedPicker>(std::move(picker));
// If READY or IDLE or TRANSIENT_FAILURE, cancel failover timer.
if (state == GRPC_CHANNEL_READY || state == GRPC_CHANNEL_IDLE ||
state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
// If we transition to state CONNECTING and we've not seen
// TRANSIENT_FAILURE more recently than READY or IDLE, start failover
// timer if not already pending.
// In any other state, update seen_ready_or_idle_since_transient_failure_
// and cancel failover timer.
if (state == GRPC_CHANNEL_CONNECTING) {
if (seen_ready_or_idle_since_transient_failure_ &&
failover_timer_ == nullptr) {
failover_timer_ = MakeOrphanable<FailoverTimer>(Ref());
}
} else if (state == GRPC_CHANNEL_READY || state == GRPC_CHANNEL_IDLE) {
seen_ready_or_idle_since_transient_failure_ = true;
failover_timer_.reset();
} else if (state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
seen_ready_or_idle_since_transient_failure_ = false;
failover_timer_.reset();
}
// Notify the parent policy.
priority_policy_->HandleChildConnectivityStateChangeLocked(this);
}

void PriorityLb::ChildPriority::DeactivateLocked() {
// If already deactivated, don't do it again.
if (deactivation_timer_ != nullptr) return;
failover_timer_.reset();
deactivation_timer_ = MakeOrphanable<DeactivationTimer>(Ref());
void PriorityLb::ChildPriority::MaybeDeactivateLocked() {
if (deactivation_timer_ == nullptr) {
deactivation_timer_ = MakeOrphanable<DeactivationTimer>(Ref());
}
}

void PriorityLb::ChildPriority::MaybeReactivateLocked() {
Expand Down