Diff_tree_c_6414_65RC1
0 removals
736 lines
0 additions
736 lines
// SPDX-License-Identifier: GPL-2.0+
// SPDX-License-Identifier: GPL-2.0+
/*
/*
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
*
*
* Copyright IBM Corporation, 2008
* Copyright IBM Corporation, 2008
*
*
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
* Manfred Spraul <manfred@colorfullife.com>
* Manfred Spraul <manfred@colorfullife.com>
* Paul E. McKenney <paulmck@linux.ibm.com>
* Paul E. McKenney <paulmck@linux.ibm.com>
*
*
* Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
* Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
*
*
* For detailed explanation of Read-Copy Update mechanism see -
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
* Documentation/RCU
*/
*/
#define pr_fmt(fmt) "rcu: " fmt
#define pr_fmt(fmt) "rcu: " fmt
#include <linux/types.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/smp.h>
#include <linux/rcupdate_wait.h>
#include <linux/rcupdate_wait.h>
#include <linux/interrupt.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/debug.h>
#include <linux/nmi.h>
#include <linux/nmi.h>
#include <linux/atomic.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/export.h>
#include <linux/completion.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/moduleparam.h>
#include <linux/panic.h>
#include <linux/panic.h>
#include <linux/panic_notifier.h>
#include <linux/panic_notifier.h>
#include <linux/percpu.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/mutex.h>
#include <linux/time.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
#include <linux/kernel_stat.h>
#include <linux/wait.h>
#include <linux/wait.h>
#include <linux/kthread.h>
#include <linux/kthread.h>
#include <uapi/linux/sched/types.h>
#include <uapi/linux/sched/types.h>
#include <linux/prefetch.h>
#include <linux/prefetch.h>
#include <linux/delay.h>
#include <linux/delay.h>
#include <linux/random.h>
#include <linux/random.h>
#include <linux/trace_events.h>
#include <linux/trace_events.h>
#include <linux/suspend.h>
#include <linux/suspend.h>
#include <linux/ftrace.h>
#include <linux/ftrace.h>
#include <linux/tick.h>
#include <linux/tick.h>
#include <linux/sysrq.h>
#include <linux/sysrq.h>
#include <linux/kprobes.h>
#include <linux/kprobes.h>
#include <linux/gfp.h>
#include <linux/gfp.h>
#include <linux/oom.h>
#include <linux/oom.h>
#include <linux/smpboot.h>
#include <linux/smpboot.h>
#include <linux/jiffies.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/sched/isolation.h>
#include <linux/sched/isolation.h>
#include <linux/sched/clock.h>
#include <linux/sched/clock.h>
#include <linux/vmalloc.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/mm.h>
#include <linux/kasan.h>
#include <linux/kasan.h>
#include <linux/context_tracking.h>
#include <linux/context_tracking.h>
#include "../time/tick-internal.h"
#include "../time/tick-internal.h"
#include "tree.h"
#include "tree.h"
#include "rcu.h"
#include "rcu.h"
#ifdef MODULE_PARAM_PREFIX
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#endif
#define MODULE_PARAM_PREFIX "rcutree."
#define MODULE_PARAM_PREFIX "rcutree."
/* Data structures. */
/* Data structures. */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.gpwrap = true,
.gpwrap = true,
#ifdef CONFIG_RCU_NOCB_CPU
#ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_RCU_CORE,
.cblist.flags = SEGCBLIST_RCU_CORE,
#endif
#endif
};
};
static struct rcu_state rcu_state = {
static struct rcu_state rcu_state = {
.level = { &rcu_state.node[0] },
.level = { &rcu_state.node[0] },
.gp_state = RCU_GP_IDLE,
.gp_state = RCU_GP_IDLE,
.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
.barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
.barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
.name = RCU_NAME,
.name = RCU_NAME,
.abbr = RCU_ABBR,
.abbr = RCU_ABBR,
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
};
};
/* Dump rcu_node combining tree at boot to verify correct setup. */
/* Dump rcu_node combining tree at boot to verify correct setup. */
static bool dump_tree;
static bool dump_tree;
module_param(dump_tree, bool, 0444);
module_param(dump_tree, bool, 0444);
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
#ifndef CONFIG_PREEMPT_RT
#ifndef CONFIG_PREEMPT_RT
module_param(use_softirq, bool, 0444);
module_param(use_softirq, bool, 0444);
#endif
#endif
/* Control rcu_node-tree auto-balancing at boot time. */
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
static bool rcu_fanout_exact;
module_param(rcu_fanout_exact, bool, 0444);
module_param(rcu_fanout_exact, bool, 0444);
/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
module_param(rcu_fanout_leaf, int, 0444);
module_param(rcu_fanout_leaf, int, 0444);
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
/* Number of rcu_nodes at specified level. */
/* Number of rcu_nodes at specified level. */
int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/*
/*
* The rcu_scheduler_active variable is initialized to the value
* The rcu_scheduler_active variable is initialized to the value
* RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the
* RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the
* first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE,
* first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE,
* RCU can assume that there is but one task, allowing RCU to (for example)
* RCU can assume that there is but one task, allowing RCU to (for example)
* optimize synchronize_rcu() to a simple barrier(). When this variable
* optimize synchronize_rcu() to a simple barrier(). When this variable
* is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required
* is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required
* to detect real grace periods. This variable is also used to suppress
* to detect real grace periods. This variable is also used to suppress
* boot-time false positives from lockdep-RCU error checking. Finally, it
* boot-time false positives from lockdep-RCU error checking. Finally, it
* transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU
* transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU
* is fully initialized, including all of its kthreads having been spawned.
* is fully initialized, including all of its kthreads having been spawned.
*/
*/
int rcu_scheduler_active __read_mostly;
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
/*
/*
* The rcu_scheduler_fully_active variable transitions from zero to one
* The rcu_scheduler_fully_active variable transitions from zero to one
* during the early_initcall() processing, which is after the scheduler
* during the early_initcall() processing, which is after the scheduler
* is capable of creating new tasks. So RCU processing (for example,
* is capable of creating new tasks. So RCU processing (for example,
* creating tasks for RCU priority boosting) must be delayed until after
* creating tasks for RCU priority boosting) must be delayed until after
* rcu_scheduler_fully_active transitions from zero to one. We also
* rcu_scheduler_fully_active transitions from zero to one. We also
* currently delay invocation of any RCU callbacks until after this point.
* currently delay invocation of any RCU callbacks until after this point.
*
*
* It might later prove better for people registering RCU callbacks during
* It might later prove better for people registering RCU callbacks during
* early boot to take responsibility for these callbacks, but one step at
* early boot to take responsibility for these callbacks, but one step at
* a time.
* a time.
*/
*/
static int rcu_scheduler_fully_active __read_mostly;
static int rcu_scheduler_fully_active __read_mostly;
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags);
unsigned long gps, unsigned long flags);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
static void sync_sched_exp_online_cleanup(int cpu);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
static bool rcu_init_invoked(void);
static bool rcu_init_invoked(void);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
/*
/*
* rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
* rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
* real-time priority(enabling/disabling) is controlled by
* real-time priority(enabling/disabling) is controlled by
* the extra CONFIG_RCU_NOCB_CPU_CB_BOOST configuration.
* the extra CONFIG_RCU_NOCB_CPU_CB_BOOST configuration.
*/
*/
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
module_param(kthread_prio, int, 0444);
module_param(kthread_prio, int, 0444);
/* Delay in jiffies for grace-period initialization delays, debug only. */
/* Delay in jiffies for grace-period initialization delays, debug only. */
static int gp_preinit_delay;
static int gp_preinit_delay;
module_param(gp_preinit_delay, int, 0444);
module_param(gp_preinit_delay, int, 0444);
static int gp_init_delay;
static int gp_init_delay;
module_param(gp_init_delay, int, 0444);
module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay;
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
module_param(gp_cleanup_delay, int, 0444);
// Add delay to rcu_read_unlock() for strict grace periods.
// Add delay to rcu_read_unlock() for strict grace periods.
static int rcu_unlock_delay;
static int rcu_unlock_delay;
#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
module_param(rcu_unlock_delay, int, 0444);
module_param(rcu_unlock_delay, int, 0444);
#endif
#endif
/*
/*
* This rcu parameter is runtime-read-only. It reflects
* This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached
* a minimum allowed number of objects which can be cached
* per-CPU. Object size is equal to one page. This value
* per-CPU. Object size is equal to one page. This value
* can be changed at boot time.
* can be changed at boot time.
*/
*/
static int rcu_min_cached_objs = 5;
static int rcu_min_cached_objs = 5;
module_param(rcu_min_cached_objs, int, 0444);
module_param(rcu_min_cached_objs, int, 0444);
// A page shrinker can ask for pages to be freed to make them
// A page shrinker can ask for pages to be freed to make them
// available for other parts of the system. This usually happens
// available for other parts of the system. This usually happens
// under low memory conditions, and in that case we should also
// under low memory conditions, and in that case we should also
// defer page-cache filling for a short time period.
// defer page-cache filling for a short time period.
//
//
// The default value is 5 seconds, which is long enough to reduce
// The default value is 5 seconds, which is long enough to reduce
// interference with the shrinker while it asks other systems to
// interference with the shrinker while it asks other systems to
// drain their caches.
// drain their caches.
static int rcu_delay_page_cache_fill_msec = 5000;
static int rcu_delay_page_cache_fill_msec = 5000;
module_param(rcu_delay_page_cache_fill_msec, int, 0444);
module_param(rcu_delay_page_cache_fill_msec, int, 0444);
/* Retrieve RCU kthreads priority for rcutorture */
/* Retrieve RCU kthreads priority for rcutorture */
int rcu_get_gp_kthreads_prio(void)
int rcu_get_gp_kthreads_prio(void)
{
{
return kthread_prio;
return kthread_prio;
}
}
EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
/*
/*
* Number of grace periods between delays, normalized by the duration of
* Number of grace periods between delays, normalized by the duration of
* the delay. The longer the delay, the more the grace periods between
* the delay. The longer the delay, the more the grace periods between
* each delay. The reason for this normalization is that it means that,
* each delay. The reason for this normalization is that it means that,
* for non-zero delays, the overall slowdown of grace periods is constant
* for non-zero delays, the overall slowdown of grace periods is constant
* regardless of the duration of the delay. This arrangement balances
* regardless of the duration of the delay. This arrangement balances
* the need for long delays to increase some race probabilities with the
* the need for long delays to increase some race probabilities with the
* need for fast grace periods to increase other race probabilities.
* need for fast grace periods to increase other race probabilities.
*/
*/
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
/*
/*
* Return true if an RCU grace period is in progress. The READ_ONCE()s
* Return true if an RCU grace period is in progress. The READ_ONCE()s
* permit this function to be invoked without holding the root rcu_node
* permit this function to be invoked without holding the root rcu_node
* structure's ->lock, but of course results can be subject to change.
* structure's ->lock, but of course results can be subject to change.
*/
*/
static int rcu_gp_in_progress(void)
static int rcu_gp_in_progress(void)
{
{
return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
}
}
/*
/*
* Return the number of callbacks queued on the specified CPU.
* Return the number of callbacks queued on the specified CPU.
* Handles both the nocbs and normal cases.
* Handles both the nocbs and normal cases.
*/
*/
static long rcu_get_n_cbs_cpu(int cpu)
static long rcu_get_n_cbs_cpu(int cpu)
{
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
if (rcu_segcblist_is_enabled(&rdp->cblist))
if (rcu_segcblist_is_enabled(&rdp->cblist))
return rcu_segcblist_n_cbs(&rdp->cblist);
return rcu_segcblist_n_cbs(&rdp->cblist);
return 0;
return 0;
}
}
void rcu_softirq_qs(void)
void rcu_softirq_qs(void)
{
{
rcu_qs();
rcu_qs();
rcu_preempt_deferred_qs(current);
rcu_preempt_deferred_qs(current);
rcu_tasks_qs(current, false);
rcu_tasks_qs(current, false);
}
}
/*
/*
* Reset the current CPU's ->dynticks counter to indicate that the
* Reset the current CPU's ->dynticks counter to indicate that the
* newly onlined CPU is no longer in an extended quiescent state.
* newly onlined CPU is no longer in an extended quiescent state.
* This will either leave the counter unchanged, or increment it
* This will either leave the counter unchanged, or increment it
* to the next non-quiescent value.
* to the next non-quiescent value.
*
*
* The non-atomic test/increment sequence works because the upper bits
* The non-atomic test/increment sequence works because the upper bits
* of the ->dynticks counter are manipulated only by the corresponding CPU,
* of the ->dynticks counter are manipulated only by the corresponding CPU,
* or when the corresponding CPU is offline.
* or when the corresponding CPU is offline.
*/
*/
static void rcu_dynticks_eqs_online(void)
static void rcu_dynticks_eqs_online(void)
{
{
if (ct_dynticks() & RCU_DYNTICKS_IDX)
if (ct_dynticks() & RCU_DYNTICKS_IDX)
return;
return;
ct_state_inc(RCU_DYNTICKS_IDX);
ct_state_inc(RCU_DYNTICKS_IDX);
}
}
/*
/*
* Snapshot the ->dynticks counter with full ordering so as to allow
* Snapshot the ->dynticks counter with full ordering so as to allow
* stable comparison of this counter with past and future snapshots.
* stable comparison of this counter with past and future snapshots.
*/
*/
static int rcu_dynticks_snap(int cpu)
static int rcu_dynticks_snap(int cpu)
{
{
smp_mb(); // Fundamental RCU ordering guarantee.
smp_mb(); // Fundamental RCU ordering guarantee.
return ct_dynticks_cpu_acquire(cpu);
return ct_dynticks_cpu_acquire(cpu);
}
}
/*
/*
* Return true if the snapshot returned from rcu_dynticks_snap()
* Return true if the snapshot returned from rcu_dynticks_snap()
* indicates that RCU is in an extended quiescent state.
* indicates that RCU is in an extended quiescent state.
*/
*/
static bool rcu_dynticks_in_eqs(int snap)
static bool rcu_dynticks_in_eqs(int snap)
{
{
return !(snap & RCU_DYNTICKS_IDX);
return !(snap & RCU_DYNTICKS_IDX);
}
}
/*
/*
* Return true if the CPU corresponding to the specified rcu_data
* Return true if the CPU corresponding to the specified rcu_data
* structure has spent some time in an extended quiescent state since
* structure has spent some time in an extended quiescent state since
* rcu_dynticks_snap() returned the specified snapshot.
* rcu_dynticks_snap() returned the specified snapshot.
*/
*/
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
{
{
return snap != rcu_dynticks_snap(rdp->cpu);
return snap != rcu_dynticks_snap(rdp->cpu);
}
}
/*
/*
* Return true if the referenced integer is zero while the specified
* Return true if the referenced integer is zero while the specified
* CPU remains within a single extended quiescent state.
* CPU remains within a single extended quiescent state.
*/
*/
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
{
{
int snap;
int snap;
// If not quiescent, force back to earlier extended quiescent state.
// If not quiescent, force back to earlier extended quiescent state.
snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX;
snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX;
smp_rmb(); // Order ->dynticks and *vp reads.
smp_rmb(); // Order ->dynticks and *vp reads.
if (READ_ONCE(*vp))
if (READ_ONCE(*vp))
return false; // Non-zero, so report failure;
return false; // Non-zero, so report failure;
smp_rmb(); // Order *vp read and ->dynticks re-read.
smp_rmb(); // Order *vp read and ->dynticks re-read.
// If still in the same extended quiescent state, we are good!
// If still in the same extended quiescent state, we are good!
return snap == ct_dynticks_cpu(cpu);
return snap == ct_dynticks_cpu(cpu);
}
}
/*
/*
* Let the RCU core know that this CPU has gone through the scheduler,
* Let the RCU core know that this CPU has gone through the scheduler,
* which is a quiescent state. This is called when the need for a
* which is a quiescent state. This is called when the need for a
* quiescent state is urgent, so we burn an atomic operation and full
* quiescent state is urgent, so we burn an atomic operation and full
* memory barriers to let the RCU core know about it, regardless of what
* memory barriers to let the RCU core know about it, regardless of what
* this CPU might (or might not) do in the near future.
* this CPU might (or might not) do in the near future.
*
*
* We inform the RCU core by emulating a zero-duration dyntick-idle period.
* We inform the RCU core by emulating a zero-duration dyntick-idle period.
*
*
* The caller must have disabled interrupts and must not be idle.
* The caller must have disabled interrupts and must not be idle.
*/
*/
notrace void rcu_momentary_dyntick_idle(void)
notrace void rcu_momentary_dyntick_idle(void)
{
{
int seq;
int seq;
raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
/* It is illegal to call this from idle state. */
/* It is illegal to call this from idle state. */
WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
rcu_preempt_deferred_qs(current);
rcu_preempt_deferred_qs(current);
}
}
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
/**
/**
* rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
* rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
*
*
* If the current CPU is idle and running at a first-level (not nested)
* If the current CPU is idle and running at a first-level (not nested)
* interrupt, or directly, from idle, return true.
* interrupt, or directly, from idle, return true.
*
*
* The caller must have at least disabled IRQs.
* The caller must have at least disabled IRQs.
*/
*/
static int rcu_is_cpu_rrupt_from_idle(void)
static int rcu_is_cpu_rrupt_from_idle(void)
{
{
long nesting;
long nesting;
/*
/*
* Usually called from the tick; but also used from smp_function_call()
* Usually called from the tick; but also used from smp_function_call()
* for expedited grace periods. This latter can result in running from
* for expedited grace periods. This latter can result in running from
* the idle task, instead of an actual IPI.
* the idle task, instead of an actual IPI.
*/
*/
lockdep_assert_irqs_disabled();
lockdep_assert_irqs_disabled();
/* Check for counter underflows */
/* Check for counter underflows */
RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0,
RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0,
"RCU dynticks_nesting counter underflow!");
"RCU dynticks_nesting counter underflow!");
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0,
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0,
"RCU dynticks_nmi_nesting counter underflow/zero!");
"RCU dynticks_nmi_nesting counter underflow/zero!");
/* Are we at first interrupt nesting level? */
/* Are we at first interrupt nesting level? */
nesting = ct_dynticks_nmi_nesting();
nesting = ct_dynticks_nmi_nesting();
if (nesting > 1)
if (nesting > 1)
return false;
return false;
/*
/*
* If we're not in an interrupt, we must be in the idle task!
* If we're not in an interrupt, we must be in the idle task!
*/
*/
WARN_ON_ONCE(!nesting && !is_idle_task(current));
WARN_ON_ONCE(!nesting && !is_idle_task(current));
/* Does CPU appear to be idle from an RCU standpoint? */
/* Does CPU appear to be idle from an RCU standpoint? */
return ct_dynticks_nesting() == 0;
return ct_dynticks_nesting() == 0;
}
}
#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
// Maximum callbacks per rcu_do_batch ...
// Maximum callbacks per rcu_do_batch ...
#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
static long blimit = DEFAULT_RCU_BLIMIT;
static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
static long qhimark = DEFAULT_RCU_QHIMARK;
static long qhimark = DEFAULT_RCU_QHIMARK;
#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
static long qlowmark = DEFAULT_RCU_QLOMARK;
static long qlowmark = DEFAULT_RCU_QLOMARK;
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
static long qovld_calc = -1; // No pre-initialization lock acquisitions!
static long qovld_calc = -1; // No pre-initialization lock acquisitions!
module_param(blimit, long, 0444);
module_param(blimit, long, 0444);
module_param(qhimark, long, 0444);
module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qovld, long, 0444);
module_param(qovld, long, 0444);
static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
static bool rcu_kick_kthreads;
static int rcu_divisor = 7;
static int rcu_divisor = 7;
module_param(rcu_divisor, int, 0644);
module_param(rcu_divisor, int, 0644);
/* Force an exit from rcu_do_batch() after 3 milliseconds. */
/* Force an exit from rcu_do_batch() after 3 milliseconds. */
static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
module_param(rcu_resched_ns, long, 0644);
module_param(rcu_resched_ns, long, 0644);
/*
/*
* How long the grace period must be before we start recruiting
* How long the grace period must be before we start recruiting
* quiescent-state help from rcu_note_context_switch().
* quiescent-state help from rcu_note_context_switch().
*/
*/
static ulong jiffies_till_sched_qs = ULONG_MAX;
static ulong jiffies_till_sched_qs = ULONG_MAX;
module_param(jiffies_till_sched_qs, ulong, 0444);
module_param(jiffies_till_sched_qs, ulong, 0444);
static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
/*
/*
* Make sure that we give the grace-period kthread time to detect any
* Make sure that we give the grace-period kthread time to detect any
* idle CPUs before taking active measures to force quiescent states.
* idle CPUs before taking active measures to force quiescent states.
* However, don't go below 100 milliseconds, adjusted upwards for really
* However, don't go below 100 milliseconds, adjusted upwards for really
* large systems.
* large systems.
*/
*/
static void adjust_jiffies_till_sched_qs(void)
static void adjust_jiffies_till_sched_qs(void)
{
{
unsigned long j;
unsigned long j;
/* If jiffies_till_sched_qs was specified, respect the request. */
/* If jiffies_till_sched_qs was specified, respect the request. */
if (jiffies_till_sched_qs != ULONG_MAX) {
if (jiffies_till_sched_qs != ULONG_MAX) {
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
return;
return;
}
}
/* Otherwise, set to third fqs scan, but bound below on large system. */
/* Otherwise, set to third fqs scan, but bound below on large system. */
j = READ_ONCE(jiffies_till_first_fqs) +
j = READ_ONCE(jiffies_till_first_fqs) +
2 * READ_ONCE(jiffies_till_next_fqs);
2 * READ_ONCE(jiffies_till_next_fqs);
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
WRITE_ONCE(jiffies_to_sched_qs, j);
WRITE_ONCE(jiffies_to_sched_qs, j);
}
}
static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
{
{
ulong j;
ulong j;
int ret = kstrtoul(val, 0, &j);
int ret = kstrtoul(val, 0, &j);
if (!ret) {
if (!ret) {
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
adjust_jiffies_till_sched_qs();
adjust_jiffies_till_sched_qs();
}
}
return ret;
return ret;
}
}
static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
{
{
ulong j;
ulong j;
int ret = kstrtoul(val, 0, &j);
int ret = kstrtoul(val, 0, &j);
if (!ret) {
if (!ret) {
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
adjust_jiffies_till_sched_qs();
adjust_jiffies_till_sched_qs();
}
}
return ret;
return ret;
}
}
static const struct kernel_param_ops first_fqs_jiffies_ops = {
static const struct kernel_param_ops first_fqs_jiffies_ops = {
.set = param_set_first_fqs_jiffies,
.set = param_set_first_fqs_jiffies,
.get = param_get_ulong,
.get = param_get_ulong,
};
};
static const struct kernel_param_ops next_fqs_jiffies_ops = {
static const struct kernel_param_ops next_fqs_jiffies_ops = {
.set = param_set_next_fqs_jiffies,
.set = param_set_next_fqs_jiffies,
.get = param_get_ulong,
.get = param_get_ulong,
};
};
module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
module_param(rcu_kick_kthreads, bool, 0644);
module_param(rcu_kick_kthreads, bool, 0644);
static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
static int rcu_pending(int user);
static int rcu_pending(int user);
/*
/*
* Return the number of RCU GPs completed thus far for debug & stats.
* Return the number of RCU GPs completed thus far for debug & stats.
*/
*/
unsigned long rcu_get_gp_seq(void)
unsigned long rcu_get_gp_seq(void)
{
{
return READ_ONCE(rcu_state.gp_seq);
return READ_ONCE(rcu_state.gp_seq);
}
}
EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
/*
/*
* Return the number of RCU expedited batches completed thus far for
* Return the number of RCU expedited batches completed thus far for
* debug & stats. Odd numbers mean that a batch is in progress, even
* debug & stats. Odd numbers mean that a batch is in progress, even
* numbers mean idle. The value returned will thus be roughly double
* numbers mean idle. The value returned will thus be roughly double
* the cumulative batches since boot.
* the cumulative batches since boot.
*/
*/
unsigned long rcu_exp_batches_completed(void)
unsigned long rcu_exp_batches_completed(void)
{
{
return rcu_state.expedited_sequence;
return rcu_state.expedited_sequence;
}
}
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
/*
/*
* Return the root node of the rcu_state structure.
* Return the root node of the rcu_state structure.
*/
*/
static struct rcu_node *rcu_get_root(void)
static struct rcu_node *rcu_get_root(void)
{
{
return &rcu_state.node[0];
return &rcu_state.node[0];
}
}
/*
/*
* Send along grace-period-related data for rcutorture diagnostics.
* Send along grace-period-related data for rcutorture diagnostics.
*/
*/
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq)
unsigned long *gp_seq)
{
{
switch (test_type) {
switch (test_type) {
case RCU_FLAVOR:
case RCU_FLAVOR:
*flags = READ_ONCE(rcu_state.gp_flags);
*flags = READ_ONCE(rcu_state.gp_flags);
*gp_seq = rcu_seq_current(&rcu_state.gp_seq);
*gp_seq = rcu_seq_current(&rcu_state.gp_seq);
break;
break;
default:
default:
break;
break;
}
}
}
}
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
/*
/*
* An empty function that will trigger a reschedule on
* An empty function that will trigger a reschedule on
* IRQ tail once IRQs get re-enabled on userspace/guest resume.
* IRQ tail once IRQs get re-enabled on userspace/guest resume.
*/
*/
static void late_wakeup_func(struct irq_work *work)
static void late_wakeup_func(struct irq_work *work)
{
{
}
}
static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
IRQ_WORK_INIT(late_wakeup_func);
IRQ_WORK_INIT(late_wakeup_func);
/*
/*
* If either:
* If either:
*
*
* 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
* 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
* 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
* 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
*
*
* In these cases the late RCU wake ups aren't supported in the resched loops and our
* In these cases the late RCU wake ups aren't supported in the resched loops and our
* last resort is to fire a local irq_work that will trigger a reschedule once IRQs
* last resort is to fire a local irq_work that will trigger a reschedule once IRQs
* get re-enabled again.
* get re-enabled again.
*/
*/
noinstr void rcu_irq_work_resched(void)
noinstr void rcu_irq_work_resched(void)
{
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
return;
return;
if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
return;
return;
instrumentation_begin();
instrumentation_begin();
if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
irq_work_queue(this_cpu_ptr(&late_wakeup_work));
irq_work_queue(this_cpu_ptr(&late_wakeup_work));
}
}
instrumentation_end();
instrumentation_end();
}
}
#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */
#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */
#ifdef CONFIG_PROVE_RCU
#ifdef CONFIG_PROVE_RCU
/**
/**
* rcu_irq_exit_check_preempt - Validate that scheduling is possible
* rcu_irq_exit_check_preempt - Validate that scheduling is possible
*/
*/
void rcu_irq_exit_check_preempt(void)
void rcu_irq_exit_check_preempt(void)
{
{
lockdep_assert_irqs_disabled();
lockdep_assert_irqs_disabled();
RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0,
RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0,
"RCU dynticks_nesting counter underflow/zero!");
"RCU dynticks_nesting counter underflow/zero!");
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() !=
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() !=
DYNTICK_IRQ_NONIDLE,
DYNTICK_IRQ_NONIDLE,
"Bad RCU dynticks_nmi_nesting counter\n");
"Bad RCU dynticks_nmi_nesting counter\n");
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
"RCU in extended quiescent state!");
"RCU in extended quiescent state!");
}
}
#endif /* #ifdef CONFIG_PROVE_RCU */
#endif /* #ifdef CONFIG_PROVE_RCU */
#ifdef CONFIG_NO_HZ_FULL
#ifdef CONFIG_NO_HZ_FULL
/**
/**
* __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
* __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
*
*
* The scheduler tick is not normally enabled when CPUs enter the kernel
* The scheduler tick is not normally enabled when CPUs enter the kernel
* from nohz_full userspace execution. After all, nohz_full userspace
* from nohz_full userspace execution. After all, nohz_full userspace
* execution is an RCU quiescent state and the time executing in the kernel
* execution is an RCU quiescent state and the time executing in the kernel
* is quite short. Except of course when it isn't. And it is not hard to
* is quite short. Except of course when it isn't. And it is not hard to
* cause a large system to spend tens of seconds or even minutes looping
* cause a large system to spend tens of seconds or even minutes looping
* in the kernel, which can cause a number of problems, include RCU CPU
* in the kernel, which can cause a number of problems, include RCU CPU
* stall warnings.
* stall warnings.
*
*
* Therefore, if a nohz_full CPU fails to report a quiescent state
* Therefore, if a nohz_full CPU fails to report a quiescent state
* in a timely manner, the RCU grace-period kthread sets that CPU's
* in a timely manner, the RCU grace-period kthread sets that CPU's
* ->rcu_urgent_qs flag with the expectation that the next interrupt or
* ->rcu_urgent_qs flag with the expectation that the next interrupt or
* exception will invoke this function, which will turn on the scheduler
* exception will invoke this function, which will turn on the scheduler
* tick, which will enable RCU to detect that CPU's quiescent states,
* tick, which will enable RCU to detect that CPU's quiescent states,
* for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
* for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
* The tick will be disabled once a quiescent state is reported for
* The tick will be disabled once a quiescent state is reported for
* this CPU.
* this CPU.
*
*
* Of course, in carefully tuned systems, there might never be an
* Of course, in carefully tuned systems, there might never be an
* interrupt or exception. In that case, the RCU grace-period kthread
* interrupt or exception. In that case, the RCU grace-period kthread
* will eventually cause one to happen. However, in less carefully
* will eventually cause one to happen. However, in less carefully
* controlled environments, this function allows RCU to get what it
* controlled environments, this function allows RCU to get what it
* needs without creating otherwise useless interruptions.
* needs without creating otherwise useless interruptions.
*/
*/
void __rcu_irq_enter_check_tick(void)
void __rcu_irq_enter_check_tick(void)
{
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
// If we're here from NMI there's nothing to do.
// If we're here from NMI there's nothing to do.
if (in_nmi())
if (in_nmi())
return;
return;
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
"Illegal rcu_irq_enter_check_tick() from extended quiescent state");
"Illegal rcu_irq_enter_check_tick() from extended quiescent state");
if (!tick_nohz_full_cpu(rdp->cpu) ||
if (!tick_nohz_full_cpu(rdp->cpu) ||
!READ_ONCE(rdp->rcu_urgent_qs) ||
!READ_ONCE(rdp->rcu_urgent_qs) ||
READ_ONCE(rdp->rcu_forced_tick)) {
READ_ONCE(rdp->rcu_forced_tick)) {
// RCU doesn't need nohz_full help from this CPU, or it is
// RCU doesn't need nohz_full help from this CPU, or it is
// already getting that help.
// already getting that help.
return;
return;
}
}
// We get here only when not in an extended quiescent state and
// We get here only when not in an extended quiescent state and
// from interrupts (as opposed to NMIs). Therefore, (1) RCU is
// from interrupts (as opposed to NMIs). Therefore, (1) RCU is
// already watching and (2) The fact that we are in an interrupt
// already watching and (2) The fact that we are in an interrupt
// handler and that the rcu_node lock is an irq-disabled lock
// handler and that the rcu_node lock is an irq-disabled lock
// prevents self-deadlock. So we can safely recheck under the lock.
// prevents self-deadlock. So we can safely recheck under the lock.
// Note that the nohz_full state currently cannot change.
// Note that the nohz_full state currently cannot change.
raw_spin_lock_rcu_node(rdp->mynode);
raw_spin_lock_rcu_node(rdp->mynode);
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
// A nohz_full CPU is in the kernel and RCU needs a
// A nohz_full CPU is in the kernel and RCU needs a
// quiescent state. Turn on the tick!
// quiescent state. Turn on the tick!
WRITE_ONCE(rdp->rcu_forced_tick, true);
WRITE_ONCE(rdp->rcu_forced_tick, true);
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
}
}
raw_spin_unlock_rcu_node(rdp->mynode);
raw_spin_unlock_rcu_node(rdp->mynode);
}
}
NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
#endif /* CONFIG_NO_HZ_FULL */
#endif /* CONFIG_NO_HZ_FULL */
/*
/*
* Check to see if any future non-offloaded RCU-related work will need
* Check to see if any future non-offloaded RCU-related work will need
* to be done by the current CPU, even if none need be done immediately,
* to be done by the current CPU, even if none need be done immediately,
* returning 1 if so. This function is part of the RCU implementation;
* returning 1 if so. This function is part of the RCU implementation;
* it is -not- an exported member of the RCU API. This is used by
* it is -not- an exported member of the RCU API. This is used by
* the idle-entry code to figure out whether it is safe to disable the
* the idle-entry code to figure out whether it is safe to disable the
* scheduler-clock interrupt.
* scheduler-clock interrupt.
*
*
* Just check whether or not this CPU has non-offloaded RCU callbacks
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
* queued.
*/
*/
int rcu_needs_cpu(void)
int rcu_needs_cpu(void)
{
{
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
}
/*
/*
* If any sort of urgency was applied to the current CPU (for example,
* If any sort of urgency was applied to the current CPU (for example,
* the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
* the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
* to get to a quiescent state, disable it.
* to get to a quiescent state, disable it.
*/
*/
static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
{
{
raw_lockdep_assert_held_rcu_node(rdp->mynode);
raw_lockdep_assert_held_rcu_node(rdp->mynode);
WRITE_ONCE(rdp->rcu_urgent_qs, false);
WRITE_ONCE(rdp->rcu_urgent_qs, false);
WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
WRITE_ONCE(rdp->rcu_forced_tick, false);
WRITE_ONCE(rdp->rcu_forced_tick, false);
}
}
}
}
/**
/**
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
*
*
* Return true if RCU is watching the running CPU, which means that this
* Return true if RCU is watching the running CPU, which means that this
* CPU can safely enter RCU read-side critical sections. In other words,
* CPU can safely enter RCU read-side critical sections. In other words,
* if the current CPU is not in its idle loop or is in an interrupt or
* if the current CPU is not in its idle loop or is in an interrupt or
* NMI handler, return true.
* NMI handler, return true.
*
*
* Make notrace because it can be called by the internal functions of
* Make notrace because it can be called by the internal functions of
* ftrace, and making this notrace removes unnecessary recursion calls.
* ftrace, and making this notrace removes unnecessary recursion calls.
*/
*/
notrace bool rcu_is_watching(void)
notrace bool rcu_is_watching(void)
{
{
bool ret;
bool ret;
preempt_disable_notrace();
preempt_disable_notrace();
ret = !rcu_dynticks_curr_cpu_in_eqs();
ret = !rcu_dynticks_curr_cpu_in_eqs();
preempt_enable_notrace();
preempt_enable_notrace();
return ret;
return ret;
}
}
EXPORT_SYMBOL_GPL(rcu_is_watching);
EXPORT_SYMBOL_GPL(rcu_is_watching);
/*
/*
* If a holdout task is actually running, request an urgent quiescent
* If a holdout task is actually running, request an urgent quiescent
* state from its CPU. This is unsynchronized, so migrations can cause
* state from its CPU. This is unsynchronized, so migrations can cause
* the request to go to the wrong CPU. Which is OK, all that will happen
* the request to go to the wrong CPU. Which is OK, all that will happen
* is that the CPU's next context switch will be a bit slower and next
* is that the CPU's next context switch will be a bit slower and next
* time around this task will generate another request.
* time around this task will generate another request.
*/
*/
void rcu_request_urgent_qs_task(struct task_struct *t)
void rcu_request_urgent_qs_task(struct task_struct *t)
{
{
int cpu;
int cpu;
barrier();
barrier();
cpu = task_cpu(t);
cpu = task_cpu(t);
if (!task_curr(t))
if (!task_curr(t))
return; /* This task is not running on that CPU. */
return; /* This task is not running on that CPU. */
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
}
}
/*
/*
* When trying to report a quiescent state on behalf of some other CPU,
* When trying to report a quiescent state on behalf of some other CPU,
* it is our responsibility to check for and handle potential overflow
* it is our responsibility to check for and handle potential overflow
* of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
* of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
* After all, the CPU might be in deep idle state, and thus executing no
* After all, the CPU might be in deep idle state, and thus executing no
* code whatsoever.
* code whatsoever.
*/
*/
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
{
{
raw_lockdep_assert_held_rcu_node(rnp);
raw_lockdep_assert_held_rcu_node(rnp);
if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
rnp->gp_seq))
rnp->gp_seq))
WRITE_ONCE(rdp->gpwrap, true);
WRITE_ONCE(rdp->gpwrap, true);
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
}
}