Comparing sensitive data, confidential files or internal emails?

Most legal and privacy policies prohibit uploading sensitive data online. Diffchecker Desktop ensures your confidential information never leaves your computer. Work offline and compare documents securely.

Diff_tree_c_6414_65RC1

Created Diff never expires
0 removals
736 lines
0 additions
736 lines
// SPDX-License-Identifier: GPL-2.0+
// SPDX-License-Identifier: GPL-2.0+
/*
/*
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
*
*
* Copyright IBM Corporation, 2008
* Copyright IBM Corporation, 2008
*
*
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
* Manfred Spraul <manfred@colorfullife.com>
* Manfred Spraul <manfred@colorfullife.com>
* Paul E. McKenney <paulmck@linux.ibm.com>
* Paul E. McKenney <paulmck@linux.ibm.com>
*
*
* Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
* Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
*
*
* For detailed explanation of Read-Copy Update mechanism see -
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU
* Documentation/RCU
*/
*/


#define pr_fmt(fmt) "rcu: " fmt
#define pr_fmt(fmt) "rcu: " fmt


#include <linux/types.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/smp.h>
#include <linux/rcupdate_wait.h>
#include <linux/rcupdate_wait.h>
#include <linux/interrupt.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/debug.h>
#include <linux/nmi.h>
#include <linux/nmi.h>
#include <linux/atomic.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/export.h>
#include <linux/completion.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/moduleparam.h>
#include <linux/panic.h>
#include <linux/panic.h>
#include <linux/panic_notifier.h>
#include <linux/panic_notifier.h>
#include <linux/percpu.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/mutex.h>
#include <linux/time.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
#include <linux/kernel_stat.h>
#include <linux/wait.h>
#include <linux/wait.h>
#include <linux/kthread.h>
#include <linux/kthread.h>
#include <uapi/linux/sched/types.h>
#include <uapi/linux/sched/types.h>
#include <linux/prefetch.h>
#include <linux/prefetch.h>
#include <linux/delay.h>
#include <linux/delay.h>
#include <linux/random.h>
#include <linux/random.h>
#include <linux/trace_events.h>
#include <linux/trace_events.h>
#include <linux/suspend.h>
#include <linux/suspend.h>
#include <linux/ftrace.h>
#include <linux/ftrace.h>
#include <linux/tick.h>
#include <linux/tick.h>
#include <linux/sysrq.h>
#include <linux/sysrq.h>
#include <linux/kprobes.h>
#include <linux/kprobes.h>
#include <linux/gfp.h>
#include <linux/gfp.h>
#include <linux/oom.h>
#include <linux/oom.h>
#include <linux/smpboot.h>
#include <linux/smpboot.h>
#include <linux/jiffies.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/sched/isolation.h>
#include <linux/sched/isolation.h>
#include <linux/sched/clock.h>
#include <linux/sched/clock.h>
#include <linux/vmalloc.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/mm.h>
#include <linux/kasan.h>
#include <linux/kasan.h>
#include <linux/context_tracking.h>
#include <linux/context_tracking.h>
#include "../time/tick-internal.h"
#include "../time/tick-internal.h"


#include "tree.h"
#include "tree.h"
#include "rcu.h"
#include "rcu.h"


#ifdef MODULE_PARAM_PREFIX
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#endif
#define MODULE_PARAM_PREFIX "rcutree."
#define MODULE_PARAM_PREFIX "rcutree."


/* Data structures. */
/* Data structures. */


static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.gpwrap = true,
.gpwrap = true,
#ifdef CONFIG_RCU_NOCB_CPU
#ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_RCU_CORE,
.cblist.flags = SEGCBLIST_RCU_CORE,
#endif
#endif
};
};
static struct rcu_state rcu_state = {
static struct rcu_state rcu_state = {
.level = { &rcu_state.node[0] },
.level = { &rcu_state.node[0] },
.gp_state = RCU_GP_IDLE,
.gp_state = RCU_GP_IDLE,
.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
.barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
.barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
.name = RCU_NAME,
.name = RCU_NAME,
.abbr = RCU_ABBR,
.abbr = RCU_ABBR,
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
};
};


/* Dump rcu_node combining tree at boot to verify correct setup. */
/* Dump rcu_node combining tree at boot to verify correct setup. */
static bool dump_tree;
static bool dump_tree;
module_param(dump_tree, bool, 0444);
module_param(dump_tree, bool, 0444);
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
#ifndef CONFIG_PREEMPT_RT
#ifndef CONFIG_PREEMPT_RT
module_param(use_softirq, bool, 0444);
module_param(use_softirq, bool, 0444);
#endif
#endif
/* Control rcu_node-tree auto-balancing at boot time. */
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
static bool rcu_fanout_exact;
module_param(rcu_fanout_exact, bool, 0444);
module_param(rcu_fanout_exact, bool, 0444);
/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
module_param(rcu_fanout_leaf, int, 0444);
module_param(rcu_fanout_leaf, int, 0444);
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
/* Number of rcu_nodes at specified level. */
/* Number of rcu_nodes at specified level. */
int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */


/*
/*
* The rcu_scheduler_active variable is initialized to the value
* The rcu_scheduler_active variable is initialized to the value
* RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the
* RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the
* first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE,
* first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE,
* RCU can assume that there is but one task, allowing RCU to (for example)
* RCU can assume that there is but one task, allowing RCU to (for example)
* optimize synchronize_rcu() to a simple barrier(). When this variable
* optimize synchronize_rcu() to a simple barrier(). When this variable
* is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required
* is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required
* to detect real grace periods. This variable is also used to suppress
* to detect real grace periods. This variable is also used to suppress
* boot-time false positives from lockdep-RCU error checking. Finally, it
* boot-time false positives from lockdep-RCU error checking. Finally, it
* transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU
* transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU
* is fully initialized, including all of its kthreads having been spawned.
* is fully initialized, including all of its kthreads having been spawned.
*/
*/
int rcu_scheduler_active __read_mostly;
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
EXPORT_SYMBOL_GPL(rcu_scheduler_active);


/*
/*
* The rcu_scheduler_fully_active variable transitions from zero to one
* The rcu_scheduler_fully_active variable transitions from zero to one
* during the early_initcall() processing, which is after the scheduler
* during the early_initcall() processing, which is after the scheduler
* is capable of creating new tasks. So RCU processing (for example,
* is capable of creating new tasks. So RCU processing (for example,
* creating tasks for RCU priority boosting) must be delayed until after
* creating tasks for RCU priority boosting) must be delayed until after
* rcu_scheduler_fully_active transitions from zero to one. We also
* rcu_scheduler_fully_active transitions from zero to one. We also
* currently delay invocation of any RCU callbacks until after this point.
* currently delay invocation of any RCU callbacks until after this point.
*
*
* It might later prove better for people registering RCU callbacks during
* It might later prove better for people registering RCU callbacks during
* early boot to take responsibility for these callbacks, but one step at
* early boot to take responsibility for these callbacks, but one step at
* a time.
* a time.
*/
*/
static int rcu_scheduler_fully_active __read_mostly;
static int rcu_scheduler_fully_active __read_mostly;


static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags);
unsigned long gps, unsigned long flags);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
static void sync_sched_exp_online_cleanup(int cpu);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
static bool rcu_init_invoked(void);
static bool rcu_init_invoked(void);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);


/*
/*
* rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
* rcuc/rcub/rcuop kthread realtime priority. The "rcuop"
* real-time priority(enabling/disabling) is controlled by
* real-time priority(enabling/disabling) is controlled by
* the extra CONFIG_RCU_NOCB_CPU_CB_BOOST configuration.
* the extra CONFIG_RCU_NOCB_CPU_CB_BOOST configuration.
*/
*/
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
module_param(kthread_prio, int, 0444);
module_param(kthread_prio, int, 0444);


/* Delay in jiffies for grace-period initialization delays, debug only. */
/* Delay in jiffies for grace-period initialization delays, debug only. */


static int gp_preinit_delay;
static int gp_preinit_delay;
module_param(gp_preinit_delay, int, 0444);
module_param(gp_preinit_delay, int, 0444);
static int gp_init_delay;
static int gp_init_delay;
module_param(gp_init_delay, int, 0444);
module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay;
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
module_param(gp_cleanup_delay, int, 0444);


// Add delay to rcu_read_unlock() for strict grace periods.
// Add delay to rcu_read_unlock() for strict grace periods.
static int rcu_unlock_delay;
static int rcu_unlock_delay;
#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
module_param(rcu_unlock_delay, int, 0444);
module_param(rcu_unlock_delay, int, 0444);
#endif
#endif


/*
/*
* This rcu parameter is runtime-read-only. It reflects
* This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached
* a minimum allowed number of objects which can be cached
* per-CPU. Object size is equal to one page. This value
* per-CPU. Object size is equal to one page. This value
* can be changed at boot time.
* can be changed at boot time.
*/
*/
static int rcu_min_cached_objs = 5;
static int rcu_min_cached_objs = 5;
module_param(rcu_min_cached_objs, int, 0444);
module_param(rcu_min_cached_objs, int, 0444);


// A page shrinker can ask for pages to be freed to make them
// A page shrinker can ask for pages to be freed to make them
// available for other parts of the system. This usually happens
// available for other parts of the system. This usually happens
// under low memory conditions, and in that case we should also
// under low memory conditions, and in that case we should also
// defer page-cache filling for a short time period.
// defer page-cache filling for a short time period.
//
//
// The default value is 5 seconds, which is long enough to reduce
// The default value is 5 seconds, which is long enough to reduce
// interference with the shrinker while it asks other systems to
// interference with the shrinker while it asks other systems to
// drain their caches.
// drain their caches.
static int rcu_delay_page_cache_fill_msec = 5000;
static int rcu_delay_page_cache_fill_msec = 5000;
module_param(rcu_delay_page_cache_fill_msec, int, 0444);
module_param(rcu_delay_page_cache_fill_msec, int, 0444);


/* Retrieve RCU kthreads priority for rcutorture */
/* Retrieve RCU kthreads priority for rcutorture */
int rcu_get_gp_kthreads_prio(void)
int rcu_get_gp_kthreads_prio(void)
{
{
return kthread_prio;
return kthread_prio;
}
}
EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);


/*
/*
* Number of grace periods between delays, normalized by the duration of
* Number of grace periods between delays, normalized by the duration of
* the delay. The longer the delay, the more the grace periods between
* the delay. The longer the delay, the more the grace periods between
* each delay. The reason for this normalization is that it means that,
* each delay. The reason for this normalization is that it means that,
* for non-zero delays, the overall slowdown of grace periods is constant
* for non-zero delays, the overall slowdown of grace periods is constant
* regardless of the duration of the delay. This arrangement balances
* regardless of the duration of the delay. This arrangement balances
* the need for long delays to increase some race probabilities with the
* the need for long delays to increase some race probabilities with the
* need for fast grace periods to increase other race probabilities.
* need for fast grace periods to increase other race probabilities.
*/
*/
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */


/*
/*
* Return true if an RCU grace period is in progress. The READ_ONCE()s
* Return true if an RCU grace period is in progress. The READ_ONCE()s
* permit this function to be invoked without holding the root rcu_node
* permit this function to be invoked without holding the root rcu_node
* structure's ->lock, but of course results can be subject to change.
* structure's ->lock, but of course results can be subject to change.
*/
*/
static int rcu_gp_in_progress(void)
static int rcu_gp_in_progress(void)
{
{
return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
}
}


/*
/*
* Return the number of callbacks queued on the specified CPU.
* Return the number of callbacks queued on the specified CPU.
* Handles both the nocbs and normal cases.
* Handles both the nocbs and normal cases.
*/
*/
static long rcu_get_n_cbs_cpu(int cpu)
static long rcu_get_n_cbs_cpu(int cpu)
{
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);


if (rcu_segcblist_is_enabled(&rdp->cblist))
if (rcu_segcblist_is_enabled(&rdp->cblist))
return rcu_segcblist_n_cbs(&rdp->cblist);
return rcu_segcblist_n_cbs(&rdp->cblist);
return 0;
return 0;
}
}


void rcu_softirq_qs(void)
void rcu_softirq_qs(void)
{
{
rcu_qs();
rcu_qs();
rcu_preempt_deferred_qs(current);
rcu_preempt_deferred_qs(current);
rcu_tasks_qs(current, false);
rcu_tasks_qs(current, false);
}
}


/*
/*
* Reset the current CPU's ->dynticks counter to indicate that the
* Reset the current CPU's ->dynticks counter to indicate that the
* newly onlined CPU is no longer in an extended quiescent state.
* newly onlined CPU is no longer in an extended quiescent state.
* This will either leave the counter unchanged, or increment it
* This will either leave the counter unchanged, or increment it
* to the next non-quiescent value.
* to the next non-quiescent value.
*
*
* The non-atomic test/increment sequence works because the upper bits
* The non-atomic test/increment sequence works because the upper bits
* of the ->dynticks counter are manipulated only by the corresponding CPU,
* of the ->dynticks counter are manipulated only by the corresponding CPU,
* or when the corresponding CPU is offline.
* or when the corresponding CPU is offline.
*/
*/
static void rcu_dynticks_eqs_online(void)
static void rcu_dynticks_eqs_online(void)
{
{
if (ct_dynticks() & RCU_DYNTICKS_IDX)
if (ct_dynticks() & RCU_DYNTICKS_IDX)
return;
return;
ct_state_inc(RCU_DYNTICKS_IDX);
ct_state_inc(RCU_DYNTICKS_IDX);
}
}


/*
/*
* Snapshot the ->dynticks counter with full ordering so as to allow
* Snapshot the ->dynticks counter with full ordering so as to allow
* stable comparison of this counter with past and future snapshots.
* stable comparison of this counter with past and future snapshots.
*/
*/
static int rcu_dynticks_snap(int cpu)
static int rcu_dynticks_snap(int cpu)
{
{
smp_mb(); // Fundamental RCU ordering guarantee.
smp_mb(); // Fundamental RCU ordering guarantee.
return ct_dynticks_cpu_acquire(cpu);
return ct_dynticks_cpu_acquire(cpu);
}
}


/*
/*
* Return true if the snapshot returned from rcu_dynticks_snap()
* Return true if the snapshot returned from rcu_dynticks_snap()
* indicates that RCU is in an extended quiescent state.
* indicates that RCU is in an extended quiescent state.
*/
*/
static bool rcu_dynticks_in_eqs(int snap)
static bool rcu_dynticks_in_eqs(int snap)
{
{
return !(snap & RCU_DYNTICKS_IDX);
return !(snap & RCU_DYNTICKS_IDX);
}
}


/*
/*
* Return true if the CPU corresponding to the specified rcu_data
* Return true if the CPU corresponding to the specified rcu_data
* structure has spent some time in an extended quiescent state since
* structure has spent some time in an extended quiescent state since
* rcu_dynticks_snap() returned the specified snapshot.
* rcu_dynticks_snap() returned the specified snapshot.
*/
*/
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
{
{
return snap != rcu_dynticks_snap(rdp->cpu);
return snap != rcu_dynticks_snap(rdp->cpu);
}
}


/*
/*
* Return true if the referenced integer is zero while the specified
* Return true if the referenced integer is zero while the specified
* CPU remains within a single extended quiescent state.
* CPU remains within a single extended quiescent state.
*/
*/
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
{
{
int snap;
int snap;


// If not quiescent, force back to earlier extended quiescent state.
// If not quiescent, force back to earlier extended quiescent state.
snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX;
snap = ct_dynticks_cpu(cpu) & ~RCU_DYNTICKS_IDX;
smp_rmb(); // Order ->dynticks and *vp reads.
smp_rmb(); // Order ->dynticks and *vp reads.
if (READ_ONCE(*vp))
if (READ_ONCE(*vp))
return false; // Non-zero, so report failure;
return false; // Non-zero, so report failure;
smp_rmb(); // Order *vp read and ->dynticks re-read.
smp_rmb(); // Order *vp read and ->dynticks re-read.


// If still in the same extended quiescent state, we are good!
// If still in the same extended quiescent state, we are good!
return snap == ct_dynticks_cpu(cpu);
return snap == ct_dynticks_cpu(cpu);
}
}


/*
/*
* Let the RCU core know that this CPU has gone through the scheduler,
* Let the RCU core know that this CPU has gone through the scheduler,
* which is a quiescent state. This is called when the need for a
* which is a quiescent state. This is called when the need for a
* quiescent state is urgent, so we burn an atomic operation and full
* quiescent state is urgent, so we burn an atomic operation and full
* memory barriers to let the RCU core know about it, regardless of what
* memory barriers to let the RCU core know about it, regardless of what
* this CPU might (or might not) do in the near future.
* this CPU might (or might not) do in the near future.
*
*
* We inform the RCU core by emulating a zero-duration dyntick-idle period.
* We inform the RCU core by emulating a zero-duration dyntick-idle period.
*
*
* The caller must have disabled interrupts and must not be idle.
* The caller must have disabled interrupts and must not be idle.
*/
*/
notrace void rcu_momentary_dyntick_idle(void)
notrace void rcu_momentary_dyntick_idle(void)
{
{
int seq;
int seq;


raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
/* It is illegal to call this from idle state. */
/* It is illegal to call this from idle state. */
WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
rcu_preempt_deferred_qs(current);
rcu_preempt_deferred_qs(current);
}
}
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);


/**
/**
* rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
* rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
*
*
* If the current CPU is idle and running at a first-level (not nested)
* If the current CPU is idle and running at a first-level (not nested)
* interrupt, or directly, from idle, return true.
* interrupt, or directly, from idle, return true.
*
*
* The caller must have at least disabled IRQs.
* The caller must have at least disabled IRQs.
*/
*/
static int rcu_is_cpu_rrupt_from_idle(void)
static int rcu_is_cpu_rrupt_from_idle(void)
{
{
long nesting;
long nesting;


/*
/*
* Usually called from the tick; but also used from smp_function_call()
* Usually called from the tick; but also used from smp_function_call()
* for expedited grace periods. This latter can result in running from
* for expedited grace periods. This latter can result in running from
* the idle task, instead of an actual IPI.
* the idle task, instead of an actual IPI.
*/
*/
lockdep_assert_irqs_disabled();
lockdep_assert_irqs_disabled();


/* Check for counter underflows */
/* Check for counter underflows */
RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0,
RCU_LOCKDEP_WARN(ct_dynticks_nesting() < 0,
"RCU dynticks_nesting counter underflow!");
"RCU dynticks_nesting counter underflow!");
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0,
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() <= 0,
"RCU dynticks_nmi_nesting counter underflow/zero!");
"RCU dynticks_nmi_nesting counter underflow/zero!");


/* Are we at first interrupt nesting level? */
/* Are we at first interrupt nesting level? */
nesting = ct_dynticks_nmi_nesting();
nesting = ct_dynticks_nmi_nesting();
if (nesting > 1)
if (nesting > 1)
return false;
return false;


/*
/*
* If we're not in an interrupt, we must be in the idle task!
* If we're not in an interrupt, we must be in the idle task!
*/
*/
WARN_ON_ONCE(!nesting && !is_idle_task(current));
WARN_ON_ONCE(!nesting && !is_idle_task(current));


/* Does CPU appear to be idle from an RCU standpoint? */
/* Does CPU appear to be idle from an RCU standpoint? */
return ct_dynticks_nesting() == 0;
return ct_dynticks_nesting() == 0;
}
}


#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
// Maximum callbacks per rcu_do_batch ...
// Maximum callbacks per rcu_do_batch ...
#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
static long blimit = DEFAULT_RCU_BLIMIT;
static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
static long qhimark = DEFAULT_RCU_QHIMARK;
static long qhimark = DEFAULT_RCU_QHIMARK;
#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
static long qlowmark = DEFAULT_RCU_QLOMARK;
static long qlowmark = DEFAULT_RCU_QLOMARK;
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
static long qovld_calc = -1; // No pre-initialization lock acquisitions!
static long qovld_calc = -1; // No pre-initialization lock acquisitions!


module_param(blimit, long, 0444);
module_param(blimit, long, 0444);
module_param(qhimark, long, 0444);
module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qovld, long, 0444);
module_param(qovld, long, 0444);


static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
static bool rcu_kick_kthreads;
static int rcu_divisor = 7;
static int rcu_divisor = 7;
module_param(rcu_divisor, int, 0644);
module_param(rcu_divisor, int, 0644);


/* Force an exit from rcu_do_batch() after 3 milliseconds. */
/* Force an exit from rcu_do_batch() after 3 milliseconds. */
static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
module_param(rcu_resched_ns, long, 0644);
module_param(rcu_resched_ns, long, 0644);


/*
/*
* How long the grace period must be before we start recruiting
* How long the grace period must be before we start recruiting
* quiescent-state help from rcu_note_context_switch().
* quiescent-state help from rcu_note_context_switch().
*/
*/
static ulong jiffies_till_sched_qs = ULONG_MAX;
static ulong jiffies_till_sched_qs = ULONG_MAX;
module_param(jiffies_till_sched_qs, ulong, 0444);
module_param(jiffies_till_sched_qs, ulong, 0444);
static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */


/*
/*
* Make sure that we give the grace-period kthread time to detect any
* Make sure that we give the grace-period kthread time to detect any
* idle CPUs before taking active measures to force quiescent states.
* idle CPUs before taking active measures to force quiescent states.
* However, don't go below 100 milliseconds, adjusted upwards for really
* However, don't go below 100 milliseconds, adjusted upwards for really
* large systems.
* large systems.
*/
*/
static void adjust_jiffies_till_sched_qs(void)
static void adjust_jiffies_till_sched_qs(void)
{
{
unsigned long j;
unsigned long j;


/* If jiffies_till_sched_qs was specified, respect the request. */
/* If jiffies_till_sched_qs was specified, respect the request. */
if (jiffies_till_sched_qs != ULONG_MAX) {
if (jiffies_till_sched_qs != ULONG_MAX) {
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
return;
return;
}
}
/* Otherwise, set to third fqs scan, but bound below on large system. */
/* Otherwise, set to third fqs scan, but bound below on large system. */
j = READ_ONCE(jiffies_till_first_fqs) +
j = READ_ONCE(jiffies_till_first_fqs) +
2 * READ_ONCE(jiffies_till_next_fqs);
2 * READ_ONCE(jiffies_till_next_fqs);
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
WRITE_ONCE(jiffies_to_sched_qs, j);
WRITE_ONCE(jiffies_to_sched_qs, j);
}
}


static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
{
{
ulong j;
ulong j;
int ret = kstrtoul(val, 0, &j);
int ret = kstrtoul(val, 0, &j);


if (!ret) {
if (!ret) {
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
adjust_jiffies_till_sched_qs();
adjust_jiffies_till_sched_qs();
}
}
return ret;
return ret;
}
}


static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
{
{
ulong j;
ulong j;
int ret = kstrtoul(val, 0, &j);
int ret = kstrtoul(val, 0, &j);


if (!ret) {
if (!ret) {
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
adjust_jiffies_till_sched_qs();
adjust_jiffies_till_sched_qs();
}
}
return ret;
return ret;
}
}


static const struct kernel_param_ops first_fqs_jiffies_ops = {
static const struct kernel_param_ops first_fqs_jiffies_ops = {
.set = param_set_first_fqs_jiffies,
.set = param_set_first_fqs_jiffies,
.get = param_get_ulong,
.get = param_get_ulong,
};
};


static const struct kernel_param_ops next_fqs_jiffies_ops = {
static const struct kernel_param_ops next_fqs_jiffies_ops = {
.set = param_set_next_fqs_jiffies,
.set = param_set_next_fqs_jiffies,
.get = param_get_ulong,
.get = param_get_ulong,
};
};


module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
module_param(rcu_kick_kthreads, bool, 0644);
module_param(rcu_kick_kthreads, bool, 0644);


static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
static int rcu_pending(int user);
static int rcu_pending(int user);


/*
/*
* Return the number of RCU GPs completed thus far for debug & stats.
* Return the number of RCU GPs completed thus far for debug & stats.
*/
*/
unsigned long rcu_get_gp_seq(void)
unsigned long rcu_get_gp_seq(void)
{
{
return READ_ONCE(rcu_state.gp_seq);
return READ_ONCE(rcu_state.gp_seq);
}
}
EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
EXPORT_SYMBOL_GPL(rcu_get_gp_seq);


/*
/*
* Return the number of RCU expedited batches completed thus far for
* Return the number of RCU expedited batches completed thus far for
* debug & stats. Odd numbers mean that a batch is in progress, even
* debug & stats. Odd numbers mean that a batch is in progress, even
* numbers mean idle. The value returned will thus be roughly double
* numbers mean idle. The value returned will thus be roughly double
* the cumulative batches since boot.
* the cumulative batches since boot.
*/
*/
unsigned long rcu_exp_batches_completed(void)
unsigned long rcu_exp_batches_completed(void)
{
{
return rcu_state.expedited_sequence;
return rcu_state.expedited_sequence;
}
}
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);


/*
/*
* Return the root node of the rcu_state structure.
* Return the root node of the rcu_state structure.
*/
*/
static struct rcu_node *rcu_get_root(void)
static struct rcu_node *rcu_get_root(void)
{
{
return &rcu_state.node[0];
return &rcu_state.node[0];
}
}


/*
/*
* Send along grace-period-related data for rcutorture diagnostics.
* Send along grace-period-related data for rcutorture diagnostics.
*/
*/
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq)
unsigned long *gp_seq)
{
{
switch (test_type) {
switch (test_type) {
case RCU_FLAVOR:
case RCU_FLAVOR:
*flags = READ_ONCE(rcu_state.gp_flags);
*flags = READ_ONCE(rcu_state.gp_flags);
*gp_seq = rcu_seq_current(&rcu_state.gp_seq);
*gp_seq = rcu_seq_current(&rcu_state.gp_seq);
break;
break;
default:
default:
break;
break;
}
}
}
}
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);


#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
/*
/*
* An empty function that will trigger a reschedule on
* An empty function that will trigger a reschedule on
* IRQ tail once IRQs get re-enabled on userspace/guest resume.
* IRQ tail once IRQs get re-enabled on userspace/guest resume.
*/
*/
static void late_wakeup_func(struct irq_work *work)
static void late_wakeup_func(struct irq_work *work)
{
{
}
}


static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
IRQ_WORK_INIT(late_wakeup_func);
IRQ_WORK_INIT(late_wakeup_func);


/*
/*
* If either:
* If either:
*
*
* 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
* 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
* 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
* 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
*
*
* In these cases the late RCU wake ups aren't supported in the resched loops and our
* In these cases the late RCU wake ups aren't supported in the resched loops and our
* last resort is to fire a local irq_work that will trigger a reschedule once IRQs
* last resort is to fire a local irq_work that will trigger a reschedule once IRQs
* get re-enabled again.
* get re-enabled again.
*/
*/
noinstr void rcu_irq_work_resched(void)
noinstr void rcu_irq_work_resched(void)
{
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);


if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
return;
return;


if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
return;
return;


instrumentation_begin();
instrumentation_begin();
if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
irq_work_queue(this_cpu_ptr(&late_wakeup_work));
irq_work_queue(this_cpu_ptr(&late_wakeup_work));
}
}
instrumentation_end();
instrumentation_end();
}
}
#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */
#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */


#ifdef CONFIG_PROVE_RCU
#ifdef CONFIG_PROVE_RCU
/**
/**
* rcu_irq_exit_check_preempt - Validate that scheduling is possible
* rcu_irq_exit_check_preempt - Validate that scheduling is possible
*/
*/
void rcu_irq_exit_check_preempt(void)
void rcu_irq_exit_check_preempt(void)
{
{
lockdep_assert_irqs_disabled();
lockdep_assert_irqs_disabled();


RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0,
RCU_LOCKDEP_WARN(ct_dynticks_nesting() <= 0,
"RCU dynticks_nesting counter underflow/zero!");
"RCU dynticks_nesting counter underflow/zero!");
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() !=
RCU_LOCKDEP_WARN(ct_dynticks_nmi_nesting() !=
DYNTICK_IRQ_NONIDLE,
DYNTICK_IRQ_NONIDLE,
"Bad RCU dynticks_nmi_nesting counter\n");
"Bad RCU dynticks_nmi_nesting counter\n");
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
"RCU in extended quiescent state!");
"RCU in extended quiescent state!");
}
}
#endif /* #ifdef CONFIG_PROVE_RCU */
#endif /* #ifdef CONFIG_PROVE_RCU */


#ifdef CONFIG_NO_HZ_FULL
#ifdef CONFIG_NO_HZ_FULL
/**
/**
* __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
* __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
*
*
* The scheduler tick is not normally enabled when CPUs enter the kernel
* The scheduler tick is not normally enabled when CPUs enter the kernel
* from nohz_full userspace execution. After all, nohz_full userspace
* from nohz_full userspace execution. After all, nohz_full userspace
* execution is an RCU quiescent state and the time executing in the kernel
* execution is an RCU quiescent state and the time executing in the kernel
* is quite short. Except of course when it isn't. And it is not hard to
* is quite short. Except of course when it isn't. And it is not hard to
* cause a large system to spend tens of seconds or even minutes looping
* cause a large system to spend tens of seconds or even minutes looping
* in the kernel, which can cause a number of problems, include RCU CPU
* in the kernel, which can cause a number of problems, include RCU CPU
* stall warnings.
* stall warnings.
*
*
* Therefore, if a nohz_full CPU fails to report a quiescent state
* Therefore, if a nohz_full CPU fails to report a quiescent state
* in a timely manner, the RCU grace-period kthread sets that CPU's
* in a timely manner, the RCU grace-period kthread sets that CPU's
* ->rcu_urgent_qs flag with the expectation that the next interrupt or
* ->rcu_urgent_qs flag with the expectation that the next interrupt or
* exception will invoke this function, which will turn on the scheduler
* exception will invoke this function, which will turn on the scheduler
* tick, which will enable RCU to detect that CPU's quiescent states,
* tick, which will enable RCU to detect that CPU's quiescent states,
* for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
* for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
* The tick will be disabled once a quiescent state is reported for
* The tick will be disabled once a quiescent state is reported for
* this CPU.
* this CPU.
*
*
* Of course, in carefully tuned systems, there might never be an
* Of course, in carefully tuned systems, there might never be an
* interrupt or exception. In that case, the RCU grace-period kthread
* interrupt or exception. In that case, the RCU grace-period kthread
* will eventually cause one to happen. However, in less carefully
* will eventually cause one to happen. However, in less carefully
* controlled environments, this function allows RCU to get what it
* controlled environments, this function allows RCU to get what it
* needs without creating otherwise useless interruptions.
* needs without creating otherwise useless interruptions.
*/
*/
void __rcu_irq_enter_check_tick(void)
void __rcu_irq_enter_check_tick(void)
{
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);


// If we're here from NMI there's nothing to do.
// If we're here from NMI there's nothing to do.
if (in_nmi())
if (in_nmi())
return;
return;


RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
"Illegal rcu_irq_enter_check_tick() from extended quiescent state");
"Illegal rcu_irq_enter_check_tick() from extended quiescent state");


if (!tick_nohz_full_cpu(rdp->cpu) ||
if (!tick_nohz_full_cpu(rdp->cpu) ||
!READ_ONCE(rdp->rcu_urgent_qs) ||
!READ_ONCE(rdp->rcu_urgent_qs) ||
READ_ONCE(rdp->rcu_forced_tick)) {
READ_ONCE(rdp->rcu_forced_tick)) {
// RCU doesn't need nohz_full help from this CPU, or it is
// RCU doesn't need nohz_full help from this CPU, or it is
// already getting that help.
// already getting that help.
return;
return;
}
}


// We get here only when not in an extended quiescent state and
// We get here only when not in an extended quiescent state and
// from interrupts (as opposed to NMIs). Therefore, (1) RCU is
// from interrupts (as opposed to NMIs). Therefore, (1) RCU is
// already watching and (2) The fact that we are in an interrupt
// already watching and (2) The fact that we are in an interrupt
// handler and that the rcu_node lock is an irq-disabled lock
// handler and that the rcu_node lock is an irq-disabled lock
// prevents self-deadlock. So we can safely recheck under the lock.
// prevents self-deadlock. So we can safely recheck under the lock.
// Note that the nohz_full state currently cannot change.
// Note that the nohz_full state currently cannot change.
raw_spin_lock_rcu_node(rdp->mynode);
raw_spin_lock_rcu_node(rdp->mynode);
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
// A nohz_full CPU is in the kernel and RCU needs a
// A nohz_full CPU is in the kernel and RCU needs a
// quiescent state. Turn on the tick!
// quiescent state. Turn on the tick!
WRITE_ONCE(rdp->rcu_forced_tick, true);
WRITE_ONCE(rdp->rcu_forced_tick, true);
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
}
}
raw_spin_unlock_rcu_node(rdp->mynode);
raw_spin_unlock_rcu_node(rdp->mynode);
}
}
NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
#endif /* CONFIG_NO_HZ_FULL */
#endif /* CONFIG_NO_HZ_FULL */


/*
/*
* Check to see if any future non-offloaded RCU-related work will need
* Check to see if any future non-offloaded RCU-related work will need
* to be done by the current CPU, even if none need be done immediately,
* to be done by the current CPU, even if none need be done immediately,
* returning 1 if so. This function is part of the RCU implementation;
* returning 1 if so. This function is part of the RCU implementation;
* it is -not- an exported member of the RCU API. This is used by
* it is -not- an exported member of the RCU API. This is used by
* the idle-entry code to figure out whether it is safe to disable the
* the idle-entry code to figure out whether it is safe to disable the
* scheduler-clock interrupt.
* scheduler-clock interrupt.
*
*
* Just check whether or not this CPU has non-offloaded RCU callbacks
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
* queued.
*/
*/
int rcu_needs_cpu(void)
int rcu_needs_cpu(void)
{
{
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
}
}


/*
/*
* If any sort of urgency was applied to the current CPU (for example,
* If any sort of urgency was applied to the current CPU (for example,
* the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
* the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
* to get to a quiescent state, disable it.
* to get to a quiescent state, disable it.
*/
*/
static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
{
{
raw_lockdep_assert_held_rcu_node(rdp->mynode);
raw_lockdep_assert_held_rcu_node(rdp->mynode);
WRITE_ONCE(rdp->rcu_urgent_qs, false);
WRITE_ONCE(rdp->rcu_urgent_qs, false);
WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
WRITE_ONCE(rdp->rcu_forced_tick, false);
WRITE_ONCE(rdp->rcu_forced_tick, false);
}
}
}
}


/**
/**
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
*
*
* Return true if RCU is watching the running CPU, which means that this
* Return true if RCU is watching the running CPU, which means that this
* CPU can safely enter RCU read-side critical sections. In other words,
* CPU can safely enter RCU read-side critical sections. In other words,
* if the current CPU is not in its idle loop or is in an interrupt or
* if the current CPU is not in its idle loop or is in an interrupt or
* NMI handler, return true.
* NMI handler, return true.
*
*
* Make notrace because it can be called by the internal functions of
* Make notrace because it can be called by the internal functions of
* ftrace, and making this notrace removes unnecessary recursion calls.
* ftrace, and making this notrace removes unnecessary recursion calls.
*/
*/
notrace bool rcu_is_watching(void)
notrace bool rcu_is_watching(void)
{
{
bool ret;
bool ret;


preempt_disable_notrace();
preempt_disable_notrace();
ret = !rcu_dynticks_curr_cpu_in_eqs();
ret = !rcu_dynticks_curr_cpu_in_eqs();
preempt_enable_notrace();
preempt_enable_notrace();
return ret;
return ret;
}
}
EXPORT_SYMBOL_GPL(rcu_is_watching);
EXPORT_SYMBOL_GPL(rcu_is_watching);


/*
/*
* If a holdout task is actually running, request an urgent quiescent
* If a holdout task is actually running, request an urgent quiescent
* state from its CPU. This is unsynchronized, so migrations can cause
* state from its CPU. This is unsynchronized, so migrations can cause
* the request to go to the wrong CPU. Which is OK, all that will happen
* the request to go to the wrong CPU. Which is OK, all that will happen
* is that the CPU's next context switch will be a bit slower and next
* is that the CPU's next context switch will be a bit slower and next
* time around this task will generate another request.
* time around this task will generate another request.
*/
*/
void rcu_request_urgent_qs_task(struct task_struct *t)
void rcu_request_urgent_qs_task(struct task_struct *t)
{
{
int cpu;
int cpu;


barrier();
barrier();
cpu = task_cpu(t);
cpu = task_cpu(t);
if (!task_curr(t))
if (!task_curr(t))
return; /* This task is not running on that CPU. */
return; /* This task is not running on that CPU. */
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
}
}


/*
/*
* When trying to report a quiescent state on behalf of some other CPU,
* When trying to report a quiescent state on behalf of some other CPU,
* it is our responsibility to check for and handle potential overflow
* it is our responsibility to check for and handle potential overflow
* of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
* of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
* After all, the CPU might be in deep idle state, and thus executing no
* After all, the CPU might be in deep idle state, and thus executing no
* code whatsoever.
* code whatsoever.
*/
*/
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
{
{
raw_lockdep_assert_held_rcu_node(rnp);
raw_lockdep_assert_held_rcu_node(rnp);
if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
rnp->gp_seq))
rnp->gp_seq))
WRITE_ONCE(rdp->gpwrap, true);
WRITE_ONCE(rdp->gpwrap, true);
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
}
}