1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/powerpc/include/asm/paravirt.h
Srikar Dronamraju ca3f969dcb powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted()
If its a shared LPAR but not a KVM guest, then see if the vCPU is
related to the calling vCPU. On PowerVM, only cores can be preempted.
So if one vCPU is a non-preempted state, we can decipher that all
other vCPUs sharing the same core are in non-preempted state.

Performance results:

  $ perf stat -r 5 -a perf bench sched pipe -l 10000000 (lesser time is better)

  powerpc/next
       35,107,951.20 msec cpu-clock                 #  255.898 CPUs utilized            ( +-  0.31% )
          23,655,348      context-switches          #    0.674 K/sec                    ( +-  3.72% )
              14,465      cpu-migrations            #    0.000 K/sec                    ( +-  5.37% )
              82,463      page-faults               #    0.002 K/sec                    ( +-  8.40% )
   1,127,182,328,206      cycles                    #    0.032 GHz                      ( +-  1.60% )  (66.67%)
      78,587,300,622      stalled-cycles-frontend   #    6.97% frontend cycles idle     ( +-  0.08% )  (50.01%)
     654,124,218,432      stalled-cycles-backend    #   58.03% backend cycles idle      ( +-  1.74% )  (50.01%)
     834,013,059,242      instructions              #    0.74  insn per cycle
                                                    #    0.78  stalled cycles per insn  ( +-  0.73% )  (66.67%)
     132,911,454,387      branches                  #    3.786 M/sec                    ( +-  0.59% )  (50.00%)
       2,890,882,143      branch-misses             #    2.18% of all branches          ( +-  0.46% )  (50.00%)

             137.195 +- 0.419 seconds time elapsed  ( +-  0.31% )

  powerpc/next + patchset
       29,981,702.64 msec cpu-clock                 #  255.881 CPUs utilized            ( +-  1.30% )
          40,162,456      context-switches          #    0.001 M/sec                    ( +-  0.01% )
               1,110      cpu-migrations            #    0.000 K/sec                    ( +-  5.20% )
              62,616      page-faults               #    0.002 K/sec                    ( +-  3.93% )
   1,430,030,626,037      cycles                    #    0.048 GHz                      ( +-  1.41% )  (66.67%)
      83,202,707,288      stalled-cycles-frontend   #    5.82% frontend cycles idle     ( +-  0.75% )  (50.01%)
     744,556,088,520      stalled-cycles-backend    #   52.07% backend cycles idle      ( +-  1.39% )  (50.01%)
     940,138,418,674      instructions              #    0.66  insn per cycle
                                                    #    0.79  stalled cycles per insn  ( +-  0.51% )  (66.67%)
     146,452,852,283      branches                  #    4.885 M/sec                    ( +-  0.80% )  (50.00%)
       3,237,743,996      branch-misses             #    2.21% of all branches          ( +-  1.18% )  (50.01%)

              117.17 +- 1.52 seconds time elapsed  ( +-  1.30% )

This is around 14.6% improvement in performance.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Waiman Long <longman@redhat.com>
[mpe: Fold in performance results from cover letter]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201202050456.164005-5-srikar@linux.vnet.ibm.com
2020-12-04 01:01:22 +11:00

105 lines
2.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_PARAVIRT_H
#define _ASM_POWERPC_PARAVIRT_H
#include <linux/jump_label.h>
#include <asm/smp.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/hvcall.h>
#endif
#ifdef CONFIG_PPC_SPLPAR
#include <asm/kvm_guest.h>
#include <asm/cputhreads.h>
DECLARE_STATIC_KEY_FALSE(shared_processor);
static inline bool is_shared_processor(void)
{
return static_branch_unlikely(&shared_processor);
}
/* If bit 0 is set, the cpu has been preempted */
static inline u32 yield_count_of(int cpu)
{
__be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count);
return be32_to_cpu(yield_count);
}
static inline void yield_to_preempted(int cpu, u32 yield_count)
{
plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
}
static inline void prod_cpu(int cpu)
{
plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
}
static inline void yield_to_any(void)
{
plpar_hcall_norets(H_CONFER, -1, 0);
}
#else
static inline bool is_shared_processor(void)
{
return false;
}
static inline u32 yield_count_of(int cpu)
{
return 0;
}
extern void ___bad_yield_to_preempted(void);
static inline void yield_to_preempted(int cpu, u32 yield_count)
{
___bad_yield_to_preempted(); /* This would be a bug */
}
extern void ___bad_yield_to_any(void);
static inline void yield_to_any(void)
{
___bad_yield_to_any(); /* This would be a bug */
}
extern void ___bad_prod_cpu(void);
static inline void prod_cpu(int cpu)
{
___bad_prod_cpu(); /* This would be a bug */
}
#endif
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
if (!is_shared_processor())
return false;
#ifdef CONFIG_PPC_SPLPAR
if (!is_kvm_guest()) {
int first_cpu = cpu_first_thread_sibling(smp_processor_id());
/*
* Preemption can only happen at core granularity. This CPU
* is not preempted if one of the CPU of this core is not
* preempted.
*/
if (cpu_first_thread_sibling(cpu) == first_cpu)
return false;
}
#endif
if (yield_count_of(cpu) & 1)
return true;
return false;
}
static inline bool pv_is_native_spin_unlock(void)
{
return !is_shared_processor();
}
#endif /* _ASM_POWERPC_PARAVIRT_H */