1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/arm/include/asm/arch_gicv3.h
Mark Rutland 8bf0a8048b arm64: add ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap
When Priority Mask Hint Enable (PMHE) == 0b1, the GIC may use the PMR
value to determine whether to signal an IRQ to a PE, and consequently
after a change to the PMR value, a DSB SY may be required to ensure that
interrupts are signalled to a CPU in finite time. When PMHE == 0b0,
interrupts are always signalled to the relevant PE, and all masking
occurs locally, without requiring a DSB SY.

Since commit:

  f226650494 ("arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear")

... we handle this dynamically: in most cases a static key is used to
determine whether to issue a DSB SY, but the entry code must read from
ICC_CTLR_EL1 as static keys aren't accessible from plain assembly.

It would be much nicer to use an alternative instruction sequence for
the DSB, as this would avoid the need to read from ICC_CTLR_EL1 in the
entry code, and for most other code this will result in simpler code
generation with fewer instructions and fewer branches.

This patch adds a new ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap which is
only set when ICC_CTLR_EL1.PMHE == 0b0 (and GIC priority masking is in
use). This allows us to replace the existing users of the
`gic_pmr_sync` static key with alternative sequences which default to a
DSB SY and are relaxed to a NOP when PMHE is not in use.

The entry assembly management of the PMR is slightly restructured to use
a branch (rather than multiple NOPs) when priority masking is not in
use. This is more in keeping with other alternatives in the entry
assembly, and permits the use of a separate alternatives for the
PMHE-dependent DSB SY (and removal of the conditional branch this
currently requires). For consistency I've adjusted both the save and
restore paths.

According to bloat-o-meter, when building defconfig +
CONFIG_ARM64_PSEUDO_NMI=y this shrinks the kernel text by ~4KiB:

| add/remove: 4/2 grow/shrink: 42/310 up/down: 332/-5032 (-4700)

The resulting vmlinux is ~66KiB smaller, though the resulting Image size
is unchanged due to padding and alignment:

| [mark@lakrids:~/src/linux]% ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 137508344 Jan 17 14:11 vmlinux-after
| -rwxr-xr-x 1 mark mark 137575440 Jan 17 13:49 vmlinux-before
| [mark@lakrids:~/src/linux]% ls -al Image-*
| -rw-r--r-- 1 mark mark 38777344 Jan 17 14:11 Image-after
| -rw-r--r-- 1 mark mark 38777344 Jan 17 13:49 Image-before

Prior to this patch we did not verify the state of ICC_CTLR_EL1.PMHE on
secondary CPUs. As of this patch this is verified by the cpufeature code
when using GIC priority masking (i.e. when using pseudo-NMIs).

Note that since commit:

  7e3a57fa6c ("arm64: Document ICC_CTLR_EL3.PMHE setting requirements")

... Documentation/arm64/booting.rst specifies:

|      - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across
|        all CPUs the kernel is executing on, and must stay constant
|        for the lifetime of the kernel.

... so that should not adversely affect any compliant systems, and as
we'll only check for the absense of PMHE when using pseudo-NMIs, this
will only fire when such mismatch will adversely affect the system.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230130145429.903791-5-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-01-31 16:06:17 +00:00

261 lines
6.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* arch/arm/include/asm/arch_gicv3.h
*
* Copyright (C) 2015 ARM Ltd.
*/
#ifndef __ASM_ARCH_GICV3_H
#define __ASM_ARCH_GICV3_H
#ifndef __ASSEMBLY__
#include <linux/io.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/barrier.h>
#include <asm/cacheflush.h>
#include <asm/cp15.h>
#define ICC_EOIR1 __ACCESS_CP15(c12, 0, c12, 1)
#define ICC_DIR __ACCESS_CP15(c12, 0, c11, 1)
#define ICC_IAR1 __ACCESS_CP15(c12, 0, c12, 0)
#define ICC_SGI1R __ACCESS_CP15_64(0, c12)
#define ICC_PMR __ACCESS_CP15(c4, 0, c6, 0)
#define ICC_CTLR __ACCESS_CP15(c12, 0, c12, 4)
#define ICC_SRE __ACCESS_CP15(c12, 0, c12, 5)
#define ICC_IGRPEN1 __ACCESS_CP15(c12, 0, c12, 7)
#define ICC_BPR1 __ACCESS_CP15(c12, 0, c12, 3)
#define ICC_RPR __ACCESS_CP15(c12, 0, c11, 3)
#define __ICC_AP0Rx(x) __ACCESS_CP15(c12, 0, c8, 4 | x)
#define ICC_AP0R0 __ICC_AP0Rx(0)
#define ICC_AP0R1 __ICC_AP0Rx(1)
#define ICC_AP0R2 __ICC_AP0Rx(2)
#define ICC_AP0R3 __ICC_AP0Rx(3)
#define __ICC_AP1Rx(x) __ACCESS_CP15(c12, 0, c9, x)
#define ICC_AP1R0 __ICC_AP1Rx(0)
#define ICC_AP1R1 __ICC_AP1Rx(1)
#define ICC_AP1R2 __ICC_AP1Rx(2)
#define ICC_AP1R3 __ICC_AP1Rx(3)
#define CPUIF_MAP(a32, a64) \
static inline void write_ ## a64(u32 val) \
{ \
write_sysreg(val, a32); \
} \
static inline u32 read_ ## a64(void) \
{ \
return read_sysreg(a32); \
} \
CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1)
CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
CPUIF_MAP(ICC_AP0R2, ICC_AP0R2_EL1)
CPUIF_MAP(ICC_AP0R3, ICC_AP0R3_EL1)
CPUIF_MAP(ICC_AP1R0, ICC_AP1R0_EL1)
CPUIF_MAP(ICC_AP1R1, ICC_AP1R1_EL1)
CPUIF_MAP(ICC_AP1R2, ICC_AP1R2_EL1)
CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
#define read_gicreg(r) read_##r()
#define write_gicreg(v, r) write_##r(v)
/* Low-level accessors */
static inline void gic_write_dir(u32 val)
{
write_sysreg(val, ICC_DIR);
isb();
}
static inline u32 gic_read_iar(void)
{
u32 irqstat = read_sysreg(ICC_IAR1);
dsb(sy);
return irqstat;
}
static inline void gic_write_ctlr(u32 val)
{
write_sysreg(val, ICC_CTLR);
isb();
}
static inline u32 gic_read_ctlr(void)
{
return read_sysreg(ICC_CTLR);
}
static inline void gic_write_grpen1(u32 val)
{
write_sysreg(val, ICC_IGRPEN1);
isb();
}
static inline void gic_write_sgi1r(u64 val)
{
write_sysreg(val, ICC_SGI1R);
}
static inline u32 gic_read_sre(void)
{
return read_sysreg(ICC_SRE);
}
static inline void gic_write_sre(u32 val)
{
write_sysreg(val, ICC_SRE);
isb();
}
static inline void gic_write_bpr1(u32 val)
{
write_sysreg(val, ICC_BPR1);
}
static inline u32 gic_read_pmr(void)
{
return read_sysreg(ICC_PMR);
}
static inline void gic_write_pmr(u32 val)
{
write_sysreg(val, ICC_PMR);
}
static inline u32 gic_read_rpr(void)
{
return read_sysreg(ICC_RPR);
}
/*
* Even in 32bit systems that use LPAE, there is no guarantee that the I/O
* interface provides true 64bit atomic accesses, so using strd/ldrd doesn't
* make much sense.
* Moreover, 64bit I/O emulation is extremely difficult to implement on
* AArch32, since the syndrome register doesn't provide any information for
* them.
* Consequently, the following IO helpers use 32bit accesses.
*/
static inline void __gic_writeq_nonatomic(u64 val, volatile void __iomem *addr)
{
writel_relaxed((u32)val, addr);
writel_relaxed((u32)(val >> 32), addr + 4);
}
static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
{
u64 val;
val = readl_relaxed(addr);
val |= (u64)readl_relaxed(addr + 4) << 32;
return val;
}
#define gic_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
/*
* GICD_IROUTERn, contain the affinity values associated to each interrupt.
* The upper-word (aff3) will always be 0, so there is no need for a lock.
*/
#define gic_write_irouter(v, c) __gic_writeq_nonatomic(v, c)
/*
* GICR_TYPER is an ID register and doesn't need atomicity.
*/
#define gic_read_typer(c) __gic_readq_nonatomic(c)
/*
* GITS_BASER - hi and lo bits may be accessed independently.
*/
#define gits_read_baser(c) __gic_readq_nonatomic(c)
#define gits_write_baser(v, c) __gic_writeq_nonatomic(v, c)
/*
* GICR_PENDBASER and GICR_PROPBASE are changed with LPIs disabled, so they
* won't be being used during any updates and can be changed non-atomically
*/
#define gicr_read_propbaser(c) __gic_readq_nonatomic(c)
#define gicr_write_propbaser(v, c) __gic_writeq_nonatomic(v, c)
#define gicr_read_pendbaser(c) __gic_readq_nonatomic(c)
#define gicr_write_pendbaser(v, c) __gic_writeq_nonatomic(v, c)
/*
* GICR_xLPIR - only the lower bits are significant
*/
#define gic_read_lpir(c) readl_relaxed(c)
#define gic_write_lpir(v, c) writel_relaxed(lower_32_bits(v), c)
/*
* GITS_TYPER is an ID register and doesn't need atomicity.
*/
#define gits_read_typer(c) __gic_readq_nonatomic(c)
/*
* GITS_CBASER - hi and lo bits may be accessed independently.
*/
#define gits_read_cbaser(c) __gic_readq_nonatomic(c)
#define gits_write_cbaser(v, c) __gic_writeq_nonatomic(v, c)
/*
* GITS_CWRITER - hi and lo bits may be accessed independently.
*/
#define gits_write_cwriter(v, c) __gic_writeq_nonatomic(v, c)
/*
* GICR_VPROPBASER - hi and lo bits may be accessed independently.
*/
#define gicr_read_vpropbaser(c) __gic_readq_nonatomic(c)
#define gicr_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c)
/*
* GICR_VPENDBASER - the Valid bit must be cleared before changing
* anything else.
*/
static inline void gicr_write_vpendbaser(u64 val, void __iomem *addr)
{
u32 tmp;
tmp = readl_relaxed(addr + 4);
if (tmp & (GICR_VPENDBASER_Valid >> 32)) {
tmp &= ~(GICR_VPENDBASER_Valid >> 32);
writel_relaxed(tmp, addr + 4);
}
/*
* Use the fact that __gic_writeq_nonatomic writes the second
* half of the 64bit quantity after the first.
*/
__gic_writeq_nonatomic(val, addr);
}
#define gicr_read_vpendbaser(c) __gic_readq_nonatomic(c)
static inline bool gic_prio_masking_enabled(void)
{
return false;
}
static inline void gic_pmr_mask_irqs(void)
{
/* Should not get called. */
WARN_ON_ONCE(true);
}
static inline void gic_arch_enable_irqs(void)
{
/* Should not get called. */
WARN_ON_ONCE(true);
}
static inline bool gic_has_relaxed_pmr_sync(void)
{
return false;
}
#endif /* !__ASSEMBLY__ */
#endif /* !__ASM_ARCH_GICV3_H */