1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/drivers/platform/x86/intel/ifs/runtest.c
Linus Torvalds 59fff63cc2 platform-drivers-x86 for v6.7-1
Highlights:
  - asus-wmi:		Support for screenpad and solve brightness key
 			press duplication
  - int3472:		Eliminate the last use of deprecated GPIO functions
  - mlxbf-pmc:		New HW support
  - msi-ec:		Support new EC configurations
  - thinkpad_acpi:	Support reading aux MAC address during passthrough
  - wmi: 		Fixes & improvements
  - x86-android-tablets:	Detection fix and avoid use of GPIO private APIs
  - Debug & metrics interface improvements
  - Miscellaneous cleanups / fixes / improvements
 
 The following is an automated shortlog grouped by driver:
 
 acer-wmi:
  -  Remove void function return
 
 amd/hsmp:
  -  add support for metrics tbl
  -  create plat specific struct
  -  Fix iomem handling
  -  improve the error log
 
 amd/pmc:
  -  Add dump_custom_stb module parameter
  -  Add PMFW command id to support S2D force flush
  -  Handle overflow cases where the num_samples range is higher
  -  Use flex array when calling amd_pmc_stb_debugfs_open_v2()
 
 asus-wireless:
  -  Replace open coded acpi_match_acpi_device()
 
 asus-wmi:
  -  add support for ASUS screenpad
  -  Do not report brightness up/down keys when also reported by acpi_video
 
 gpiolib: acpi:
  -  Add a ignore interrupt quirk for Peaq C1010
  -  Check if a GPIO is listed in ignore_interrupt earlier
 
 hp-bioscfg:
  -  Annotate struct bios_args with __counted_by
 
 inspur-platform-profile:
  -  Add platform profile support
 
 int3472:
  -  Add new skl_int3472_fill_gpiod_lookup() helper
  -  Add new skl_int3472_gpiod_get_from_temp_lookup() helper
  -  Stop using gpiod_toggle_active_low()
  -  Switch to devm_get_gpiod()
 
 intel: bytcrc_pwrsrc:
  -  Convert to platform remove callback returning void
 
 intel/ifs:
  -  Add new CPU support
  -  Add new error code
  -  ARRAY BIST for Sierra Forest
  -  Gen2 scan image loading
  -  Gen2 Scan test support
  -  Metadata validation for start_chunk
  -  Refactor image loading code
  -  Store IFS generation number
  -  Validate image size
 
 intel_speed_select_if:
  -  Remove hardcoded map size
  -  Use devm_ioremap_resource
 
 intel/tpmi:
  -  Add debugfs support for read/write blocked
  -  Add defines to get version information
 
 intel-uncore-freq:
  -  Ignore minor version change
 
 ISST:
  -  Allow level 0 to be not present
  -  Ignore minor version change
  -  Use fuse enabled mask instead of allowed levels
 
 mellanox:
  -  Fix misspelling error in routine name
  -  Rename some init()/exit() functions for consistent naming
 
 mlxbf-bootctl:
  -  Convert to platform remove callback returning void
 
 mlxbf-pmc:
  -  Add support for BlueField-3
 
 mlxbf-tmfifo:
  -  Convert to platform remove callback returning void
 
 mlx-Convert to platform remove callback returning void:
  - mlx-Convert to platform remove callback returning void
 
 mlxreg-hotplug:
  -  Convert to platform remove callback returning void
 
 mlxreg-io:
  -  Convert to platform remove callback returning void
 
 mlxreg-lc:
  -  Convert to platform remove callback returning void
 
 msi-ec:
  -  Add more EC configs
  -  rename fn_super_swap
 
 nvsw-sn2201:
  -  Convert to platform remove callback returning void
 
 sel3350-Convert to platform remove callback returning void:
  - sel3350-Convert to platform remove callback returning void
 
 siemens: simatic-ipc-batt-apollolake:
  -  Convert to platform remove callback returning void
 
 siemens: simatic-ipc-batt:
  -  Convert to platform remove callback returning void
 
 siemens: simatic-ipc-batt-elkhartlake:
  -  Convert to platform remove callback returning void
 
 siemens: simatic-ipc-batt-f7188x:
  -  Convert to platform remove callback returning void
 
 siemens: simatic-ipc-batt:
  -  Simplify simatic_ipc_batt_remove()
 
 surface: acpi-notify:
  -  Convert to platform remove callback returning void
 
 surface: aggregator:
  -  Annotate struct ssam_event with __counted_by
 
 surface: aggregator-cdev:
  -  Convert to platform remove callback returning void
 
 surface: aggregator-registry:
  -  Convert to platform remove callback returning void
 
 surface: dtx:
  -  Convert to platform remove callback returning void
 
 surface: gpe:
  -  Convert to platform remove callback returning void
 
 surface: hotplug:
  -  Convert to platform remove callback returning void
 
 surface: surface3-wmi:
  -  Convert to platform remove callback returning void
 
 think-lmi:
  -  Add bulk save feature
  -  Replace kstrdup() + strreplace() with kstrdup_and_replace()
  -  Use strreplace() to replace a character by nul
 
 thinkpad_acpi:
  -  Add battery quirk for Thinkpad X120e
  -  replace deprecated strncpy with memcpy
  -  sysfs interface to auxmac
 
 tools/power/x86/intel-speed-select:
  -  Display error for core-power support
  -  Increase max CPUs in one request
  -  No TRL for non compute domains
  -  Sanitize integer arguments
  -  turbo-mode enable disable swapped
  -  Update help for TRL
  -  Use cgroup isolate for CPU 0
  -  v1.18 release
 
 wmi:
  -  Decouple probe deferring from wmi_block_list
  -  Decouple WMI device removal from wmi_block_list
  -  Fix opening of char device
  -  Fix probe failure when failing to register WMI devices
  -  Fix refcounting of WMI devices in legacy functions
 
 x86-android-tablets:
  -  Add a comment about x86_android_tablet_get_gpiod()
  -  Create a platform_device from module_init()
  -  Drop "linux,power-supply-name" from lenovo_yt3_bq25892_0_props[]
  -  Fix Lenovo Yoga Tablet 2 830F/L vs 1050F/L detection
  -  Remove invalid_aei_gpiochip from Peaq C1010
  -  Remove invalid_aei_gpiochip support
  -  Stop using gpiolib private APIs
  -  Use platform-device as gpio-keys parent
 
 xo15-ebook:
  -  Replace open coded acpi_match_acpi_device()
 
 Merges:
  -  Merge branch 'pdx86/platform-drivers-x86-int3472' into review-ilpo
  -  Merge branch 'pdx86/platform-drivers-x86-mellanox-init' into review-ilpo
  -  Merge remote-tracking branch 'intel-speed-select/intel-sst' into review-ilpo
  -  Merge remote-tracking branch 'pdx86/platform-drivers-x86-android-tablets' into review-hans
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQSCSUwRdwTNL2MhaBlZrE9hU+XOMQUCZT+lBwAKCRBZrE9hU+XO
 Mck0AQCFU7dYLCF4d1CXtHf1eZhSXLpYdhcO+C08JGGoM+MqSgD+Jyb9KJHk4pxE
 FvKG51I9neyAne9lvNrLodHRzxCYgAo=
 =duM8
 -----END PGP SIGNATURE-----

Merge tag 'platform-drivers-x86-v6.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86

Pull x86 platform driver updates from Ilpo Järvinen:

 - asus-wmi: Support for screenpad and solve brightness key press
   duplication

 - int3472: Eliminate the last use of deprecated GPIO functions

 - mlxbf-pmc: New HW support

 - msi-ec: Support new EC configurations

 - thinkpad_acpi: Support reading aux MAC address during passthrough

 - wmi: Fixes & improvements

 - x86-android-tablets: Detection fix and avoid use of GPIO private APIs

 - Debug & metrics interface improvements

 - Miscellaneous cleanups / fixes / improvements

* tag 'platform-drivers-x86-v6.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86: (80 commits)
  platform/x86: inspur-platform-profile: Add platform profile support
  platform/x86: thinkpad_acpi: Add battery quirk for Thinkpad X120e
  platform/x86: wmi: Decouple WMI device removal from wmi_block_list
  platform/x86: wmi: Fix opening of char device
  platform/x86: wmi: Fix probe failure when failing to register WMI devices
  platform/x86: wmi: Fix refcounting of WMI devices in legacy functions
  platform/x86: wmi: Decouple probe deferring from wmi_block_list
  platform/x86/amd/hsmp: Fix iomem handling
  platform/x86: asus-wmi: Do not report brightness up/down keys when also reported by acpi_video
  platform/x86: thinkpad_acpi: replace deprecated strncpy with memcpy
  tools/power/x86/intel-speed-select: v1.18 release
  tools/power/x86/intel-speed-select: Use cgroup isolate for CPU 0
  tools/power/x86/intel-speed-select: Increase max CPUs in one request
  tools/power/x86/intel-speed-select: Display error for core-power support
  tools/power/x86/intel-speed-select: No TRL for non compute domains
  tools/power/x86/intel-speed-select: turbo-mode enable disable swapped
  tools/power/x86/intel-speed-select: Update help for TRL
  tools/power/x86/intel-speed-select: Sanitize integer arguments
  platform/x86: acer-wmi: Remove void function return
  platform/x86/amd/pmc: Add dump_custom_stb module parameter
  ...
2023-10-31 17:53:00 -10:00

403 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. */
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/nmi.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
#include "ifs.h"
/*
* Note all code and data in this file is protected by
* ifs_sem. On HT systems all threads on a core will
* execute together, but only the first thread on the
* core will update results of the test.
*/
#define CREATE_TRACE_POINTS
#include <trace/events/intel_ifs.h>
/* Max retries on the same chunk */
#define MAX_IFS_RETRIES 5
/*
* Number of TSC cycles that a logical CPU will wait for the other
* logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
*/
#define IFS_THREAD_WAIT 100000
enum ifs_status_err_code {
IFS_NO_ERROR = 0,
IFS_OTHER_THREAD_COULD_NOT_JOIN = 1,
IFS_INTERRUPTED_BEFORE_RENDEZVOUS = 2,
IFS_POWER_MGMT_INADEQUATE_FOR_SCAN = 3,
IFS_INVALID_CHUNK_RANGE = 4,
IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS = 5,
IFS_CORE_NOT_CAPABLE_CURRENTLY = 6,
IFS_UNASSIGNED_ERROR_CODE = 7,
IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8,
IFS_INTERRUPTED_DURING_EXECUTION = 9,
IFS_UNASSIGNED_ERROR_CODE_0xA = 0xA,
IFS_CORRUPTED_CHUNK = 0xB,
};
static const char * const scan_test_status[] = {
[IFS_NO_ERROR] = "SCAN no error",
[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
"Core Abort SCAN Response due to power management condition.",
[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
"Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
[IFS_UNASSIGNED_ERROR_CODE_0xA] = "Unassigned error code 0xA",
[IFS_CORRUPTED_CHUNK] = "Scan operation aborted due to corrupted image. Try reloading",
};
static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
{
if (status.error_code < ARRAY_SIZE(scan_test_status)) {
dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
cpumask_pr_args(cpu_smt_mask(cpu)),
scan_test_status[status.error_code]);
} else if (status.error_code == IFS_SW_TIMEOUT) {
dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
cpumask_pr_args(cpu_smt_mask(cpu)));
} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
dev_info(dev, "CPU(s) %*pbl: %s\n",
cpumask_pr_args(cpu_smt_mask(cpu)),
"Not all scan chunks were executed. Maximum forward progress retries exceeded");
} else {
dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
}
}
static void message_fail(struct device *dev, int cpu, union ifs_status status)
{
struct ifs_data *ifsd = ifs_get_data(dev);
/*
* control_error is set when the microcode runs into a problem
* loading the image from the reserved BIOS memory, or it has
* been corrupted. Reloading the image may fix this issue.
*/
if (status.control_error) {
dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image. Batch: %02x version: 0x%x\n",
cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
}
/*
* signature_error is set when the output from the scan chains does not
* match the expected signature. This might be a transient problem (e.g.
* due to a bit flip from an alpha particle or neutron). If the problem
* repeats on a subsequent test, then it indicates an actual problem in
* the core being tested.
*/
if (status.signature_error) {
dev_err(dev, "CPU(s) %*pbl: test signature incorrect. Batch: %02x version: 0x%x\n",
cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
}
}
static bool can_restart(union ifs_status status)
{
enum ifs_status_err_code err_code = status.error_code;
/* Signature for chunk is bad, or scan test failed */
if (status.signature_error || status.control_error)
return false;
switch (err_code) {
case IFS_NO_ERROR:
case IFS_OTHER_THREAD_COULD_NOT_JOIN:
case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
case IFS_INTERRUPTED_DURING_EXECUTION:
return true;
case IFS_INVALID_CHUNK_RANGE:
case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
case IFS_CORE_NOT_CAPABLE_CURRENTLY:
case IFS_UNASSIGNED_ERROR_CODE:
case IFS_UNASSIGNED_ERROR_CODE_0xA:
case IFS_CORRUPTED_CHUNK:
break;
}
return false;
}
/*
* Execute the scan. Called "simultaneously" on all threads of a core
* at high priority using the stop_cpus mechanism.
*/
static int doscan(void *data)
{
int cpu = smp_processor_id();
u64 *msrs = data;
int first;
/* Only the first logical CPU on a core reports result */
first = cpumask_first(cpu_smt_mask(cpu));
/*
* This WRMSR will wait for other HT threads to also write
* to this MSR (at most for activate.delay cycles). Then it
* starts scan of each requested chunk. The core scan happens
* during the "execution" of the WRMSR. This instruction can
* take up to 200 milliseconds (in the case where all chunks
* are processed in a single pass) before it retires.
*/
wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]);
if (cpu == first) {
/* Pass back the result of the scan */
rdmsrl(MSR_SCAN_STATUS, msrs[1]);
}
return 0;
}
/*
* Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN
* on all threads of the core to be tested. Loop if necessary to complete
* run of all chunks. Include some defensive tests to make sure forward
* progress is made, and that the whole test completes in a reasonable time.
*/
static void ifs_test_core(int cpu, struct device *dev)
{
union ifs_scan activate;
union ifs_status status;
unsigned long timeout;
struct ifs_data *ifsd;
int to_start, to_stop;
int status_chunk;
u64 msrvals[2];
int retries;
ifsd = ifs_get_data(dev);
activate.gen0.rsvd = 0;
activate.delay = IFS_THREAD_WAIT;
activate.sigmce = 0;
to_start = 0;
to_stop = ifsd->valid_chunks - 1;
if (ifsd->generation) {
activate.gen2.start = to_start;
activate.gen2.stop = to_stop;
} else {
activate.gen0.start = to_start;
activate.gen0.stop = to_stop;
}
timeout = jiffies + HZ / 2;
retries = MAX_IFS_RETRIES;
while (to_start <= to_stop) {
if (time_after(jiffies, timeout)) {
status.error_code = IFS_SW_TIMEOUT;
break;
}
msrvals[0] = activate.data;
stop_core_cpuslocked(cpu, doscan, msrvals);
status.data = msrvals[1];
trace_ifs_status(cpu, to_start, to_stop, status.data);
/* Some cases can be retried, give up for others */
if (!can_restart(status))
break;
status_chunk = ifsd->generation ? status.gen2.chunk_num : status.gen0.chunk_num;
if (status_chunk == to_start) {
/* Check for forward progress */
if (--retries == 0) {
if (status.error_code == IFS_NO_ERROR)
status.error_code = IFS_SW_PARTIAL_COMPLETION;
break;
}
} else {
retries = MAX_IFS_RETRIES;
if (ifsd->generation)
activate.gen2.start = status_chunk;
else
activate.gen0.start = status_chunk;
to_start = status_chunk;
}
}
/* Update status for this core */
ifsd->scan_details = status.data;
if (status.control_error || status.signature_error) {
ifsd->status = SCAN_TEST_FAIL;
message_fail(dev, cpu, status);
} else if (status.error_code) {
ifsd->status = SCAN_NOT_TESTED;
message_not_tested(dev, cpu, status);
} else {
ifsd->status = SCAN_TEST_PASS;
}
}
#define SPINUNIT 100 /* 100 nsec */
static atomic_t array_cpus_out;
/*
* Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus()
*/
static void wait_for_sibling_cpu(atomic_t *t, long long timeout)
{
int cpu = smp_processor_id();
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
int all_cpus = cpumask_weight(smt_mask);
atomic_inc(t);
while (atomic_read(t) < all_cpus) {
if (timeout < SPINUNIT)
return;
ndelay(SPINUNIT);
timeout -= SPINUNIT;
touch_nmi_watchdog();
}
}
static int do_array_test(void *data)
{
union ifs_array *command = data;
int cpu = smp_processor_id();
int first;
/*
* Only one logical CPU on a core needs to trigger the Array test via MSR write.
*/
first = cpumask_first(cpu_smt_mask(cpu));
if (cpu == first) {
wrmsrl(MSR_ARRAY_BIST, command->data);
/* Pass back the result of the test */
rdmsrl(MSR_ARRAY_BIST, command->data);
}
/* Tests complete faster if the sibling is spinning here */
wait_for_sibling_cpu(&array_cpus_out, NSEC_PER_SEC);
return 0;
}
static void ifs_array_test_core(int cpu, struct device *dev)
{
union ifs_array command = {};
bool timed_out = false;
struct ifs_data *ifsd;
unsigned long timeout;
ifsd = ifs_get_data(dev);
command.array_bitmask = ~0U;
timeout = jiffies + HZ / 2;
do {
if (time_after(jiffies, timeout)) {
timed_out = true;
break;
}
atomic_set(&array_cpus_out, 0);
stop_core_cpuslocked(cpu, do_array_test, &command);
if (command.ctrl_result)
break;
} while (command.array_bitmask);
ifsd->scan_details = command.data;
if (command.ctrl_result)
ifsd->status = SCAN_TEST_FAIL;
else if (timed_out || command.array_bitmask)
ifsd->status = SCAN_NOT_TESTED;
else
ifsd->status = SCAN_TEST_PASS;
}
#define ARRAY_GEN1_TEST_ALL_ARRAYS 0x0ULL
#define ARRAY_GEN1_STATUS_FAIL 0x1ULL
static int do_array_test_gen1(void *status)
{
int cpu = smp_processor_id();
int first;
first = cpumask_first(cpu_smt_mask(cpu));
if (cpu == first) {
wrmsrl(MSR_ARRAY_TRIGGER, ARRAY_GEN1_TEST_ALL_ARRAYS);
rdmsrl(MSR_ARRAY_STATUS, *((u64 *)status));
}
return 0;
}
static void ifs_array_test_gen1(int cpu, struct device *dev)
{
struct ifs_data *ifsd = ifs_get_data(dev);
u64 status = 0;
stop_core_cpuslocked(cpu, do_array_test_gen1, &status);
ifsd->scan_details = status;
if (status & ARRAY_GEN1_STATUS_FAIL)
ifsd->status = SCAN_TEST_FAIL;
else
ifsd->status = SCAN_TEST_PASS;
}
/*
* Initiate per core test. It wakes up work queue threads on the target cpu and
* its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
* wait for all sibling threads to finish the scan test.
*/
int do_core_test(int cpu, struct device *dev)
{
const struct ifs_test_caps *test = ifs_get_test_caps(dev);
struct ifs_data *ifsd = ifs_get_data(dev);
int ret = 0;
/* Prevent CPUs from being taken offline during the scan test */
cpus_read_lock();
if (!cpu_online(cpu)) {
dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
ret = -EINVAL;
goto out;
}
switch (test->test_num) {
case IFS_TYPE_SAF:
if (!ifsd->loaded)
ret = -EPERM;
else
ifs_test_core(cpu, dev);
break;
case IFS_TYPE_ARRAY_BIST:
if (ifsd->array_gen == ARRAY_GEN0)
ifs_array_test_core(cpu, dev);
else
ifs_array_test_gen1(cpu, dev);
break;
default:
ret = -EINVAL;
}
out:
cpus_read_unlock();
return ret;
}