Enabling/Disabling vCPUs and Qemu 'coldplug' - Linaro-open-discussions - op-lists.linaro.org

3 Nov 2022

Hi Salil(s),
You mentioned 'cold plug behaves differently'.
From the last call you described 'coldplug' as starting Qemu with '-S' so it doesn't
actually run the guest, then adding vCPUs before releasing Qemu to run the guest. You said
CPUs added this way can't be disabled.
(am I right so far?)
This turns out to be a bit murkier than that. You can disable these vCPUs, but the first
call will fail. The reason is very simple: Qemu is sending a device-check for the first
call, not an eject-request.
Linux prints a warning for the spurious device-check because the CPU already exists and is
even online.
An example flow, with the below debug[0], is:
# qemu -S smp cpus=1,maxcpus=3,cores=3,threads=1,sockets=1 ${REST_OF_OPTIONS}
On the Qemu monitor:
| device_add driver=host-arm-cpu,core-id=1,id=cpu1
| cont
[Qemu boots the guest]
acpi_processor_add() is called twice during boot, once for vCPU0 and once for the vCPU
that was 'coldplugged' vCPU1.
On the Qemu monitor:
| device_del cpu1
[   56.427089] ACPI: XYZZY:ACPI_NOTIFY_DEVICE_CHECK on ACPI0007:1
[   56.428239] ACPI: XYZZY: acpi_scan_device_check() 1 | 1
[   56.429335] CPU: 1 PID: 105 Comm: kworker/u6:2 Not tainted
6.1.0-rc2-00028-g6eaecb5ffd26-dirty #14644
[   56.431043] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[   56.432431] Workqueue: kacpi_hotplug acpi_hotplug_work_fn
[   56.433520] Call trace:
[   56.434015]  dump_backtrace.part.0+0xe0/0xf0
[   56.434875]  show_stack+0x18/0x40
[   56.435546]  dump_stack_lvl+0x68/0x84
[   56.436308]  dump_stack+0x18/0x34
[   56.436983]  acpi_device_hotplug+0x234/0x4e0
[   56.437847]  acpi_hotplug_work_fn+0x24/0x40
[   56.438695]  process_one_work+0x1d0/0x320
[   56.439515]  worker_thread+0x14c/0x444
[   56.440283]  kthread+0x10c/0x110
[   56.440935]  ret_from_fork+0x10/0x20
[   56.441680] acpi ACPI0007:01: Already enumerated
[ This is because Qemu is adding a CPU that already exists ]
A definition of madness is doing the same thing and expecting a different result.
On the Qemu monitor:
| device_del cpu1
[   67.723708] ACPI: XYZZY:ACPI_NOTIFY_EJECT_REQUEST on ACPI0007:1
[   67.771014] psci: CPU1 killed (polled 0 ms)
[   67.773437] XYZZY: acpi_processor_post_eject()
It looks like Qemu creates the device-check when you cold-plug the vCPU, but doesn't
deliver it, instead it delivers it _instead_ of the next notification.
Qemu v7.1.0 for doesn't do this for x86, nor does it deliver the spurious
ACPI_NOTIFY_DEVICE_CHECK early.
I'd suggest the arm64 changes are generating a ACPI_NOTIFY_DEVICE_CHECK when it shouldn't.
Thanks,
James
[0] Debug

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index a4d58e0d1452..ec72f2d2a5fb 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -492,6 +496,8 @@ static void acpi_processor_post_eject(struct acpi_device *device)
        unsigned long long sta;
        acpi_status status;
+       pr_err("XYZZY: acpi_processor_post_eject()\n");
+
        if (!device)
                return;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d466c8195314..f9f4c7707886 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -467,43 +467,55 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
        u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
        bool hotplug_event = false;
+       adev = acpi_get_acpi_dev(handle);
+       if (!adev)
+               goto err;
+
        switch (type) {
        case ACPI_NOTIFY_BUS_CHECK:
                acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
+               pr_err("XYZZY:ACPI_NOTIFY_BUS_CHECK on %s:%s\n", acpi_device_hid(adev),
acpi_device_uid(adev));
                hotplug_event = true;
                break;
case ACPI_NOTIFY_DEVICE_CHECK:
                acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
+               pr_err("XYZZY:ACPI_NOTIFY_DEVICE_CHECK on %s:%s\n", acpi_device_hid(adev),
acpi_device_uid(adev));
                hotplug_event = true;
                break;
case ACPI_NOTIFY_DEVICE_WAKE:
                acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
+               pr_err("XYZZY:ACPI_NOTIFY_DEVICE_WAKE on %s:%s\n", acpi_device_hid(adev),
acpi_device_uid(adev));
                break;
case ACPI_NOTIFY_EJECT_REQUEST:
                acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
+               pr_err("XYZZY:ACPI_NOTIFY_EJECT_REQUEST on %s:%s\n",
acpi_device_hid(adev), acpi_device_uid(adev));
                hotplug_event = true;
                break;
case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
                acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
+               pr_err("XYZZY:ACPI_NOTIFY_DEVICE_CHECK_LIGHT on %s:%s\n",
acpi_device_hid(adev), acpi_device_uid(adev));
                /* TBD: Exactly what does 'light' mean? */
                break;
case ACPI_NOTIFY_FREQUENCY_MISMATCH:
                acpi_handle_err(handle, "Device cannot be configured due "
                                "to a frequency mismatch\n");
+               pr_err("XYZZY:ACPI_NOTIFY_FREQUENCY_MISMATCH on %s:%s\n",
acpi_device_hid(adev), acpi_device_uid(adev));
                break;
case ACPI_NOTIFY_BUS_MODE_MISMATCH:
                acpi_handle_err(handle, "Device cannot be configured due "
                                "to a bus mode mismatch\n");
+               pr_err("XYZZY:ACPI_NOTIFY_BUS_MODE_MISMATCH on %s:%s\n",
acpi_device_hid(adev), acpi_device_uid(adev));
                break;
case ACPI_NOTIFY_POWER_FAULT:
                acpi_handle_err(handle, "Device has suffered a power fault\n");
+               pr_err("XYZZY:ACPI_NOTIFY_POWER_FAULT on %s:%s\n", acpi_device_hid(adev),
acpi_device_uid(adev));
                break;
default:
@@ -511,10 +523,6 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
                break;
        }
-       adev = acpi_get_acpi_dev(handle);
-       if (!adev)
-               goto err;
-
        if (adev->dev.driver) {
                struct acpi_driver *driver = to_acpi_driver(adev->dev.driver);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2d1a82aa1607..b2bc1c611a83 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -380,6 +380,8 @@ static int acpi_scan_device_check(struct acpi_device *adev)
 {
        int error;
+       pr_err("XYZZY: acpi_scan_device_check() %u | %u\n", adev->status.present,
adev->status.functional);
+
        acpi_bus_get_status(adev);
        if (adev->status.present || adev->status.functional) {
                /*
@@ -391,6 +393,7 @@ static int acpi_scan_device_check(struct acpi_device *adev)
                 * again).
                 */
                if (adev->handler) {
+                       dump_stack();
                        dev_warn(&adev->dev, "Already enumerated\n");
                        return -EALREADY;
                }