device.c (3a8d7c3a7d1bf7d1f2121c1f467d6b349b7bf807) device.c (802f25b6c2c0377c681dd1e4f799a648c3df50dd)
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#define pr_fmt(fmt) "habanalabs: " fmt

--- 1366 unchanged lines hidden (view full) ---

1375 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
1376
1377 mutex_lock(fd_lock);
1378 list_for_each_entry(hpriv, fd_list, dev_node)
1379 hpriv->hdev = NULL;
1380 mutex_unlock(fd_lock);
1381}
1382
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#define pr_fmt(fmt) "habanalabs: " fmt

--- 1366 unchanged lines hidden (view full) ---

1375 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
1376
1377 mutex_lock(fd_lock);
1378 list_for_each_entry(hpriv, fd_list, dev_node)
1379 hpriv->hdev = NULL;
1380 mutex_unlock(fd_lock);
1381}
1382
1383static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
1384{
1385 /* If reset is due to heartbeat, device CPU is no responsive in
1386 * which case no point sending PCI disable message to it.
1387 */
1388 if ((flags & HL_DRV_RESET_HARD) &&
1389 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
1390 /* Disable PCI access from device F/W so he won't send
1391 * us additional interrupts. We disable MSI/MSI-X at
1392 * the halt_engines function and we can't have the F/W
1393 * sending us interrupts after that. We need to disable
1394 * the access here because if the device is marked
1395 * disable, the message won't be send. Also, in case
1396 * of heartbeat, the device CPU is marked as disable
1397 * so this message won't be sent
1398 */
1399 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) {
1400 dev_warn(hdev->dev, "Failed to disable FW's PCI access\n");
1401 return;
1402 }
1403
1404 /* verify that last EQs are handled before disabled is set */
1405 if (hdev->cpu_queues_enable)
1406 synchronize_irq(pci_irq_vector(hdev->pdev,
1407 hdev->asic_prop.eq_interrupt_id));
1408 }
1409}
1410
1383static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
1384{
1385 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
1386
1387 /* No consecutive mechanism when user context exists */
1388 if (hdev->is_compute_ctx_active)
1411static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
1412{
1413 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
1414
1415 /* No consecutive mechanism when user context exists */
1416 if (hdev->is_compute_ctx_active)
1389 goto disable_pci;
1417 return;
1390
1391 /*
1392 * 'reset cause' is being updated here, because getting here
1393 * means that it's the 1st time and the last time we're here
1394 * ('in_reset' makes sure of it). This makes sure that
1395 * 'reset_cause' will continue holding its 1st recorded reason!
1396 */
1397 if (flags & HL_DRV_RESET_HEARTBEAT) {

--- 15 unchanged lines hidden (view full) ---

1413 * device is set to an unstable state.
1414 */
1415 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
1416 hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
1417 hdev->reset_info.reset_trigger_repeated = 0;
1418 } else {
1419 hdev->reset_info.reset_trigger_repeated = 1;
1420 }
1418
1419 /*
1420 * 'reset cause' is being updated here, because getting here
1421 * means that it's the 1st time and the last time we're here
1422 * ('in_reset' makes sure of it). This makes sure that
1423 * 'reset_cause' will continue holding its 1st recorded reason!
1424 */
1425 if (flags & HL_DRV_RESET_HEARTBEAT) {

--- 15 unchanged lines hidden (view full) ---

1441 * device is set to an unstable state.
1442 */
1443 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
1444 hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
1445 hdev->reset_info.reset_trigger_repeated = 0;
1446 } else {
1447 hdev->reset_info.reset_trigger_repeated = 1;
1448 }
1421
1422 /* If reset is due to heartbeat, device CPU is no responsive in
1423 * which case no point sending PCI disable message to it.
1424 *
1425 * If F/W is performing the reset, no need to send it a message to disable
1426 * PCI access
1427 */
1428
1429disable_pci:
1430 if ((flags & HL_DRV_RESET_HARD) &&
1431 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
1432 /* Disable PCI access from device F/W so he won't send
1433 * us additional interrupts. We disable MSI/MSI-X at
1434 * the halt_engines function and we can't have the F/W
1435 * sending us interrupts after that. We need to disable
1436 * the access here because if the device is marked
1437 * disable, the message won't be send. Also, in case
1438 * of heartbeat, the device CPU is marked as disable
1439 * so this message won't be sent
1440 */
1441 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
1442 dev_warn(hdev->dev,
1443 "Failed to disable FW's PCI access\n");
1444 }
1445}
1446
1447/*
1448 * hl_device_reset - reset the device
1449 *
1450 * @hdev: pointer to habanalabs device structure
1451 * @flags: reset flags.
1452 *

--- 104 unchanged lines hidden (view full) ---

1557 }
1558 }
1559
1560 if (delay_reset)
1561 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
1562
1563escalate_reset_flow:
1564 handle_reset_trigger(hdev, flags);
1449}
1450
1451/*
1452 * hl_device_reset - reset the device
1453 *
1454 * @hdev: pointer to habanalabs device structure
1455 * @flags: reset flags.
1456 *

--- 104 unchanged lines hidden (view full) ---

1561 }
1562 }
1563
1564 if (delay_reset)
1565 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
1566
1567escalate_reset_flow:
1568 handle_reset_trigger(hdev, flags);
1569 send_disable_pci_access(hdev, flags);
1565
1566 /* This also blocks future CS/VM/JOB completion operations */
1567 hdev->disabled = true;
1568
1569 take_release_locks(hdev);
1570
1571 if (hard_reset)
1572 dev_info(hdev->dev, "Going to reset device\n");

--- 1092 unchanged lines hidden ---
1570
1571 /* This also blocks future CS/VM/JOB completion operations */
1572 hdev->disabled = true;
1573
1574 take_release_locks(hdev);
1575
1576 if (hard_reset)
1577 dev_info(hdev->dev, "Going to reset device\n");

--- 1092 unchanged lines hidden ---