/* * nvlink-core.c: * This driver manages the entire NVLINK system that the Tegra SOC is connected * to. The NVLINK core driver interfaces with the NVLINK endpoint drivers. Each * endpoint driver is responsible for the HW programming of 1 particular NVLINK * device. The core driver uses the endpoint drivers to manage the NVLINK * system. * * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #define NVLINK_MODULE_NAME "nvlink-core" #define NVLINK_DEBUGFS_ROOT "nvlink" #define NVLINK_DEBUGFS_TESTS "tests" #define DEFAULT_LOOP_SLEEP_US 100 #define DEFAULT_LOOP_TIMEOUT_US 1000000 #define NVLINK_TRANSITION_HS_TIMEOUT_MS 2000 #define NVLINK_TRANSITION_SAFE_TIMEOUT_MS 5 struct nvlink_intranode_conn { struct nvlink_device *ndev0; struct nvlink_device *ndev1; }; struct topology { int slave_dev_id; int master_dev_id; int slave_link_id; int master_link_id; }; struct nvlink_core { struct nvlink_device *ndevs[NVLINK_MAX_DEVICES]; struct nvlink_link *nlinks[NVLINK_MAX_LINKS]; struct topology topology; struct nvlink_intranode_conn intranode_conn; struct mutex mutex; }; /* * We're exporting the NVLINK driver stack's logging APIs to the NVLINK kernel * test modules. The logging APIs use nvlink_log_mask. Therefore, we have to * export nvlink_log_mask along with the logging APIs. */ u32 nvlink_log_mask = NVLINK_DEFAULT_LOG_MASK; EXPORT_SYMBOL(nvlink_log_mask); #ifdef CONFIG_DEBUG_FS /* This is the root debugfs directory for the entire NVLINK driver stack */ struct dentry *nvlink_debugfs_root; /* * This is the parent debugfs directory for NVLINK tests. We need to export this * symbol so that the NVLINK kernel test modules can create their debugfs nodes * under the correct path. */ struct dentry *nvlink_debugfs_tests; EXPORT_SYMBOL(nvlink_debugfs_tests); #endif /* CONFIG_DEBUG_FS */ static struct nvlink_core nvlink_core; static bool nvlink_is_single_lane_mode_supported( struct nvlink_intranode_conn *conn) { /* * Single-lane mode is supported on the connection * only when both of the nvlink devices support this feature. */ return (conn->ndev0->link.is_sl_supported && conn->ndev1->link.is_sl_supported); } int nvlink_get_init_state(struct nvlink_device *ndev, enum init_state *state) { int ret = 0; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } mutex_lock(&ndev->init_state_mutex); *state = ndev->init_state; mutex_unlock(&ndev->init_state_mutex); return ret; } EXPORT_SYMBOL(nvlink_get_init_state); int nvlink_set_init_state(struct nvlink_device *ndev, enum init_state state) { int ret = 0; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } if ((state >= NVLINK_INIT_STATE_INVALID) || (state < 0)) { nvlink_err("Invalid init state"); return -EINVAL; } mutex_lock(&ndev->init_state_mutex); ndev->init_state = state; mutex_unlock(&ndev->init_state_mutex); return ret; } EXPORT_SYMBOL(nvlink_set_init_state); void nvlink_print_topology(void) { struct topology *topology = NULL; mutex_lock(&nvlink_core.mutex); topology = &(nvlink_core.topology); if (topology->master_dev_id == -1) { nvlink_err("Topology information not present"); mutex_unlock(&nvlink_core.mutex); return; } nvlink_dbg("Master device ID: %d", topology->master_dev_id); nvlink_dbg("Slave device ID: %d", topology->slave_dev_id); nvlink_dbg("Master link ID: %d", topology->master_link_id); nvlink_dbg("Slave link ID: %d", topology->slave_link_id); if ((topology->master_dev_id == NVLINK_ENDPT_T19X) && (topology->slave_dev_id == NVLINK_ENDPT_T19X)) { nvlink_dbg("Tegra loopback topology detected"); } else if ((topology->master_dev_id == NVLINK_ENDPT_GV100) && (topology->slave_dev_id == NVLINK_ENDPT_T19X)) { nvlink_dbg("GV100 (master) connected to Tegra (slave) "); } mutex_unlock(&nvlink_core.mutex); } EXPORT_SYMBOL(nvlink_print_topology); /* * Record the topology information in core driver structures. * If the topology data is already available with core driver, * just verify that both the devices have same topology stored */ static int nvlink_update_topology(struct nvlink_device *ndev) { int ret = 0; struct topology *topology = NULL; struct nvlink_intranode_conn *intranode_conn = NULL; int local_dev_id = ndev->device_id; int local_link_id = ndev->link.link_id; int remote_dev_id = ndev->link.remote_dev_info.device_id; int remote_link_id = ndev->link.remote_dev_info.link_id; if ((local_dev_id >= NVLINK_MAX_DEVICES) || (remote_dev_id >= NVLINK_MAX_DEVICES)) { nvlink_err("Invalid device_id"); return -ENODEV; } if ((local_link_id >= NVLINK_MAX_LINKS) || (remote_link_id >= NVLINK_MAX_LINKS)) { nvlink_err("Invalid link_id"); return -ENODEV; } mutex_lock(&nvlink_core.mutex); topology = &(nvlink_core.topology); intranode_conn = &(nvlink_core.intranode_conn); /* * If ndev is the first device to register, we need to store the * topology; on consequent call from other device, we verify the * topology. We should check the topology information provided * from both the endpoints is same. This will prevent two devices * from registering as master. */ if (topology->master_dev_id == -1) { nvlink_dbg("Storing the topology information with core driver"); if (ndev->is_master) { nvlink_dbg("Device %d is the master", ndev->device_id); topology->master_dev_id = local_dev_id; topology->master_link_id = local_link_id; topology->slave_dev_id = remote_dev_id; topology->slave_link_id = remote_link_id; } else { nvlink_dbg("Device %d is the slave", ndev->device_id); topology->master_dev_id = remote_dev_id; topology->master_link_id = remote_link_id; topology->slave_dev_id = local_dev_id; topology->slave_link_id = local_link_id; } nvlink_dbg("Topology stored in core driver structure"); } else { /* * Verify the topology information in ndev against the topology * information stored in core driver struct. */ if (ndev->is_master) { nvlink_dbg("Device %d is the master", ndev->device_id); if ((topology->master_dev_id != local_dev_id) || (topology->master_link_id != local_link_id) || (topology->slave_dev_id != remote_dev_id) || (topology->slave_link_id != remote_link_id)) { nvlink_err("Topology Mismatch!"); ret = -EINVAL; goto topology_err; } else { nvlink_dbg("Topology Match!"); } } else { nvlink_dbg("Device %d is the slave", ndev->device_id); if ((topology->master_dev_id != remote_dev_id) || (topology->master_link_id != remote_link_id) || (topology->slave_dev_id != local_dev_id) || (topology->slave_link_id != local_link_id)) { nvlink_err("Topology Mismatch!"); ret = -EINVAL; goto topology_err; } else { nvlink_dbg("Topology Match!"); } } } /* Check if topology is one of the below supported topologies - * 1. Tegra Loopback * 2. dGPU as master connected to Tegra as slave * * else report error */ if (topology->slave_dev_id == NVLINK_ENDPT_GV100) { nvlink_err("Topology with dGPU as slave is not supported!"); ret = -EINVAL; goto topology_err; } intranode_conn->ndev0 = nvlink_core.ndevs[topology->master_dev_id]; intranode_conn->ndev1 = nvlink_core.ndevs[topology->slave_dev_id]; goto success; topology_err: nvlink_core.ndevs[local_dev_id] = NULL; nvlink_core.ndevs[remote_dev_id] = NULL; nvlink_core.topology.master_link_id = -1; nvlink_core.topology.master_dev_id = -1; nvlink_core.topology.slave_link_id = -1; nvlink_core.topology.slave_dev_id = -1; success: mutex_unlock(&nvlink_core.mutex); return ret; } /* * This is a wrapper function for an ARM64 cache flush API. This API is used in * NVLINK kernel test modules. We've created this NVLINK wrapper because we * don't want to directly export the ARM64 API. We want to minimize the exposure * of this API outside of the kernel. By creating this NVLINK wrapper we're * trying to ensure that only NVLINK kernel test modules will use this API * outside of the kernel. */ void __nvlink_dma_flush_area(const void *ptr, size_t size) { __dma_flush_area(ptr, size); } EXPORT_SYMBOL(__nvlink_dma_flush_area); int nvlink_register_device(struct nvlink_device *ndev) { int ret = 0; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } if (ndev->device_id >= NVLINK_MAX_DEVICES) { nvlink_err("Invalid device_id"); ret = -ENODEV; goto fail; } mutex_lock(&nvlink_core.mutex); /* Allow each device to register just once */ if (nvlink_core.ndevs[ndev->device_id] != NULL) { nvlink_err("Device %u has already registered with core driver", ndev->device_id); ret = -EINVAL; goto release_mutex; } mutex_init(&ndev->init_state_mutex); ret = nvlink_set_init_state(ndev, NVLINK_DEV_OFF); if (ret < 0) { nvlink_err("Error initializing init state to DEV_OFF"); mutex_destroy(&ndev->init_state_mutex); goto release_mutex; } nvlink_core.ndevs[ndev->device_id] = ndev; mutex_unlock(&nvlink_core.mutex); ret = nvlink_update_topology(ndev); if (ret < 0) { mutex_destroy(&ndev->init_state_mutex); goto fail; } nvlink_dbg("Device registration successful!"); goto success; release_mutex: mutex_unlock(&nvlink_core.mutex); fail: nvlink_err("Device registration failed!"); success: return ret; } EXPORT_SYMBOL(nvlink_register_device); int nvlink_register_link(struct nvlink_link *link) { int ret = 0; if (!link) { nvlink_err("Invalid link struct pointer"); return -EINVAL; } mutex_lock(&nvlink_core.mutex); if (link->link_id >= NVLINK_MAX_LINKS) { nvlink_err("Invalid link_id"); ret = -ENODEV; goto fail; } link->mode = NVLINK_LINK_OFF; link->is_connected = false; nvlink_core.nlinks[link->link_id] = link; goto success; fail: nvlink_err("Link register failed!"); success: mutex_unlock(&nvlink_core.mutex); return ret; } EXPORT_SYMBOL(nvlink_register_link); int nvlink_unregister_device(struct nvlink_device* ndev) { int ret = 0; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } mutex_lock(&nvlink_core.mutex); if (ndev->device_id >= NVLINK_MAX_DEVICES) { nvlink_err("Invalid device_id"); ret = -ENODEV; goto fail; } mutex_destroy(&ndev->init_state_mutex); nvlink_core.ndevs[ndev->device_id] = NULL; nvlink_core.topology.master_link_id = -1; nvlink_core.topology.master_dev_id = -1; nvlink_core.topology.slave_link_id = -1; nvlink_core.topology.slave_dev_id = -1; goto success; fail: nvlink_err("Device unregister failed!"); success: mutex_unlock(&nvlink_core.mutex); return ret; } EXPORT_SYMBOL(nvlink_unregister_device); int nvlink_unregister_link(struct nvlink_link *link) { int ret = 0; if (!link) { nvlink_err("Invalid link struct pointer"); return -EINVAL; } mutex_lock(&nvlink_core.mutex); if (link->link_id >= NVLINK_MAX_LINKS) { nvlink_err("Invalid link_id"); ret = -ENODEV; goto fail; } nvlink_core.nlinks[link->link_id] = NULL; goto success; fail: nvlink_err("Link unregister failed!"); success: mutex_unlock(&nvlink_core.mutex); return ret; } EXPORT_SYMBOL(nvlink_unregister_link); static int nvlink_poll_link_state(struct nvlink_device *ndev, u32 link_state, u32 timeout_ms) { u32 link_mode; u32 timeout_us = timeout_ms * 1000; link_mode = ndev->link.link_ops.get_link_mode(ndev); while (link_mode != link_state) { usleep_range(DEFAULT_LOOP_SLEEP_US, DEFAULT_LOOP_SLEEP_US * 2); timeout_us = timeout_us - DEFAULT_LOOP_SLEEP_US; if (timeout_us <= 0) { nvlink_err("Timeout occurred while polling on link"); return -ETIMEDOUT; } link_mode = ndev->link.link_ops.get_link_mode(ndev); } return 0; } static int nvlink_poll_tx_sublink_state(struct nvlink_device *ndev, u32 tx_sublink_state, u32 timeout_ms) { u32 sublink_mode; u32 timeout_us = timeout_ms * 1000; sublink_mode = ndev->link.link_ops.get_sublink_mode(ndev, false); while (sublink_mode != tx_sublink_state) { usleep_range(DEFAULT_LOOP_SLEEP_US, DEFAULT_LOOP_SLEEP_US * 2); timeout_us = timeout_us - DEFAULT_LOOP_SLEEP_US; if (timeout_us <= 0) { nvlink_err("Timeout while polling on Tx sublink"); return -ETIMEDOUT; } sublink_mode = ndev->link.link_ops.get_sublink_mode(ndev, false); } return 0; } static int nvlink_poll_rx_sublink_state(struct nvlink_device *ndev, u32 rx_sublink_state, u32 timeout_ms) { u32 sublink_mode; u32 timeout_us = timeout_ms * 1000; sublink_mode = ndev->link.link_ops.get_sublink_mode(ndev, true); while (sublink_mode != rx_sublink_state) { usleep_range(DEFAULT_LOOP_SLEEP_US, DEFAULT_LOOP_SLEEP_US * 2); timeout_us = timeout_us - DEFAULT_LOOP_SLEEP_US; if (timeout_us <= 0) { nvlink_err("Timeout while polling on Rx sublink"); return -ETIMEDOUT; } sublink_mode = ndev->link.link_ops.get_sublink_mode(ndev, true); } return 0; } static int nvlink_poll_sublink_state(struct nvlink_device *ndev0, u32 tx_sublink_state, struct nvlink_device *ndev1, u32 rx_sublink_state, u32 timeout_ms) { int status; status = nvlink_poll_tx_sublink_state(ndev0, tx_sublink_state, timeout_ms); if (status) { /* polling on tx sublink failed. skip any rx polling */ return status; } status = nvlink_poll_rx_sublink_state(ndev1, rx_sublink_state, timeout_ms); return status; } /* For a given link, check whether tx sublink mode is at the requested mode */ static bool nvlink_check_tx_sublink_mode(struct nvlink_device *ndev, u32 sublink_mode) { u32 curr_sublink_mode = NVLINK_TX_OFF; curr_sublink_mode = ndev->link.link_ops.get_sublink_mode(ndev, false); switch (sublink_mode) { case NVLINK_TX_OFF: if (curr_sublink_mode == NVLINK_TX_OFF) { nvlink_dbg("Tx sublink is in OFF mode"); return true; } break; case NVLINK_TX_SAFE: if (curr_sublink_mode == NVLINK_TX_SAFE) { nvlink_dbg("Tx sublink is in SAFE mode"); return true; } break; case NVLINK_TX_HS: if ((curr_sublink_mode == NVLINK_TX_SINGLE_LANE) || (curr_sublink_mode == NVLINK_TX_HS)) { nvlink_dbg("Tx sublink is in HS mode"); return true; } break; } /* return false for default case or the states are not matching */ return false; } /* For a given link, check whether rx sublink mode is at the requested mode */ static bool nvlink_check_rx_sublink_mode(struct nvlink_device *ndev, u32 sublink_mode) { u32 curr_sublink_mode = NVLINK_RX_OFF; curr_sublink_mode = ndev->link.link_ops.get_sublink_mode(ndev, true); switch (sublink_mode) { case NVLINK_RX_OFF: if (curr_sublink_mode == NVLINK_RX_OFF) { nvlink_dbg("Rx sublink is in OFF mode"); return true; } break; case NVLINK_RX_SAFE: if (curr_sublink_mode == NVLINK_RX_SAFE) { nvlink_dbg("Rx sublink is in SAFE mode"); return true; } break; case NVLINK_RX_HS: if ((curr_sublink_mode == NVLINK_RX_SINGLE_LANE) || (curr_sublink_mode == NVLINK_RX_HS)) { nvlink_dbg("Rx sublink is in HS mode"); return true; } break; } /* return false for default case or the states are not matching */ return false; } /* For the given link, check whether the link mode is at the requested mode */ static bool nvlink_check_link_mode(struct nvlink_device *ndev, u32 link_mode) { u32 curr_link_mode = NVLINK_LINK_OFF; curr_link_mode = ndev->link.link_ops.get_link_mode(ndev); if (link_mode == curr_link_mode) return true; else return false; } /* Check if the given intranode connection is in the specified mode */ static int nvlink_check_intranode_conn_mode( struct nvlink_intranode_conn *conn, u32 link_mode, bool *match) { struct nvlink_device *ndev0 = conn->ndev0; struct nvlink_device *ndev1 = conn->ndev1; int ret = 0; bool is_mode = false; switch (link_mode) { case NVLINK_LINK_OFF: /* Check if both links are OFF */ if (nvlink_check_link_mode(ndev0, NVLINK_LINK_OFF) && nvlink_check_link_mode(ndev1, NVLINK_LINK_OFF)) { *match = true; nvlink_dbg("Intranode connection is OFF"); return ret; } /* Check if one of the links is OFF */ if (nvlink_check_link_mode(ndev0, NVLINK_LINK_OFF) || nvlink_check_link_mode(ndev1, NVLINK_LINK_OFF)) { nvlink_err("Link is in bad state"); *match = false; return -ENOLINK; } nvlink_dbg("Link not OFF yet."); *match = false; break; case NVLINK_LINK_SAFE: /* Check if both links and sublinks are already in SAFE mode */ if (nvlink_check_link_mode(ndev0, NVLINK_LINK_SAFE) && nvlink_check_link_mode(ndev1, NVLINK_LINK_SAFE)) { is_mode = nvlink_check_tx_sublink_mode(ndev0, NVLINK_TX_SAFE) && nvlink_check_tx_sublink_mode(ndev1, NVLINK_TX_SAFE) && nvlink_check_rx_sublink_mode(ndev0, NVLINK_RX_SAFE) && nvlink_check_rx_sublink_mode(ndev1, NVLINK_RX_SAFE); if (!is_mode) { nvlink_err("Sublinks in bad state"); *match = false; return -ENOLINK; } *match = true; nvlink_dbg("Intranode connection in Safe mode"); return ret; } /* Check if one of the links in SAFE mode */ if (nvlink_check_link_mode(ndev0, NVLINK_LINK_SAFE) || nvlink_check_link_mode(ndev1, NVLINK_LINK_SAFE)) { nvlink_err("Link is in bad state"); *match = false; return -ENOLINK; } nvlink_dbg("Link is not in Safe mode"); *match = false; break; case NVLINK_LINK_HS: /* Check if both links and sublinks are in HS mode */ if (nvlink_check_link_mode(ndev0, NVLINK_LINK_HS) && nvlink_check_link_mode(ndev1, NVLINK_LINK_HS)) { is_mode = nvlink_check_tx_sublink_mode(ndev0, NVLINK_TX_HS) && nvlink_check_tx_sublink_mode(ndev1, NVLINK_TX_HS) && nvlink_check_rx_sublink_mode(ndev0, NVLINK_RX_HS) && nvlink_check_rx_sublink_mode(ndev1, NVLINK_RX_HS); if (!is_mode) { nvlink_err("Sublinks in bad state"); *match = false; return -ENOLINK; } *match = true; nvlink_dbg("Intranode connection in HS mode"); return ret; } /* Check if one of the links in HS mode */ if (nvlink_check_link_mode(ndev0, NVLINK_LINK_HS) || nvlink_check_link_mode(ndev1, NVLINK_LINK_HS)) { nvlink_err("Link is in bad state"); *match = false; return -ENOLINK; } nvlink_dbg("Link is not in High Speed mode"); *match = false; break; default: *match = false; } return ret; } /* * Get the intranode connection having ndev0 pointing to master device and * ndev1 to slave device. */ static int nvlink_get_intranode_conn(struct nvlink_device *ndev, struct nvlink_intranode_conn *conn) { int ret = 0; if (!ndev || !conn) { nvlink_err("Invalid pointers passed as input"); return -EINVAL; } mutex_lock(&nvlink_core.mutex); if ((ndev->device_id == nvlink_core.topology.master_dev_id) || (ndev->device_id == nvlink_core.topology.slave_dev_id)) { *conn = nvlink_core.intranode_conn; } else { nvlink_err("Invalid Unregistered device ID"); ret = -EINVAL; } mutex_unlock(&nvlink_core.mutex); return ret; } /* * This function should help transition link and sublink mode from high speed * to safe on both the endpoints. It will also disable low power management * before transitioning out of high speed. */ int nvlink_transition_intranode_conn_hs_to_safe(struct nvlink_device *ndev) { int ret = 0; struct nvlink_intranode_conn conn; struct nvlink_device *ndev0 = NULL; struct nvlink_device *ndev1 = NULL; struct nvlink_link *link0 = NULL; struct nvlink_link *link1 = NULL; bool match = false; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } /* * Get the intranode conn to co-ordinate link state transition between * two endpoints. */ ret = nvlink_get_intranode_conn(ndev, &conn); if (ret < 0) { nvlink_err("Error retrieving intranode connection information"); return ret; } ndev0 = conn.ndev0; ndev1 = conn.ndev1; link0 = &(ndev0->link); link1 = &(ndev1->link); /* Check if both the link and sublink state are SAFE for both ends */ ret = nvlink_check_intranode_conn_mode(&conn, NVLINK_LINK_SAFE, &match); /* Return if the links are in bad state or already in SAFE mode */ if (ret < 0) { nvlink_err("Can't transition to SAFE as link is in bad state"); return ret; } if (match) { nvlink_dbg("link is already in SAFE mode"); return ret; } if (nvlink_is_single_lane_mode_supported(&conn)) { /* Disable Single-Lane mode for device 0 */ ret = link0->link_ops.set_link_mode(ndev0, NVLINK_LINK_DISABLE_PM); if (ret) { nvlink_err("Failed to disable SL(1/8th) mode for dev0"); return ret; } /* Disable Single-Lane mode for device 1 */ ret = link1->link_ops.set_link_mode(ndev1, NVLINK_LINK_DISABLE_PM); if (ret) { nvlink_err("Failed to disable SL(1/8th) mode for dev1"); return ret; } } /* Move both ends to SWCFG */ link0->link_ops.set_link_mode(ndev0, NVLINK_LINK_SAFE); link1->link_ops.set_link_mode(ndev1, NVLINK_LINK_SAFE); /* Wait for the end0 to go to SWCFG */ ret = nvlink_poll_link_state(ndev0, NVLINK_LINK_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set link in swcfg"); return ret; } /* Wait for the end1 to go to SWCFG */ ret = nvlink_poll_link_state(ndev1, NVLINK_LINK_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set link in swcfg"); return ret; } /* Put TX sublink on end0 in SAFE Mode */ ret = link0->link_ops.set_sublink_mode(ndev0, false, NVLINK_TX_SAFE); if (ret < 0) { nvlink_err("Failed to set TX sublink mode to SAFE for ndev0"); return ret; } /* Put TX sublink on end1 in SAFE Mode */ ret = link1->link_ops.set_sublink_mode(ndev1, false, NVLINK_TX_SAFE); if (ret < 0) { nvlink_err("Failed to set TX sublink mode to SAFE for ndev1"); return ret; } /* wait for sublinks to go in SAFE Mode */ ret = nvlink_poll_sublink_state(ndev0, NVLINK_TX_SAFE, ndev1, NVLINK_RX_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set sublinks in safe mode"); return ret; } ret = nvlink_poll_sublink_state(ndev1, NVLINK_TX_SAFE, ndev0, NVLINK_RX_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set sublinks in safe mode"); return ret; } nvlink_dbg("Link in Safe mode!"); return ret; } EXPORT_SYMBOL(nvlink_transition_intranode_conn_hs_to_safe); /* After device is initialized, this function can be used to transition an * intranode connection from OFF to SAFE state. Note this function only changes * the state from off to safe on both endpoints; it does not perform any device * or link initialization. */ int nvlink_transition_intranode_conn_off_to_safe(struct nvlink_device *ndev) { int ret = 0; enum init_state init_state_ndev0 = NVLINK_DEV_OFF; enum init_state init_state_ndev1 = NVLINK_DEV_OFF; struct nvlink_intranode_conn conn; struct nvlink_device *ndev0 = NULL; struct nvlink_device *ndev1 = NULL; struct nvlink_link *link0 = NULL; struct nvlink_link *link1 = NULL; bool match = false; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } /* * Get the intranode conn to co-ordinate link state transition between * two endpoints. */ ret = nvlink_get_intranode_conn(ndev, &conn); if (ret < 0) { nvlink_err("Error retrieving intranode connection information"); return ret; } ndev0 = conn.ndev0; ndev1 = conn.ndev1; link0 = &(ndev0->link); link1 = &(ndev1->link); /* Verify that the hardware is initialized before transition to safe */ ret = nvlink_get_init_state(ndev0, &init_state_ndev0); if (ret < 0) { nvlink_err("Error retrieving init state for ndev0"); return ret; } ret = nvlink_get_init_state(ndev1, &init_state_ndev1); if (ret < 0) { nvlink_err("Error retrieving init state for ndev1"); return ret; } if ((init_state_ndev0 != NVLINK_DEV_REG_INIT_DONE) || (init_state_ndev1 != NVLINK_DEV_REG_INIT_DONE)) { nvlink_err("Error: hardware is uninitialized"); return ret; } /* Check if link is already in SAFE mode */ ret = nvlink_check_intranode_conn_mode(&conn, NVLINK_LINK_SAFE, &match); /* Return if the links are in bad state or already in SAFE mode */ if (ret < 0) { nvlink_err("Can't transition to Safe as link is in bad state"); return ret; } if (match) { nvlink_dbg( "Exiting Safe transition as link is already in Safe"); return ret; } /* This function supports transition only from OFF mode */ ret = nvlink_check_intranode_conn_mode(&conn, NVLINK_LINK_OFF, &match); /* Return if the links are in bad state or not in OFF mode */ if (ret < 0) { nvlink_err("Can't transition to Safe as link is in bad state"); return ret; } if (!match) { nvlink_dbg( "Exiting Safe transition as link is not in OFF mode"); return ret; } /* Put the links in SAFE mode. */ link0->link_ops.set_link_mode(ndev0, NVLINK_LINK_SAFE); link1->link_ops.set_link_mode(ndev1, NVLINK_LINK_SAFE); /* Wait for ndev0 to go in SWCFG mode */ ret = nvlink_poll_link_state(ndev0, NVLINK_LINK_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set ndev0 end in SWCFG mode"); return ret; } /* Wait for ndev1 to go in SWCFG mode */ ret = nvlink_poll_link_state(ndev1, NVLINK_LINK_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set ndev1 end in SWCFG mode"); return ret; } /* wait for sublinks to go in Safe Mode */ ret = nvlink_poll_sublink_state(ndev0, NVLINK_TX_SAFE, ndev1, NVLINK_RX_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set sublinks in Safe mode"); return ret; } ret = nvlink_poll_sublink_state(ndev1, NVLINK_TX_SAFE, ndev0, NVLINK_RX_SAFE, NVLINK_TRANSITION_SAFE_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set sublinks in Safe mode"); return ret; } link0->is_connected = true; link1->is_connected = true; nvlink_dbg("Link in Safe mode!"); return ret; } EXPORT_SYMBOL(nvlink_transition_intranode_conn_off_to_safe); /* * This function trains the link from safe to high speed mode. It enables the * PRBS generator on both the endpoints before transitioning to high speed. Once * the link is in high speed mode, it enables low power management over link. */ int nvlink_train_intranode_conn_safe_to_hs(struct nvlink_device *ndev) { int ret = 0; struct nvlink_intranode_conn conn; struct nvlink_device *ndev0 = NULL; struct nvlink_device *ndev1 = NULL; struct nvlink_link *link0 = NULL; struct nvlink_link *link1 = NULL; bool match = false; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } /* * Get the intranode conn to co-ordinate link state transition between * two endpoints. */ ret = nvlink_get_intranode_conn(ndev, &conn); if (ret < 0) { nvlink_err("Error retrieving intranode connection information"); return ret; } ndev0 = conn.ndev0; ndev1 = conn.ndev1; link0 = &(ndev0->link); link1 = &(ndev1->link); /* Check if both the link and sublink state are HS for both ends */ ret = nvlink_check_intranode_conn_mode(&conn, NVLINK_LINK_HS, &match); /* Return if the links are in bad state or already in HS mode */ if (ret < 0) { nvlink_err("Can't transition to HS as link is in bad state"); return ret; } if (match) { nvlink_dbg("Exiting HS transition as link is already in HS"); return ret; } /* We can train connection to HS only if the link is in Safe mode */ ret = nvlink_check_intranode_conn_mode(&conn, NVLINK_LINK_SAFE, &match); /* Return if the links are in bad state or not in Safe mode */ if (ret < 0) { nvlink_err("Can't transition to HS as link is in bad state"); return ret; } if (!match) { nvlink_err("Exiting HS transition as link is not in SAFE mode"); return ret; } /* Enable PRBS generator on both ends */ ret = link0->link_ops.set_sublink_mode(ndev0, false, NVLINK_TX_PRBS_EN); if (ret < 0) { nvlink_err("Failed to enable PRBS generator for ndev0"); return ret; } ret = link1->link_ops.set_sublink_mode(ndev1, false, NVLINK_TX_PRBS_EN); if (ret < 0) { nvlink_err("Failed to enable PRBS generator for ndev1"); return ret; } /* Put TX sublink on end0 in High Speed */ ret = link0->link_ops.set_sublink_mode(ndev0, false, NVLINK_TX_HS); if (ret < 0) { nvlink_err("Failed to set TX sublink mode to HS for ndev0"); return ret; } /* Put TX sublink on end1 in High Speed */ ret = link1->link_ops.set_sublink_mode(ndev1, false, NVLINK_TX_HS); if (ret < 0) { nvlink_err("Failed to set TX sublink mode to HS for ndev1"); return ret; } /* wait for sublinks to go in High Speed */ ret = nvlink_poll_sublink_state(ndev0, NVLINK_TX_HS, ndev1, NVLINK_RX_HS, NVLINK_TRANSITION_HS_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set sublinks in high speed mode"); return ret; } ret = nvlink_poll_sublink_state(ndev1, NVLINK_TX_HS, ndev0, NVLINK_RX_HS, NVLINK_TRANSITION_HS_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set sublinks in high speed mode"); return ret; } /* * Put only end0 in ACTIVE mode. The other end should automatically * go to Active mode. */ link0->link_ops.set_link_mode(ndev0, NVLINK_LINK_HS); /* Wait for other end to go in ACTIVE mode */ ret = nvlink_poll_link_state(ndev1, NVLINK_LINK_HS, NVLINK_TRANSITION_HS_TIMEOUT_MS); if (ret < 0) { nvlink_err("Unable to set links in high speed mode"); return ret; } if (nvlink_is_single_lane_mode_supported(&conn)) { /* Enable Single-Lane policy for device 0 */ ret = link0->link_ops.set_link_mode(ndev0, NVLINK_LINK_ENABLE_PM); if (ret) { nvlink_err("Error encountered while enabling " "Single-Lane mode policy for device 0"); return ret; } /* Enable Single-Lane policy for device 1 */ ret = link1->link_ops.set_link_mode(ndev1, NVLINK_LINK_ENABLE_PM); if (ret) { nvlink_err("Error encountered while enabling " "Single-Lane mode policy for device 1"); return ret; } } nvlink_dbg("Link in High Speed mode!"); return ret; } EXPORT_SYMBOL(nvlink_train_intranode_conn_safe_to_hs); int nvlink_transition_intranode_conn_safe_to_off(struct nvlink_device *ndev) { int ret = 0; struct nvlink_intranode_conn conn; struct nvlink_device *ndev0 = NULL; struct nvlink_device *ndev1 = NULL; if (!ndev) { nvlink_err("Invalid device struct pointer"); return -EINVAL; } ret = nvlink_get_intranode_conn(ndev, &conn); if (ret < 0) { nvlink_err("Error retrieving intranode connection information"); return ret; } ndev0 = conn.ndev0; ndev1 = conn.ndev1; ndev0->link.link_ops.set_link_mode(ndev0, NVLINK_LINK_DISABLE_ERR_DETECT); ndev1->link.link_ops.set_link_mode(ndev1, NVLINK_LINK_DISABLE_ERR_DETECT); /* Disable Lanes on both sides of the link */ ret = ndev0->link.link_ops.set_link_mode(ndev0, NVLINK_LINK_LANE_DISABLE); if (ret < 0) { nvlink_err("ndev0 set NVLINK_LINK_LANE_DISABLE failed"); goto fail; } ret = ndev1->link.link_ops.set_link_mode(ndev1, NVLINK_LINK_LANE_DISABLE); if (ret < 0) { nvlink_err("ndev1 set NVLINK_LINK_LANE_DISABLE failed"); goto fail; } /* Shutdown Lanes on both sides of the link */ ret = ndev0->link.link_ops.set_link_mode(ndev0, NVLINK_LINK_LANE_SHUTDOWN); if (ret < 0) { nvlink_err("ndev0 set NVLINK_LINK_LANE_SHUTDOWN failed"); goto fail; } ret = ndev1->link.link_ops.set_link_mode(ndev1, NVLINK_LINK_LANE_SHUTDOWN); if (ret < 0) { nvlink_err("ndev1 set NVLINK_LINK_LANE_SHUTDOWN failed"); goto fail; } ret = ndev0->link.link_ops.set_link_mode(ndev0, NVLINK_LINK_OFF); if (ret < 0) { nvlink_err("ndev0 set link mode to OFF failed"); goto fail; } /* set_link_mode(NVLINK_LINK_OFF) disables CAR. Make sure we are not * calling this twice for the same endpoint incase of loopback * topologies. */ if (ndev0 != ndev1) { ret = ndev1->link.link_ops.set_link_mode(ndev1, NVLINK_LINK_OFF); if (ret < 0) { nvlink_err("ndev1 set link mode to OFF failed"); goto fail; } } ret = nvlink_set_init_state(ndev0, NVLINK_DEV_OFF); if (ret < 0) goto fail; ret = nvlink_set_init_state(ndev1, NVLINK_DEV_OFF); if (ret < 0) goto fail; fail: return ret; } EXPORT_SYMBOL(nvlink_transition_intranode_conn_safe_to_off); /* * Initialize the device using different callbacks registered through * dev_ops and link_ops. At the end of this function, the device should * have the clocks, resets, uphy, minion, interrupts and memory interface * initialized and the endpoint should be ready for link state transition */ int nvlink_initialize_endpoint(struct nvlink_device *ndev) { int ret = 0; enum init_state init_state = NVLINK_DEV_OFF; if (!ndev) { nvlink_err("Invalid device struct pointer"); ret = -EINVAL; goto fail; } ret = nvlink_get_init_state(ndev, &init_state); if (ret < 0) goto fail; switch (init_state) { case NVLINK_DEV_OFF: ret = ndev->dev_ops.dev_early_init(ndev); if (ret < 0) goto fail; ret = nvlink_set_init_state(ndev, NVLINK_DEV_EARLY_INIT_DONE); if (ret < 0) goto fail; case NVLINK_DEV_EARLY_INIT_DONE: ret = ndev->link.link_ops.link_early_init(ndev); if (ret < 0) goto fail; ret = nvlink_set_init_state(ndev, NVLINK_LINK_EARLY_INIT_DONE); if (ret < 0) goto fail; case NVLINK_LINK_EARLY_INIT_DONE: ret = ndev->dev_ops.dev_interface_init(ndev); if (ret < 0) goto fail; ret = nvlink_set_init_state(ndev, NVLINK_DEV_INTERFACE_INIT_DONE); if (ret < 0) goto fail; case NVLINK_DEV_INTERFACE_INIT_DONE: ret = ndev->dev_ops.dev_reg_init(ndev); if (ret < 0) goto fail; ret = nvlink_set_init_state(ndev, NVLINK_DEV_REG_INIT_DONE); if (ret < 0) goto fail; case NVLINK_DEV_REG_INIT_DONE: nvlink_dbg("Device %u is initialized!", ndev->device_id); break; default: ret = -EINVAL; nvlink_err("Invalid device state!"); goto fail; } nvlink_dbg("Device initialization successful!"); goto success; fail: /* * TODO: Add code to undo the HW and interface init state if device * init fails. This code will follow the shutdown sequence. */ nvlink_err("Device initialization failed!"); success: return ret; } EXPORT_SYMBOL(nvlink_initialize_endpoint); /* * Setup the link and endpoint devices for data transfer over high speed * Only master device can call nvlink_enumerate to start data transfer over * nvlink. */ int nvlink_enumerate(struct nvlink_device *ndev) { int ret = 0; struct nvlink_device *master_dev = NULL; struct nvlink_device *slave_dev = NULL; struct topology *topology = NULL; if (!ndev) { nvlink_err("Invalid pointer to device struct"); return -EINVAL; } mutex_lock(&nvlink_core.mutex); topology = &(nvlink_core.topology); if (ndev->device_id != topology->master_dev_id) { nvlink_err("Device is not master and cannot start enumeration"); ret = -EINVAL; goto release_mutex; } master_dev = nvlink_core.ndevs[topology->master_dev_id]; slave_dev = nvlink_core.ndevs[topology->slave_dev_id]; if (!master_dev || !slave_dev) { nvlink_err("Slave or Master not registered with core driver"); ret = -ENODATA; goto release_mutex; } mutex_unlock(&nvlink_core.mutex); /* * Initialize the clocks, resets, minion, uphy, interrupts, * memory interface on both the endpoints */ ret = nvlink_initialize_endpoint(slave_dev); if (ret < 0) goto fail; ret = nvlink_initialize_endpoint(master_dev); if (ret < 0) goto fail; ret = nvlink_transition_intranode_conn_off_to_safe(master_dev); if (ret < 0) goto fail; ret = nvlink_train_intranode_conn_safe_to_hs(master_dev); if (ret < 0) goto fail; nvlink_dbg("Nvlilnk enumerate successful!"); goto success; release_mutex: mutex_unlock(&nvlink_core.mutex); fail: nvlink_err("Nvlink enumerate failed!"); success: return ret; } EXPORT_SYMBOL(nvlink_enumerate); /* * Disable the device interface, transition the link to SAFE mode * and then to OFF. Only master device can able to initiate nvlink shutdown. */ int nvlink_shutdown(struct nvlink_device *ndev) { int ret = 0; struct nvlink_device *master_dev = NULL; struct nvlink_device *slave_dev = NULL; struct topology *topology = NULL; enum init_state master_state = NVLINK_DEV_OFF; enum init_state slave_state = NVLINK_DEV_OFF; if (!ndev) { nvlink_err("Invalid pointer to device struct"); return -EINVAL; } mutex_lock(&nvlink_core.mutex); topology = &(nvlink_core.topology); if (ndev->device_id != topology->master_dev_id) { nvlink_err("Device is not master and cannot start shutdown"); ret = -EINVAL; goto release_mutex; } master_dev = nvlink_core.ndevs[topology->master_dev_id]; slave_dev = nvlink_core.ndevs[topology->slave_dev_id]; if (!master_dev || !slave_dev) { nvlink_err("Slave or Master not registered with core driver"); ret = -ENODATA; goto release_mutex; } mutex_unlock(&nvlink_core.mutex); ret = nvlink_get_init_state(master_dev, &master_state); if (ret < 0) { nvlink_err("Error retrieving init state for master"); goto fail; } ret = nvlink_get_init_state(slave_dev, &slave_state); if (ret < 0) { nvlink_err("Error retrieving init state for slave"); goto fail; } if (master_state == NVLINK_DEV_OFF || slave_state == NVLINK_DEV_OFF) { nvlink_dbg("master/slave device is off, link already shutdown"); return ret; } if (master_state != NVLINK_DEV_REG_INIT_DONE || slave_state != NVLINK_DEV_REG_INIT_DONE) { nvlink_err("nvlink not initialized and is struck in" " intermediate state"); ret = -EPERM; goto fail; } ret = master_dev->dev_ops.dev_interface_disable(master_dev); if (ret < 0) { nvlink_err("master_dev dev_interface_disable failed"); goto fail; } ret = slave_dev->dev_ops.dev_interface_disable(slave_dev); if (ret < 0) { nvlink_err("slave_dev dev_interface_disable failed"); goto fail; } ret = nvlink_transition_intranode_conn_hs_to_safe(master_dev); if (ret < 0) { nvlink_err("Transiting intranode conn to safe failed"); goto fail; } ret = nvlink_transition_intranode_conn_safe_to_off(master_dev); if (ret < 0) { nvlink_err("Turning off nvlink lane failed"); goto fail; } nvlink_dbg("Nvlink shutdown successful!"); goto success; release_mutex: mutex_unlock(&nvlink_core.mutex); fail: nvlink_err("nvlink shutdown failed"); success: return ret; } EXPORT_SYMBOL(nvlink_shutdown); #ifdef CONFIG_DEBUG_FS void nvlink_core_debugfs_init(void) { struct dentry *core_debugfs = NULL; struct dentry *debugfs_node = NULL; nvlink_debugfs_root = debugfs_create_dir(NVLINK_DEBUGFS_ROOT, NULL); if (!nvlink_debugfs_root) { nvlink_err("Failed to create NVLINK debugfs root directory"); goto fail; } nvlink_debugfs_tests = debugfs_create_dir(NVLINK_DEBUGFS_TESTS, nvlink_debugfs_root); if (!nvlink_debugfs_tests) { nvlink_err("Failed to create NVLINK tests debugfs directory"); goto fail; } core_debugfs = debugfs_create_dir(NVLINK_MODULE_NAME, nvlink_debugfs_root); if (!core_debugfs) { nvlink_err("Failed to create NVLINK core driver's debugfs directory"); goto fail; } debugfs_node = debugfs_create_u32("log_mask", S_IWUSR | S_IRUGO, core_debugfs, &nvlink_log_mask); if (!debugfs_node) { nvlink_err("Failed to create the log_mask debugfs file"); goto fail; } return; fail: nvlink_err("Failed to create debugfs nodes"); debugfs_remove_recursive(nvlink_debugfs_root); nvlink_debugfs_root = NULL; nvlink_debugfs_tests = NULL; } void nvlink_core_debugfs_deinit(void) { debugfs_remove_recursive(nvlink_debugfs_root); nvlink_debugfs_root = NULL; nvlink_debugfs_tests = NULL; } #endif /* CONFIG_DEBUG_FS */ /* * nvlink_core_init: * The NVLINK core driver init function is called after debugfs has been * initialized but before the NVLINK endpoint drivers probe. This is the perfect * time for the NVLINK core driver to initialize any variables/state. At this * point during the kernel boot we should have access to debugfs, but we don't * have to worry about race conditions due to endpoint driver nvlink_register_* * calls. */ int __init nvlink_core_init(void) { int i = 0; mutex_init(&nvlink_core.mutex); mutex_lock(&nvlink_core.mutex); for (i = 0; i < NVLINK_MAX_DEVICES; i++) nvlink_core.ndevs[i] = NULL; for (i = 0; i < NVLINK_MAX_LINKS; i++) nvlink_core.nlinks[i] = NULL; nvlink_core.topology.slave_dev_id = -1; nvlink_core.topology.master_dev_id = -1; nvlink_core.topology.slave_link_id = -1; nvlink_core.topology.master_link_id = -1; #ifdef CONFIG_DEBUG_FS nvlink_core_debugfs_init(); #endif /* CONFIG_DEBUG_FS */ mutex_unlock(&nvlink_core.mutex); return 0; } subsys_initcall(nvlink_core_init); void __exit nvlink_core_exit(void) { #ifdef CONFIG_DEBUG_FS nvlink_core_debugfs_deinit(); #endif /* CONFIG_DEBUG_FS */ mutex_destroy(&nvlink_core.mutex); } module_exit(nvlink_core_exit);