1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd */ 3 4 #include <linux/mlx5/driver.h> 5 #include <linux/mlx5/device.h> 6 #include "mlx5_core.h" 7 #include "dev.h" 8 #include "sf/vhca_event.h" 9 #include "sf/sf.h" 10 #include "sf/mlx5_ifc_vhca_event.h" 11 #include "ecpf.h" 12 #define CREATE_TRACE_POINTS 13 #include "diag/dev_tracepoint.h" 14 15 struct mlx5_sf_dev_table { 16 struct xarray devices; 17 unsigned int max_sfs; 18 phys_addr_t base_address; 19 u64 sf_bar_length; 20 struct notifier_block nb; 21 struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ 22 struct workqueue_struct *active_wq; 23 struct work_struct work; 24 u8 stop_active_wq:1; 25 struct mlx5_core_dev *dev; 26 }; 27 28 static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev) 29 { 30 return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev); 31 } 32 33 bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) 34 { 35 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; 36 37 return table && !xa_empty(&table->devices); 38 } 39 40 static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) 41 { 42 struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); 43 struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); 44 45 return sysfs_emit(buf, "%u\n", sf_dev->sfnum); 46 } 47 static DEVICE_ATTR_RO(sfnum); 48 49 static struct attribute *sf_device_attrs[] = { 50 &dev_attr_sfnum.attr, 51 NULL, 52 }; 53 54 static const struct attribute_group sf_attr_group = { 55 .attrs = sf_device_attrs, 56 }; 57 58 static const struct attribute_group *sf_attr_groups[2] = { 59 &sf_attr_group, 60 NULL 61 }; 62 63 static void mlx5_sf_dev_release(struct device *device) 64 { 65 struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev); 66 struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); 67 68 mlx5_adev_idx_free(adev->id); 69 kfree(sf_dev); 70 } 71 72 static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev) 73 { 74 int id; 75 76 id = sf_dev->adev.id; 77 trace_mlx5_sf_dev_del(dev, sf_dev, id); 78 79 auxiliary_device_delete(&sf_dev->adev); 80 auxiliary_device_uninit(&sf_dev->adev); 81 } 82 83 static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum) 84 { 85 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; 86 struct mlx5_sf_dev *sf_dev; 87 struct pci_dev *pdev; 88 int err; 89 int id; 90 91 id = mlx5_adev_idx_alloc(); 92 if (id < 0) { 93 err = id; 94 goto add_err; 95 } 96 97 sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL); 98 if (!sf_dev) { 99 mlx5_adev_idx_free(id); 100 err = -ENOMEM; 101 goto add_err; 102 } 103 pdev = dev->pdev; 104 sf_dev->adev.id = id; 105 sf_dev->adev.name = MLX5_SF_DEV_ID_NAME; 106 sf_dev->adev.dev.release = mlx5_sf_dev_release; 107 sf_dev->adev.dev.parent = &pdev->dev; 108 sf_dev->adev.dev.groups = sf_attr_groups; 109 sf_dev->sfnum = sfnum; 110 sf_dev->parent_mdev = dev; 111 sf_dev->fn_id = fn_id; 112 113 if (!table->max_sfs) { 114 mlx5_adev_idx_free(id); 115 kfree(sf_dev); 116 err = -EOPNOTSUPP; 117 goto add_err; 118 } 119 sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length); 120 121 trace_mlx5_sf_dev_add(dev, sf_dev, id); 122 123 err = auxiliary_device_init(&sf_dev->adev); 124 if (err) { 125 mlx5_adev_idx_free(id); 126 kfree(sf_dev); 127 goto add_err; 128 } 129 130 err = auxiliary_device_add(&sf_dev->adev); 131 if (err) { 132 put_device(&sf_dev->adev.dev); 133 goto add_err; 134 } 135 136 err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL); 137 if (err) 138 goto xa_err; 139 return; 140 141 xa_err: 142 mlx5_sf_dev_remove(dev, sf_dev); 143 add_err: 144 mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n", 145 sf_index, sfnum, err); 146 } 147 148 static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index) 149 { 150 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; 151 152 xa_erase(&table->devices, sf_index); 153 mlx5_sf_dev_remove(dev, sf_dev); 154 } 155 156 static int 157 mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data) 158 { 159 struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb); 160 const struct mlx5_vhca_state_event *event = data; 161 struct mlx5_sf_dev *sf_dev; 162 u16 max_functions; 163 u16 sf_index; 164 u16 base_id; 165 166 max_functions = mlx5_sf_max_functions(table->dev); 167 if (!max_functions) 168 return 0; 169 170 base_id = MLX5_CAP_GEN(table->dev, sf_base_id); 171 if (event->function_id < base_id || event->function_id >= (base_id + max_functions)) 172 return 0; 173 174 sf_index = event->function_id - base_id; 175 mutex_lock(&table->table_lock); 176 sf_dev = xa_load(&table->devices, sf_index); 177 switch (event->new_vhca_state) { 178 case MLX5_VHCA_STATE_INVALID: 179 case MLX5_VHCA_STATE_ALLOCATED: 180 if (sf_dev) 181 mlx5_sf_dev_del(table->dev, sf_dev, sf_index); 182 break; 183 case MLX5_VHCA_STATE_TEARDOWN_REQUEST: 184 if (sf_dev) 185 mlx5_sf_dev_del(table->dev, sf_dev, sf_index); 186 else 187 mlx5_core_err(table->dev, 188 "SF DEV: teardown state for invalid dev index=%d fn_id=0x%x\n", 189 sf_index, event->sw_function_id); 190 break; 191 case MLX5_VHCA_STATE_ACTIVE: 192 if (!sf_dev) 193 mlx5_sf_dev_add(table->dev, sf_index, event->function_id, 194 event->sw_function_id); 195 break; 196 default: 197 break; 198 } 199 mutex_unlock(&table->table_lock); 200 return 0; 201 } 202 203 static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) 204 { 205 struct mlx5_core_dev *dev = table->dev; 206 u16 max_functions; 207 u16 function_id; 208 int err = 0; 209 int i; 210 211 max_functions = mlx5_sf_max_functions(dev); 212 function_id = MLX5_CAP_GEN(dev, sf_base_id); 213 /* Arm the vhca context as the vhca event notifier */ 214 for (i = 0; i < max_functions; i++) { 215 err = mlx5_vhca_event_arm(dev, function_id); 216 if (err) 217 return err; 218 219 function_id++; 220 } 221 return 0; 222 } 223 224 static void mlx5_sf_dev_add_active_work(struct work_struct *work) 225 { 226 struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); 227 u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; 228 struct mlx5_core_dev *dev = table->dev; 229 u16 max_functions; 230 u16 function_id; 231 u16 sw_func_id; 232 int err = 0; 233 u8 state; 234 int i; 235 236 max_functions = mlx5_sf_max_functions(dev); 237 function_id = MLX5_CAP_GEN(dev, sf_base_id); 238 for (i = 0; i < max_functions; i++, function_id++) { 239 if (table->stop_active_wq) 240 return; 241 err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); 242 if (err) 243 /* A failure of specific vhca doesn't mean others will 244 * fail as well. 245 */ 246 continue; 247 state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); 248 if (state != MLX5_VHCA_STATE_ACTIVE) 249 continue; 250 251 sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); 252 mutex_lock(&table->table_lock); 253 /* Don't probe device which is already probe */ 254 if (!xa_load(&table->devices, i)) 255 mlx5_sf_dev_add(dev, i, function_id, sw_func_id); 256 /* There is a race where SF got inactive after the query 257 * above. e.g.: the query returns that the state of the 258 * SF is active, and after that the eswitch manager set it to 259 * inactive. 260 * This case cannot be managed in SW, since the probing of the 261 * SF is on one system, and the inactivation is on a different 262 * system. 263 * If the inactive is done after the SF perform init_hca(), 264 * the SF will fully probe and then removed. If it was 265 * done before init_hca(), the SF probe will fail. 266 */ 267 mutex_unlock(&table->table_lock); 268 } 269 } 270 271 /* In case SFs are generated externally, probe active SFs */ 272 static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) 273 { 274 if (MLX5_CAP_GEN(table->dev, eswitch_manager)) 275 return 0; /* the table is local */ 276 277 /* Use a workqueue to probe active SFs, which are in large 278 * quantity and may take up to minutes to probe. 279 */ 280 table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); 281 if (!table->active_wq) 282 return -ENOMEM; 283 INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); 284 queue_work(table->active_wq, &table->work); 285 return 0; 286 } 287 288 static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) 289 { 290 if (table->active_wq) { 291 table->stop_active_wq = true; 292 destroy_workqueue(table->active_wq); 293 } 294 } 295 296 void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) 297 { 298 struct mlx5_sf_dev_table *table; 299 unsigned int max_sfs; 300 int err; 301 302 if (!mlx5_sf_dev_supported(dev) || !mlx5_vhca_event_supported(dev)) 303 return; 304 305 table = kzalloc(sizeof(*table), GFP_KERNEL); 306 if (!table) { 307 err = -ENOMEM; 308 goto table_err; 309 } 310 311 table->nb.notifier_call = mlx5_sf_dev_state_change_handler; 312 table->dev = dev; 313 if (MLX5_CAP_GEN(dev, max_num_sf)) 314 max_sfs = MLX5_CAP_GEN(dev, max_num_sf); 315 else 316 max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf); 317 table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12); 318 table->base_address = pci_resource_start(dev->pdev, 2); 319 table->max_sfs = max_sfs; 320 xa_init(&table->devices); 321 mutex_init(&table->table_lock); 322 dev->priv.sf_dev_table = table; 323 324 err = mlx5_vhca_event_notifier_register(dev, &table->nb); 325 if (err) 326 goto vhca_err; 327 328 err = mlx5_sf_dev_queue_active_work(table); 329 if (err) 330 goto add_active_err; 331 332 err = mlx5_sf_dev_vhca_arm_all(table); 333 if (err) 334 goto arm_err; 335 mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs); 336 return; 337 338 arm_err: 339 mlx5_sf_dev_destroy_active_work(table); 340 add_active_err: 341 mlx5_vhca_event_notifier_unregister(dev, &table->nb); 342 vhca_err: 343 table->max_sfs = 0; 344 kfree(table); 345 dev->priv.sf_dev_table = NULL; 346 table_err: 347 mlx5_core_err(dev, "SF DEV table create err = %d\n", err); 348 } 349 350 static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table) 351 { 352 struct mlx5_sf_dev *sf_dev; 353 unsigned long index; 354 355 xa_for_each(&table->devices, index, sf_dev) { 356 xa_erase(&table->devices, index); 357 mlx5_sf_dev_remove(table->dev, sf_dev); 358 } 359 } 360 361 void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) 362 { 363 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; 364 365 if (!table) 366 return; 367 368 mlx5_sf_dev_destroy_active_work(table); 369 mlx5_vhca_event_notifier_unregister(dev, &table->nb); 370 mutex_destroy(&table->table_lock); 371 372 /* Now that event handler is not running, it is safe to destroy 373 * the sf device without race. 374 */ 375 mlx5_sf_dev_destroy_all(table); 376 377 WARN_ON(!xa_empty(&table->devices)); 378 kfree(table); 379 dev->priv.sf_dev_table = NULL; 380 } 381