1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4  */
5 
6 /**
7  * DOC: Nitro Enclaves (NE) PCI device driver.
8  */
9 
10 #include <linux/delay.h>
11 #include <linux/device.h>
12 #include <linux/list.h>
13 #include <linux/module.h>
14 #include <linux/mutex.h>
15 #include <linux/nitro_enclaves.h>
16 #include <linux/pci.h>
17 #include <linux/types.h>
18 #include <linux/wait.h>
19 
20 #include "ne_misc_dev.h"
21 #include "ne_pci_dev.h"
22 
23 /**
24  * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
25  *			      the NE PCI device.
26  */
27 #define NE_DEFAULT_TIMEOUT_MSECS	(120000) /* 120 sec */
28 
29 static const struct pci_device_id ne_pci_ids[] = {
30 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
31 	{ 0, }
32 };
33 
34 MODULE_DEVICE_TABLE(pci, ne_pci_ids);
35 
36 /**
37  * ne_submit_request() - Submit command request to the PCI device based on the
38  *			 command type.
39  * @pdev:		PCI device to send the command to.
40  * @cmd_type:		Command type of the request sent to the PCI device.
41  * @cmd_request:	Command request payload.
42  * @cmd_request_size:	Size of the command request payload.
43  *
44  * Context: Process context. This function is called with the ne_pci_dev mutex held.
45  */
46 static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
47 			      void *cmd_request, size_t cmd_request_size)
48 {
49 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
50 
51 	memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
52 
53 	iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
54 }
55 
56 /**
57  * ne_retrieve_reply() - Retrieve reply from the PCI device.
58  * @pdev:		PCI device to receive the reply from.
59  * @cmd_reply:		Command reply payload.
60  * @cmd_reply_size:	Size of the command reply payload.
61  *
62  * Context: Process context. This function is called with the ne_pci_dev mutex held.
63  */
64 static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
65 			      size_t cmd_reply_size)
66 {
67 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
68 
69 	memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
70 }
71 
72 /**
73  * ne_wait_for_reply() - Wait for a reply of a PCI device command.
74  * @pdev:	PCI device for which a reply is waited.
75  *
76  * Context: Process context. This function is called with the ne_pci_dev mutex held.
77  * Return:
78  * * 0 on success.
79  * * Negative return value on failure.
80  */
81 static int ne_wait_for_reply(struct pci_dev *pdev)
82 {
83 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
84 	int rc = -EINVAL;
85 
86 	/*
87 	 * TODO: Update to _interruptible and handle interrupted wait event
88 	 * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
89 	 */
90 	rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
91 				atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
92 				msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
93 	if (!rc)
94 		return -ETIMEDOUT;
95 
96 	return 0;
97 }
98 
99 int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
100 		  void *cmd_request, size_t cmd_request_size,
101 		  struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
102 {
103 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
104 	int rc = -EINVAL;
105 
106 	if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
107 		dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
108 
109 		return -EINVAL;
110 	}
111 
112 	if (!cmd_request) {
113 		dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
114 				    cmd_type);
115 
116 		return -EINVAL;
117 	}
118 
119 	if (cmd_request_size > NE_SEND_DATA_SIZE) {
120 		dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
121 				    cmd_request_size, cmd_type);
122 
123 		return -EINVAL;
124 	}
125 
126 	if (!cmd_reply) {
127 		dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
128 				    cmd_type);
129 
130 		return -EINVAL;
131 	}
132 
133 	if (cmd_reply_size > NE_RECV_DATA_SIZE) {
134 		dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
135 				    cmd_reply_size, cmd_type);
136 
137 		return -EINVAL;
138 	}
139 
140 	/*
141 	 * Use this mutex so that the PCI device handles one command request at
142 	 * a time.
143 	 */
144 	mutex_lock(&ne_pci_dev->pci_dev_mutex);
145 
146 	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
147 
148 	ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
149 
150 	rc = ne_wait_for_reply(pdev);
151 	if (rc < 0) {
152 		dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
153 				    cmd_type, rc);
154 
155 		goto unlock_mutex;
156 	}
157 
158 	ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
159 
160 	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
161 
162 	if (cmd_reply->rc < 0) {
163 		rc = cmd_reply->rc;
164 
165 		dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
166 				    cmd_type, rc);
167 
168 		goto unlock_mutex;
169 	}
170 
171 	rc = 0;
172 
173 unlock_mutex:
174 	mutex_unlock(&ne_pci_dev->pci_dev_mutex);
175 
176 	return rc;
177 }
178 
179 /**
180  * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
181  *			request sent to the PCI device for enclave lifetime
182  *			management.
183  * @irq:	Received interrupt for a reply sent by the PCI device.
184  * @args:	PCI device private data structure.
185  *
186  * Context: Interrupt context.
187  * Return:
188  * * IRQ_HANDLED on handled interrupt.
189  */
190 static irqreturn_t ne_reply_handler(int irq, void *args)
191 {
192 	struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
193 
194 	atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
195 
196 	/* TODO: Update to _interruptible. */
197 	wake_up(&ne_pci_dev->cmd_reply_wait_q);
198 
199 	return IRQ_HANDLED;
200 }
201 
202 /**
203  * ne_event_work_handler() - Work queue handler for notifying enclaves on a
204  *			     state change received by the event interrupt
205  *			     handler.
206  * @work:	Item containing the NE PCI device for which an out-of-band event
207  *		was issued.
208  *
209  * An out-of-band event is being issued by the Nitro Hypervisor when at least
210  * one enclave is changing state without client interaction.
211  *
212  * Context: Work queue context.
213  */
214 static void ne_event_work_handler(struct work_struct *work)
215 {
216 	struct ne_pci_dev_cmd_reply cmd_reply = {};
217 	struct ne_enclave *ne_enclave = NULL;
218 	struct ne_pci_dev *ne_pci_dev =
219 		container_of(work, struct ne_pci_dev, notify_work);
220 	struct pci_dev *pdev = ne_pci_dev->pdev;
221 	int rc = -EINVAL;
222 	struct slot_info_req slot_info_req = {};
223 
224 	mutex_lock(&ne_pci_dev->enclaves_list_mutex);
225 
226 	/*
227 	 * Iterate over all enclaves registered for the Nitro Enclaves
228 	 * PCI device and determine for which enclave(s) the out-of-band event
229 	 * is corresponding to.
230 	 */
231 	list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
232 		mutex_lock(&ne_enclave->enclave_info_mutex);
233 
234 		/*
235 		 * Enclaves that were never started cannot receive out-of-band
236 		 * events.
237 		 */
238 		if (ne_enclave->state != NE_STATE_RUNNING)
239 			goto unlock;
240 
241 		slot_info_req.slot_uid = ne_enclave->slot_uid;
242 
243 		rc = ne_do_request(pdev, SLOT_INFO,
244 				   &slot_info_req, sizeof(slot_info_req),
245 				   &cmd_reply, sizeof(cmd_reply));
246 		if (rc < 0)
247 			dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
248 
249 		/* Notify enclave process that the enclave state changed. */
250 		if (ne_enclave->state != cmd_reply.state) {
251 			ne_enclave->state = cmd_reply.state;
252 
253 			ne_enclave->has_event = true;
254 
255 			wake_up_interruptible(&ne_enclave->eventq);
256 		}
257 
258 unlock:
259 		 mutex_unlock(&ne_enclave->enclave_info_mutex);
260 	}
261 
262 	mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
263 }
264 
265 /**
266  * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
267  *			This interrupt does not supply any data in the MMIO
268  *			region. It notifies a change in the state of any of
269  *			the launched enclaves.
270  * @irq:	Received interrupt for an out-of-band event.
271  * @args:	PCI device private data structure.
272  *
273  * Context: Interrupt context.
274  * Return:
275  * * IRQ_HANDLED on handled interrupt.
276  */
277 static irqreturn_t ne_event_handler(int irq, void *args)
278 {
279 	struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
280 
281 	queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
282 
283 	return IRQ_HANDLED;
284 }
285 
286 /**
287  * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
288  * @pdev:	PCI device to setup the MSI-X for.
289  *
290  * Context: Process context.
291  * Return:
292  * * 0 on success.
293  * * Negative return value on failure.
294  */
295 static int ne_setup_msix(struct pci_dev *pdev)
296 {
297 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
298 	int nr_vecs = 0;
299 	int rc = -EINVAL;
300 
301 	nr_vecs = pci_msix_vec_count(pdev);
302 	if (nr_vecs < 0) {
303 		rc = nr_vecs;
304 
305 		dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
306 
307 		return rc;
308 	}
309 
310 	rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
311 	if (rc < 0) {
312 		dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
313 
314 		return rc;
315 	}
316 
317 	/*
318 	 * This IRQ gets triggered every time the PCI device responds to a
319 	 * command request. The reply is then retrieved, reading from the MMIO
320 	 * space of the PCI device.
321 	 */
322 	rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
323 			 0, "enclave_cmd", ne_pci_dev);
324 	if (rc < 0) {
325 		dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
326 
327 		goto free_irq_vectors;
328 	}
329 
330 	ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
331 	if (!ne_pci_dev->event_wq) {
332 		rc = -ENOMEM;
333 
334 		dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
335 
336 		goto free_reply_irq_vec;
337 	}
338 
339 	INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
340 
341 	/*
342 	 * This IRQ gets triggered every time any enclave's state changes. Its
343 	 * handler then scans for the changes and propagates them to the user
344 	 * space.
345 	 */
346 	rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
347 			 0, "enclave_evt", ne_pci_dev);
348 	if (rc < 0) {
349 		dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
350 
351 		goto destroy_wq;
352 	}
353 
354 	return 0;
355 
356 destroy_wq:
357 	destroy_workqueue(ne_pci_dev->event_wq);
358 free_reply_irq_vec:
359 	free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
360 free_irq_vectors:
361 	pci_free_irq_vectors(pdev);
362 
363 	return rc;
364 }
365 
366 /**
367  * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
368  * @pdev:	PCI device to teardown the MSI-X for.
369  *
370  * Context: Process context.
371  */
372 static void ne_teardown_msix(struct pci_dev *pdev)
373 {
374 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
375 
376 	free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
377 
378 	flush_work(&ne_pci_dev->notify_work);
379 	flush_workqueue(ne_pci_dev->event_wq);
380 	destroy_workqueue(ne_pci_dev->event_wq);
381 
382 	free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
383 
384 	pci_free_irq_vectors(pdev);
385 }
386 
387 /**
388  * ne_pci_dev_enable() - Select the PCI device version and enable it.
389  * @pdev:	PCI device to select version for and then enable.
390  *
391  * Context: Process context.
392  * Return:
393  * * 0 on success.
394  * * Negative return value on failure.
395  */
396 static int ne_pci_dev_enable(struct pci_dev *pdev)
397 {
398 	u8 dev_enable_reply = 0;
399 	u16 dev_version_reply = 0;
400 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
401 
402 	iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
403 
404 	dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
405 	if (dev_version_reply != NE_VERSION_MAX) {
406 		dev_err(&pdev->dev, "Error in pci dev version cmd\n");
407 
408 		return -EIO;
409 	}
410 
411 	iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
412 
413 	dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
414 	if (dev_enable_reply != NE_ENABLE_ON) {
415 		dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
416 
417 		return -EIO;
418 	}
419 
420 	return 0;
421 }
422 
423 /**
424  * ne_pci_dev_disable() - Disable the PCI device.
425  * @pdev:	PCI device to disable.
426  *
427  * Context: Process context.
428  */
429 static void ne_pci_dev_disable(struct pci_dev *pdev)
430 {
431 	u8 dev_disable_reply = 0;
432 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
433 	const unsigned int sleep_time = 10; /* 10 ms */
434 	unsigned int sleep_time_count = 0;
435 
436 	iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
437 
438 	/*
439 	 * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
440 	 * state is not immediately set to disabled and going through a
441 	 * transitory state of disabling.
442 	 */
443 	while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
444 		dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
445 		if (dev_disable_reply == NE_ENABLE_OFF)
446 			return;
447 
448 		msleep_interruptible(sleep_time);
449 		sleep_time_count += sleep_time;
450 	}
451 
452 	dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
453 	if (dev_disable_reply != NE_ENABLE_OFF)
454 		dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
455 }
456 
457 /**
458  * ne_pci_probe() - Probe function for the NE PCI device.
459  * @pdev:	PCI device to match with the NE PCI driver.
460  * @id :	PCI device id table associated with the NE PCI driver.
461  *
462  * Context: Process context.
463  * Return:
464  * * 0 on success.
465  * * Negative return value on failure.
466  */
467 static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
468 {
469 	struct ne_pci_dev *ne_pci_dev = NULL;
470 	int rc = -EINVAL;
471 
472 	ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
473 	if (!ne_pci_dev)
474 		return -ENOMEM;
475 
476 	rc = pci_enable_device(pdev);
477 	if (rc < 0) {
478 		dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
479 
480 		goto free_ne_pci_dev;
481 	}
482 
483 	rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
484 	if (rc < 0) {
485 		dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
486 
487 		goto disable_pci_dev;
488 	}
489 
490 	ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
491 	if (!ne_pci_dev->iomem_base) {
492 		rc = -ENOMEM;
493 
494 		dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
495 
496 		goto release_pci_regions;
497 	}
498 
499 	pci_set_drvdata(pdev, ne_pci_dev);
500 
501 	rc = ne_setup_msix(pdev);
502 	if (rc < 0) {
503 		dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
504 
505 		goto iounmap_pci_bar;
506 	}
507 
508 	ne_pci_dev_disable(pdev);
509 
510 	rc = ne_pci_dev_enable(pdev);
511 	if (rc < 0) {
512 		dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
513 
514 		goto teardown_msix;
515 	}
516 
517 	atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
518 	init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
519 	INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
520 	mutex_init(&ne_pci_dev->enclaves_list_mutex);
521 	mutex_init(&ne_pci_dev->pci_dev_mutex);
522 	ne_pci_dev->pdev = pdev;
523 
524 	ne_devs.ne_pci_dev = ne_pci_dev;
525 
526 	rc = misc_register(ne_devs.ne_misc_dev);
527 	if (rc < 0) {
528 		dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
529 
530 		goto disable_ne_pci_dev;
531 	}
532 
533 	return 0;
534 
535 disable_ne_pci_dev:
536 	ne_devs.ne_pci_dev = NULL;
537 	ne_pci_dev_disable(pdev);
538 teardown_msix:
539 	ne_teardown_msix(pdev);
540 iounmap_pci_bar:
541 	pci_set_drvdata(pdev, NULL);
542 	pci_iounmap(pdev, ne_pci_dev->iomem_base);
543 release_pci_regions:
544 	pci_release_regions(pdev);
545 disable_pci_dev:
546 	pci_disable_device(pdev);
547 free_ne_pci_dev:
548 	kfree(ne_pci_dev);
549 
550 	return rc;
551 }
552 
553 /**
554  * ne_pci_remove() - Remove function for the NE PCI device.
555  * @pdev:	PCI device associated with the NE PCI driver.
556  *
557  * Context: Process context.
558  */
559 static void ne_pci_remove(struct pci_dev *pdev)
560 {
561 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
562 
563 	misc_deregister(ne_devs.ne_misc_dev);
564 
565 	ne_devs.ne_pci_dev = NULL;
566 
567 	ne_pci_dev_disable(pdev);
568 
569 	ne_teardown_msix(pdev);
570 
571 	pci_set_drvdata(pdev, NULL);
572 
573 	pci_iounmap(pdev, ne_pci_dev->iomem_base);
574 
575 	pci_release_regions(pdev);
576 
577 	pci_disable_device(pdev);
578 
579 	kfree(ne_pci_dev);
580 }
581 
582 /**
583  * ne_pci_shutdown() - Shutdown function for the NE PCI device.
584  * @pdev:	PCI device associated with the NE PCI driver.
585  *
586  * Context: Process context.
587  */
588 static void ne_pci_shutdown(struct pci_dev *pdev)
589 {
590 	struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
591 
592 	if (!ne_pci_dev)
593 		return;
594 
595 	misc_deregister(ne_devs.ne_misc_dev);
596 
597 	ne_devs.ne_pci_dev = NULL;
598 
599 	ne_pci_dev_disable(pdev);
600 
601 	ne_teardown_msix(pdev);
602 
603 	pci_set_drvdata(pdev, NULL);
604 
605 	pci_iounmap(pdev, ne_pci_dev->iomem_base);
606 
607 	pci_release_regions(pdev);
608 
609 	pci_disable_device(pdev);
610 
611 	kfree(ne_pci_dev);
612 }
613 
614 /*
615  * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
616  * needed.
617  */
618 /* NE PCI device driver. */
619 struct pci_driver ne_pci_driver = {
620 	.name		= "nitro_enclaves",
621 	.id_table	= ne_pci_ids,
622 	.probe		= ne_pci_probe,
623 	.remove		= ne_pci_remove,
624 	.shutdown	= ne_pci_shutdown,
625 };
626