xref: /openbmc/linux/drivers/firmware/qemu_fw_cfg.c (revision ea47eed33a3fe3d919e6e3cf4e4eb5507b817188)
1 /*
2  * drivers/firmware/qemu_fw_cfg.c
3  *
4  * Copyright 2015 Carnegie Mellon University
5  *
6  * Expose entries from QEMU's firmware configuration (fw_cfg) device in
7  * sysfs (read-only, under "/sys/firmware/qemu_fw_cfg/...").
8  *
9  * The fw_cfg device may be instantiated via either an ACPI node (on x86
10  * and select subsets of aarch64), a Device Tree node (on arm), or using
11  * a kernel module (or command line) parameter with the following syntax:
12  *
13  *      [qemu_fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
14  * or
15  *      [qemu_fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
16  *
17  * where:
18  *      <size>     := size of ioport or mmio range
19  *      <base>     := physical base address of ioport or mmio range
20  *      <ctrl_off> := (optional) offset of control register
21  *      <data_off> := (optional) offset of data register
22  *      <dma_off> := (optional) offset of dma register
23  *
24  * e.g.:
25  *      qemu_fw_cfg.ioport=12@0x510:0:1:4	(the default on x86)
26  * or
27  *      qemu_fw_cfg.mmio=16@0x9020000:8:0:16	(the default on arm)
28  */
29 
30 #include <linux/module.h>
31 #include <linux/platform_device.h>
32 #include <linux/acpi.h>
33 #include <linux/slab.h>
34 #include <linux/io.h>
35 #include <linux/ioport.h>
36 #include <uapi/linux/qemu_fw_cfg.h>
37 #include <linux/delay.h>
38 #include <linux/crash_dump.h>
39 #include <linux/crash_core.h>
40 
41 MODULE_AUTHOR("Gabriel L. Somlo <somlo@cmu.edu>");
42 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
43 MODULE_LICENSE("GPL");
44 
45 /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
46 static u32 fw_cfg_rev;
47 
48 /* fw_cfg device i/o register addresses */
49 static bool fw_cfg_is_mmio;
50 static phys_addr_t fw_cfg_p_base;
51 static resource_size_t fw_cfg_p_size;
52 static void __iomem *fw_cfg_dev_base;
53 static void __iomem *fw_cfg_reg_ctrl;
54 static void __iomem *fw_cfg_reg_data;
55 static void __iomem *fw_cfg_reg_dma;
56 
57 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
58 static DEFINE_MUTEX(fw_cfg_dev_lock);
59 
60 /* pick appropriate endianness for selector key */
61 static void fw_cfg_sel_endianness(u16 key)
62 {
63 	if (fw_cfg_is_mmio)
64 		iowrite16be(key, fw_cfg_reg_ctrl);
65 	else
66 		iowrite16(key, fw_cfg_reg_ctrl);
67 }
68 
69 #ifdef CONFIG_CRASH_CORE
70 static inline bool fw_cfg_dma_enabled(void)
71 {
72 	return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
73 }
74 
75 /* qemu fw_cfg device is sync today, but spec says it may become async */
76 static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
77 {
78 	for (;;) {
79 		u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
80 
81 		/* do not reorder the read to d->control */
82 		rmb();
83 		if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
84 			return;
85 
86 		cpu_relax();
87 	}
88 }
89 
90 static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
91 {
92 	phys_addr_t dma;
93 	struct fw_cfg_dma_access *d = NULL;
94 	ssize_t ret = length;
95 
96 	d = kmalloc(sizeof(*d), GFP_KERNEL);
97 	if (!d) {
98 		ret = -ENOMEM;
99 		goto end;
100 	}
101 
102 	/* fw_cfg device does not need IOMMU protection, so use physical addresses */
103 	*d = (struct fw_cfg_dma_access) {
104 		.address = cpu_to_be64(address ? virt_to_phys(address) : 0),
105 		.length = cpu_to_be32(length),
106 		.control = cpu_to_be32(control)
107 	};
108 
109 	dma = virt_to_phys(d);
110 
111 	iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
112 	/* force memory to sync before notifying device via MMIO */
113 	wmb();
114 	iowrite32be(dma, fw_cfg_reg_dma + 4);
115 
116 	fw_cfg_wait_for_control(d);
117 
118 	if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
119 		ret = -EIO;
120 	}
121 
122 end:
123 	kfree(d);
124 
125 	return ret;
126 }
127 #endif
128 
129 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
130 static ssize_t fw_cfg_read_blob(u16 key,
131 				void *buf, loff_t pos, size_t count)
132 {
133 	u32 glk = -1U;
134 	acpi_status status;
135 
136 	/* If we have ACPI, ensure mutual exclusion against any potential
137 	 * device access by the firmware, e.g. via AML methods:
138 	 */
139 	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
140 	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
141 		/* Should never get here */
142 		WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
143 		memset(buf, 0, count);
144 		return -EINVAL;
145 	}
146 
147 	mutex_lock(&fw_cfg_dev_lock);
148 	fw_cfg_sel_endianness(key);
149 	while (pos-- > 0)
150 		ioread8(fw_cfg_reg_data);
151 	ioread8_rep(fw_cfg_reg_data, buf, count);
152 	mutex_unlock(&fw_cfg_dev_lock);
153 
154 	acpi_release_global_lock(glk);
155 	return count;
156 }
157 
158 #ifdef CONFIG_CRASH_CORE
159 /* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
160 static ssize_t fw_cfg_write_blob(u16 key,
161 				 void *buf, loff_t pos, size_t count)
162 {
163 	u32 glk = -1U;
164 	acpi_status status;
165 	ssize_t ret = count;
166 
167 	/* If we have ACPI, ensure mutual exclusion against any potential
168 	 * device access by the firmware, e.g. via AML methods:
169 	 */
170 	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
171 	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
172 		/* Should never get here */
173 		WARN(1, "%s: Failed to lock ACPI!\n", __func__);
174 		return -EINVAL;
175 	}
176 
177 	mutex_lock(&fw_cfg_dev_lock);
178 	if (pos == 0) {
179 		ret = fw_cfg_dma_transfer(buf, count, key << 16
180 					  | FW_CFG_DMA_CTL_SELECT
181 					  | FW_CFG_DMA_CTL_WRITE);
182 	} else {
183 		fw_cfg_sel_endianness(key);
184 		ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
185 		if (ret < 0)
186 			goto end;
187 		ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
188 	}
189 
190 end:
191 	mutex_unlock(&fw_cfg_dev_lock);
192 
193 	acpi_release_global_lock(glk);
194 
195 	return ret;
196 }
197 #endif /* CONFIG_CRASH_CORE */
198 
199 /* clean up fw_cfg device i/o */
200 static void fw_cfg_io_cleanup(void)
201 {
202 	if (fw_cfg_is_mmio) {
203 		iounmap(fw_cfg_dev_base);
204 		release_mem_region(fw_cfg_p_base, fw_cfg_p_size);
205 	} else {
206 		ioport_unmap(fw_cfg_dev_base);
207 		release_region(fw_cfg_p_base, fw_cfg_p_size);
208 	}
209 }
210 
211 /* arch-specific ctrl & data register offsets are not available in ACPI, DT */
212 #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
213 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
214 #  define FW_CFG_CTRL_OFF 0x08
215 #  define FW_CFG_DATA_OFF 0x00
216 #  define FW_CFG_DMA_OFF 0x10
217 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m */
218 #  define FW_CFG_CTRL_OFF 0x00
219 #  define FW_CFG_DATA_OFF 0x02
220 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
221 #  define FW_CFG_CTRL_OFF 0x00
222 #  define FW_CFG_DATA_OFF 0x01
223 #  define FW_CFG_DMA_OFF 0x04
224 # else
225 #  error "QEMU FW_CFG not available on this architecture!"
226 # endif
227 #endif
228 
229 /* initialize fw_cfg device i/o from platform data */
230 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
231 {
232 	char sig[FW_CFG_SIG_SIZE];
233 	struct resource *range, *ctrl, *data, *dma;
234 
235 	/* acquire i/o range details */
236 	fw_cfg_is_mmio = false;
237 	range = platform_get_resource(pdev, IORESOURCE_IO, 0);
238 	if (!range) {
239 		fw_cfg_is_mmio = true;
240 		range = platform_get_resource(pdev, IORESOURCE_MEM, 0);
241 		if (!range)
242 			return -EINVAL;
243 	}
244 	fw_cfg_p_base = range->start;
245 	fw_cfg_p_size = resource_size(range);
246 
247 	if (fw_cfg_is_mmio) {
248 		if (!request_mem_region(fw_cfg_p_base,
249 					fw_cfg_p_size, "fw_cfg_mem"))
250 			return -EBUSY;
251 		fw_cfg_dev_base = ioremap(fw_cfg_p_base, fw_cfg_p_size);
252 		if (!fw_cfg_dev_base) {
253 			release_mem_region(fw_cfg_p_base, fw_cfg_p_size);
254 			return -EFAULT;
255 		}
256 	} else {
257 		if (!request_region(fw_cfg_p_base,
258 				    fw_cfg_p_size, "fw_cfg_io"))
259 			return -EBUSY;
260 		fw_cfg_dev_base = ioport_map(fw_cfg_p_base, fw_cfg_p_size);
261 		if (!fw_cfg_dev_base) {
262 			release_region(fw_cfg_p_base, fw_cfg_p_size);
263 			return -EFAULT;
264 		}
265 	}
266 
267 	/* were custom register offsets provided (e.g. on the command line)? */
268 	ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
269 	data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
270 	dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
271 	if (ctrl && data) {
272 		fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
273 		fw_cfg_reg_data = fw_cfg_dev_base + data->start;
274 	} else {
275 		/* use architecture-specific offsets */
276 		fw_cfg_reg_ctrl = fw_cfg_dev_base + FW_CFG_CTRL_OFF;
277 		fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
278 	}
279 
280 	if (dma)
281 		fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
282 #ifdef FW_CFG_DMA_OFF
283 	else
284 		fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
285 #endif
286 
287 	/* verify fw_cfg device signature */
288 	if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
289 				0, FW_CFG_SIG_SIZE) < 0 ||
290 		memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
291 		fw_cfg_io_cleanup();
292 		return -ENODEV;
293 	}
294 
295 	return 0;
296 }
297 
298 static ssize_t fw_cfg_showrev(struct kobject *k, struct attribute *a, char *buf)
299 {
300 	return sprintf(buf, "%u\n", fw_cfg_rev);
301 }
302 
303 static const struct {
304 	struct attribute attr;
305 	ssize_t (*show)(struct kobject *k, struct attribute *a, char *buf);
306 } fw_cfg_rev_attr = {
307 	.attr = { .name = "rev", .mode = S_IRUSR },
308 	.show = fw_cfg_showrev,
309 };
310 
311 /* fw_cfg_sysfs_entry type */
312 struct fw_cfg_sysfs_entry {
313 	struct kobject kobj;
314 	u32 size;
315 	u16 select;
316 	char name[FW_CFG_MAX_FILE_PATH];
317 	struct list_head list;
318 };
319 
320 #ifdef CONFIG_CRASH_CORE
321 static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f)
322 {
323 	static struct fw_cfg_vmcoreinfo *data;
324 	ssize_t ret;
325 
326 	data = kmalloc(sizeof(struct fw_cfg_vmcoreinfo), GFP_KERNEL);
327 	if (!data)
328 		return -ENOMEM;
329 
330 	*data = (struct fw_cfg_vmcoreinfo) {
331 		.guest_format = cpu_to_le16(FW_CFG_VMCOREINFO_FORMAT_ELF),
332 		.size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
333 		.paddr = cpu_to_le64(paddr_vmcoreinfo_note())
334 	};
335 	/* spare ourself reading host format support for now since we
336 	 * don't know what else to format - host may ignore ours
337 	 */
338 	ret = fw_cfg_write_blob(be16_to_cpu(f->select), data,
339 				0, sizeof(struct fw_cfg_vmcoreinfo));
340 
341 	kfree(data);
342 	return ret;
343 }
344 #endif /* CONFIG_CRASH_CORE */
345 
346 /* get fw_cfg_sysfs_entry from kobject member */
347 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
348 {
349 	return container_of(kobj, struct fw_cfg_sysfs_entry, kobj);
350 }
351 
352 /* fw_cfg_sysfs_attribute type */
353 struct fw_cfg_sysfs_attribute {
354 	struct attribute attr;
355 	ssize_t (*show)(struct fw_cfg_sysfs_entry *entry, char *buf);
356 };
357 
358 /* get fw_cfg_sysfs_attribute from attribute member */
359 static inline struct fw_cfg_sysfs_attribute *to_attr(struct attribute *attr)
360 {
361 	return container_of(attr, struct fw_cfg_sysfs_attribute, attr);
362 }
363 
364 /* global cache of fw_cfg_sysfs_entry objects */
365 static LIST_HEAD(fw_cfg_entry_cache);
366 
367 /* kobjects removed lazily by kernel, mutual exclusion needed */
368 static DEFINE_SPINLOCK(fw_cfg_cache_lock);
369 
370 static inline void fw_cfg_sysfs_cache_enlist(struct fw_cfg_sysfs_entry *entry)
371 {
372 	spin_lock(&fw_cfg_cache_lock);
373 	list_add_tail(&entry->list, &fw_cfg_entry_cache);
374 	spin_unlock(&fw_cfg_cache_lock);
375 }
376 
377 static inline void fw_cfg_sysfs_cache_delist(struct fw_cfg_sysfs_entry *entry)
378 {
379 	spin_lock(&fw_cfg_cache_lock);
380 	list_del(&entry->list);
381 	spin_unlock(&fw_cfg_cache_lock);
382 }
383 
384 static void fw_cfg_sysfs_cache_cleanup(void)
385 {
386 	struct fw_cfg_sysfs_entry *entry, *next;
387 
388 	list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) {
389 		/* will end up invoking fw_cfg_sysfs_cache_delist()
390 		 * via each object's release() method (i.e. destructor)
391 		 */
392 		kobject_put(&entry->kobj);
393 	}
394 }
395 
396 /* default_attrs: per-entry attributes and show methods */
397 
398 #define FW_CFG_SYSFS_ATTR(_attr) \
399 struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = { \
400 	.attr = { .name = __stringify(_attr), .mode = S_IRUSR }, \
401 	.show = fw_cfg_sysfs_show_##_attr, \
402 }
403 
404 static ssize_t fw_cfg_sysfs_show_size(struct fw_cfg_sysfs_entry *e, char *buf)
405 {
406 	return sprintf(buf, "%u\n", e->size);
407 }
408 
409 static ssize_t fw_cfg_sysfs_show_key(struct fw_cfg_sysfs_entry *e, char *buf)
410 {
411 	return sprintf(buf, "%u\n", e->select);
412 }
413 
414 static ssize_t fw_cfg_sysfs_show_name(struct fw_cfg_sysfs_entry *e, char *buf)
415 {
416 	return sprintf(buf, "%s\n", e->name);
417 }
418 
419 static FW_CFG_SYSFS_ATTR(size);
420 static FW_CFG_SYSFS_ATTR(key);
421 static FW_CFG_SYSFS_ATTR(name);
422 
423 static struct attribute *fw_cfg_sysfs_entry_attrs[] = {
424 	&fw_cfg_sysfs_attr_size.attr,
425 	&fw_cfg_sysfs_attr_key.attr,
426 	&fw_cfg_sysfs_attr_name.attr,
427 	NULL,
428 };
429 
430 /* sysfs_ops: find fw_cfg_[entry, attribute] and call appropriate show method */
431 static ssize_t fw_cfg_sysfs_attr_show(struct kobject *kobj, struct attribute *a,
432 				      char *buf)
433 {
434 	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
435 	struct fw_cfg_sysfs_attribute *attr = to_attr(a);
436 
437 	return attr->show(entry, buf);
438 }
439 
440 static const struct sysfs_ops fw_cfg_sysfs_attr_ops = {
441 	.show = fw_cfg_sysfs_attr_show,
442 };
443 
444 /* release: destructor, to be called via kobject_put() */
445 static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
446 {
447 	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
448 
449 	fw_cfg_sysfs_cache_delist(entry);
450 	kfree(entry);
451 }
452 
453 /* kobj_type: ties together all properties required to register an entry */
454 static struct kobj_type fw_cfg_sysfs_entry_ktype = {
455 	.default_attrs = fw_cfg_sysfs_entry_attrs,
456 	.sysfs_ops = &fw_cfg_sysfs_attr_ops,
457 	.release = fw_cfg_sysfs_release_entry,
458 };
459 
460 /* raw-read method and attribute */
461 static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj,
462 				     struct bin_attribute *bin_attr,
463 				     char *buf, loff_t pos, size_t count)
464 {
465 	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
466 
467 	if (pos > entry->size)
468 		return -EINVAL;
469 
470 	if (count > entry->size - pos)
471 		count = entry->size - pos;
472 
473 	return fw_cfg_read_blob(entry->select, buf, pos, count);
474 }
475 
476 static struct bin_attribute fw_cfg_sysfs_attr_raw = {
477 	.attr = { .name = "raw", .mode = S_IRUSR },
478 	.read = fw_cfg_sysfs_read_raw,
479 };
480 
481 /*
482  * Create a kset subdirectory matching each '/' delimited dirname token
483  * in 'name', starting with sysfs kset/folder 'dir'; At the end, create
484  * a symlink directed at the given 'target'.
485  * NOTE: We do this on a best-effort basis, since 'name' is not guaranteed
486  * to be a well-behaved path name. Whenever a symlink vs. kset directory
487  * name collision occurs, the kernel will issue big scary warnings while
488  * refusing to add the offending link or directory. We follow up with our
489  * own, slightly less scary error messages explaining the situation :)
490  */
491 static int fw_cfg_build_symlink(struct kset *dir,
492 				struct kobject *target, const char *name)
493 {
494 	int ret;
495 	struct kset *subdir;
496 	struct kobject *ko;
497 	char *name_copy, *p, *tok;
498 
499 	if (!dir || !target || !name || !*name)
500 		return -EINVAL;
501 
502 	/* clone a copy of name for parsing */
503 	name_copy = p = kstrdup(name, GFP_KERNEL);
504 	if (!name_copy)
505 		return -ENOMEM;
506 
507 	/* create folders for each dirname token, then symlink for basename */
508 	while ((tok = strsep(&p, "/")) && *tok) {
509 
510 		/* last (basename) token? If so, add symlink here */
511 		if (!p || !*p) {
512 			ret = sysfs_create_link(&dir->kobj, target, tok);
513 			break;
514 		}
515 
516 		/* does the current dir contain an item named after tok ? */
517 		ko = kset_find_obj(dir, tok);
518 		if (ko) {
519 			/* drop reference added by kset_find_obj */
520 			kobject_put(ko);
521 
522 			/* ko MUST be a kset - we're about to use it as one ! */
523 			if (ko->ktype != dir->kobj.ktype) {
524 				ret = -EINVAL;
525 				break;
526 			}
527 
528 			/* descend into already existing subdirectory */
529 			dir = to_kset(ko);
530 		} else {
531 			/* create new subdirectory kset */
532 			subdir = kzalloc(sizeof(struct kset), GFP_KERNEL);
533 			if (!subdir) {
534 				ret = -ENOMEM;
535 				break;
536 			}
537 			subdir->kobj.kset = dir;
538 			subdir->kobj.ktype = dir->kobj.ktype;
539 			ret = kobject_set_name(&subdir->kobj, "%s", tok);
540 			if (ret) {
541 				kfree(subdir);
542 				break;
543 			}
544 			ret = kset_register(subdir);
545 			if (ret) {
546 				kfree(subdir);
547 				break;
548 			}
549 
550 			/* descend into newly created subdirectory */
551 			dir = subdir;
552 		}
553 	}
554 
555 	/* we're done with cloned copy of name */
556 	kfree(name_copy);
557 	return ret;
558 }
559 
560 /* recursively unregister fw_cfg/by_name/ kset directory tree */
561 static void fw_cfg_kset_unregister_recursive(struct kset *kset)
562 {
563 	struct kobject *k, *next;
564 
565 	list_for_each_entry_safe(k, next, &kset->list, entry)
566 		/* all set members are ksets too, but check just in case... */
567 		if (k->ktype == kset->kobj.ktype)
568 			fw_cfg_kset_unregister_recursive(to_kset(k));
569 
570 	/* symlinks are cleanly and automatically removed with the directory */
571 	kset_unregister(kset);
572 }
573 
574 /* kobjects & kset representing top-level, by_key, and by_name folders */
575 static struct kobject *fw_cfg_top_ko;
576 static struct kobject *fw_cfg_sel_ko;
577 static struct kset *fw_cfg_fname_kset;
578 
579 /* register an individual fw_cfg file */
580 static int fw_cfg_register_file(const struct fw_cfg_file *f)
581 {
582 	int err;
583 	struct fw_cfg_sysfs_entry *entry;
584 
585 #ifdef CONFIG_CRASH_CORE
586 	if (fw_cfg_dma_enabled() &&
587 		strcmp(f->name, FW_CFG_VMCOREINFO_FILENAME) == 0 &&
588 		!is_kdump_kernel()) {
589 		if (fw_cfg_write_vmcoreinfo(f) < 0)
590 			pr_warn("fw_cfg: failed to write vmcoreinfo");
591 	}
592 #endif
593 
594 	/* allocate new entry */
595 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
596 	if (!entry)
597 		return -ENOMEM;
598 
599 	/* set file entry information */
600 	entry->size = be32_to_cpu(f->size);
601 	entry->select = be16_to_cpu(f->select);
602 	memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
603 
604 	/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
605 	err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
606 				   fw_cfg_sel_ko, "%d", entry->select);
607 	if (err)
608 		goto err_register;
609 
610 	/* add raw binary content access */
611 	err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw);
612 	if (err)
613 		goto err_add_raw;
614 
615 	/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
616 	fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
617 
618 	/* success, add entry to global cache */
619 	fw_cfg_sysfs_cache_enlist(entry);
620 	return 0;
621 
622 err_add_raw:
623 	kobject_del(&entry->kobj);
624 err_register:
625 	kfree(entry);
626 	return err;
627 }
628 
629 /* iterate over all fw_cfg directory entries, registering each one */
630 static int fw_cfg_register_dir_entries(void)
631 {
632 	int ret = 0;
633 	__be32 files_count;
634 	u32 count, i;
635 	struct fw_cfg_file *dir;
636 	size_t dir_size;
637 
638 	ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count,
639 			0, sizeof(files_count));
640 	if (ret < 0)
641 		return ret;
642 
643 	count = be32_to_cpu(files_count);
644 	dir_size = count * sizeof(struct fw_cfg_file);
645 
646 	dir = kmalloc(dir_size, GFP_KERNEL);
647 	if (!dir)
648 		return -ENOMEM;
649 
650 	ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
651 			sizeof(files_count), dir_size);
652 	if (ret < 0)
653 		goto end;
654 
655 	for (i = 0; i < count; i++) {
656 		ret = fw_cfg_register_file(&dir[i]);
657 		if (ret)
658 			break;
659 	}
660 
661 end:
662 	kfree(dir);
663 	return ret;
664 }
665 
666 /* unregister top-level or by_key folder */
667 static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
668 {
669 	kobject_del(kobj);
670 	kobject_put(kobj);
671 }
672 
673 static int fw_cfg_sysfs_probe(struct platform_device *pdev)
674 {
675 	int err;
676 	__le32 rev;
677 
678 	/* NOTE: If we supported multiple fw_cfg devices, we'd first create
679 	 * a subdirectory named after e.g. pdev->id, then hang per-device
680 	 * by_key (and by_name) subdirectories underneath it. However, only
681 	 * one fw_cfg device exist system-wide, so if one was already found
682 	 * earlier, we might as well stop here.
683 	 */
684 	if (fw_cfg_sel_ko)
685 		return -EBUSY;
686 
687 	/* create by_key and by_name subdirs of /sys/firmware/qemu_fw_cfg/ */
688 	err = -ENOMEM;
689 	fw_cfg_sel_ko = kobject_create_and_add("by_key", fw_cfg_top_ko);
690 	if (!fw_cfg_sel_ko)
691 		goto err_sel;
692 	fw_cfg_fname_kset = kset_create_and_add("by_name", NULL, fw_cfg_top_ko);
693 	if (!fw_cfg_fname_kset)
694 		goto err_name;
695 
696 	/* initialize fw_cfg device i/o from platform data */
697 	err = fw_cfg_do_platform_probe(pdev);
698 	if (err)
699 		goto err_probe;
700 
701 	/* get revision number, add matching top-level attribute */
702 	err = fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
703 	if (err < 0)
704 		goto err_probe;
705 
706 	fw_cfg_rev = le32_to_cpu(rev);
707 	err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
708 	if (err)
709 		goto err_rev;
710 
711 	/* process fw_cfg file directory entry, registering each file */
712 	err = fw_cfg_register_dir_entries();
713 	if (err)
714 		goto err_dir;
715 
716 	/* success */
717 	pr_debug("fw_cfg: loaded.\n");
718 	return 0;
719 
720 err_dir:
721 	fw_cfg_sysfs_cache_cleanup();
722 	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
723 err_rev:
724 	fw_cfg_io_cleanup();
725 err_probe:
726 	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
727 err_name:
728 	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
729 err_sel:
730 	return err;
731 }
732 
733 static int fw_cfg_sysfs_remove(struct platform_device *pdev)
734 {
735 	pr_debug("fw_cfg: unloading.\n");
736 	fw_cfg_sysfs_cache_cleanup();
737 	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
738 	fw_cfg_io_cleanup();
739 	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
740 	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
741 	return 0;
742 }
743 
744 static const struct of_device_id fw_cfg_sysfs_mmio_match[] = {
745 	{ .compatible = "qemu,fw-cfg-mmio", },
746 	{},
747 };
748 MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
749 
750 #ifdef CONFIG_ACPI
751 static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
752 	{ FW_CFG_ACPI_DEVICE_ID, },
753 	{},
754 };
755 MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
756 #endif
757 
758 static struct platform_driver fw_cfg_sysfs_driver = {
759 	.probe = fw_cfg_sysfs_probe,
760 	.remove = fw_cfg_sysfs_remove,
761 	.driver = {
762 		.name = "fw_cfg",
763 		.of_match_table = fw_cfg_sysfs_mmio_match,
764 		.acpi_match_table = ACPI_PTR(fw_cfg_sysfs_acpi_match),
765 	},
766 };
767 
768 #ifdef CONFIG_FW_CFG_SYSFS_CMDLINE
769 
770 static struct platform_device *fw_cfg_cmdline_dev;
771 
772 /* this probably belongs in e.g. include/linux/types.h,
773  * but right now we are the only ones doing it...
774  */
775 #ifdef CONFIG_PHYS_ADDR_T_64BIT
776 #define __PHYS_ADDR_PREFIX "ll"
777 #else
778 #define __PHYS_ADDR_PREFIX ""
779 #endif
780 
781 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
782 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
783 			 ":%" __PHYS_ADDR_PREFIX "i" \
784 			 ":%" __PHYS_ADDR_PREFIX "i%n" \
785 			 ":%" __PHYS_ADDR_PREFIX "i%n"
786 
787 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
788 			 "0x%" __PHYS_ADDR_PREFIX "x"
789 
790 #define PH_ADDR_PR_3_FMT PH_ADDR_PR_1_FMT \
791 			 ":%" __PHYS_ADDR_PREFIX "u" \
792 			 ":%" __PHYS_ADDR_PREFIX "u"
793 
794 #define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
795 			 ":%" __PHYS_ADDR_PREFIX "u"
796 
797 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
798 {
799 	struct resource res[4] = {};
800 	char *str;
801 	phys_addr_t base;
802 	resource_size_t size, ctrl_off, data_off, dma_off;
803 	int processed, consumed = 0;
804 
805 	/* only one fw_cfg device can exist system-wide, so if one
806 	 * was processed on the command line already, we might as
807 	 * well stop here.
808 	 */
809 	if (fw_cfg_cmdline_dev) {
810 		/* avoid leaking previously registered device */
811 		platform_device_unregister(fw_cfg_cmdline_dev);
812 		return -EINVAL;
813 	}
814 
815 	/* consume "<size>" portion of command line argument */
816 	size = memparse(arg, &str);
817 
818 	/* get "@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]" chunks */
819 	processed = sscanf(str, PH_ADDR_SCAN_FMT,
820 			   &base, &consumed,
821 			   &ctrl_off, &data_off, &consumed,
822 			   &dma_off, &consumed);
823 
824 	/* sscanf() must process precisely 1, 3 or 4 chunks:
825 	 * <base> is mandatory, optionally followed by <ctrl_off>
826 	 * and <data_off>, and <dma_off>;
827 	 * there must be no extra characters after the last chunk,
828 	 * so str[consumed] must be '\0'.
829 	 */
830 	if (str[consumed] ||
831 	    (processed != 1 && processed != 3 && processed != 4))
832 		return -EINVAL;
833 
834 	res[0].start = base;
835 	res[0].end = base + size - 1;
836 	res[0].flags = !strcmp(kp->name, "mmio") ? IORESOURCE_MEM :
837 						   IORESOURCE_IO;
838 
839 	/* insert register offsets, if provided */
840 	if (processed > 1) {
841 		res[1].name = "ctrl";
842 		res[1].start = ctrl_off;
843 		res[1].flags = IORESOURCE_REG;
844 		res[2].name = "data";
845 		res[2].start = data_off;
846 		res[2].flags = IORESOURCE_REG;
847 	}
848 	if (processed > 3) {
849 		res[3].name = "dma";
850 		res[3].start = dma_off;
851 		res[3].flags = IORESOURCE_REG;
852 	}
853 
854 	/* "processed" happens to nicely match the number of resources
855 	 * we need to pass in to this platform device.
856 	 */
857 	fw_cfg_cmdline_dev = platform_device_register_simple("fw_cfg",
858 					PLATFORM_DEVID_NONE, res, processed);
859 
860 	return PTR_ERR_OR_ZERO(fw_cfg_cmdline_dev);
861 }
862 
863 static int fw_cfg_cmdline_get(char *buf, const struct kernel_param *kp)
864 {
865 	/* stay silent if device was not configured via the command
866 	 * line, or if the parameter name (ioport/mmio) doesn't match
867 	 * the device setting
868 	 */
869 	if (!fw_cfg_cmdline_dev ||
870 	    (!strcmp(kp->name, "mmio") ^
871 	     (fw_cfg_cmdline_dev->resource[0].flags == IORESOURCE_MEM)))
872 		return 0;
873 
874 	switch (fw_cfg_cmdline_dev->num_resources) {
875 	case 1:
876 		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_1_FMT,
877 				resource_size(&fw_cfg_cmdline_dev->resource[0]),
878 				fw_cfg_cmdline_dev->resource[0].start);
879 	case 3:
880 		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_3_FMT,
881 				resource_size(&fw_cfg_cmdline_dev->resource[0]),
882 				fw_cfg_cmdline_dev->resource[0].start,
883 				fw_cfg_cmdline_dev->resource[1].start,
884 				fw_cfg_cmdline_dev->resource[2].start);
885 	case 4:
886 		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_4_FMT,
887 				resource_size(&fw_cfg_cmdline_dev->resource[0]),
888 				fw_cfg_cmdline_dev->resource[0].start,
889 				fw_cfg_cmdline_dev->resource[1].start,
890 				fw_cfg_cmdline_dev->resource[2].start,
891 				fw_cfg_cmdline_dev->resource[3].start);
892 	}
893 
894 	/* Should never get here */
895 	WARN(1, "Unexpected number of resources: %d\n",
896 		fw_cfg_cmdline_dev->num_resources);
897 	return 0;
898 }
899 
900 static const struct kernel_param_ops fw_cfg_cmdline_param_ops = {
901 	.set = fw_cfg_cmdline_set,
902 	.get = fw_cfg_cmdline_get,
903 };
904 
905 device_param_cb(ioport, &fw_cfg_cmdline_param_ops, NULL, S_IRUSR);
906 device_param_cb(mmio, &fw_cfg_cmdline_param_ops, NULL, S_IRUSR);
907 
908 #endif /* CONFIG_FW_CFG_SYSFS_CMDLINE */
909 
910 static int __init fw_cfg_sysfs_init(void)
911 {
912 	int ret;
913 
914 	/* create /sys/firmware/qemu_fw_cfg/ top level directory */
915 	fw_cfg_top_ko = kobject_create_and_add("qemu_fw_cfg", firmware_kobj);
916 	if (!fw_cfg_top_ko)
917 		return -ENOMEM;
918 
919 	ret = platform_driver_register(&fw_cfg_sysfs_driver);
920 	if (ret)
921 		fw_cfg_kobj_cleanup(fw_cfg_top_ko);
922 
923 	return ret;
924 }
925 
926 static void __exit fw_cfg_sysfs_exit(void)
927 {
928 	platform_driver_unregister(&fw_cfg_sysfs_driver);
929 
930 #ifdef CONFIG_FW_CFG_SYSFS_CMDLINE
931 	platform_device_unregister(fw_cfg_cmdline_dev);
932 #endif
933 
934 	/* clean up /sys/firmware/qemu_fw_cfg/ */
935 	fw_cfg_kobj_cleanup(fw_cfg_top_ko);
936 }
937 
938 module_init(fw_cfg_sysfs_init);
939 module_exit(fw_cfg_sysfs_exit);
940