xref: /openbmc/linux/arch/arm/mach-mvebu/coherency.c (revision e5535545)
1 /*
2  * Coherency fabric (Aurora) support for Armada 370 and XP platforms.
3  *
4  * Copyright (C) 2012 Marvell
5  *
6  * Yehuda Yitschak <yehuday@marvell.com>
7  * Gregory Clement <gregory.clement@free-electrons.com>
8  * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
9  *
10  * This file is licensed under the terms of the GNU General Public
11  * License version 2.  This program is licensed "as is" without any
12  * warranty of any kind, whether express or implied.
13  *
14  * The Armada 370 and Armada XP SOCs have a coherency fabric which is
15  * responsible for ensuring hardware coherency between all CPUs and between
16  * CPUs and I/O masters. This file initializes the coherency fabric and
17  * supplies basic routines for configuring and controlling hardware coherency
18  */
19 
20 #define pr_fmt(fmt) "mvebu-coherency: " fmt
21 
22 #include <linux/kernel.h>
23 #include <linux/init.h>
24 #include <linux/of_address.h>
25 #include <linux/io.h>
26 #include <linux/smp.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/platform_device.h>
29 #include <linux/slab.h>
30 #include <linux/mbus.h>
31 #include <linux/clk.h>
32 #include <linux/pci.h>
33 #include <asm/smp_plat.h>
34 #include <asm/cacheflush.h>
35 #include <asm/mach/map.h>
36 #include "armada-370-xp.h"
37 #include "coherency.h"
38 #include "mvebu-soc-id.h"
39 
40 unsigned long coherency_phys_base;
41 void __iomem *coherency_base;
42 static void __iomem *coherency_cpu_base;
43 
44 /* Coherency fabric registers */
45 #define COHERENCY_FABRIC_CFG_OFFSET		   0x4
46 
47 #define IO_SYNC_BARRIER_CTL_OFFSET		   0x0
48 
49 enum {
50 	COHERENCY_FABRIC_TYPE_NONE,
51 	COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
52 	COHERENCY_FABRIC_TYPE_ARMADA_375,
53 	COHERENCY_FABRIC_TYPE_ARMADA_380,
54 };
55 
56 static struct of_device_id of_coherency_table[] = {
57 	{.compatible = "marvell,coherency-fabric",
58 	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
59 	{.compatible = "marvell,armada-375-coherency-fabric",
60 	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
61 	{.compatible = "marvell,armada-380-coherency-fabric",
62 	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
63 	{ /* end of list */ },
64 };
65 
66 /* Functions defined in coherency_ll.S */
67 int ll_enable_coherency(void);
68 void ll_add_cpu_to_smp_group(void);
69 
70 int set_cpu_coherent(void)
71 {
72 	if (!coherency_base) {
73 		pr_warn("Can't make current CPU cache coherent.\n");
74 		pr_warn("Coherency fabric is not initialized\n");
75 		return 1;
76 	}
77 
78 	ll_add_cpu_to_smp_group();
79 	return ll_enable_coherency();
80 }
81 
82 /*
83  * The below code implements the I/O coherency workaround on Armada
84  * 375. This workaround consists in using the two channels of the
85  * first XOR engine to trigger a XOR transaction that serves as the
86  * I/O coherency barrier.
87  */
88 
89 static void __iomem *xor_base, *xor_high_base;
90 static dma_addr_t coherency_wa_buf_phys[CONFIG_NR_CPUS];
91 static void *coherency_wa_buf[CONFIG_NR_CPUS];
92 static bool coherency_wa_enabled;
93 
94 #define XOR_CONFIG(chan)            (0x10 + (chan * 4))
95 #define XOR_ACTIVATION(chan)        (0x20 + (chan * 4))
96 #define WINDOW_BAR_ENABLE(chan)     (0x240 + ((chan) << 2))
97 #define WINDOW_BASE(w)              (0x250 + ((w) << 2))
98 #define WINDOW_SIZE(w)              (0x270 + ((w) << 2))
99 #define WINDOW_REMAP_HIGH(w)        (0x290 + ((w) << 2))
100 #define WINDOW_OVERRIDE_CTRL(chan)  (0x2A0 + ((chan) << 2))
101 #define XOR_DEST_POINTER(chan)      (0x2B0 + (chan * 4))
102 #define XOR_BLOCK_SIZE(chan)        (0x2C0 + (chan * 4))
103 #define XOR_INIT_VALUE_LOW           0x2E0
104 #define XOR_INIT_VALUE_HIGH          0x2E4
105 
106 static inline void mvebu_hwcc_armada375_sync_io_barrier_wa(void)
107 {
108 	int idx = smp_processor_id();
109 
110 	/* Write '1' to the first word of the buffer */
111 	writel(0x1, coherency_wa_buf[idx]);
112 
113 	/* Wait until the engine is idle */
114 	while ((readl(xor_base + XOR_ACTIVATION(idx)) >> 4) & 0x3)
115 		;
116 
117 	dmb();
118 
119 	/* Trigger channel */
120 	writel(0x1, xor_base + XOR_ACTIVATION(idx));
121 
122 	/* Poll the data until it is cleared by the XOR transaction */
123 	while (readl(coherency_wa_buf[idx]))
124 		;
125 }
126 
127 static void __init armada_375_coherency_init_wa(void)
128 {
129 	const struct mbus_dram_target_info *dram;
130 	struct device_node *xor_node;
131 	struct property *xor_status;
132 	struct clk *xor_clk;
133 	u32 win_enable = 0;
134 	int i;
135 
136 	pr_warn("enabling coherency workaround for Armada 375 Z1, one XOR engine disabled\n");
137 
138 	/*
139 	 * Since the workaround uses one XOR engine, we grab a
140 	 * reference to its Device Tree node first.
141 	 */
142 	xor_node = of_find_compatible_node(NULL, NULL, "marvell,orion-xor");
143 	BUG_ON(!xor_node);
144 
145 	/*
146 	 * Then we mark it as disabled so that the real XOR driver
147 	 * will not use it.
148 	 */
149 	xor_status = kzalloc(sizeof(struct property), GFP_KERNEL);
150 	BUG_ON(!xor_status);
151 
152 	xor_status->value = kstrdup("disabled", GFP_KERNEL);
153 	BUG_ON(!xor_status->value);
154 
155 	xor_status->length = 8;
156 	xor_status->name = kstrdup("status", GFP_KERNEL);
157 	BUG_ON(!xor_status->name);
158 
159 	of_update_property(xor_node, xor_status);
160 
161 	/*
162 	 * And we remap the registers, get the clock, and do the
163 	 * initial configuration of the XOR engine.
164 	 */
165 	xor_base = of_iomap(xor_node, 0);
166 	xor_high_base = of_iomap(xor_node, 1);
167 
168 	xor_clk = of_clk_get_by_name(xor_node, NULL);
169 	BUG_ON(!xor_clk);
170 
171 	clk_prepare_enable(xor_clk);
172 
173 	dram = mv_mbus_dram_info();
174 
175 	for (i = 0; i < 8; i++) {
176 		writel(0, xor_base + WINDOW_BASE(i));
177 		writel(0, xor_base + WINDOW_SIZE(i));
178 		if (i < 4)
179 			writel(0, xor_base + WINDOW_REMAP_HIGH(i));
180 	}
181 
182 	for (i = 0; i < dram->num_cs; i++) {
183 		const struct mbus_dram_window *cs = dram->cs + i;
184 		writel((cs->base & 0xffff0000) |
185 		       (cs->mbus_attr << 8) |
186 		       dram->mbus_dram_target_id, xor_base + WINDOW_BASE(i));
187 		writel((cs->size - 1) & 0xffff0000, xor_base + WINDOW_SIZE(i));
188 
189 		win_enable |= (1 << i);
190 		win_enable |= 3 << (16 + (2 * i));
191 	}
192 
193 	writel(win_enable, xor_base + WINDOW_BAR_ENABLE(0));
194 	writel(win_enable, xor_base + WINDOW_BAR_ENABLE(1));
195 	writel(0, xor_base + WINDOW_OVERRIDE_CTRL(0));
196 	writel(0, xor_base + WINDOW_OVERRIDE_CTRL(1));
197 
198 	for (i = 0; i < CONFIG_NR_CPUS; i++) {
199 		coherency_wa_buf[i] = kzalloc(PAGE_SIZE, GFP_KERNEL);
200 		BUG_ON(!coherency_wa_buf[i]);
201 
202 		/*
203 		 * We can't use the DMA mapping API, since we don't
204 		 * have a valid 'struct device' pointer
205 		 */
206 		coherency_wa_buf_phys[i] =
207 			virt_to_phys(coherency_wa_buf[i]);
208 		BUG_ON(!coherency_wa_buf_phys[i]);
209 
210 		/*
211 		 * Configure the XOR engine for memset operation, with
212 		 * a 128 bytes block size
213 		 */
214 		writel(0x444, xor_base + XOR_CONFIG(i));
215 		writel(128, xor_base + XOR_BLOCK_SIZE(i));
216 		writel(coherency_wa_buf_phys[i],
217 		       xor_base + XOR_DEST_POINTER(i));
218 	}
219 
220 	writel(0x0, xor_base + XOR_INIT_VALUE_LOW);
221 	writel(0x0, xor_base + XOR_INIT_VALUE_HIGH);
222 
223 	coherency_wa_enabled = true;
224 }
225 
226 static inline void mvebu_hwcc_sync_io_barrier(void)
227 {
228 	if (coherency_wa_enabled) {
229 		mvebu_hwcc_armada375_sync_io_barrier_wa();
230 		return;
231 	}
232 
233 	writel(0x1, coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET);
234 	while (readl(coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET) & 0x1);
235 }
236 
237 static dma_addr_t mvebu_hwcc_dma_map_page(struct device *dev, struct page *page,
238 				  unsigned long offset, size_t size,
239 				  enum dma_data_direction dir,
240 				  struct dma_attrs *attrs)
241 {
242 	if (dir != DMA_TO_DEVICE)
243 		mvebu_hwcc_sync_io_barrier();
244 	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
245 }
246 
247 
248 static void mvebu_hwcc_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
249 			      size_t size, enum dma_data_direction dir,
250 			      struct dma_attrs *attrs)
251 {
252 	if (dir != DMA_TO_DEVICE)
253 		mvebu_hwcc_sync_io_barrier();
254 }
255 
256 static void mvebu_hwcc_dma_sync(struct device *dev, dma_addr_t dma_handle,
257 			size_t size, enum dma_data_direction dir)
258 {
259 	if (dir != DMA_TO_DEVICE)
260 		mvebu_hwcc_sync_io_barrier();
261 }
262 
263 static struct dma_map_ops mvebu_hwcc_dma_ops = {
264 	.alloc			= arm_dma_alloc,
265 	.free			= arm_dma_free,
266 	.mmap			= arm_dma_mmap,
267 	.map_page		= mvebu_hwcc_dma_map_page,
268 	.unmap_page		= mvebu_hwcc_dma_unmap_page,
269 	.get_sgtable		= arm_dma_get_sgtable,
270 	.map_sg			= arm_dma_map_sg,
271 	.unmap_sg		= arm_dma_unmap_sg,
272 	.sync_single_for_cpu	= mvebu_hwcc_dma_sync,
273 	.sync_single_for_device	= mvebu_hwcc_dma_sync,
274 	.sync_sg_for_cpu	= arm_dma_sync_sg_for_cpu,
275 	.sync_sg_for_device	= arm_dma_sync_sg_for_device,
276 	.set_dma_mask		= arm_dma_set_mask,
277 };
278 
279 static int mvebu_hwcc_notifier(struct notifier_block *nb,
280 			       unsigned long event, void *__dev)
281 {
282 	struct device *dev = __dev;
283 
284 	if (event != BUS_NOTIFY_ADD_DEVICE)
285 		return NOTIFY_DONE;
286 	set_dma_ops(dev, &mvebu_hwcc_dma_ops);
287 
288 	return NOTIFY_OK;
289 }
290 
291 static struct notifier_block mvebu_hwcc_nb = {
292 	.notifier_call = mvebu_hwcc_notifier,
293 };
294 
295 static struct notifier_block mvebu_hwcc_pci_nb = {
296 	.notifier_call = mvebu_hwcc_notifier,
297 };
298 
299 static void __init armada_370_coherency_init(struct device_node *np)
300 {
301 	struct resource res;
302 
303 	of_address_to_resource(np, 0, &res);
304 	coherency_phys_base = res.start;
305 	/*
306 	 * Ensure secondary CPUs will see the updated value,
307 	 * which they read before they join the coherency
308 	 * fabric, and therefore before they are coherent with
309 	 * the boot CPU cache.
310 	 */
311 	sync_cache_w(&coherency_phys_base);
312 	coherency_base = of_iomap(np, 0);
313 	coherency_cpu_base = of_iomap(np, 1);
314 	set_cpu_coherent();
315 }
316 
317 /*
318  * This ioremap hook is used on Armada 375/38x to ensure that PCIe
319  * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
320  * is needed as a workaround for a deadlock issue between the PCIe
321  * interface and the cache controller.
322  */
323 static void __iomem *
324 armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
325 			      unsigned int mtype, void *caller)
326 {
327 	struct resource pcie_mem;
328 
329 	mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
330 
331 	if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
332 		mtype = MT_UNCACHED;
333 
334 	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
335 }
336 
337 static void __init armada_375_380_coherency_init(struct device_node *np)
338 {
339 	struct device_node *cache_dn;
340 
341 	coherency_cpu_base = of_iomap(np, 0);
342 	arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
343 
344 	/*
345 	 * Add the PL310 property "arm,io-coherent". This makes sure the
346 	 * outer sync operation is not used, which allows to
347 	 * workaround the system erratum that causes deadlocks when
348 	 * doing PCIe in an SMP situation on Armada 375 and Armada
349 	 * 38x.
350 	 */
351 	for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
352 		struct property *p;
353 
354 		p = kzalloc(sizeof(*p), GFP_KERNEL);
355 		p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
356 		of_add_property(cache_dn, p);
357 	}
358 }
359 
360 static int coherency_type(void)
361 {
362 	struct device_node *np;
363 	const struct of_device_id *match;
364 	int type;
365 
366 	/*
367 	 * The coherency fabric is needed:
368 	 * - For coherency between processors on Armada XP, so only
369 	 *   when SMP is enabled.
370 	 * - For coherency between the processor and I/O devices, but
371 	 *   this coherency requires many pre-requisites (write
372 	 *   allocate cache policy, shareable pages, SMP bit set) that
373 	 *   are only meant in SMP situations.
374 	 *
375 	 * Note that this means that on Armada 370, there is currently
376 	 * no way to use hardware I/O coherency, because even when
377 	 * CONFIG_SMP is enabled, is_smp() returns false due to the
378 	 * Armada 370 being a single-core processor. To lift this
379 	 * limitation, we would have to find a way to make the cache
380 	 * policy set to write-allocate (on all Armada SoCs), and to
381 	 * set the shareable attribute in page tables (on all Armada
382 	 * SoCs except the Armada 370). Unfortunately, such decisions
383 	 * are taken very early in the kernel boot process, at a point
384 	 * where we don't know yet on which SoC we are running.
385 
386 	 */
387 	if (!is_smp())
388 		return COHERENCY_FABRIC_TYPE_NONE;
389 
390 	np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
391 	if (!np)
392 		return COHERENCY_FABRIC_TYPE_NONE;
393 
394 	type = (int) match->data;
395 
396 	of_node_put(np);
397 
398 	return type;
399 }
400 
401 int coherency_available(void)
402 {
403 	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
404 }
405 
406 int __init coherency_init(void)
407 {
408 	int type = coherency_type();
409 	struct device_node *np;
410 
411 	np = of_find_matching_node(NULL, of_coherency_table);
412 
413 	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
414 		armada_370_coherency_init(np);
415 	else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
416 		 type == COHERENCY_FABRIC_TYPE_ARMADA_380)
417 		armada_375_380_coherency_init(np);
418 
419 	of_node_put(np);
420 
421 	return 0;
422 }
423 
424 static int __init coherency_late_init(void)
425 {
426 	int type = coherency_type();
427 
428 	if (type == COHERENCY_FABRIC_TYPE_NONE)
429 		return 0;
430 
431 	if (type == COHERENCY_FABRIC_TYPE_ARMADA_375) {
432 		u32 dev, rev;
433 
434 		if (mvebu_get_soc_id(&dev, &rev) == 0 &&
435 		    rev == ARMADA_375_Z1_REV)
436 			armada_375_coherency_init_wa();
437 	}
438 
439 	bus_register_notifier(&platform_bus_type,
440 			      &mvebu_hwcc_nb);
441 
442 	return 0;
443 }
444 
445 postcore_initcall(coherency_late_init);
446 
447 #if IS_ENABLED(CONFIG_PCI)
448 static int __init coherency_pci_init(void)
449 {
450 	if (coherency_available())
451 		bus_register_notifier(&pci_bus_type,
452 				       &mvebu_hwcc_pci_nb);
453 	return 0;
454 }
455 
456 arch_initcall(coherency_pci_init);
457 #endif
458