1 /* 2 * Coherency fabric (Aurora) support for Armada 370 and XP platforms. 3 * 4 * Copyright (C) 2012 Marvell 5 * 6 * Yehuda Yitschak <yehuday@marvell.com> 7 * Gregory Clement <gregory.clement@free-electrons.com> 8 * Thomas Petazzoni <thomas.petazzoni@free-electrons.com> 9 * 10 * This file is licensed under the terms of the GNU General Public 11 * License version 2. This program is licensed "as is" without any 12 * warranty of any kind, whether express or implied. 13 * 14 * The Armada 370 and Armada XP SOCs have a coherency fabric which is 15 * responsible for ensuring hardware coherency between all CPUs and between 16 * CPUs and I/O masters. This file initializes the coherency fabric and 17 * supplies basic routines for configuring and controlling hardware coherency 18 */ 19 20 #define pr_fmt(fmt) "mvebu-coherency: " fmt 21 22 #include <linux/kernel.h> 23 #include <linux/init.h> 24 #include <linux/of_address.h> 25 #include <linux/io.h> 26 #include <linux/smp.h> 27 #include <linux/dma-mapping.h> 28 #include <linux/platform_device.h> 29 #include <linux/slab.h> 30 #include <linux/mbus.h> 31 #include <linux/clk.h> 32 #include <linux/pci.h> 33 #include <asm/smp_plat.h> 34 #include <asm/cacheflush.h> 35 #include <asm/mach/map.h> 36 #include "armada-370-xp.h" 37 #include "coherency.h" 38 #include "mvebu-soc-id.h" 39 40 unsigned long coherency_phys_base; 41 void __iomem *coherency_base; 42 static void __iomem *coherency_cpu_base; 43 44 /* Coherency fabric registers */ 45 #define COHERENCY_FABRIC_CFG_OFFSET 0x4 46 47 #define IO_SYNC_BARRIER_CTL_OFFSET 0x0 48 49 enum { 50 COHERENCY_FABRIC_TYPE_NONE, 51 COHERENCY_FABRIC_TYPE_ARMADA_370_XP, 52 COHERENCY_FABRIC_TYPE_ARMADA_375, 53 COHERENCY_FABRIC_TYPE_ARMADA_380, 54 }; 55 56 static struct of_device_id of_coherency_table[] = { 57 {.compatible = "marvell,coherency-fabric", 58 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP }, 59 {.compatible = "marvell,armada-375-coherency-fabric", 60 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 }, 61 {.compatible = "marvell,armada-380-coherency-fabric", 62 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 }, 63 { /* end of list */ }, 64 }; 65 66 /* Functions defined in coherency_ll.S */ 67 int ll_enable_coherency(void); 68 void ll_add_cpu_to_smp_group(void); 69 70 int set_cpu_coherent(void) 71 { 72 if (!coherency_base) { 73 pr_warn("Can't make current CPU cache coherent.\n"); 74 pr_warn("Coherency fabric is not initialized\n"); 75 return 1; 76 } 77 78 ll_add_cpu_to_smp_group(); 79 return ll_enable_coherency(); 80 } 81 82 /* 83 * The below code implements the I/O coherency workaround on Armada 84 * 375. This workaround consists in using the two channels of the 85 * first XOR engine to trigger a XOR transaction that serves as the 86 * I/O coherency barrier. 87 */ 88 89 static void __iomem *xor_base, *xor_high_base; 90 static dma_addr_t coherency_wa_buf_phys[CONFIG_NR_CPUS]; 91 static void *coherency_wa_buf[CONFIG_NR_CPUS]; 92 static bool coherency_wa_enabled; 93 94 #define XOR_CONFIG(chan) (0x10 + (chan * 4)) 95 #define XOR_ACTIVATION(chan) (0x20 + (chan * 4)) 96 #define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2)) 97 #define WINDOW_BASE(w) (0x250 + ((w) << 2)) 98 #define WINDOW_SIZE(w) (0x270 + ((w) << 2)) 99 #define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2)) 100 #define WINDOW_OVERRIDE_CTRL(chan) (0x2A0 + ((chan) << 2)) 101 #define XOR_DEST_POINTER(chan) (0x2B0 + (chan * 4)) 102 #define XOR_BLOCK_SIZE(chan) (0x2C0 + (chan * 4)) 103 #define XOR_INIT_VALUE_LOW 0x2E0 104 #define XOR_INIT_VALUE_HIGH 0x2E4 105 106 static inline void mvebu_hwcc_armada375_sync_io_barrier_wa(void) 107 { 108 int idx = smp_processor_id(); 109 110 /* Write '1' to the first word of the buffer */ 111 writel(0x1, coherency_wa_buf[idx]); 112 113 /* Wait until the engine is idle */ 114 while ((readl(xor_base + XOR_ACTIVATION(idx)) >> 4) & 0x3) 115 ; 116 117 dmb(); 118 119 /* Trigger channel */ 120 writel(0x1, xor_base + XOR_ACTIVATION(idx)); 121 122 /* Poll the data until it is cleared by the XOR transaction */ 123 while (readl(coherency_wa_buf[idx])) 124 ; 125 } 126 127 static void __init armada_375_coherency_init_wa(void) 128 { 129 const struct mbus_dram_target_info *dram; 130 struct device_node *xor_node; 131 struct property *xor_status; 132 struct clk *xor_clk; 133 u32 win_enable = 0; 134 int i; 135 136 pr_warn("enabling coherency workaround for Armada 375 Z1, one XOR engine disabled\n"); 137 138 /* 139 * Since the workaround uses one XOR engine, we grab a 140 * reference to its Device Tree node first. 141 */ 142 xor_node = of_find_compatible_node(NULL, NULL, "marvell,orion-xor"); 143 BUG_ON(!xor_node); 144 145 /* 146 * Then we mark it as disabled so that the real XOR driver 147 * will not use it. 148 */ 149 xor_status = kzalloc(sizeof(struct property), GFP_KERNEL); 150 BUG_ON(!xor_status); 151 152 xor_status->value = kstrdup("disabled", GFP_KERNEL); 153 BUG_ON(!xor_status->value); 154 155 xor_status->length = 8; 156 xor_status->name = kstrdup("status", GFP_KERNEL); 157 BUG_ON(!xor_status->name); 158 159 of_update_property(xor_node, xor_status); 160 161 /* 162 * And we remap the registers, get the clock, and do the 163 * initial configuration of the XOR engine. 164 */ 165 xor_base = of_iomap(xor_node, 0); 166 xor_high_base = of_iomap(xor_node, 1); 167 168 xor_clk = of_clk_get_by_name(xor_node, NULL); 169 BUG_ON(!xor_clk); 170 171 clk_prepare_enable(xor_clk); 172 173 dram = mv_mbus_dram_info(); 174 175 for (i = 0; i < 8; i++) { 176 writel(0, xor_base + WINDOW_BASE(i)); 177 writel(0, xor_base + WINDOW_SIZE(i)); 178 if (i < 4) 179 writel(0, xor_base + WINDOW_REMAP_HIGH(i)); 180 } 181 182 for (i = 0; i < dram->num_cs; i++) { 183 const struct mbus_dram_window *cs = dram->cs + i; 184 writel((cs->base & 0xffff0000) | 185 (cs->mbus_attr << 8) | 186 dram->mbus_dram_target_id, xor_base + WINDOW_BASE(i)); 187 writel((cs->size - 1) & 0xffff0000, xor_base + WINDOW_SIZE(i)); 188 189 win_enable |= (1 << i); 190 win_enable |= 3 << (16 + (2 * i)); 191 } 192 193 writel(win_enable, xor_base + WINDOW_BAR_ENABLE(0)); 194 writel(win_enable, xor_base + WINDOW_BAR_ENABLE(1)); 195 writel(0, xor_base + WINDOW_OVERRIDE_CTRL(0)); 196 writel(0, xor_base + WINDOW_OVERRIDE_CTRL(1)); 197 198 for (i = 0; i < CONFIG_NR_CPUS; i++) { 199 coherency_wa_buf[i] = kzalloc(PAGE_SIZE, GFP_KERNEL); 200 BUG_ON(!coherency_wa_buf[i]); 201 202 /* 203 * We can't use the DMA mapping API, since we don't 204 * have a valid 'struct device' pointer 205 */ 206 coherency_wa_buf_phys[i] = 207 virt_to_phys(coherency_wa_buf[i]); 208 BUG_ON(!coherency_wa_buf_phys[i]); 209 210 /* 211 * Configure the XOR engine for memset operation, with 212 * a 128 bytes block size 213 */ 214 writel(0x444, xor_base + XOR_CONFIG(i)); 215 writel(128, xor_base + XOR_BLOCK_SIZE(i)); 216 writel(coherency_wa_buf_phys[i], 217 xor_base + XOR_DEST_POINTER(i)); 218 } 219 220 writel(0x0, xor_base + XOR_INIT_VALUE_LOW); 221 writel(0x0, xor_base + XOR_INIT_VALUE_HIGH); 222 223 coherency_wa_enabled = true; 224 } 225 226 static inline void mvebu_hwcc_sync_io_barrier(void) 227 { 228 if (coherency_wa_enabled) { 229 mvebu_hwcc_armada375_sync_io_barrier_wa(); 230 return; 231 } 232 233 writel(0x1, coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET); 234 while (readl(coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET) & 0x1); 235 } 236 237 static dma_addr_t mvebu_hwcc_dma_map_page(struct device *dev, struct page *page, 238 unsigned long offset, size_t size, 239 enum dma_data_direction dir, 240 struct dma_attrs *attrs) 241 { 242 if (dir != DMA_TO_DEVICE) 243 mvebu_hwcc_sync_io_barrier(); 244 return pfn_to_dma(dev, page_to_pfn(page)) + offset; 245 } 246 247 248 static void mvebu_hwcc_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 249 size_t size, enum dma_data_direction dir, 250 struct dma_attrs *attrs) 251 { 252 if (dir != DMA_TO_DEVICE) 253 mvebu_hwcc_sync_io_barrier(); 254 } 255 256 static void mvebu_hwcc_dma_sync(struct device *dev, dma_addr_t dma_handle, 257 size_t size, enum dma_data_direction dir) 258 { 259 if (dir != DMA_TO_DEVICE) 260 mvebu_hwcc_sync_io_barrier(); 261 } 262 263 static struct dma_map_ops mvebu_hwcc_dma_ops = { 264 .alloc = arm_dma_alloc, 265 .free = arm_dma_free, 266 .mmap = arm_dma_mmap, 267 .map_page = mvebu_hwcc_dma_map_page, 268 .unmap_page = mvebu_hwcc_dma_unmap_page, 269 .get_sgtable = arm_dma_get_sgtable, 270 .map_sg = arm_dma_map_sg, 271 .unmap_sg = arm_dma_unmap_sg, 272 .sync_single_for_cpu = mvebu_hwcc_dma_sync, 273 .sync_single_for_device = mvebu_hwcc_dma_sync, 274 .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, 275 .sync_sg_for_device = arm_dma_sync_sg_for_device, 276 .set_dma_mask = arm_dma_set_mask, 277 }; 278 279 static int mvebu_hwcc_notifier(struct notifier_block *nb, 280 unsigned long event, void *__dev) 281 { 282 struct device *dev = __dev; 283 284 if (event != BUS_NOTIFY_ADD_DEVICE) 285 return NOTIFY_DONE; 286 set_dma_ops(dev, &mvebu_hwcc_dma_ops); 287 288 return NOTIFY_OK; 289 } 290 291 static struct notifier_block mvebu_hwcc_nb = { 292 .notifier_call = mvebu_hwcc_notifier, 293 }; 294 295 static struct notifier_block mvebu_hwcc_pci_nb = { 296 .notifier_call = mvebu_hwcc_notifier, 297 }; 298 299 static void __init armada_370_coherency_init(struct device_node *np) 300 { 301 struct resource res; 302 303 of_address_to_resource(np, 0, &res); 304 coherency_phys_base = res.start; 305 /* 306 * Ensure secondary CPUs will see the updated value, 307 * which they read before they join the coherency 308 * fabric, and therefore before they are coherent with 309 * the boot CPU cache. 310 */ 311 sync_cache_w(&coherency_phys_base); 312 coherency_base = of_iomap(np, 0); 313 coherency_cpu_base = of_iomap(np, 1); 314 set_cpu_coherent(); 315 } 316 317 /* 318 * This ioremap hook is used on Armada 375/38x to ensure that PCIe 319 * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This 320 * is needed as a workaround for a deadlock issue between the PCIe 321 * interface and the cache controller. 322 */ 323 static void __iomem * 324 armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, 325 unsigned int mtype, void *caller) 326 { 327 struct resource pcie_mem; 328 329 mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); 330 331 if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) 332 mtype = MT_UNCACHED; 333 334 return __arm_ioremap_caller(phys_addr, size, mtype, caller); 335 } 336 337 static void __init armada_375_380_coherency_init(struct device_node *np) 338 { 339 struct device_node *cache_dn; 340 341 coherency_cpu_base = of_iomap(np, 0); 342 arch_ioremap_caller = armada_pcie_wa_ioremap_caller; 343 344 /* 345 * Add the PL310 property "arm,io-coherent". This makes sure the 346 * outer sync operation is not used, which allows to 347 * workaround the system erratum that causes deadlocks when 348 * doing PCIe in an SMP situation on Armada 375 and Armada 349 * 38x. 350 */ 351 for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") { 352 struct property *p; 353 354 p = kzalloc(sizeof(*p), GFP_KERNEL); 355 p->name = kstrdup("arm,io-coherent", GFP_KERNEL); 356 of_add_property(cache_dn, p); 357 } 358 } 359 360 static int coherency_type(void) 361 { 362 struct device_node *np; 363 const struct of_device_id *match; 364 365 np = of_find_matching_node_and_match(NULL, of_coherency_table, &match); 366 if (np) { 367 int type = (int) match->data; 368 369 /* Armada 370/XP coherency works in both UP and SMP */ 370 if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) 371 return type; 372 373 /* Armada 375 coherency works only on SMP */ 374 else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 && is_smp()) 375 return type; 376 377 /* Armada 380 coherency works only on SMP */ 378 else if (type == COHERENCY_FABRIC_TYPE_ARMADA_380 && is_smp()) 379 return type; 380 } 381 382 return COHERENCY_FABRIC_TYPE_NONE; 383 } 384 385 int coherency_available(void) 386 { 387 return coherency_type() != COHERENCY_FABRIC_TYPE_NONE; 388 } 389 390 int __init coherency_init(void) 391 { 392 int type = coherency_type(); 393 struct device_node *np; 394 395 np = of_find_matching_node(NULL, of_coherency_table); 396 397 if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) 398 armada_370_coherency_init(np); 399 else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 || 400 type == COHERENCY_FABRIC_TYPE_ARMADA_380) 401 armada_375_380_coherency_init(np); 402 403 return 0; 404 } 405 406 static int __init coherency_late_init(void) 407 { 408 int type = coherency_type(); 409 410 if (type == COHERENCY_FABRIC_TYPE_NONE) 411 return 0; 412 413 if (type == COHERENCY_FABRIC_TYPE_ARMADA_375) { 414 u32 dev, rev; 415 416 if (mvebu_get_soc_id(&dev, &rev) == 0 && 417 rev == ARMADA_375_Z1_REV) 418 armada_375_coherency_init_wa(); 419 } 420 421 bus_register_notifier(&platform_bus_type, 422 &mvebu_hwcc_nb); 423 424 return 0; 425 } 426 427 postcore_initcall(coherency_late_init); 428 429 #if IS_ENABLED(CONFIG_PCI) 430 static int __init coherency_pci_init(void) 431 { 432 if (coherency_available()) 433 bus_register_notifier(&pci_bus_type, 434 &mvebu_hwcc_pci_nb); 435 return 0; 436 } 437 438 arch_initcall(coherency_pci_init); 439 #endif 440