1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 /* 35 * This file is conditionally built on x86_64 only. Otherwise weak symbol 36 * versions of the functions exported from here are used. 37 */ 38 39 #include <linux/pci.h> 40 #include <asm/mtrr.h> 41 #include <asm/processor.h> 42 43 #include "qib.h" 44 45 /** 46 * qib_enable_wc - enable write combining for MMIO writes to the device 47 * @dd: qlogic_ib device 48 * 49 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable 50 * write combining. 51 */ 52 int qib_enable_wc(struct qib_devdata *dd) 53 { 54 int ret = 0; 55 u64 pioaddr, piolen; 56 unsigned bits; 57 const unsigned long addr = pci_resource_start(dd->pcidev, 0); 58 const size_t len = pci_resource_len(dd->pcidev, 0); 59 60 /* 61 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the 62 * chip. Linux (possibly the hardware) requires it to be on a power 63 * of 2 address matching the length (which has to be a power of 2). 64 * For rev1, that means the base address, for rev2, it will be just 65 * the PIO buffers themselves. 66 * For chips with two sets of buffers, the calculations are 67 * somewhat more complicated; we need to sum, and the piobufbase 68 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. 69 * The buffers are still packed, so a single range covers both. 70 */ 71 if (dd->piobcnt2k && dd->piobcnt4k) { 72 /* 2 sizes for chip */ 73 unsigned long pio2kbase, pio4kbase; 74 pio2kbase = dd->piobufbase & 0xffffffffUL; 75 pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL; 76 if (pio2kbase < pio4kbase) { 77 /* all current chips */ 78 pioaddr = addr + pio2kbase; 79 piolen = pio4kbase - pio2kbase + 80 dd->piobcnt4k * dd->align4k; 81 } else { 82 pioaddr = addr + pio4kbase; 83 piolen = pio2kbase - pio4kbase + 84 dd->piobcnt2k * dd->palign; 85 } 86 } else { /* single buffer size (2K, currently) */ 87 pioaddr = addr + dd->piobufbase; 88 piolen = dd->piobcnt2k * dd->palign + 89 dd->piobcnt4k * dd->align4k; 90 } 91 92 for (bits = 0; !(piolen & (1ULL << bits)); bits++) 93 /* do nothing */ ; 94 95 if (piolen != (1ULL << bits)) { 96 piolen >>= bits; 97 while (piolen >>= 1) 98 bits++; 99 piolen = 1ULL << (bits + 1); 100 } 101 if (pioaddr & (piolen - 1)) { 102 u64 atmp; 103 atmp = pioaddr & ~(piolen - 1); 104 if (atmp < addr || (atmp + piolen) > (addr + len)) { 105 qib_dev_err(dd, "No way to align address/size " 106 "(%llx/%llx), no WC mtrr\n", 107 (unsigned long long) atmp, 108 (unsigned long long) piolen << 1); 109 ret = -ENODEV; 110 } else { 111 pioaddr = atmp; 112 piolen <<= 1; 113 } 114 } 115 116 if (!ret) { 117 int cookie; 118 119 cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); 120 if (cookie < 0) { 121 { 122 qib_devinfo(dd->pcidev, 123 "mtrr_add() WC for PIO bufs " 124 "failed (%d)\n", 125 cookie); 126 ret = -EINVAL; 127 } 128 } else { 129 dd->wc_cookie = cookie; 130 dd->wc_base = (unsigned long) pioaddr; 131 dd->wc_len = (unsigned long) piolen; 132 } 133 } 134 135 return ret; 136 } 137 138 /** 139 * qib_disable_wc - disable write combining for MMIO writes to the device 140 * @dd: qlogic_ib device 141 */ 142 void qib_disable_wc(struct qib_devdata *dd) 143 { 144 if (dd->wc_cookie) { 145 int r; 146 147 r = mtrr_del(dd->wc_cookie, dd->wc_base, 148 dd->wc_len); 149 if (r < 0) 150 qib_devinfo(dd->pcidev, 151 "mtrr_del(%lx, %lx, %lx) failed: %d\n", 152 dd->wc_cookie, dd->wc_base, 153 dd->wc_len, r); 154 dd->wc_cookie = 0; /* even on failure */ 155 } 156 } 157 158 /** 159 * qib_unordered_wc - indicate whether write combining is ordered 160 * 161 * Because our performance depends on our ability to do write combining mmio 162 * writes in the most efficient way, we need to know if we are on an Intel 163 * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in 164 * the order completed, and so no special flushing is required to get 165 * correct ordering. Intel processors, however, will flush write buffers 166 * out in "random" orders, and so explicit ordering is needed at times. 167 */ 168 int qib_unordered_wc(void) 169 { 170 return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; 171 } 172