1 /* 2 * Fast C2P (Chunky-to-Planar) Conversion 3 * 4 * Copyright (C) 2003-2008 Geert Uytterhoeven 5 * 6 * NOTES: 7 * - This code was inspired by Scout's C2P tutorial 8 * - It assumes to run on a big endian system 9 * 10 * This file is subject to the terms and conditions of the GNU General Public 11 * License. See the file COPYING in the main directory of this archive 12 * for more details. 13 */ 14 15 #include <linux/build_bug.h> 16 17 18 /* 19 * Basic transpose step 20 */ 21 22 static inline void _transp(u32 d[], unsigned int i1, unsigned int i2, 23 unsigned int shift, u32 mask) 24 { 25 u32 t = (d[i1] ^ (d[i2] >> shift)) & mask; 26 27 d[i1] ^= t; 28 d[i2] ^= t << shift; 29 } 30 31 32 static __always_inline u32 get_mask(unsigned int n) 33 { 34 switch (n) { 35 case 1: 36 return 0x55555555; 37 38 case 2: 39 return 0x33333333; 40 41 case 4: 42 return 0x0f0f0f0f; 43 44 case 8: 45 return 0x00ff00ff; 46 47 case 16: 48 return 0x0000ffff; 49 } 50 51 BUILD_BUG(); 52 return 0; 53 } 54 55 56 /* 57 * Transpose operations on 8 32-bit words 58 */ 59 60 static __always_inline void transp8(u32 d[], unsigned int n, unsigned int m) 61 { 62 u32 mask = get_mask(n); 63 64 switch (m) { 65 case 1: 66 /* First n x 1 block */ 67 _transp(d, 0, 1, n, mask); 68 /* Second n x 1 block */ 69 _transp(d, 2, 3, n, mask); 70 /* Third n x 1 block */ 71 _transp(d, 4, 5, n, mask); 72 /* Fourth n x 1 block */ 73 _transp(d, 6, 7, n, mask); 74 return; 75 76 case 2: 77 /* First n x 2 block */ 78 _transp(d, 0, 2, n, mask); 79 _transp(d, 1, 3, n, mask); 80 /* Second n x 2 block */ 81 _transp(d, 4, 6, n, mask); 82 _transp(d, 5, 7, n, mask); 83 return; 84 85 case 4: 86 /* Single n x 4 block */ 87 _transp(d, 0, 4, n, mask); 88 _transp(d, 1, 5, n, mask); 89 _transp(d, 2, 6, n, mask); 90 _transp(d, 3, 7, n, mask); 91 return; 92 } 93 94 BUILD_BUG(); 95 } 96 97 98 /* 99 * Transpose operations on 4 32-bit words 100 */ 101 102 static __always_inline void transp4(u32 d[], unsigned int n, unsigned int m) 103 { 104 u32 mask = get_mask(n); 105 106 switch (m) { 107 case 1: 108 /* First n x 1 block */ 109 _transp(d, 0, 1, n, mask); 110 /* Second n x 1 block */ 111 _transp(d, 2, 3, n, mask); 112 return; 113 114 case 2: 115 /* Single n x 2 block */ 116 _transp(d, 0, 2, n, mask); 117 _transp(d, 1, 3, n, mask); 118 return; 119 } 120 121 BUILD_BUG(); 122 } 123 124 125 /* 126 * Transpose operations on 4 32-bit words (reverse order) 127 */ 128 129 static __always_inline void transp4x(u32 d[], unsigned int n, unsigned int m) 130 { 131 u32 mask = get_mask(n); 132 133 switch (m) { 134 case 2: 135 /* Single n x 2 block */ 136 _transp(d, 2, 0, n, mask); 137 _transp(d, 3, 1, n, mask); 138 return; 139 } 140 141 BUILD_BUG(); 142 } 143 144 145 /* 146 * Compose two values, using a bitmask as decision value 147 * This is equivalent to (a & mask) | (b & ~mask) 148 */ 149 150 static inline u32 comp(u32 a, u32 b, u32 mask) 151 { 152 return ((a ^ b) & mask) ^ b; 153 } 154