1 /*
2  *  Generic BitBLT function for frame buffer with packed pixels of any depth.
3  *
4  *      Copyright (C)  June 1999 James Simmons
5  *
6  *  This file is subject to the terms and conditions of the GNU General Public
7  *  License.  See the file COPYING in the main directory of this archive for
8  *  more details.
9  *
10  * NOTES:
11  *
12  *    This function copys a image from system memory to video memory. The
13  *  image can be a bitmap where each 0 represents the background color and
14  *  each 1 represents the foreground color. Great for font handling. It can
15  *  also be a color image. This is determined by image_depth. The color image
16  *  must be laid out exactly in the same format as the framebuffer. Yes I know
17  *  their are cards with hardware that coverts images of various depths to the
18  *  framebuffer depth. But not every card has this. All images must be rounded
19  *  up to the nearest byte. For example a bitmap 12 bits wide must be two
20  *  bytes width.
21  *
22  *  Tony:
23  *  Incorporate mask tables similar to fbcon-cfb*.c in 2.4 API.  This speeds
24  *  up the code significantly.
25  *
26  *  Code for depths not multiples of BITS_PER_LONG is still kludgy, which is
27  *  still processed a bit at a time.
28  *
29  *  Also need to add code to deal with cards endians that are different than
30  *  the native cpu endians. I also need to deal with MSB position in the word.
31  */
32 #include <linux/module.h>
33 #include <linux/string.h>
34 #include <linux/fb.h>
35 #include <asm/types.h>
36 #include "fb_draw.h"
37 
38 #define DEBUG
39 
40 #ifdef DEBUG
41 #define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt,__func__,## args)
42 #else
43 #define DPRINTK(fmt, args...)
44 #endif
45 
46 static const u32 cfb_tab8_be[] = {
47     0x00000000,0x000000ff,0x0000ff00,0x0000ffff,
48     0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff,
49     0xff000000,0xff0000ff,0xff00ff00,0xff00ffff,
50     0xffff0000,0xffff00ff,0xffffff00,0xffffffff
51 };
52 
53 static const u32 cfb_tab8_le[] = {
54     0x00000000,0xff000000,0x00ff0000,0xffff0000,
55     0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00,
56     0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff,
57     0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff
58 };
59 
60 static const u32 cfb_tab16_be[] = {
61     0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff
62 };
63 
64 static const u32 cfb_tab16_le[] = {
65     0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
66 };
67 
68 static const u32 cfb_tab32[] = {
69 	0x00000000, 0xffffffff
70 };
71 
72 #define FB_WRITEL fb_writel
73 #define FB_READL  fb_readl
74 
75 static inline void color_imageblit(const struct fb_image *image,
76 				   struct fb_info *p, u8 __iomem *dst1,
77 				   u32 start_index,
78 				   u32 pitch_index)
79 {
80 	/* Draw the penguin */
81 	u32 __iomem *dst, *dst2;
82 	u32 color = 0, val, shift;
83 	int i, n, bpp = p->var.bits_per_pixel;
84 	u32 null_bits = 32 - bpp;
85 	u32 *palette = (u32 *) p->pseudo_palette;
86 	const u8 *src = image->data;
87 	u32 bswapmask = fb_compute_bswapmask(p);
88 
89 	dst2 = (u32 __iomem *) dst1;
90 	for (i = image->height; i--; ) {
91 		n = image->width;
92 		dst = (u32 __iomem *) dst1;
93 		shift = 0;
94 		val = 0;
95 
96 		if (start_index) {
97 			u32 start_mask = ~fb_shifted_pixels_mask_u32(p,
98 						start_index, bswapmask);
99 			val = FB_READL(dst) & start_mask;
100 			shift = start_index;
101 		}
102 		while (n--) {
103 			if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
104 			    p->fix.visual == FB_VISUAL_DIRECTCOLOR )
105 				color = palette[*src];
106 			else
107 				color = *src;
108 			color <<= FB_LEFT_POS(p, bpp);
109 			val |= FB_SHIFT_HIGH(p, color, shift ^ bswapmask);
110 			if (shift >= null_bits) {
111 				FB_WRITEL(val, dst++);
112 
113 				val = (shift == null_bits) ? 0 :
114 					FB_SHIFT_LOW(p, color, 32 - shift);
115 			}
116 			shift += bpp;
117 			shift &= (32 - 1);
118 			src++;
119 		}
120 		if (shift) {
121 			u32 end_mask = fb_shifted_pixels_mask_u32(p, shift,
122 						bswapmask);
123 
124 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
125 		}
126 		dst1 += p->fix.line_length;
127 		if (pitch_index) {
128 			dst2 += p->fix.line_length;
129 			dst1 = (u8 __iomem *)((long __force)dst2 & ~(sizeof(u32) - 1));
130 
131 			start_index += pitch_index;
132 			start_index &= 32 - 1;
133 		}
134 	}
135 }
136 
137 static inline void slow_imageblit(const struct fb_image *image, struct fb_info *p,
138 				  u8 __iomem *dst1, u32 fgcolor,
139 				  u32 bgcolor,
140 				  u32 start_index,
141 				  u32 pitch_index)
142 {
143 	u32 shift, color = 0, bpp = p->var.bits_per_pixel;
144 	u32 __iomem *dst, *dst2;
145 	u32 val, pitch = p->fix.line_length;
146 	u32 null_bits = 32 - bpp;
147 	u32 spitch = (image->width+7)/8;
148 	const u8 *src = image->data, *s;
149 	u32 i, j, l;
150 	u32 bswapmask = fb_compute_bswapmask(p);
151 
152 	dst2 = (u32 __iomem *) dst1;
153 	fgcolor <<= FB_LEFT_POS(p, bpp);
154 	bgcolor <<= FB_LEFT_POS(p, bpp);
155 
156 	for (i = image->height; i--; ) {
157 		shift = val = 0;
158 		l = 8;
159 		j = image->width;
160 		dst = (u32 __iomem *) dst1;
161 		s = src;
162 
163 		/* write leading bits */
164 		if (start_index) {
165 			u32 start_mask = ~fb_shifted_pixels_mask_u32(p,
166 						start_index, bswapmask);
167 			val = FB_READL(dst) & start_mask;
168 			shift = start_index;
169 		}
170 
171 		while (j--) {
172 			l--;
173 			color = (*s & (1 << l)) ? fgcolor : bgcolor;
174 			val |= FB_SHIFT_HIGH(p, color, shift ^ bswapmask);
175 
176 			/* Did the bitshift spill bits to the next long? */
177 			if (shift >= null_bits) {
178 				FB_WRITEL(val, dst++);
179 				val = (shift == null_bits) ? 0 :
180 					FB_SHIFT_LOW(p, color, 32 - shift);
181 			}
182 			shift += bpp;
183 			shift &= (32 - 1);
184 			if (!l) { l = 8; s++; }
185 		}
186 
187 		/* write trailing bits */
188  		if (shift) {
189 			u32 end_mask = fb_shifted_pixels_mask_u32(p, shift,
190 						bswapmask);
191 
192 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
193 		}
194 
195 		dst1 += pitch;
196 		src += spitch;
197 		if (pitch_index) {
198 			dst2 += pitch;
199 			dst1 = (u8 __iomem *)((long __force)dst2 & ~(sizeof(u32) - 1));
200 			start_index += pitch_index;
201 			start_index &= 32 - 1;
202 		}
203 
204 	}
205 }
206 
207 /*
208  * fast_imageblit - optimized monochrome color expansion
209  *
210  * Only if:  bits_per_pixel == 8, 16, or 32
211  *           image->width is divisible by pixel/dword (ppw);
212  *           fix->line_legth is divisible by 4;
213  *           beginning and end of a scanline is dword aligned
214  */
215 static inline void fast_imageblit(const struct fb_image *image, struct fb_info *p,
216 				  u8 __iomem *dst1, u32 fgcolor,
217 				  u32 bgcolor)
218 {
219 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
220 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
221 	u32 bit_mask, eorx, shift;
222 	const char *s = image->data, *src;
223 	u32 __iomem *dst;
224 	const u32 *tab = NULL;
225 	size_t tablen;
226 	u32 colortab[16];
227 	int i, j, k;
228 
229 	switch (bpp) {
230 	case 8:
231 		tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le;
232 		tablen = 16;
233 		break;
234 	case 16:
235 		tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
236 		tablen = 4;
237 		break;
238 	case 32:
239 		tab = cfb_tab32;
240 		tablen = 2;
241 		break;
242 	default:
243 		return;
244 	}
245 
246 	for (i = ppw-1; i--; ) {
247 		fgx <<= bpp;
248 		bgx <<= bpp;
249 		fgx |= fgcolor;
250 		bgx |= bgcolor;
251 	}
252 
253 	bit_mask = (1 << ppw) - 1;
254 	eorx = fgx ^ bgx;
255 	k = image->width/ppw;
256 
257 	for (i = 0; i < tablen; ++i)
258 		colortab[i] = (tab[i] & eorx) ^ bgx;
259 
260 	for (i = image->height; i--; ) {
261 		dst = (u32 __iomem *)dst1;
262 		shift = 8;
263 		src = s;
264 
265 		/*
266 		 * Manually unroll the per-line copying loop for better
267 		 * performance. This works until we processed the last
268 		 * completely filled source byte (inclusive).
269 		 */
270 		switch (ppw) {
271 		case 4: /* 8 bpp */
272 			for (j = k; j >= 2; j -= 2, ++src) {
273 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
274 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
275 			}
276 			break;
277 		case 2: /* 16 bpp */
278 			for (j = k; j >= 4; j -= 4, ++src) {
279 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
280 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
281 				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
282 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
283 			}
284 			break;
285 		case 1: /* 32 bpp */
286 			for (j = k; j >= 8; j -= 8, ++src) {
287 				FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
288 				FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
289 				FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
290 				FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
291 				FB_WRITEL(colortab[(*src >> 3) & bit_mask], dst++);
292 				FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
293 				FB_WRITEL(colortab[(*src >> 1) & bit_mask], dst++);
294 				FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
295 			}
296 			break;
297 		}
298 
299 		/*
300 		 * For image widths that are not a multiple of 8, there
301 		 * are trailing pixels left on the current line. Print
302 		 * them as well.
303 		 */
304 		for (; j--; ) {
305 			shift -= ppw;
306 			FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
307 			if (!shift) {
308 				shift = 8;
309 				++src;
310 			}
311 		}
312 
313 		dst1 += p->fix.line_length;
314 		s += spitch;
315 	}
316 }
317 
318 void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
319 {
320 	u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
321 	u32 bpl = sizeof(u32), bpp = p->var.bits_per_pixel;
322 	u32 width = image->width;
323 	u32 dx = image->dx, dy = image->dy;
324 	u8 __iomem *dst1;
325 
326 	if (p->state != FBINFO_STATE_RUNNING)
327 		return;
328 
329 	bitstart = (dy * p->fix.line_length * 8) + (dx * bpp);
330 	start_index = bitstart & (32 - 1);
331 	pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
332 
333 	bitstart /= 8;
334 	bitstart &= ~(bpl - 1);
335 	dst1 = p->screen_base + bitstart;
336 
337 	if (p->fbops->fb_sync)
338 		p->fbops->fb_sync(p);
339 
340 	if (image->depth == 1) {
341 		if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
342 		    p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
343 			fgcolor = ((u32*)(p->pseudo_palette))[image->fg_color];
344 			bgcolor = ((u32*)(p->pseudo_palette))[image->bg_color];
345 		} else {
346 			fgcolor = image->fg_color;
347 			bgcolor = image->bg_color;
348 		}
349 
350 		if (32 % bpp == 0 && !start_index && !pitch_index &&
351 		    ((width & (32/bpp-1)) == 0) &&
352 		    bpp >= 8 && bpp <= 32)
353 			fast_imageblit(image, p, dst1, fgcolor, bgcolor);
354 		else
355 			slow_imageblit(image, p, dst1, fgcolor, bgcolor,
356 					start_index, pitch_index);
357 	} else
358 		color_imageblit(image, p, dst1, start_index, pitch_index);
359 }
360 
361 EXPORT_SYMBOL(cfb_imageblit);
362 
363 MODULE_AUTHOR("James Simmons <jsimmons@users.sf.net>");
364 MODULE_DESCRIPTION("Generic software accelerated imaging drawing");
365 MODULE_LICENSE("GPL");
366 
367