15320918bSDave Airlie /* 25320918bSDave Airlie * Copyright (C) 2012 Red Hat 35320918bSDave Airlie * based in parts on udlfb.c: 45320918bSDave Airlie * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> 55320918bSDave Airlie * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> 65320918bSDave Airlie * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> 75320918bSDave Airlie * 85320918bSDave Airlie * This file is subject to the terms and conditions of the GNU General Public 95320918bSDave Airlie * License v2. See the file COPYING in the main directory of this archive for 105320918bSDave Airlie * more details. 115320918bSDave Airlie */ 125320918bSDave Airlie 135320918bSDave Airlie #include <linux/module.h> 145320918bSDave Airlie #include <linux/slab.h> 155320918bSDave Airlie #include <linux/fb.h> 165320918bSDave Airlie #include <linux/prefetch.h> 175320918bSDave Airlie 18760285e7SDavid Howells #include <drm/drmP.h> 195320918bSDave Airlie #include "udl_drv.h" 205320918bSDave Airlie 215320918bSDave Airlie #define MAX_CMD_PIXELS 255 225320918bSDave Airlie 235320918bSDave Airlie #define RLX_HEADER_BYTES 7 245320918bSDave Airlie #define MIN_RLX_PIX_BYTES 4 255320918bSDave Airlie #define MIN_RLX_CMD_BYTES (RLX_HEADER_BYTES + MIN_RLX_PIX_BYTES) 265320918bSDave Airlie 275320918bSDave Airlie #define RLE_HEADER_BYTES 6 285320918bSDave Airlie #define MIN_RLE_PIX_BYTES 3 295320918bSDave Airlie #define MIN_RLE_CMD_BYTES (RLE_HEADER_BYTES + MIN_RLE_PIX_BYTES) 305320918bSDave Airlie 315320918bSDave Airlie #define RAW_HEADER_BYTES 6 325320918bSDave Airlie #define MIN_RAW_PIX_BYTES 2 335320918bSDave Airlie #define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES) 345320918bSDave Airlie 355320918bSDave Airlie /* 365320918bSDave Airlie * Trims identical data from front and back of line 375320918bSDave Airlie * Sets new front buffer address and width 385320918bSDave Airlie * And returns byte count of identical pixels 395320918bSDave Airlie * Assumes CPU natural alignment (unsigned long) 405320918bSDave Airlie * for back and front buffer ptrs and width 415320918bSDave Airlie */ 425320918bSDave Airlie #if 0 435320918bSDave Airlie static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes) 445320918bSDave Airlie { 455320918bSDave Airlie int j, k; 465320918bSDave Airlie const unsigned long *back = (const unsigned long *) bback; 475320918bSDave Airlie const unsigned long *front = (const unsigned long *) *bfront; 485320918bSDave Airlie const int width = *width_bytes / sizeof(unsigned long); 495320918bSDave Airlie int identical = width; 505320918bSDave Airlie int start = width; 515320918bSDave Airlie int end = width; 525320918bSDave Airlie 535320918bSDave Airlie prefetch((void *) front); 545320918bSDave Airlie prefetch((void *) back); 555320918bSDave Airlie 565320918bSDave Airlie for (j = 0; j < width; j++) { 575320918bSDave Airlie if (back[j] != front[j]) { 585320918bSDave Airlie start = j; 595320918bSDave Airlie break; 605320918bSDave Airlie } 615320918bSDave Airlie } 625320918bSDave Airlie 635320918bSDave Airlie for (k = width - 1; k > j; k--) { 645320918bSDave Airlie if (back[k] != front[k]) { 655320918bSDave Airlie end = k+1; 665320918bSDave Airlie break; 675320918bSDave Airlie } 685320918bSDave Airlie } 695320918bSDave Airlie 705320918bSDave Airlie identical = start + (width - end); 715320918bSDave Airlie *bfront = (u8 *) &front[start]; 725320918bSDave Airlie *width_bytes = (end - start) * sizeof(unsigned long); 735320918bSDave Airlie 745320918bSDave Airlie return identical * sizeof(unsigned long); 755320918bSDave Airlie } 765320918bSDave Airlie #endif 775320918bSDave Airlie 78e90a4ea5SChris Wilson static inline u16 pixel32_to_be16(const uint32_t pixel) 795320918bSDave Airlie { 80e90a4ea5SChris Wilson return (((pixel >> 3) & 0x001f) | 81e90a4ea5SChris Wilson ((pixel >> 5) & 0x07e0) | 82e90a4ea5SChris Wilson ((pixel >> 8) & 0xf800)); 83e90a4ea5SChris Wilson } 845320918bSDave Airlie 8586584444SHaixia Shi static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp) 86e90a4ea5SChris Wilson { 8786584444SHaixia Shi u16 pixel_val16 = 0; 88e90a4ea5SChris Wilson if (bpp == 2) 8986584444SHaixia Shi pixel_val16 = *(const uint16_t *)pixel; 9086584444SHaixia Shi else if (bpp == 4) 9186584444SHaixia Shi pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); 9286584444SHaixia Shi return pixel_val16; 935320918bSDave Airlie } 945320918bSDave Airlie 955320918bSDave Airlie /* 965320918bSDave Airlie * Render a command stream for an encoded horizontal line segment of pixels. 975320918bSDave Airlie * 985320918bSDave Airlie * A command buffer holds several commands. 995320918bSDave Airlie * It always begins with a fresh command header 1005320918bSDave Airlie * (the protocol doesn't require this, but we enforce it to allow 1015320918bSDave Airlie * multiple buffers to be potentially encoded and sent in parallel). 1025320918bSDave Airlie * A single command encodes one contiguous horizontal line of pixels 1035320918bSDave Airlie * 1045320918bSDave Airlie * The function relies on the client to do all allocation, so that 1055320918bSDave Airlie * rendering can be done directly to output buffers (e.g. USB URBs). 1065320918bSDave Airlie * The function fills the supplied command buffer, providing information 1075320918bSDave Airlie * on where it left off, so the client may call in again with additional 1085320918bSDave Airlie * buffers if the line will take several buffers to complete. 1095320918bSDave Airlie * 1105320918bSDave Airlie * A single command can transmit a maximum of 256 pixels, 1115320918bSDave Airlie * regardless of the compression ratio (protocol design limit). 1125320918bSDave Airlie * To the hardware, 0 for a size byte means 256 1135320918bSDave Airlie * 1145320918bSDave Airlie * Rather than 256 pixel commands which are either rl or raw encoded, 1155320918bSDave Airlie * the rlx command simply assumes alternating raw and rl spans within one cmd. 1165320918bSDave Airlie * This has a slightly larger header overhead, but produces more even results. 1175320918bSDave Airlie * It also processes all data (read and write) in a single pass. 1185320918bSDave Airlie * Performance benchmarks of common cases show it having just slightly better 1195320918bSDave Airlie * compression than 256 pixel raw or rle commands, with similar CPU consumpion. 1205320918bSDave Airlie * But for very rl friendly data, will compress not quite as well. 1215320918bSDave Airlie */ 1225320918bSDave Airlie static void udl_compress_hline16( 1235320918bSDave Airlie const u8 **pixel_start_ptr, 1245320918bSDave Airlie const u8 *const pixel_end, 1255320918bSDave Airlie uint32_t *device_address_ptr, 1265320918bSDave Airlie uint8_t **command_buffer_ptr, 1275320918bSDave Airlie const uint8_t *const cmd_buffer_end, int bpp) 1285320918bSDave Airlie { 1295320918bSDave Airlie const u8 *pixel = *pixel_start_ptr; 1305320918bSDave Airlie uint32_t dev_addr = *device_address_ptr; 1315320918bSDave Airlie uint8_t *cmd = *command_buffer_ptr; 1325320918bSDave Airlie 1335320918bSDave Airlie while ((pixel_end > pixel) && 1345320918bSDave Airlie (cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) { 13574401b1dSSachin Kamat uint8_t *raw_pixels_count_byte = NULL; 13674401b1dSSachin Kamat uint8_t *cmd_pixels_count_byte = NULL; 13774401b1dSSachin Kamat const u8 *raw_pixel_start = NULL; 13874401b1dSSachin Kamat const u8 *cmd_pixel_start, *cmd_pixel_end = NULL; 13986584444SHaixia Shi uint16_t pixel_val16; 1405320918bSDave Airlie 1415320918bSDave Airlie prefetchw((void *) cmd); /* pull in one cache line at least */ 1425320918bSDave Airlie 1435320918bSDave Airlie *cmd++ = 0xaf; 1445320918bSDave Airlie *cmd++ = 0x6b; 1455320918bSDave Airlie *cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF); 1465320918bSDave Airlie *cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF); 1475320918bSDave Airlie *cmd++ = (uint8_t) ((dev_addr) & 0xFF); 1485320918bSDave Airlie 1495320918bSDave Airlie cmd_pixels_count_byte = cmd++; /* we'll know this later */ 1505320918bSDave Airlie cmd_pixel_start = pixel; 1515320918bSDave Airlie 1525320918bSDave Airlie raw_pixels_count_byte = cmd++; /* we'll know this later */ 1535320918bSDave Airlie raw_pixel_start = pixel; 1545320918bSDave Airlie 1555320918bSDave Airlie cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1, 1565320918bSDave Airlie min((int)(pixel_end - pixel) / bpp, 1575320918bSDave Airlie (int)(cmd_buffer_end - cmd) / 2))) * bpp; 1585320918bSDave Airlie 1595320918bSDave Airlie prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); 16086584444SHaixia Shi pixel_val16 = get_pixel_val16(pixel, bpp); 1615320918bSDave Airlie 1625320918bSDave Airlie while (pixel < cmd_pixel_end) { 163e90a4ea5SChris Wilson const u8 *const start = pixel; 16486584444SHaixia Shi const uint16_t repeating_pixel_val16 = pixel_val16; 1655320918bSDave Airlie 16686584444SHaixia Shi *(uint16_t *)cmd = cpu_to_be16(pixel_val16); 1675320918bSDave Airlie 1685320918bSDave Airlie cmd += 2; 1695320918bSDave Airlie pixel += bpp; 1705320918bSDave Airlie 17186584444SHaixia Shi while (pixel < cmd_pixel_end) { 17286584444SHaixia Shi pixel_val16 = get_pixel_val16(pixel, bpp); 17386584444SHaixia Shi if (pixel_val16 != repeating_pixel_val16) 17486584444SHaixia Shi break; 17586584444SHaixia Shi pixel += bpp; 17686584444SHaixia Shi } 17786584444SHaixia Shi 17886584444SHaixia Shi if (unlikely(pixel > start + bpp)) { 1795320918bSDave Airlie /* go back and fill in raw pixel count */ 180e90a4ea5SChris Wilson *raw_pixels_count_byte = (((start - 1815320918bSDave Airlie raw_pixel_start) / bpp) + 1) & 0xFF; 1825320918bSDave Airlie 1835320918bSDave Airlie /* immediately after raw data is repeat byte */ 184e90a4ea5SChris Wilson *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; 1855320918bSDave Airlie 1865320918bSDave Airlie /* Then start another raw pixel span */ 1875320918bSDave Airlie raw_pixel_start = pixel; 1885320918bSDave Airlie raw_pixels_count_byte = cmd++; 1895320918bSDave Airlie } 1905320918bSDave Airlie } 1915320918bSDave Airlie 1925320918bSDave Airlie if (pixel > raw_pixel_start) { 1935320918bSDave Airlie /* finalize last RAW span */ 1945320918bSDave Airlie *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; 1955320918bSDave Airlie } 1965320918bSDave Airlie 1975320918bSDave Airlie *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; 1985320918bSDave Airlie dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2; 1995320918bSDave Airlie } 2005320918bSDave Airlie 2015320918bSDave Airlie if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { 2025320918bSDave Airlie /* Fill leftover bytes with no-ops */ 2035320918bSDave Airlie if (cmd_buffer_end > cmd) 2045320918bSDave Airlie memset(cmd, 0xAF, cmd_buffer_end - cmd); 2055320918bSDave Airlie cmd = (uint8_t *) cmd_buffer_end; 2065320918bSDave Airlie } 2075320918bSDave Airlie 2085320918bSDave Airlie *command_buffer_ptr = cmd; 2095320918bSDave Airlie *pixel_start_ptr = pixel; 2105320918bSDave Airlie *device_address_ptr = dev_addr; 2115320918bSDave Airlie 2125320918bSDave Airlie return; 2135320918bSDave Airlie } 2145320918bSDave Airlie 2155320918bSDave Airlie /* 2165320918bSDave Airlie * There are 3 copies of every pixel: The front buffer that the fbdev 2175320918bSDave Airlie * client renders to, the actual framebuffer across the USB bus in hardware 2185320918bSDave Airlie * (that we can only write to, slowly, and can never read), and (optionally) 2195320918bSDave Airlie * our shadow copy that tracks what's been sent to that hardware buffer. 2205320918bSDave Airlie */ 2215320918bSDave Airlie int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, 2225320918bSDave Airlie const char *front, char **urb_buf_ptr, 2233916e1d7SDave Airlie u32 byte_offset, u32 device_byte_offset, 2243916e1d7SDave Airlie u32 byte_width, 2255320918bSDave Airlie int *ident_ptr, int *sent_ptr) 2265320918bSDave Airlie { 2275320918bSDave Airlie const u8 *line_start, *line_end, *next_pixel; 2283916e1d7SDave Airlie u32 base16 = 0 + (device_byte_offset / bpp) * 2; 2295320918bSDave Airlie struct urb *urb = *urb_ptr; 2305320918bSDave Airlie u8 *cmd = *urb_buf_ptr; 2315320918bSDave Airlie u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; 2325320918bSDave Airlie 233e90a4ea5SChris Wilson BUG_ON(!(bpp == 2 || bpp == 4)); 234e90a4ea5SChris Wilson 2355320918bSDave Airlie line_start = (u8 *) (front + byte_offset); 2365320918bSDave Airlie next_pixel = line_start; 2375320918bSDave Airlie line_end = next_pixel + byte_width; 2385320918bSDave Airlie 2395320918bSDave Airlie while (next_pixel < line_end) { 2405320918bSDave Airlie 2415320918bSDave Airlie udl_compress_hline16(&next_pixel, 2425320918bSDave Airlie line_end, &base16, 2435320918bSDave Airlie (u8 **) &cmd, (u8 *) cmd_end, bpp); 2445320918bSDave Airlie 2455320918bSDave Airlie if (cmd >= cmd_end) { 2465320918bSDave Airlie int len = cmd - (u8 *) urb->transfer_buffer; 2475320918bSDave Airlie if (udl_submit_urb(dev, urb, len)) 2485320918bSDave Airlie return 1; /* lost pixels is set */ 2495320918bSDave Airlie *sent_ptr += len; 2505320918bSDave Airlie urb = udl_get_urb(dev); 2515320918bSDave Airlie if (!urb) 2525320918bSDave Airlie return 1; /* lost_pixels is set */ 2535320918bSDave Airlie *urb_ptr = urb; 2545320918bSDave Airlie cmd = urb->transfer_buffer; 2555320918bSDave Airlie cmd_end = &cmd[urb->transfer_buffer_length]; 2565320918bSDave Airlie } 2575320918bSDave Airlie } 2585320918bSDave Airlie 2595320918bSDave Airlie *urb_buf_ptr = cmd; 2605320918bSDave Airlie 2615320918bSDave Airlie return 0; 2625320918bSDave Airlie } 2635320918bSDave Airlie 264