15320918bSDave Airlie /* 25320918bSDave Airlie * Copyright (C) 2012 Red Hat 35320918bSDave Airlie * based in parts on udlfb.c: 45320918bSDave Airlie * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> 55320918bSDave Airlie * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> 65320918bSDave Airlie * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> 75320918bSDave Airlie * 85320918bSDave Airlie * This file is subject to the terms and conditions of the GNU General Public 95320918bSDave Airlie * License v2. See the file COPYING in the main directory of this archive for 105320918bSDave Airlie * more details. 115320918bSDave Airlie */ 125320918bSDave Airlie 135320918bSDave Airlie #include <linux/module.h> 145320918bSDave Airlie #include <linux/slab.h> 155320918bSDave Airlie #include <linux/fb.h> 165320918bSDave Airlie #include <linux/prefetch.h> 170c45b36fSJonathan Neuschäfer #include <asm/unaligned.h> 185320918bSDave Airlie 19760285e7SDavid Howells #include <drm/drmP.h> 205320918bSDave Airlie #include "udl_drv.h" 215320918bSDave Airlie 225320918bSDave Airlie #define MAX_CMD_PIXELS 255 235320918bSDave Airlie 245320918bSDave Airlie #define RLX_HEADER_BYTES 7 255320918bSDave Airlie #define MIN_RLX_PIX_BYTES 4 265320918bSDave Airlie #define MIN_RLX_CMD_BYTES (RLX_HEADER_BYTES + MIN_RLX_PIX_BYTES) 275320918bSDave Airlie 285320918bSDave Airlie #define RLE_HEADER_BYTES 6 295320918bSDave Airlie #define MIN_RLE_PIX_BYTES 3 305320918bSDave Airlie #define MIN_RLE_CMD_BYTES (RLE_HEADER_BYTES + MIN_RLE_PIX_BYTES) 315320918bSDave Airlie 325320918bSDave Airlie #define RAW_HEADER_BYTES 6 335320918bSDave Airlie #define MIN_RAW_PIX_BYTES 2 345320918bSDave Airlie #define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES) 355320918bSDave Airlie 365320918bSDave Airlie /* 375320918bSDave Airlie * Trims identical data from front and back of line 385320918bSDave Airlie * Sets new front buffer address and width 395320918bSDave Airlie * And returns byte count of identical pixels 405320918bSDave Airlie * Assumes CPU natural alignment (unsigned long) 415320918bSDave Airlie * for back and front buffer ptrs and width 425320918bSDave Airlie */ 435320918bSDave Airlie #if 0 445320918bSDave Airlie static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes) 455320918bSDave Airlie { 465320918bSDave Airlie int j, k; 475320918bSDave Airlie const unsigned long *back = (const unsigned long *) bback; 485320918bSDave Airlie const unsigned long *front = (const unsigned long *) *bfront; 495320918bSDave Airlie const int width = *width_bytes / sizeof(unsigned long); 505320918bSDave Airlie int identical = width; 515320918bSDave Airlie int start = width; 525320918bSDave Airlie int end = width; 535320918bSDave Airlie 545320918bSDave Airlie prefetch((void *) front); 555320918bSDave Airlie prefetch((void *) back); 565320918bSDave Airlie 575320918bSDave Airlie for (j = 0; j < width; j++) { 585320918bSDave Airlie if (back[j] != front[j]) { 595320918bSDave Airlie start = j; 605320918bSDave Airlie break; 615320918bSDave Airlie } 625320918bSDave Airlie } 635320918bSDave Airlie 645320918bSDave Airlie for (k = width - 1; k > j; k--) { 655320918bSDave Airlie if (back[k] != front[k]) { 665320918bSDave Airlie end = k+1; 675320918bSDave Airlie break; 685320918bSDave Airlie } 695320918bSDave Airlie } 705320918bSDave Airlie 715320918bSDave Airlie identical = start + (width - end); 725320918bSDave Airlie *bfront = (u8 *) &front[start]; 735320918bSDave Airlie *width_bytes = (end - start) * sizeof(unsigned long); 745320918bSDave Airlie 755320918bSDave Airlie return identical * sizeof(unsigned long); 765320918bSDave Airlie } 775320918bSDave Airlie #endif 785320918bSDave Airlie 79e90a4ea5SChris Wilson static inline u16 pixel32_to_be16(const uint32_t pixel) 805320918bSDave Airlie { 81e90a4ea5SChris Wilson return (((pixel >> 3) & 0x001f) | 82e90a4ea5SChris Wilson ((pixel >> 5) & 0x07e0) | 83e90a4ea5SChris Wilson ((pixel >> 8) & 0xf800)); 84e90a4ea5SChris Wilson } 855320918bSDave Airlie 8691ba11fbSMikulas Patocka static inline u16 get_pixel_val16(const uint8_t *pixel, int log_bpp) 87e90a4ea5SChris Wilson { 8891ba11fbSMikulas Patocka u16 pixel_val16; 8991ba11fbSMikulas Patocka if (log_bpp == 1) 9086584444SHaixia Shi pixel_val16 = *(const uint16_t *)pixel; 9191ba11fbSMikulas Patocka else 9286584444SHaixia Shi pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); 9386584444SHaixia Shi return pixel_val16; 945320918bSDave Airlie } 955320918bSDave Airlie 965320918bSDave Airlie /* 975320918bSDave Airlie * Render a command stream for an encoded horizontal line segment of pixels. 985320918bSDave Airlie * 995320918bSDave Airlie * A command buffer holds several commands. 1005320918bSDave Airlie * It always begins with a fresh command header 1015320918bSDave Airlie * (the protocol doesn't require this, but we enforce it to allow 1025320918bSDave Airlie * multiple buffers to be potentially encoded and sent in parallel). 1035320918bSDave Airlie * A single command encodes one contiguous horizontal line of pixels 1045320918bSDave Airlie * 1055320918bSDave Airlie * The function relies on the client to do all allocation, so that 1065320918bSDave Airlie * rendering can be done directly to output buffers (e.g. USB URBs). 1075320918bSDave Airlie * The function fills the supplied command buffer, providing information 1085320918bSDave Airlie * on where it left off, so the client may call in again with additional 1095320918bSDave Airlie * buffers if the line will take several buffers to complete. 1105320918bSDave Airlie * 1115320918bSDave Airlie * A single command can transmit a maximum of 256 pixels, 1125320918bSDave Airlie * regardless of the compression ratio (protocol design limit). 1135320918bSDave Airlie * To the hardware, 0 for a size byte means 256 1145320918bSDave Airlie * 1155320918bSDave Airlie * Rather than 256 pixel commands which are either rl or raw encoded, 1165320918bSDave Airlie * the rlx command simply assumes alternating raw and rl spans within one cmd. 1175320918bSDave Airlie * This has a slightly larger header overhead, but produces more even results. 1185320918bSDave Airlie * It also processes all data (read and write) in a single pass. 1195320918bSDave Airlie * Performance benchmarks of common cases show it having just slightly better 1205320918bSDave Airlie * compression than 256 pixel raw or rle commands, with similar CPU consumpion. 1215320918bSDave Airlie * But for very rl friendly data, will compress not quite as well. 1225320918bSDave Airlie */ 1235320918bSDave Airlie static void udl_compress_hline16( 1245320918bSDave Airlie const u8 **pixel_start_ptr, 1255320918bSDave Airlie const u8 *const pixel_end, 1265320918bSDave Airlie uint32_t *device_address_ptr, 1275320918bSDave Airlie uint8_t **command_buffer_ptr, 12891ba11fbSMikulas Patocka const uint8_t *const cmd_buffer_end, int log_bpp) 1295320918bSDave Airlie { 13091ba11fbSMikulas Patocka const int bpp = 1 << log_bpp; 1315320918bSDave Airlie const u8 *pixel = *pixel_start_ptr; 1325320918bSDave Airlie uint32_t dev_addr = *device_address_ptr; 1335320918bSDave Airlie uint8_t *cmd = *command_buffer_ptr; 1345320918bSDave Airlie 1355320918bSDave Airlie while ((pixel_end > pixel) && 1365320918bSDave Airlie (cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) { 13774401b1dSSachin Kamat uint8_t *raw_pixels_count_byte = NULL; 13874401b1dSSachin Kamat uint8_t *cmd_pixels_count_byte = NULL; 13974401b1dSSachin Kamat const u8 *raw_pixel_start = NULL; 14074401b1dSSachin Kamat const u8 *cmd_pixel_start, *cmd_pixel_end = NULL; 14186584444SHaixia Shi uint16_t pixel_val16; 1425320918bSDave Airlie 1435320918bSDave Airlie prefetchw((void *) cmd); /* pull in one cache line at least */ 1445320918bSDave Airlie 1455320918bSDave Airlie *cmd++ = 0xaf; 1465320918bSDave Airlie *cmd++ = 0x6b; 1475320918bSDave Airlie *cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF); 1485320918bSDave Airlie *cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF); 1495320918bSDave Airlie *cmd++ = (uint8_t) ((dev_addr) & 0xFF); 1505320918bSDave Airlie 1515320918bSDave Airlie cmd_pixels_count_byte = cmd++; /* we'll know this later */ 1525320918bSDave Airlie cmd_pixel_start = pixel; 1535320918bSDave Airlie 1545320918bSDave Airlie raw_pixels_count_byte = cmd++; /* we'll know this later */ 1555320918bSDave Airlie raw_pixel_start = pixel; 1565320918bSDave Airlie 15791ba11fbSMikulas Patocka cmd_pixel_end = pixel + (min3(MAX_CMD_PIXELS + 1UL, 15891ba11fbSMikulas Patocka (unsigned long)(pixel_end - pixel) >> log_bpp, 15991ba11fbSMikulas Patocka (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) << log_bpp); 1605320918bSDave Airlie 16199ec9e77SMikulas Patocka prefetch_range((void *) pixel, cmd_pixel_end - pixel); 16291ba11fbSMikulas Patocka pixel_val16 = get_pixel_val16(pixel, log_bpp); 1635320918bSDave Airlie 1645320918bSDave Airlie while (pixel < cmd_pixel_end) { 165e90a4ea5SChris Wilson const u8 *const start = pixel; 16686584444SHaixia Shi const uint16_t repeating_pixel_val16 = pixel_val16; 1675320918bSDave Airlie 1680c45b36fSJonathan Neuschäfer put_unaligned_be16(pixel_val16, cmd); 1695320918bSDave Airlie 1705320918bSDave Airlie cmd += 2; 1715320918bSDave Airlie pixel += bpp; 1725320918bSDave Airlie 17386584444SHaixia Shi while (pixel < cmd_pixel_end) { 17491ba11fbSMikulas Patocka pixel_val16 = get_pixel_val16(pixel, log_bpp); 17586584444SHaixia Shi if (pixel_val16 != repeating_pixel_val16) 17686584444SHaixia Shi break; 17786584444SHaixia Shi pixel += bpp; 17886584444SHaixia Shi } 17986584444SHaixia Shi 18086584444SHaixia Shi if (unlikely(pixel > start + bpp)) { 1815320918bSDave Airlie /* go back and fill in raw pixel count */ 182e90a4ea5SChris Wilson *raw_pixels_count_byte = (((start - 18391ba11fbSMikulas Patocka raw_pixel_start) >> log_bpp) + 1) & 0xFF; 1845320918bSDave Airlie 1855320918bSDave Airlie /* immediately after raw data is repeat byte */ 18691ba11fbSMikulas Patocka *cmd++ = (((pixel - start) >> log_bpp) - 1) & 0xFF; 1875320918bSDave Airlie 1885320918bSDave Airlie /* Then start another raw pixel span */ 1895320918bSDave Airlie raw_pixel_start = pixel; 1905320918bSDave Airlie raw_pixels_count_byte = cmd++; 1915320918bSDave Airlie } 1925320918bSDave Airlie } 1935320918bSDave Airlie 1945320918bSDave Airlie if (pixel > raw_pixel_start) { 1955320918bSDave Airlie /* finalize last RAW span */ 19691ba11fbSMikulas Patocka *raw_pixels_count_byte = ((pixel - raw_pixel_start) >> log_bpp) & 0xFF; 19799ec9e77SMikulas Patocka } else { 19899ec9e77SMikulas Patocka /* undo unused byte */ 19999ec9e77SMikulas Patocka cmd--; 2005320918bSDave Airlie } 2015320918bSDave Airlie 20291ba11fbSMikulas Patocka *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) >> log_bpp) & 0xFF; 20391ba11fbSMikulas Patocka dev_addr += ((pixel - cmd_pixel_start) >> log_bpp) * 2; 2045320918bSDave Airlie } 2055320918bSDave Airlie 2065320918bSDave Airlie if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { 2075320918bSDave Airlie /* Fill leftover bytes with no-ops */ 2085320918bSDave Airlie if (cmd_buffer_end > cmd) 2095320918bSDave Airlie memset(cmd, 0xAF, cmd_buffer_end - cmd); 2105320918bSDave Airlie cmd = (uint8_t *) cmd_buffer_end; 2115320918bSDave Airlie } 2125320918bSDave Airlie 2135320918bSDave Airlie *command_buffer_ptr = cmd; 2145320918bSDave Airlie *pixel_start_ptr = pixel; 2155320918bSDave Airlie *device_address_ptr = dev_addr; 2165320918bSDave Airlie 2175320918bSDave Airlie return; 2185320918bSDave Airlie } 2195320918bSDave Airlie 2205320918bSDave Airlie /* 2215320918bSDave Airlie * There are 3 copies of every pixel: The front buffer that the fbdev 2225320918bSDave Airlie * client renders to, the actual framebuffer across the USB bus in hardware 2235320918bSDave Airlie * (that we can only write to, slowly, and can never read), and (optionally) 2245320918bSDave Airlie * our shadow copy that tracks what's been sent to that hardware buffer. 2255320918bSDave Airlie */ 22691ba11fbSMikulas Patocka int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, 2275320918bSDave Airlie const char *front, char **urb_buf_ptr, 2283916e1d7SDave Airlie u32 byte_offset, u32 device_byte_offset, 2293916e1d7SDave Airlie u32 byte_width, 2305320918bSDave Airlie int *ident_ptr, int *sent_ptr) 2315320918bSDave Airlie { 2325320918bSDave Airlie const u8 *line_start, *line_end, *next_pixel; 23391ba11fbSMikulas Patocka u32 base16 = 0 + (device_byte_offset >> log_bpp) * 2; 2345320918bSDave Airlie struct urb *urb = *urb_ptr; 2355320918bSDave Airlie u8 *cmd = *urb_buf_ptr; 2365320918bSDave Airlie u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; 2375320918bSDave Airlie 23891ba11fbSMikulas Patocka BUG_ON(!(log_bpp == 1 || log_bpp == 2)); 239e90a4ea5SChris Wilson 2405320918bSDave Airlie line_start = (u8 *) (front + byte_offset); 2415320918bSDave Airlie next_pixel = line_start; 2425320918bSDave Airlie line_end = next_pixel + byte_width; 2435320918bSDave Airlie 2445320918bSDave Airlie while (next_pixel < line_end) { 2455320918bSDave Airlie 2465320918bSDave Airlie udl_compress_hline16(&next_pixel, 2475320918bSDave Airlie line_end, &base16, 24891ba11fbSMikulas Patocka (u8 **) &cmd, (u8 *) cmd_end, log_bpp); 2495320918bSDave Airlie 2505320918bSDave Airlie if (cmd >= cmd_end) { 2515320918bSDave Airlie int len = cmd - (u8 *) urb->transfer_buffer; 2525320918bSDave Airlie if (udl_submit_urb(dev, urb, len)) 2535320918bSDave Airlie return 1; /* lost pixels is set */ 2545320918bSDave Airlie *sent_ptr += len; 2555320918bSDave Airlie urb = udl_get_urb(dev); 2565320918bSDave Airlie if (!urb) 2575320918bSDave Airlie return 1; /* lost_pixels is set */ 2585320918bSDave Airlie *urb_ptr = urb; 2595320918bSDave Airlie cmd = urb->transfer_buffer; 2605320918bSDave Airlie cmd_end = &cmd[urb->transfer_buffer_length]; 2615320918bSDave Airlie } 2625320918bSDave Airlie } 2635320918bSDave Airlie 2645320918bSDave Airlie *urb_buf_ptr = cmd; 2655320918bSDave Airlie 2665320918bSDave Airlie return 0; 2675320918bSDave Airlie } 2685320918bSDave Airlie 269