Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | /*
* Copyright (C) 2012 Red Hat
* based in parts on udlfb.c:
* Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it>
* Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com>
* Copyright (C) 2009 Bernie Thompson <bernie@plugable.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License v2. See the file COPYING in the main directory of this archive for
* more details.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fb.h>
#include <linux/prefetch.h>
#include <drm/drmP.h>
#include "udl_drv.h"
#define MAX_CMD_PIXELS 255
#define RLX_HEADER_BYTES 7
#define MIN_RLX_PIX_BYTES 4
#define MIN_RLX_CMD_BYTES (RLX_HEADER_BYTES + MIN_RLX_PIX_BYTES)
#define RLE_HEADER_BYTES 6
#define MIN_RLE_PIX_BYTES 3
#define MIN_RLE_CMD_BYTES (RLE_HEADER_BYTES + MIN_RLE_PIX_BYTES)
#define RAW_HEADER_BYTES 6
#define MIN_RAW_PIX_BYTES 2
#define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES)
/*
* Trims identical data from front and back of line
* Sets new front buffer address and width
* And returns byte count of identical pixels
* Assumes CPU natural alignment (unsigned long)
* for back and front buffer ptrs and width
*/
#if 0
static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes)
{
int j, k;
const unsigned long *back = (const unsigned long *) bback;
const unsigned long *front = (const unsigned long *) *bfront;
const int width = *width_bytes / sizeof(unsigned long);
int identical = width;
int start = width;
int end = width;
prefetch((void *) front);
prefetch((void *) back);
for (j = 0; j < width; j++) {
if (back[j] != front[j]) {
start = j;
break;
}
}
for (k = width - 1; k > j; k--) {
if (back[k] != front[k]) {
end = k+1;
break;
}
}
identical = start + (width - end);
*bfront = (u8 *) &front[start];
*width_bytes = (end - start) * sizeof(unsigned long);
return identical * sizeof(unsigned long);
}
#endif
static inline u16 pixel32_to_be16(const uint32_t pixel)
{
return (((pixel >> 3) & 0x001f) |
((pixel >> 5) & 0x07e0) |
((pixel >> 8) & 0xf800));
}
static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
{
if (bpp == 2)
return *(const uint16_t *)pixel == repeat;
else
return *(const uint32_t *)pixel == repeat;
}
/*
* Render a command stream for an encoded horizontal line segment of pixels.
*
* A command buffer holds several commands.
* It always begins with a fresh command header
* (the protocol doesn't require this, but we enforce it to allow
* multiple buffers to be potentially encoded and sent in parallel).
* A single command encodes one contiguous horizontal line of pixels
*
* The function relies on the client to do all allocation, so that
* rendering can be done directly to output buffers (e.g. USB URBs).
* The function fills the supplied command buffer, providing information
* on where it left off, so the client may call in again with additional
* buffers if the line will take several buffers to complete.
*
* A single command can transmit a maximum of 256 pixels,
* regardless of the compression ratio (protocol design limit).
* To the hardware, 0 for a size byte means 256
*
* Rather than 256 pixel commands which are either rl or raw encoded,
* the rlx command simply assumes alternating raw and rl spans within one cmd.
* This has a slightly larger header overhead, but produces more even results.
* It also processes all data (read and write) in a single pass.
* Performance benchmarks of common cases show it having just slightly better
* compression than 256 pixel raw or rle commands, with similar CPU consumpion.
* But for very rl friendly data, will compress not quite as well.
*/
static void udl_compress_hline16(
const u8 **pixel_start_ptr,
const u8 *const pixel_end,
uint32_t *device_address_ptr,
uint8_t **command_buffer_ptr,
const uint8_t *const cmd_buffer_end, int bpp)
{
const u8 *pixel = *pixel_start_ptr;
uint32_t dev_addr = *device_address_ptr;
uint8_t *cmd = *command_buffer_ptr;
while ((pixel_end > pixel) &&
(cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) {
uint8_t *raw_pixels_count_byte = NULL;
uint8_t *cmd_pixels_count_byte = NULL;
const u8 *raw_pixel_start = NULL;
const u8 *cmd_pixel_start, *cmd_pixel_end = NULL;
prefetchw((void *) cmd); /* pull in one cache line at least */
*cmd++ = 0xaf;
*cmd++ = 0x6b;
*cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF);
*cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF);
*cmd++ = (uint8_t) ((dev_addr) & 0xFF);
cmd_pixels_count_byte = cmd++; /* we'll know this later */
cmd_pixel_start = pixel;
raw_pixels_count_byte = cmd++; /* we'll know this later */
raw_pixel_start = pixel;
cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1,
min((int)(pixel_end - pixel) / bpp,
(int)(cmd_buffer_end - cmd) / 2))) * bpp;
prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
while (pixel < cmd_pixel_end) {
const u8 *const start = pixel;
u32 repeating_pixel;
if (bpp == 2) {
repeating_pixel = *(uint16_t *)pixel;
*(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
} else {
repeating_pixel = *(uint32_t *)pixel;
*(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
}
cmd += 2;
pixel += bpp;
if (unlikely((pixel < cmd_pixel_end) &&
(pixel_repeats(pixel, repeating_pixel, bpp)))) {
/* go back and fill in raw pixel count */
*raw_pixels_count_byte = (((start -
raw_pixel_start) / bpp) + 1) & 0xFF;
while ((pixel < cmd_pixel_end) &&
(pixel_repeats(pixel, repeating_pixel, bpp))) {
pixel += bpp;
}
/* immediately after raw data is repeat byte */
*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
/* Then start another raw pixel span */
raw_pixel_start = pixel;
raw_pixels_count_byte = cmd++;
}
}
if (pixel > raw_pixel_start) {
/* finalize last RAW span */
*raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF;
}
*cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF;
dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2;
}
if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) {
/* Fill leftover bytes with no-ops */
if (cmd_buffer_end > cmd)
memset(cmd, 0xAF, cmd_buffer_end - cmd);
cmd = (uint8_t *) cmd_buffer_end;
}
*command_buffer_ptr = cmd;
*pixel_start_ptr = pixel;
*device_address_ptr = dev_addr;
return;
}
/*
* There are 3 copies of every pixel: The front buffer that the fbdev
* client renders to, the actual framebuffer across the USB bus in hardware
* (that we can only write to, slowly, and can never read), and (optionally)
* our shadow copy that tracks what's been sent to that hardware buffer.
*/
int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
const char *front, char **urb_buf_ptr,
u32 byte_offset, u32 device_byte_offset,
u32 byte_width,
int *ident_ptr, int *sent_ptr)
{
const u8 *line_start, *line_end, *next_pixel;
u32 base16 = 0 + (device_byte_offset / bpp) * 2;
struct urb *urb = *urb_ptr;
u8 *cmd = *urb_buf_ptr;
u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;
BUG_ON(!(bpp == 2 || bpp == 4));
line_start = (u8 *) (front + byte_offset);
next_pixel = line_start;
line_end = next_pixel + byte_width;
while (next_pixel < line_end) {
udl_compress_hline16(&next_pixel,
line_end, &base16,
(u8 **) &cmd, (u8 *) cmd_end, bpp);
if (cmd >= cmd_end) {
int len = cmd - (u8 *) urb->transfer_buffer;
if (udl_submit_urb(dev, urb, len))
return 1; /* lost pixels is set */
*sent_ptr += len;
urb = udl_get_urb(dev);
if (!urb)
return 1; /* lost_pixels is set */
*urb_ptr = urb;
cmd = urb->transfer_buffer;
cmd_end = &cmd[urb->transfer_buffer_length];
}
}
*urb_buf_ptr = cmd;
return 0;
}
|