From c23ef285fc67f5fd2d7faac4d14d8b8c5a2b23ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20G=C3=BCnther?= Date: Fri, 2 Aug 2019 12:55:18 +0200 Subject: [PATCH 01/10] drm/imx: Drop unused imx-ipuv3-crtc.o build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 3d1df96ad468 ("drm/imx: merge imx-drm-core and ipuv3-crtc in one module") the former contents of imx-ipuv3-crtc.o are built via imxdrm-objs. So there's no need to keep an extra entry with a non existing config value (CONFIG_DRM_IMX_IPUV3). Fixes: 3d1df96ad468 ("drm/imx: merge imx-drm-core and ipuv3-crtc in one module") Signed-off-by: Guido Günther Reviewed-by: Fabio Estevam Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/imx/Makefile b/drivers/gpu/drm/imx/Makefile index ab6c83caceb7..21cdcc2faabc 100644 --- a/drivers/gpu/drm/imx/Makefile +++ b/drivers/gpu/drm/imx/Makefile @@ -8,5 +8,4 @@ obj-$(CONFIG_DRM_IMX_PARALLEL_DISPLAY) += parallel-display.o obj-$(CONFIG_DRM_IMX_TVE) += imx-tve.o obj-$(CONFIG_DRM_IMX_LDB) += imx-ldb.o -obj-$(CONFIG_DRM_IMX_IPUV3) += imx-ipuv3-crtc.o obj-$(CONFIG_DRM_IMX_HDMI) += dw_hdmi-imx.o From a59957172b0c608a19aaadb77d65387f9a542c85 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 29 Jul 2019 14:22:10 +0200 Subject: [PATCH 02/10] gpu: ipu-v3: enable remaining 32-bit RGB V4L2 pixel formats Support is already implemented for the corresponding DRM formats, just hook up the remaining V4L2 pixel formats. Signed-off-by: Philipp Zabel Reviewed-by: Marco Felsch --- drivers/gpu/ipu-v3/ipu-common.c | 16 ++++++++++------ drivers/gpu/ipu-v3/ipu-cpmem.c | 26 +++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 6386e2fe2ff7..ee2a025e54cf 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -113,13 +113,17 @@ enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat) case V4L2_PIX_FMT_NV16: case V4L2_PIX_FMT_NV61: return IPUV3_COLORSPACE_YUV; - case V4L2_PIX_FMT_XRGB32: - case V4L2_PIX_FMT_XBGR32: - case V4L2_PIX_FMT_RGB32: - case V4L2_PIX_FMT_BGR32: - case V4L2_PIX_FMT_RGB24: - case V4L2_PIX_FMT_BGR24: case V4L2_PIX_FMT_RGB565: + case V4L2_PIX_FMT_BGR24: + case V4L2_PIX_FMT_RGB24: + case V4L2_PIX_FMT_ABGR32: + case V4L2_PIX_FMT_XBGR32: + case V4L2_PIX_FMT_BGRA32: + case V4L2_PIX_FMT_BGRX32: + case V4L2_PIX_FMT_RGBA32: + case V4L2_PIX_FMT_RGBX32: + case V4L2_PIX_FMT_ARGB32: + case V4L2_PIX_FMT_XRGB32: return IPUV3_COLORSPACE_RGB; default: return IPUV3_COLORSPACE_UNKNOWN; diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index be1226ce28cd..a1c85d1521f5 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -182,9 +182,27 @@ static int v4l2_pix_fmt_to_drm_fourcc(u32 pixelformat) case V4L2_PIX_FMT_RGB32: /* R G B A <=> [32:0] A:B:G:R */ return DRM_FORMAT_XBGR8888; + case V4L2_PIX_FMT_ABGR32: + /* B G R A <=> [32:0] A:R:G:B */ + return DRM_FORMAT_ARGB8888; case V4L2_PIX_FMT_XBGR32: /* B G R X <=> [32:0] X:R:G:B */ return DRM_FORMAT_XRGB8888; + case V4L2_PIX_FMT_BGRA32: + /* A B G R <=> [32:0] R:G:B:A */ + return DRM_FORMAT_RGBA8888; + case V4L2_PIX_FMT_BGRX32: + /* X B G R <=> [32:0] R:G:B:X */ + return DRM_FORMAT_RGBX8888; + case V4L2_PIX_FMT_RGBA32: + /* R G B A <=> [32:0] A:B:G:R */ + return DRM_FORMAT_ABGR8888; + case V4L2_PIX_FMT_RGBX32: + /* R G B X <=> [32:0] X:B:G:R */ + return DRM_FORMAT_XBGR8888; + case V4L2_PIX_FMT_ARGB32: + /* A R G B <=> [32:0] B:G:R:A */ + return DRM_FORMAT_BGRA8888; case V4L2_PIX_FMT_XRGB32: /* X R G B <=> [32:0] B:G:R:X */ return DRM_FORMAT_BGRX8888; @@ -823,8 +841,14 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) break; case V4L2_PIX_FMT_RGB32: case V4L2_PIX_FMT_BGR32: - case V4L2_PIX_FMT_XRGB32: + case V4L2_PIX_FMT_ABGR32: case V4L2_PIX_FMT_XBGR32: + case V4L2_PIX_FMT_BGRA32: + case V4L2_PIX_FMT_BGRX32: + case V4L2_PIX_FMT_RGBA32: + case V4L2_PIX_FMT_RGBX32: + case V4L2_PIX_FMT_ARGB32: + case V4L2_PIX_FMT_XRGB32: offset = image->rect.left * 4 + image->rect.top * pix->bytesperline; break; From 9b75651f41a05f1aee9006e2c6fa9f505e5287bc Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 29 Jul 2019 14:29:31 +0200 Subject: [PATCH 03/10] gpu: ipu-v3: image-convert: enable V4L2_PIX_FMT_BGRX32 and _RGBX32 Enable image converter support for V4L2_PIX_FMT_BGRX32 and V4L2_PIX_FMT_RGBX32 pixel formats. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 9d25db6924b3..5be7fff2cf33 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -251,6 +251,12 @@ static const struct ipu_image_pixfmt image_convert_formats[] = { }, { .fourcc = V4L2_PIX_FMT_XBGR32, .bpp = 32, + }, { + .fourcc = V4L2_PIX_FMT_BGRX32, + .bpp = 32, + }, { + .fourcc = V4L2_PIX_FMT_RGBX32, + .bpp = 32, }, { .fourcc = V4L2_PIX_FMT_YUYV, .bpp = 16, From ca84b1b860407109b4b5f3802907f13bbef5d0fd Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 13 Aug 2019 18:49:26 +0200 Subject: [PATCH 04/10] gpu: ipu-v3: image-convert: move output seam valid interval calculation into find_best_seam This reduces code duplication and allows to apply the following modifications in a single place. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 34 +++++++++----------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 5be7fff2cf33..75850fd0db7e 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -448,12 +448,10 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, #define round_closest(x, y) round_down((x) + (y)/2, (y)) /* - * Find the best aligned seam position in the inverval [out_start, out_end]. + * Find the best aligned seam position for the given column / row index. * Rotation and image offsets are out of scope. * - * @out_start: start of inverval, must be within 1024 pixels / lines - * of out_end - * @out_end: end of interval, smaller than or equal to out_edge + * @index: column / row index, used to calculate valid interval * @in_edge: input right / bottom edge * @out_edge: output right / bottom edge * @in_align: input alignment, either horizontal 8-byte line start address @@ -469,8 +467,7 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, * @_out_seam: aligned output seam position return value */ static void find_best_seam(struct ipu_image_convert_ctx *ctx, - unsigned int out_start, - unsigned int out_end, + unsigned int index, unsigned int in_edge, unsigned int out_edge, unsigned int in_align, @@ -488,6 +485,13 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx, unsigned int out_seam = 0; unsigned int in_seam = 0; unsigned int min_diff = UINT_MAX; + unsigned int out_start; + unsigned int out_end; + + /* Start within 1024 pixels of the right / bottom edge */ + out_start = max_t(int, 0, out_edge - 1024); + /* End before having to add more columns to the left / rows above */ + out_end = min_t(unsigned int, out_edge, index * 1024); /* * Output tiles must start at a multiple of 8 bytes horizontally and @@ -718,8 +722,6 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); bool allow_out_overshoot = (col < in->num_cols - 1) && !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); - unsigned int out_start; - unsigned int out_end; unsigned int in_left; unsigned int out_left; @@ -728,12 +730,7 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, * horizontally. */ - /* Start within 1024 pixels of the right edge */ - out_start = max_t(int, 0, out_right - 1024); - /* End before having to add more columns to the left */ - out_end = min_t(unsigned int, out_right, col * 1024); - - find_best_seam(ctx, out_start, out_end, + find_best_seam(ctx, col, in_right, out_right, in_left_align, out_left_align, allow_in_overshoot ? 1 : 8 /* burst length */, @@ -768,17 +765,10 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, for (row = in->num_rows - 1; row > 0; row--) { bool allow_overshoot = row < in->num_rows - 1; - unsigned int out_start; - unsigned int out_end; unsigned int in_top; unsigned int out_top; - /* Start within 1024 lines of the bottom edge */ - out_start = max_t(int, 0, out_bottom - 1024); - /* End before having to add more rows above */ - out_end = min_t(unsigned int, out_bottom, row * 1024); - - find_best_seam(ctx, out_start, out_end, + find_best_seam(ctx, row, in_bottom, out_bottom, in_top_align, out_top_align, 1, allow_overshoot ? 1 : out_height_align, From 82c3e948cc0fe7e01b39d437f7a58b197ebb422d Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 13 Aug 2019 14:30:25 +0200 Subject: [PATCH 05/10] gpu: ipu-v3: image-convert: fix output seam valid interval This fixes a failure to determine any seam if the output size is exactly 1024 multiplied by the number of tiles in a given direction. In that case an empty interval out_start == out_end is being passed to find_best_seam, which looks for a seam out_start <= x < out_end. Also reduce the interval for all but the left column / top row, to avoid returning position 0 as best fit. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 75850fd0db7e..40d450886e1d 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -489,9 +489,9 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx, unsigned int out_end; /* Start within 1024 pixels of the right / bottom edge */ - out_start = max_t(int, 0, out_edge - 1024); + out_start = max_t(int, index * out_align, out_edge - 1024); /* End before having to add more columns to the left / rows above */ - out_end = min_t(unsigned int, out_edge, index * 1024); + out_end = min_t(unsigned int, out_edge, index * 1024 + 1); /* * Output tiles must start at a multiple of 8 bytes horizontally and From 2e67a553e965b32a648c85fbe1c667fa440d6c92 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 13 Aug 2019 13:42:58 +0200 Subject: [PATCH 06/10] gpu: ipu-v3: image-convert: limit input seam position to hardware requirements Limit the input seam position to an interval that guarantees the tile size does not exceed 1024 pixels after the IC downsizing section and that space is left for the next tile. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 30 ++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 40d450886e1d..a279d803ed91 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -487,12 +487,23 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx, unsigned int min_diff = UINT_MAX; unsigned int out_start; unsigned int out_end; + unsigned int in_start; + unsigned int in_end; /* Start within 1024 pixels of the right / bottom edge */ out_start = max_t(int, index * out_align, out_edge - 1024); /* End before having to add more columns to the left / rows above */ out_end = min_t(unsigned int, out_edge, index * 1024 + 1); + /* + * Limit input seam position to make sure that the downsized input tile + * to the right or bottom does not exceed 1024 pixels. + */ + in_start = max_t(int, index * in_align, + in_edge - (1024 << downsize_coeff)); + in_end = min_t(unsigned int, in_edge, + index * (1024 << downsize_coeff) + 1); + /* * Output tiles must start at a multiple of 8 bytes horizontally and * possibly at an even line horizontally depending on the pixel format. @@ -502,6 +513,7 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx, for (out_pos = out_start; out_pos < out_end; out_pos += out_align) { unsigned int in_pos; unsigned int in_pos_aligned; + unsigned int in_pos_rounded; unsigned int abs_diff; /* @@ -522,9 +534,16 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx, * start the input tile at, 19.13 fixed point. */ in_pos_aligned = round_closest(in_pos, 8192U * in_align); + /* Convert 19.13 fixed point to integer */ + in_pos_rounded = in_pos_aligned / 8192U; + + if (in_pos_rounded < in_start) + continue; + if (in_pos_rounded >= in_end) + break; if ((in_burst > 1) && - (in_edge - in_pos_aligned / 8192U) % in_burst) + (in_edge - in_pos_rounded) % in_burst) continue; if (in_pos < in_pos_aligned) @@ -533,19 +552,18 @@ static void find_best_seam(struct ipu_image_convert_ctx *ctx, abs_diff = in_pos - in_pos_aligned; if (abs_diff < min_diff) { - in_seam = in_pos_aligned; + in_seam = in_pos_rounded; out_seam = out_pos; min_diff = abs_diff; } } *_out_seam = out_seam; - /* Convert 19.13 fixed point to integer seam position */ - *_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U); + *_in_seam = in_seam; - dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n", + dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) in [%u, %u] diff %u.%03u\n", __func__, out_seam, out_align, out_start, out_end, - *_in_seam, in_align, min_diff / 8192, + in_seam, in_align, in_start, in_end, min_diff / 8192, DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192)); } From de2564c70fcc1dfee89fff92eccba5a711a921cc Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 13 Aug 2019 13:42:58 +0200 Subject: [PATCH 07/10] gpu: ipu-v3: image-convert: fix image downsize coefficients and tiling calculation This patch effectively reverts commit 912bbf7e9ca4 ("gpu: ipu-v3: image-convert: Fix image downsize coefficients") and replaces it with a different solution based on the preceding patches. The previous fix tried to solve the problem of intermediate tile size between IC downsizing and main processing sections not being limited to 1024 pixels by downsizing the input image to a smaller intermediate size in the downsizing box filter. This causes unnecessary blurring, especially for scaling factors close to 1. Now that the seam position calculation makes sure that the 1024 pixel intermediate tile size limit is not exceeded, calculate the number of tiles from the maximum of intermediate size and output size and avoid unnecessary downsizing. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 49 ++++++++++++++++---------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index a279d803ed91..a3375eec35ce 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -382,8 +382,11 @@ static inline int num_stripes(int dim) /* * Calculate downsizing coefficients, which are the same for all tiles, - * and bilinear resizing coefficients, which are used to find the best - * seam positions. + * and initial bilinear resizing coefficients, which are used to find the + * best seam positions. + * Also determine the number of tiles necessary to guarantee that no tile + * is larger than 1024 pixels in either dimension at the output and between + * IC downsizing and main processing sections. */ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, struct ipu_image *in, @@ -397,6 +400,8 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, u32 resized_height = out->rect.height; u32 resize_coeff_h; u32 resize_coeff_v; + u32 cols; + u32 rows; if (ipu_rot_mode_is_irt(ctx->rot_mode)) { resized_width = out->rect.height; @@ -407,14 +412,12 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, if (WARN_ON(resized_width == 0 || resized_height == 0)) return -EINVAL; - while (downsized_width > 1024 || - downsized_width >= resized_width * 2) { + while (downsized_width >= resized_width * 2) { downsized_width >>= 1; downsize_coeff_h++; } - while (downsized_height > 1024 || - downsized_height >= resized_height * 2) { + while (downsized_height >= resized_height * 2) { downsized_height >>= 1; downsize_coeff_v++; } @@ -428,10 +431,18 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1); resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1); + /* + * Both the output of the IC downsizing section before being passed to + * the IC main processing section and the final output of the IC main + * processing section must be <= 1024 pixels in both dimensions. + */ + cols = num_stripes(max_t(u32, downsized_width, resized_width)); + rows = num_stripes(max_t(u32, downsized_height, resized_height)); + dev_dbg(ctx->chan->priv->ipu->dev, "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n", __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v, - resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows); + resize_coeff_v, cols, rows); if (downsize_coeff_h > 2 || downsize_coeff_v > 2 || resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff) @@ -441,6 +452,8 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, ctx->downsize_coeff_v = downsize_coeff_v; ctx->image_resize_coeff_h = resize_coeff_h; ctx->image_resize_coeff_v = resize_coeff_v; + ctx->in.num_cols = cols; + ctx->in.num_rows = rows; return 0; } @@ -2038,22 +2051,26 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, ctx->chan = chan; init_completion(&ctx->aborted); + ctx->rot_mode = rot_mode; + + /* Sets ctx->in.num_rows/cols as well */ + ret = calc_image_resize_coefficients(ctx, in, out); + if (ret) + goto out_free; + s_image = &ctx->in; d_image = &ctx->out; /* set tiling and rotation */ - d_image->num_rows = num_stripes(out->pix.height); - d_image->num_cols = num_stripes(out->pix.width); if (ipu_rot_mode_is_irt(rot_mode)) { - s_image->num_rows = d_image->num_cols; - s_image->num_cols = d_image->num_rows; + d_image->num_rows = s_image->num_cols; + d_image->num_cols = s_image->num_rows; } else { - s_image->num_rows = d_image->num_rows; - s_image->num_cols = d_image->num_cols; + d_image->num_rows = s_image->num_rows; + d_image->num_cols = s_image->num_cols; } ctx->num_tiles = d_image->num_cols * d_image->num_rows; - ctx->rot_mode = rot_mode; ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN); if (ret) @@ -2062,10 +2079,6 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, if (ret) goto out_free; - ret = calc_image_resize_coefficients(ctx, in, out); - if (ret) - goto out_free; - calc_out_tile_map(ctx); find_seams(ctx, s_image, d_image); From 5fb8b650cc1170f5e7a0f1a4ab3bb2042c007102 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 13 Aug 2019 14:39:50 +0200 Subject: [PATCH 08/10] gpu: ipu-v3: image-convert: bail on invalid tile sizes If we managed to create tiles sized 0x0 because of a bug in the seam calculation, return with an error message instead of letting the driver run into a division by zero later. Also check for tile sizes that are larger than supported by the hardware. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 27 +++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index a3375eec35ce..699c9b45683c 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -836,13 +836,21 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, in_bottom, flipped_out_top, out_bottom); } -static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, - struct ipu_image_convert_image *image) +static int calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, + struct ipu_image_convert_image *image) { struct ipu_image_convert_chan *chan = ctx->chan; struct ipu_image_convert_priv *priv = chan->priv; + unsigned int max_width = 1024; + unsigned int max_height = 1024; unsigned int i; + if (image->type == IMAGE_CONVERT_IN) { + /* Up to 4096x4096 input tile size */ + max_width <<= ctx->downsize_coeff_h; + max_height <<= ctx->downsize_coeff_v; + } + for (i = 0; i < ctx->num_tiles; i++) { struct ipu_image_tile *tile; const unsigned int row = i / image->num_cols; @@ -872,7 +880,17 @@ static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, image->type == IMAGE_CONVERT_IN ? "Input" : "Output", row, col, tile->width, tile->height, tile->left, tile->top); + + if (!tile->width || tile->width > max_width || + !tile->height || tile->height > max_height) { + dev_err(priv->ipu->dev, "invalid %s tile size: %ux%u\n", + image->type == IMAGE_CONVERT_IN ? "input" : + "output", tile->width, tile->height); + return -EINVAL; + } } + + return 0; } /* @@ -2083,7 +2101,10 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, find_seams(ctx, s_image, d_image); - calc_tile_dimensions(ctx, s_image); + ret = calc_tile_dimensions(ctx, s_image); + if (ret) + goto out_free; + ret = calc_tile_offsets(ctx, s_image); if (ret) goto out_free; From fbefb84dd142c7d75232ae9d446ae36829c2dc97 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 14 Aug 2019 10:50:13 +0200 Subject: [PATCH 09/10] gpu: ipu-v3: image-convert: move tile burst alignment out of loop Burst aligned input and output width can be calculated once per column, instead of repeatedly for each tile in the column. The same goes for input and output height per row. Also don't round up the same values repeatedly. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 80 ++++++++++++++------------ 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 699c9b45683c..1a466611ad48 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1121,6 +1121,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); u32 resized_width; u32 resize_coeff_h; + u32 in_width; tile_idx = col; in_tile = &ctx->in.tile[tile_idx]; @@ -1138,33 +1139,35 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) dev_dbg(priv->ipu->dev, "%s: column %u hscale: *8192/%u\n", __func__, col, resize_coeff_h); + /* + * With the horizontal scaling factor known, round up resized + * width (output width or height) to burst size. + */ + resized_width = round_up(resized_width, 8); + + /* + * Calculate input width from the last accessed input pixel + * given resized width and scaling coefficients. Round up to + * burst size. + */ + last_output = resized_width - 1; + if (closest) + last_output++; + in_width = round_up( + (DIV_ROUND_UP(last_output * resize_coeff_h, 8192) + 1) + << ctx->downsize_coeff_h, 8); for (row = 0; row < ctx->in.num_rows; row++) { tile_idx = row * ctx->in.num_cols + col; in_tile = &ctx->in.tile[tile_idx]; out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]]; - /* - * With the horizontal scaling factor known, round up - * resized width (output width or height) to burst size. - */ if (ipu_rot_mode_is_irt(ctx->rot_mode)) - out_tile->height = round_up(resized_width, 8); + out_tile->height = resized_width; else - out_tile->width = round_up(resized_width, 8); + out_tile->width = resized_width; - /* - * Calculate input width from the last accessed input - * pixel given resized width and scaling coefficients. - * Round up to burst size. - */ - last_output = round_up(resized_width, 8) - 1; - if (closest) - last_output++; - in_tile->width = round_up( - (DIV_ROUND_UP(last_output * resize_coeff_h, - 8192) + 1) - << ctx->downsize_coeff_h, 8); + in_tile->width = in_width; } ctx->resize_coeffs_h[col] = resize_coeff_h; @@ -1175,6 +1178,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) !(ctx->rot_mode & IPU_ROT_BIT_VFLIP); u32 resized_height; u32 resize_coeff_v; + u32 in_height; tile_idx = row * ctx->in.num_cols; in_tile = &ctx->in.tile[tile_idx]; @@ -1192,33 +1196,35 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) dev_dbg(priv->ipu->dev, "%s: row %u vscale: *8192/%u\n", __func__, row, resize_coeff_v); + /* + * With the vertical scaling factor known, round up resized + * height (output width or height) to IDMAC limitations. + */ + resized_height = round_up(resized_height, 2); + + /* + * Calculate input width from the last accessed input pixel + * given resized height and scaling coefficients. Align to + * IDMAC restrictions. + */ + last_output = resized_height - 1; + if (closest) + last_output++; + in_height = round_up( + (DIV_ROUND_UP(last_output * resize_coeff_v, 8192) + 1) + << ctx->downsize_coeff_v, 2); + for (col = 0; col < ctx->in.num_cols; col++) { tile_idx = row * ctx->in.num_cols + col; in_tile = &ctx->in.tile[tile_idx]; out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]]; - /* - * With the vertical scaling factor known, round up - * resized height (output width or height) to IDMAC - * limitations. - */ if (ipu_rot_mode_is_irt(ctx->rot_mode)) - out_tile->width = round_up(resized_height, 2); + out_tile->width = resized_height; else - out_tile->height = round_up(resized_height, 2); + out_tile->height = resized_height; - /* - * Calculate input width from the last accessed input - * pixel given resized height and scaling coefficients. - * Align to IDMAC restrictions. - */ - last_output = round_up(resized_height, 2) - 1; - if (closest) - last_output++; - in_tile->height = round_up( - (DIV_ROUND_UP(last_output * resize_coeff_v, - 8192) + 1) - << ctx->downsize_coeff_v, 2); + in_tile->height = in_height; } ctx->resize_coeffs_v[row] = resize_coeff_v; From 4d24376370fbfc87231d54434a683f2913abcce4 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 14 Aug 2019 10:53:30 +0200 Subject: [PATCH 10/10] gpu: ipu-v3: image-convert: only sample into the next tile if necessary The first pixel of the next tile is only sampled by the hardware if the fractional input position corresponding to the last written output pixel is not an integer position. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 1a466611ad48..eeca50d9a1ee 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1151,7 +1151,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) * burst size. */ last_output = resized_width - 1; - if (closest) + if (closest && ((last_output * resize_coeff_h) % 8192)) last_output++; in_width = round_up( (DIV_ROUND_UP(last_output * resize_coeff_h, 8192) + 1) @@ -1208,7 +1208,7 @@ static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) * IDMAC restrictions. */ last_output = resized_height - 1; - if (closest) + if (closest && ((last_output * resize_coeff_v) % 8192)) last_output++; in_height = round_up( (DIV_ROUND_UP(last_output * resize_coeff_v, 8192) + 1)