TW_ROTATION: add flag to handle hardware-rotated display panels

* The existence of TW_ROTATION that implements this feature at the
  level of calls to libpixelflinger API closely mirrors the existence of
  ro.sf.hwrotation for surfaceflinger in LineageOS.
* A brute-force approach was previously attempted via the
  BOARD_HAS_FLIPPED_SCREEN makefile flag. That code iterated over the
  active display surface in a double-buffered setup, and performed a
  "smart" memcpy from the UI drawing surface (gr_draw) onto the display
  surface. The problem was that, without heavy loop optimizations, that
  code could have never scaled for 90 and 270 degree rotation.
  I tried and you could literally see the for loop with the naked eye
  while the display surface was updating.
* That code is now gone, but support for BOARD_HAS_FLIPPED_SCREEN := true
  is still there (now means TW_ROTATION := 180).
* This patch relies on the assumption that it is impossibly difficult
  and non-portable to rotate whole framebuffer display surfaces, in a
  way that is not dependent upon the graphics backend (adf, fbdev, drm,
  overlay etc). Therefore, it identifies the rendering primitives that
  the TWRP graphics stack exposes to the GUI application above, and
  implements hwrotation inside each of those calls instead:
    - gr_line(), gr_fill() - 2D geometric shapes (lines, rectangles)
    - gr_blit() - graphical image resources
    - gr_ttf_textExWH() - font rendering
    - gr_fb_width(), gr_fb_height() - framebuffer resolution
* The gist is to keep the backend and framebuffer (dimensions, row size
  etc) unchanged (because making changes there is asking for trouble),
  but present an altogether different reality to the calling API,
  according to the compile-time constant TW_ROTATION.
* All (x, y) API coordinates and shapes are transformed before being
  actually rendered as (x_disp, y_disp) display coordinates.
* With TW_ROTATION := 90 or 270 you can turn a landscape device into
  a portrait one, because the GUI is fooled by the reversed dimensions
  reported by gr_fb_width() and gr_fb_height() and renders the UI as
  for a different device.
* For blit and text rendering operations, figuring out the transformed
  coordinates in display space is not enough, as the surfaces that are
  to be rendered have to be rotated themselves. This is handled by
  allocating an intermediary rotated surface on each rendering
  operation (not ideal), so the code with the intermediary surface
  is compiled out for the TW_ROTATION := 0 case.
* This is still not as bad as rotating the whole framebuffer though, and
  on a msm8976 device the performance hit is not even noticeable (for
  software rendering).
* Currently there is no attempt to make a connection between the
  TW_ROTATION and the { RECOVERY_TOUCHSCREEN_SWAP_XY,
  RECOVERY_TOUCHSCREEN_FLIP_X, RECOVERY_TOUCHSCREEN_FLIP_Y } settings.

Change-Id: Ic8966ad5360c8a499649fdb16e242286640fd992
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
diff --git a/minuitwrp/Android.mk b/minuitwrp/Android.mk
index 0f8aae6..58634e3 100644
--- a/minuitwrp/Android.mk
+++ b/minuitwrp/Android.mk
@@ -145,8 +145,18 @@
     LOCAL_CFLAGS += -DTW_FBIOPAN
 endif
 
-ifeq ($(BOARD_HAS_FLIPPED_SCREEN), true)
-LOCAL_CFLAGS += -DBOARD_HAS_FLIPPED_SCREEN
+ifneq ($(TW_ROTATION),)
+  ifeq (,$(filter 0 90 180 270, $(TW_ROTATION)))
+    $(error TW_ROTATION must be set to 0, 90, 180 or 270. Currently set to $(TW_ROTATION))
+  endif
+  LOCAL_CFLAGS += -DTW_ROTATION=$(TW_ROTATION)
+else
+  # Support for old flag
+  ifeq ($(BOARD_HAS_FLIPPED_SCREEN), true)
+    LOCAL_CFLAGS += -DTW_ROTATION=180
+  else
+    LOCAL_CFLAGS += -DTW_ROTATION=0
+  endif
 endif
 
 ifeq ($(TW_IGNORE_MAJOR_AXIS_0), true)
diff --git a/minuitwrp/graphics.cpp b/minuitwrp/graphics.cpp
index d914eef..0abcb0c 100644
--- a/minuitwrp/graphics.cpp
+++ b/minuitwrp/graphics.cpp
@@ -35,6 +35,8 @@
 #include "../gui/placement.h"
 #include "minui.h"
 #include "graphics.h"
+// For std::min and std::max
+#include <algorithm>
 
 struct GRFont {
     GRSurface* texture;
@@ -105,32 +107,51 @@
         else if (placement == BOTTOM_LEFT || placement == BOTTOM_RIGHT)
             y -= measured_height;
     }
-    return gr_ttf_textExWH(gl, x, y + y_scale, s, vfont, measured_width + x, -1);
+    return gr_ttf_textExWH(gl, x, y + y_scale, s, vfont, measured_width + x, -1, gr_draw);
 }
 
 void gr_clip(int x, int y, int w, int h)
 {
     GGLContext *gl = gr_context;
-    gl->scissor(gl, x, y, w, h);
+    int x0_disp, y0_disp, x1_disp, y1_disp;
+    int l_disp, r_disp, t_disp, b_disp;
+
+    x0_disp = ROTATION_X_DISP(x, y, gr_draw);
+    y0_disp = ROTATION_Y_DISP(x, y, gr_draw);
+    x1_disp = ROTATION_X_DISP(x + w, y + h, gr_draw);
+    y1_disp = ROTATION_Y_DISP(x + w, y + h, gr_draw);
+    l_disp = std::min(x0_disp, x1_disp);
+    r_disp = std::max(x0_disp, x1_disp);
+    t_disp = std::min(y0_disp, y1_disp);
+    b_disp = std::max(y0_disp, y1_disp);
+    gl->scissor(gl, l_disp, t_disp, r_disp, b_disp);
     gl->enable(gl, GGL_SCISSOR_TEST);
 }
 
 void gr_noclip()
 {
     GGLContext *gl = gr_context;
-    gl->scissor(gl, 0, 0, gr_fb_width(), gr_fb_height());
+    gl->scissor(gl, 0, 0,
+                gr_draw->width - 2 * overscan_offset_x,
+                gr_draw->height - 2 * overscan_offset_y);
     gl->disable(gl, GGL_SCISSOR_TEST);
 }
 
 void gr_line(int x0, int y0, int x1, int y1, int width)
 {
     GGLContext *gl = gr_context;
+    int x0_disp, y0_disp, x1_disp, y1_disp;
+
+    x0_disp = ROTATION_X_DISP(x0, y0, gr_draw);
+    y0_disp = ROTATION_Y_DISP(x0, y0, gr_draw);
+    x1_disp = ROTATION_X_DISP(x1, y1, gr_draw);
+    y1_disp = ROTATION_Y_DISP(x1, y1, gr_draw);
 
     if(gr_is_curr_clr_opaque)
         gl->disable(gl, GGL_BLEND);
 
-    const int coords0[2] = { x0 << 4, y0 << 4 };
-    const int coords1[2] = { x1 << 4, y1 << 4 };
+    const int coords0[2] = { x0_disp << 4, y0_disp << 4 };
+    const int coords1[2] = { x1_disp << 4, y1_disp << 4 };
     gl->linex(gl, coords0, coords1, width << 4);
 
     if(gr_is_curr_clr_opaque)
@@ -218,17 +239,29 @@
 void gr_fill(int x, int y, int w, int h)
 {
     GGLContext *gl = gr_context;
+    int x0_disp, y0_disp, x1_disp, y1_disp;
+    int l_disp, r_disp, t_disp, b_disp;
 
     if(gr_is_curr_clr_opaque)
         gl->disable(gl, GGL_BLEND);
 
-    gl->recti(gl, x, y, x + w, y + h);
+    x0_disp = ROTATION_X_DISP(x, y, gr_draw);
+    y0_disp = ROTATION_Y_DISP(x, y, gr_draw);
+    x1_disp = ROTATION_X_DISP(x + w, y + h, gr_draw);
+    y1_disp = ROTATION_Y_DISP(x + w, y + h, gr_draw);
+    l_disp = std::min(x0_disp, x1_disp);
+    r_disp = std::max(x0_disp, x1_disp);
+    t_disp = std::min(y0_disp, y1_disp);
+    b_disp = std::max(y0_disp, y1_disp);
+
+    gl->recti(gl, l_disp, t_disp, r_disp, b_disp);
 
     if(gr_is_curr_clr_opaque)
         gl->enable(gl, GGL_BLEND);
 }
 
-void gr_blit(gr_surface source, int sx, int sy, int w, int h, int dx, int dy) {
+void gr_blit(gr_surface source, int sx, int sy, int w, int h, int dx, int dy)
+{
     if (gr_context == NULL) {
         return;
     }
@@ -239,15 +272,50 @@
     if(surface->format == GGL_PIXEL_FORMAT_RGBX_8888)
         gl->disable(gl, GGL_BLEND);
 
+    int dx0_disp, dy0_disp, dx1_disp, dy1_disp;
+    int l_disp, r_disp, t_disp, b_disp;
+
+    // Figuring out display coordinates works for TW_ROTATION == 0 too,
+    // and isn't as expensive as allocating and rotating another surface,
+    // so we do this anyway.
+    dx0_disp = ROTATION_X_DISP(dx, dy, gr_draw);
+    dy0_disp = ROTATION_Y_DISP(dx, dy, gr_draw);
+    dx1_disp = ROTATION_X_DISP(dx + w, dy + h, gr_draw);
+    dy1_disp = ROTATION_Y_DISP(dx + w, dy + h, gr_draw);
+    l_disp = std::min(dx0_disp, dx1_disp);
+    r_disp = std::max(dx0_disp, dx1_disp);
+    t_disp = std::min(dy0_disp, dy1_disp);
+    b_disp = std::max(dy0_disp, dy1_disp);
+
+#if TW_ROTATION != 0
+    // Do not perform relatively expensive operation if not needed
+    GGLSurface surface_rotated;
+    surface_rotated.version = sizeof(surface_rotated);
+    // Skip the **(TW_ROTATION == 0)** || (TW_ROTATION == 180) check
+    // because we are under a TW_ROTATION != 0 conditional compilation statement
+    surface_rotated.width   = (TW_ROTATION == 180) ? surface->width  : surface->height;
+    surface_rotated.height  = (TW_ROTATION == 180) ? surface->height : surface->width;
+    surface_rotated.stride  = surface_rotated.width;
+    surface_rotated.format  = surface->format;
+    surface_rotated.data    = (GGLubyte*) malloc(surface_rotated.stride * surface_rotated.height * 4);
+    surface_ROTATION_transform((gr_surface) &surface_rotated, (const gr_surface) surface, 4);
+
+    gl->bindTexture(gl, &surface_rotated);
+#else
     gl->bindTexture(gl, surface);
+#endif
     gl->texEnvi(gl, GGL_TEXTURE_ENV, GGL_TEXTURE_ENV_MODE, GGL_REPLACE);
     gl->texGeni(gl, GGL_S, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE);
     gl->texGeni(gl, GGL_T, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE);
     gl->enable(gl, GGL_TEXTURE_2D);
-    gl->texCoord2i(gl, sx - dx, sy - dy);
-    gl->recti(gl, dx, dy, dx + w, dy + h);
+    gl->texCoord2i(gl, sx - l_disp, sy - t_disp);
+    gl->recti(gl, l_disp, t_disp, r_disp, b_disp);
     gl->disable(gl, GGL_TEXTURE_2D);
 
+#if TW_ROTATION != 0
+    free(surface_rotated.data);
+#endif
+
     if(surface->format == GGL_PIXEL_FORMAT_RGBX_8888)
         gl->enable(gl, GGL_BLEND);
 }
@@ -369,12 +437,16 @@
 
 int gr_fb_width(void)
 {
-    return gr_draw->width - 2*overscan_offset_x;
+    return (TW_ROTATION == 0 || TW_ROTATION == 180) ?
+            gr_draw->width  - 2 * overscan_offset_x :
+            gr_draw->height - 2 * overscan_offset_y;
 }
 
 int gr_fb_height(void)
 {
-    return gr_draw->height - 2*overscan_offset_y;
+    return (TW_ROTATION == 0 || TW_ROTATION == 180) ?
+            gr_draw->height - 2 * overscan_offset_y :
+            gr_draw->width  - 2 * overscan_offset_x;
 }
 
 void gr_fb_blank(bool blank)
diff --git a/minuitwrp/graphics_fbdev.cpp b/minuitwrp/graphics_fbdev.cpp
index 8cf85f5..fc5fcf3 100644
--- a/minuitwrp/graphics_fbdev.cpp
+++ b/minuitwrp/graphics_fbdev.cpp
@@ -293,7 +293,6 @@
         ucfb_vaddr[idx + 2] = tmp;
     }
 #endif
-#ifndef BOARD_HAS_FLIPPED_SCREEN
     if (double_buffered) {
         // Copy from the in-memory surface to the framebuffer.
         memcpy(gr_framebuffer[1-displayed_buffer].data, gr_draw->data,
@@ -304,32 +303,6 @@
         memcpy(gr_framebuffer[0].data, gr_draw->data,
                gr_draw->height * gr_draw->row_bytes);
     }
-#else
-    int gr_active_fb = 0;
-    if (double_buffered)
-        gr_active_fb = 1-displayed_buffer;
-
-    /* flip buffer 180 degrees for devices with physically inverted screens */
-    unsigned int row_pixels = gr_draw->row_bytes / gr_framebuffer[0].pixel_bytes;
-    if (gr_framebuffer[0].pixel_bytes == 4) {
-        for (unsigned int y = 0; y < gr_draw->height; ++y) {
-            uint32_t* dst = reinterpret_cast<uint32_t*>(gr_framebuffer[gr_active_fb].data) + y * row_pixels;
-            uint32_t* src = reinterpret_cast<uint32_t*>(gr_draw->data) + (gr_draw->height - y - 1) * row_pixels + gr_draw->width;
-            for (unsigned int x = 0; x < gr_draw->width; ++x)
-                *(dst++) = *(--src);
-        }
-    } else {
-        for (unsigned int y = 0; y < gr_draw->height; ++y) {
-            uint16_t* dst = reinterpret_cast<uint16_t*>(gr_framebuffer[gr_active_fb].data) + y * row_pixels;
-            uint16_t* src = reinterpret_cast<uint16_t*>(gr_draw->data) + (gr_draw->height - y - 1) * row_pixels + gr_draw->width;
-            for (unsigned int x = 0; x < gr_draw->width; ++x)
-                 *(dst++) = *(--src);
-        }
-    }
-
-    if (double_buffered)
-        set_displayed_framebuffer(1-displayed_buffer);
-#endif
     return gr_draw;
 }
 
diff --git a/minuitwrp/graphics_overlay.cpp b/minuitwrp/graphics_overlay.cpp
index b4efae4..4dff7f4 100644
--- a/minuitwrp/graphics_overlay.cpp
+++ b/minuitwrp/graphics_overlay.cpp
@@ -323,9 +323,13 @@
             overlayL.dst_rect.w = gr_fb.width;
             overlayL.dst_rect.h = gr_fb.height;
             overlayL.alpha = 0xFF;
-#ifdef BOARD_HAS_FLIPPED_SCREEN
-            overlayL.flags = MDP_ROT_180;
-#endif
+            // If this worked, life would have been so much easier
+            //switch (TW_ROTATION) {
+                //case   0:  overlayL.flags = MDP_ROT_NOP; break;
+                //case  90:  overlayL.flags = MDP_ROT_90;  break;
+                //case 180:  overlayL.flags = MDP_ROT_180; break;
+                //case 270:  overlayL.flags = MDP_ROT_270; break;
+            //}
             overlayL.transp_mask = MDP_TRANSP_NOP;
             overlayL.id = MSMFB_NEW_REQUEST;
             ret = ioctl(fd, MSMFB_OVERLAY_SET, &overlayL);
@@ -363,9 +367,13 @@
             overlayL.dst_rect.w = lWidth;
             overlayL.dst_rect.h = height;
             overlayL.alpha = 0xFF;
-#ifdef BOARD_HAS_FLIPPED_SCREEN
-            overlayL.flags = MDP_ROT_180;
-#endif
+            // If this worked, life would have been so much easier
+            //switch (TW_ROTATION) {
+                //case   0:  overlayL.flags = MDP_ROT_NOP; break;
+                //case  90:  overlayL.flags = MDP_ROT_90;  break;
+                //case 180:  overlayL.flags = MDP_ROT_180; break;
+                //case 270:  overlayL.flags = MDP_ROT_270; break;
+            //}
             overlayL.transp_mask = MDP_TRANSP_NOP;
             overlayL.id = MSMFB_NEW_REQUEST;
             ret = ioctl(fd, MSMFB_OVERLAY_SET, &overlayL);
@@ -393,11 +401,14 @@
             overlayR.dst_rect.w = rWidth;
             overlayR.dst_rect.h = height;
             overlayR.alpha = 0xFF;
-#ifdef BOARD_HAS_FLIPPED_SCREEN
-            overlayR.flags = MDSS_MDP_RIGHT_MIXER | MDP_ROT_180;
-#else
             overlayR.flags = MDSS_MDP_RIGHT_MIXER;
-#endif
+            // If this worked, life would have been so much easier
+            //switch (TW_ROTATION) {
+                //case   0:  overlayR.flags |= MDP_ROT_NOP; break;
+                //case  90:  overlayR.flags |= MDP_ROT_90;  break;
+                //case 180:  overlayR.flags |= MDP_ROT_180; break;
+                //case 270:  overlayR.flags |= MDP_ROT_270; break;
+            //}
             overlayR.transp_mask = MDP_TRANSP_NOP;
             overlayR.id = MSMFB_NEW_REQUEST;
             ret = ioctl(fd, MSMFB_OVERLAY_SET, &overlayR);
diff --git a/minuitwrp/graphics_utils.cpp b/minuitwrp/graphics_utils.cpp
index 67c836e..c591e53 100644
--- a/minuitwrp/graphics_utils.cpp
+++ b/minuitwrp/graphics_utils.cpp
@@ -19,6 +19,7 @@
 #include <png.h>
 #include <pixelflinger/pixelflinger.h>
 #include <linux/fb.h>
+#include <string.h>
 
 #include "minui.h"
 
@@ -121,3 +122,47 @@
         fclose(fp);
     return res;
 }
+
+#define MATRIX_ELEMENT(matrix, row, col, row_size, elem_size) \
+    (((uint8_t*) (matrix)) + (((row) * (elem_size)) * (row_size)) + ((col) * (elem_size)))
+
+#define DO_MATRIX_ROTATION(bits_per_pixel, bytes_per_pixel)                   \
+{                                                                             \
+    for (size_t y = 0; y < src->height; y++) {                                \
+        for (size_t x = 0; x < src->width; x++) {                             \
+            /* output pointer in dst->data */                                 \
+            uint##bits_per_pixel##_t       *op;                               \
+            /* input pointer from src->data */                                \
+            const uint##bits_per_pixel##_t *ip;                               \
+            /* Display coordinates (in dst) corresponding to (x, y) in src */ \
+            size_t x_disp = ROTATION_X_DISP(x, y, dst);                     \
+            size_t y_disp = ROTATION_Y_DISP(x, y, dst);                     \
+                                                                              \
+            ip = (const uint##bits_per_pixel##_t*)                            \
+                 MATRIX_ELEMENT(src->data, y, x,                              \
+                                src->stride, bytes_per_pixel);                \
+            op = (uint##bits_per_pixel##_t*)                                  \
+                 MATRIX_ELEMENT(dst->data, y_disp, x_disp,                    \
+                                dst->stride, bytes_per_pixel);                \
+            *op = *ip;                                                        \
+        }                                                                     \
+    }                                                                         \
+}
+
+void surface_ROTATION_transform(gr_surface dst_ptr, const gr_surface src_ptr,
+                                  size_t num_bytes_per_pixel)
+{
+    GGLSurface *dst = (GGLSurface*) dst_ptr;
+    const GGLSurface *src = (GGLSurface*) src_ptr;
+
+    /* Handle duplicated code via a macro.
+     * This is currently used for rotating surfaces of graphical resources
+     * (32-bit pixel format) and of font glyphs (8-bit pixel format).
+     * If you need to add handling of other pixel formats feel free to do so.
+     */
+    if (num_bytes_per_pixel == 4) {
+        DO_MATRIX_ROTATION(32, 4);
+    } else if (num_bytes_per_pixel == 1) {
+        DO_MATRIX_ROTATION(8, 1);
+    }
+}
diff --git a/minuitwrp/minui.h b/minuitwrp/minui.h
index 018f327..2c462e7 100644
--- a/minuitwrp/minui.h
+++ b/minuitwrp/minui.h
@@ -58,7 +58,8 @@
 void *gr_ttf_loadFont(const char *filename, int size, int dpi);
 void *gr_ttf_scaleFont(void *font, int max_width, int measured_width);
 void gr_ttf_freeFont(void *font);
-int gr_ttf_textExWH(void *context, int x, int y, const char *s, void *pFont, int max_width, int max_height);
+int gr_ttf_textExWH(void *context, int x, int y, const char *s, void *pFont,
+                    int max_width, int max_height, const gr_surface gr_draw);
 int gr_ttf_measureEx(const char *s, void *font);
 int gr_ttf_maxExW(const char *s, void *font, int max_width);
 int gr_ttf_getMaxFontHeight(void *font);
@@ -73,6 +74,21 @@
 // Functions in graphics_utils.c
 int gr_save_screenshot(const char *dest);
 
+// Transform minuitwrp API coordinates into display coordinates,
+// for panels that are hardware-mounted in a rotated manner.
+#define ROTATION_X_DISP(x, y, surface) \
+    ((TW_ROTATION ==   0) ? (x) : \
+     (TW_ROTATION ==  90) ? ((surface)->width - (y) - 1) : \
+     (TW_ROTATION == 180) ? ((surface)->width - (x) - 1) : \
+     (TW_ROTATION == 270) ? (y) : -1)
+#define ROTATION_Y_DISP(x, y, surface) \
+    ((TW_ROTATION ==   0) ? (y) : \
+     (TW_ROTATION ==  90) ? (x) : \
+     (TW_ROTATION == 180) ? ((surface)->height - (y) - 1) : \
+     (TW_ROTATION == 270) ? ((surface)->height - (x) - 1) : -1)
+
+void surface_ROTATION_transform(gr_surface dst_ptr, const gr_surface src_ptr, size_t num_bytes_per_pixel);
+
 // input event structure, include <linux/input.h> for the definition.
 // see http://www.mjmwired.net/kernel/Documentation/input/ for info.
 struct input_event;
diff --git a/minuitwrp/truetype.cpp b/minuitwrp/truetype.cpp
index 3e5f707..0416b0e 100644
--- a/minuitwrp/truetype.cpp
+++ b/minuitwrp/truetype.cpp
@@ -14,6 +14,8 @@
 
 #include <pixelflinger/pixelflinger.h>
 #include <pthread.h>
+// For std::min and std::max
+#include <algorithm>
 
 #define STRING_CACHE_MAX_ENTRIES 400
 #define STRING_CACHE_TRUNCATE_ENTRIES 150
@@ -697,10 +699,14 @@
     return max_bytes;
 }
 
-int gr_ttf_textExWH(void *context, int x, int y, const char *s, void *pFont, int max_width, int max_height)
+int gr_ttf_textExWH(void *context, int x, int y,
+                    const char *s, void *pFont,
+                    int max_width, int max_height,
+                    const gr_surface gr_draw_surface)
 {
     GGLContext *gl = (GGLContext *)context;
     TrueTypeFont *font = (TrueTypeFont *)pFont;
+    const GRSurface *gr_draw = (const GRSurface*) gr_draw_surface;
 
     // not actualy max width, but max_width + x
     if(max_width != -1)
@@ -719,6 +725,21 @@
         return -1;
     }
 
+#if TW_ROTATION != 0
+    // Do not perform relatively expensive operation if not needed
+    GGLSurface string_surface_rotated;
+    string_surface_rotated.version = sizeof(string_surface_rotated);
+    // Skip the **(TW_ROTATION == 0)** || (TW_ROTATION == 180) check
+    // because we are under a TW_ROTATION != 0 conditional compilation statement
+    string_surface_rotated.width   = (TW_ROTATION == 180) ? e->surface.width  : e->surface.height;
+    string_surface_rotated.height  = (TW_ROTATION == 180) ? e->surface.height : e->surface.width;
+    string_surface_rotated.stride  = string_surface_rotated.width;
+    string_surface_rotated.format  = e->surface.format;
+    // e->surface.format is GGL_PIXEL_FORMAT_A_8 (grayscale)
+    string_surface_rotated.data    = (GGLubyte*) malloc(string_surface_rotated.stride * string_surface_rotated.height * 1);
+    surface_ROTATION_transform((gr_surface) &string_surface_rotated, (const gr_surface) &e->surface, 1);
+#endif
+
     int y_bottom = y + e->surface.height;
     int res = e->rendered_bytes;
 
@@ -732,16 +753,39 @@
         }
     }
 
+    // Figuring out display coordinates works for TW_ROTATION == 0 too,
+    // and isn't as expensive as allocating and rotating another surface,
+    // so we do this anyway.
+    int x0_disp, y0_disp, x1_disp, y1_disp;
+    int l_disp, r_disp, t_disp, b_disp;
+
+    x0_disp = ROTATION_X_DISP(x, y, gr_draw);
+    y0_disp = ROTATION_Y_DISP(x, y, gr_draw);
+    x1_disp = ROTATION_X_DISP(x + e->surface.width, y_bottom, gr_draw);
+    y1_disp = ROTATION_Y_DISP(x + e->surface.width, y_bottom, gr_draw);
+    l_disp = std::min(x0_disp, x1_disp);
+    r_disp = std::max(x0_disp, x1_disp);
+    t_disp = std::min(y0_disp, y1_disp);
+    b_disp = std::max(y0_disp, y1_disp);
+
+#if TW_ROTATION != 0
+    gl->bindTexture(gl, &string_surface_rotated);
+#else
     gl->bindTexture(gl, &e->surface);
+#endif
     gl->texEnvi(gl, GGL_TEXTURE_ENV, GGL_TEXTURE_ENV_MODE, GGL_REPLACE);
     gl->texGeni(gl, GGL_S, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE);
     gl->texGeni(gl, GGL_T, GGL_TEXTURE_GEN_MODE, GGL_ONE_TO_ONE);
 
     gl->enable(gl, GGL_TEXTURE_2D);
-    gl->texCoord2i(gl, -x, -y);
-    gl->recti(gl, x, y, x + e->surface.width, y_bottom);
+    gl->texCoord2i(gl, -l_disp, -t_disp);
+    gl->recti(gl, l_disp, t_disp, r_disp, b_disp);
     gl->disable(gl, GGL_TEXTURE_2D);
 
+#if TW_ROTATION != 0
+    free(string_surface_rotated.data);
+#endif
+
     pthread_mutex_unlock(&font->mutex);
     return res;
 }