blob: 5e52e8f443151eeec655192c590e3901d85b6f73 [file] [log] [blame]
bigbiff bigbiffe60683a2013-02-22 20:55:50 -05001/* Align/Truncate a string in a given screen width
2 Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
bigbiff7b4c7a62015-01-01 19:44:14 -05005 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation, either version 2.1 of the License, or
bigbiff bigbiffe60683a2013-02-22 20:55:50 -05007 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17/* Written by Pádraig Brady. */
18
19#include <config.h>
20
21#include <stdlib.h>
22#include <string.h>
23#include <stdio.h>
24#include <stdbool.h>
25#include <limits.h>
bigbiff7b4c7a62015-01-01 19:44:14 -050026#include <ctype.h>
bigbiff bigbiffe60683a2013-02-22 20:55:50 -050027
28#include "c.h"
29#include "mbsalign.h"
30#include "widechar.h"
31
bigbiff bigbiffe60683a2013-02-22 20:55:50 -050032#ifdef HAVE_WIDECHAR
33/* Replace non printable chars.
34 Note \t and \n etc. are non printable.
35 Return 1 if replacement made, 0 otherwise. */
36
bigbiff7b4c7a62015-01-01 19:44:14 -050037/*
38 * Counts number of cells in multibyte string. For all control and
39 * non-printable chars is the result width enlarged to store \x?? hex
40 * sequence. See mbs_safe_encode().
41 *
42 * Returns: number of cells, @sz returns number of bytes.
43 */
44size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
45{
46 mbstate_t st;
47 const char *p = buf, *last = buf;
48 size_t width = 0, bytes = 0;
49
50 memset(&st, 0, sizeof(st));
51
52 if (p && *p && bufsz)
53 last = p + (bufsz - 1);
54
55 while (p && *p && p <= last) {
56 if (iscntrl((unsigned char) *p)) {
57 width += 4, bytes += 4; /* *p encoded to \x?? */
58 p++;
59 }
60#ifdef HAVE_WIDECHAR
61 else {
62 wchar_t wc;
63 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
64
65 if (len == 0)
66 break;
67
68 if (len == (size_t) -1 || len == (size_t) -2) {
69 len = 1;
70 if (isprint((unsigned char) *p))
71 width += 1, bytes += 1;
72 else
73 width += 4, bytes += 4;
74
75 } else if (!iswprint(wc)) {
76 width += len * 4; /* hex encode whole sequence */
77 bytes += len * 4;
78 } else {
79 width += wcwidth(wc); /* number of cells */
80 bytes += len; /* number of bytes */
81 }
82 p += len;
83 }
84#else
85 else if (!isprint((unsigned char) *p)) {
86 width += 4, bytes += 4; /* *p encoded to \x?? */
87 p++;
88 } else {
89 width++, bytes++;
90 p++;
91 }
92#endif
93 }
94
95 if (sz)
96 *sz = bytes;
97 return width;
98}
99
100size_t mbs_safe_width(const char *s)
101{
102 if (!s || !*s)
103 return 0;
104 return mbs_safe_nwidth(s, strlen(s), NULL);
105}
106
107/*
108 * Copy @s to @buf and replace control and non-printable chars with
109 * \x?? hex sequence. The @width returns number of cells.
110 *
111 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
112 * bytes.
113 */
114char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf)
115{
116 mbstate_t st;
117 const char *p = s;
118 char *r;
119 size_t sz = s ? strlen(s) : 0;
120
121 if (!sz || !buf)
122 return NULL;
123
124 memset(&st, 0, sizeof(st));
125
126 r = buf;
127 *width = 0;
128
129 while (p && *p) {
130 if (iscntrl((unsigned char) *p)) {
131 sprintf(r, "\\x%02x", (unsigned char) *p);
132 r += 4;
133 *width += 4;
134 p++;
135 }
136#ifdef HAVE_WIDECHAR
137 else {
138 wchar_t wc;
139 size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
140
141 if (len == 0)
142 break; /* end of string */
143
144 if (len == (size_t) -1 || len == (size_t) -2) {
145 len = 1;
146 /*
147 * Not valid multibyte sequence -- maybe it's
148 * printable char according to the current locales.
149 */
150 if (!isprint((unsigned char) *p)) {
151 sprintf(r, "\\x%02x", (unsigned char) *p);
152 r += 4;
153 *width += 4;
154 } else {
155 width++;
156 *r++ = *p;
157 }
158 } else if (!iswprint(wc)) {
159 size_t i;
160 for (i = 0; i < len; i++) {
161 sprintf(r, "\\x%02x", (unsigned char) *p);
162 r += 4;
163 *width += 4;
164 }
165 } else {
166 memcpy(r, p, len);
167 r += len;
168 *width += wcwidth(wc);
169 }
170 p += len;
171 }
172#else
173 else if (!isprint((unsigned char) *p)) {
174 sprintf(r, "\\x%02x", (unsigned char) *p);
175 p++;
176 r += 4;
177 *width += 4;
178 } else {
179 *r++ = *p++;
180 *width++;
181 }
182#endif
183 }
184
185 *r = '\0';
186
187 return buf;
188}
189
190size_t mbs_safe_encode_size(size_t bytes)
191{
192 return (bytes * 4) + 1;
193}
194
195/*
196 * Returns allocated string where all control and non-printable chars are
197 * replaced with \x?? hex sequence.
198 */
199char *mbs_safe_encode(const char *s, size_t *width)
200{
201 size_t sz = s ? strlen(s) : 0;
202 char *buf;
203
204 if (!sz)
205 return NULL;
206 buf = malloc(mbs_safe_encode_size(sz));
207 if (!buf)
208 return NULL;
209
210 return mbs_safe_encode_to_buffer(s, width, buf);
211}
212
bigbiff bigbiffe60683a2013-02-22 20:55:50 -0500213static bool
214wc_ensure_printable (wchar_t *wchars)
215{
216 bool replaced = false;
217 wchar_t *wc = wchars;
218 while (*wc)
219 {
220 if (!iswprint ((wint_t) *wc))
221 {
222 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
223 replaced = true;
224 }
225 wc++;
226 }
227 return replaced;
228}
229
230/* Truncate wchar string to width cells.
231 * Returns number of cells used. */
232
233static size_t
234wc_truncate (wchar_t *wc, size_t width)
235{
236 size_t cells = 0;
237 int next_cells = 0;
238
239 while (*wc)
240 {
241 next_cells = wcwidth (*wc);
242 if (next_cells == -1) /* non printable */
243 {
244 *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
245 next_cells = 1;
246 }
247 if (cells + next_cells > width)
248 break;
249 cells += next_cells;
250 wc++;
251 }
252 *wc = L'\0';
253 return cells;
254}
255
256/* FIXME: move this function to gnulib as it's missing on:
257 OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */
258
259static int
260rpl_wcswidth (const wchar_t *s, size_t n)
261{
262 int ret = 0;
263
264 while (n-- > 0 && *s != L'\0')
265 {
266 int nwidth = wcwidth (*s++);
267 if (nwidth == -1) /* non printable */
268 return -1;
269 if (ret > (INT_MAX - nwidth)) /* overflow */
270 return -1;
271 ret += nwidth;
272 }
273
274 return ret;
275}
276#endif
277
278/* Truncate multi-byte string to @width and returns number of
279 * bytes of the new string @str, and in @width returns number
280 * of cells.
281 */
282size_t
283mbs_truncate(char *str, size_t *width)
284{
285 ssize_t bytes = strlen(str);
286#ifdef HAVE_WIDECHAR
287 ssize_t sz = mbstowcs(NULL, str, 0);
288 wchar_t *wcs = NULL;
289
290 if (sz == (ssize_t) -1)
291 goto done;
292
293 wcs = malloc((sz + 1) * sizeof(wchar_t));
294 if (!wcs)
295 goto done;
296
297 if (!mbstowcs(wcs, str, sz))
298 goto done;
299 *width = wc_truncate(wcs, *width);
300 bytes = wcstombs(str, wcs, bytes);
301done:
302 free(wcs);
303#else
304 if (*width < bytes)
305 bytes = *width;
306#endif
307 if (bytes >= 0)
308 str[bytes] = '\0';
309 return bytes;
310}
311
312/* Write N_SPACES space characters to DEST while ensuring
313 nothing is written beyond DEST_END. A terminating NUL
314 is always added to DEST.
315 A pointer to the terminating NUL is returned. */
316
317static char*
318mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces)
319{
320 /* FIXME: Should we pad with "figure space" (\u2007)
321 if non ascii data present? */
bigbiff7b4c7a62015-01-01 19:44:14 -0500322 for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
bigbiff bigbiffe60683a2013-02-22 20:55:50 -0500323 *dest++ = ' ';
324 *dest = '\0';
325 return dest;
326}
327
328/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
329 characters; write the result into the DEST_SIZE-byte buffer, DEST.
330 ALIGNMENT specifies whether to left- or right-justify or to center.
331 If SRC requires more than *WIDTH columns, truncate it to fit.
332 When centering, the number of trailing spaces may be one less than the
333 number of leading spaces. The FLAGS parameter is unused at present.
334 Return the length in bytes required for the final result, not counting
335 the trailing NUL. A return value of DEST_SIZE or larger means there
336 wasn't enough space. DEST will be NUL terminated in any case.
337 Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
338 or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
339 Update *WIDTH to indicate how many columns were used before padding. */
340
341size_t
342mbsalign (const char *src, char *dest, size_t dest_size,
343 size_t *width, mbs_align_t align, int flags)
344{
345 size_t ret = -1;
346 size_t src_size = strlen (src) + 1;
347 char *newstr = NULL;
348 wchar_t *str_wc = NULL;
349 const char *str_to_print = src;
350 size_t n_cols = src_size - 1;
351 size_t n_used_bytes = n_cols; /* Not including NUL */
bigbiff7b4c7a62015-01-01 19:44:14 -0500352 size_t n_spaces = 0, space_left;
bigbiff bigbiffe60683a2013-02-22 20:55:50 -0500353 bool conversion = false;
354 bool wc_enabled = false;
355
356#ifdef HAVE_WIDECHAR
357 /* In multi-byte locales convert to wide characters
358 to allow easy truncation. Also determine number
359 of screen columns used. */
360 if (MB_CUR_MAX > 1)
361 {
362 size_t src_chars = mbstowcs (NULL, src, 0);
363 if (src_chars == (size_t) -1)
364 {
365 if (flags & MBA_UNIBYTE_FALLBACK)
366 goto mbsalign_unibyte;
367 else
368 goto mbsalign_cleanup;
369 }
370 src_chars += 1; /* make space for NUL */
371 str_wc = malloc (src_chars * sizeof (wchar_t));
372 if (str_wc == NULL)
373 {
374 if (flags & MBA_UNIBYTE_FALLBACK)
375 goto mbsalign_unibyte;
376 else
377 goto mbsalign_cleanup;
378 }
379 if (mbstowcs (str_wc, src, src_chars) != 0)
380 {
381 str_wc[src_chars - 1] = L'\0';
382 wc_enabled = true;
383 conversion = wc_ensure_printable (str_wc);
384 n_cols = rpl_wcswidth (str_wc, src_chars);
385 }
386 }
387
388 /* If we transformed or need to truncate the source string
389 then create a modified copy of it. */
390 if (wc_enabled && (conversion || (n_cols > *width)))
391 {
392 if (conversion)
393 {
394 /* May have increased the size by converting
395 \t to \uFFFD for example. */
396 src_size = wcstombs(NULL, str_wc, 0) + 1;
397 }
398 newstr = malloc (src_size);
399 if (newstr == NULL)
400 {
401 if (flags & MBA_UNIBYTE_FALLBACK)
402 goto mbsalign_unibyte;
403 else
404 goto mbsalign_cleanup;
405 }
406 str_to_print = newstr;
407 n_cols = wc_truncate (str_wc, *width);
408 n_used_bytes = wcstombs (newstr, str_wc, src_size);
409 }
410#endif
411
412mbsalign_unibyte:
413
414 if (n_cols > *width) /* Unibyte truncation required. */
415 {
416 n_cols = *width;
417 n_used_bytes = n_cols;
418 }
419
420 if (*width > n_cols) /* Padding required. */
421 n_spaces = *width - n_cols;
422
423 /* indicate to caller how many cells needed (not including padding). */
424 *width = n_cols;
425
426 /* indicate to caller how many bytes needed (not including NUL). */
427 ret = n_used_bytes + (n_spaces * 1);
428
429 /* Write as much NUL terminated output to DEST as possible. */
430 if (dest_size != 0)
431 {
432 char *dest_end = dest + dest_size - 1;
bigbiff7b4c7a62015-01-01 19:44:14 -0500433 size_t start_spaces;
434 size_t end_spaces;
bigbiff bigbiffe60683a2013-02-22 20:55:50 -0500435
436 switch (align)
437 {
438 case MBS_ALIGN_CENTER:
439 start_spaces = n_spaces / 2 + n_spaces % 2;
440 end_spaces = n_spaces / 2;
441 break;
442 case MBS_ALIGN_LEFT:
443 start_spaces = 0;
444 end_spaces = n_spaces;
445 break;
446 case MBS_ALIGN_RIGHT:
447 start_spaces = n_spaces;
448 end_spaces = 0;
449 break;
450 default:
451 abort();
452 }
453
454 dest = mbs_align_pad (dest, dest_end, start_spaces);
bigbiff7b4c7a62015-01-01 19:44:14 -0500455 space_left = dest_end - dest;
456 dest = memcpy (dest, str_to_print, min (n_used_bytes, space_left));
bigbiff bigbiffe60683a2013-02-22 20:55:50 -0500457 mbs_align_pad (dest, dest_end, end_spaces);
458 }
459
460mbsalign_cleanup:
461
462 free (str_wc);
463 free (newstr);
464
465 return ret;
466}