Blame - libblkid/encode.c - android_bootable_recovery

blob: ff57be4cb3b080b4da1a5542dcdd46d0f7d7ff8a [file] [log] [blame]

bigbiff bigbiff	e60683a	2013-02-22 20:55:50 -0500	[diff] [blame]	1
				2	/*
				3	* encode.c - string conversion routines (mostly for compatibility with
				4	* udev/volume_id)
				5	*
				6	* Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org>
				7	* Copyright (C) 2009 Karel Zak <kzak@redhat.com>
				8	*
				9	* This file may be redistributed under the terms of the
				10	* GNU Lesser General Public License.
				11	*/
				12	#include <stdio.h>
				13	#include <stdlib.h>
				14	#include <stddef.h>
				15	#include <unistd.h>
				16	#include <errno.h>
				17	#include <string.h>
				18	#include <ctype.h>
				19
				20	#include "blkidP.h"
				21
				22	#define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
				23
				24	/**
				25	* SECTION: encode
				26	* @title: Encoding utils
				27	* @short_description: encode strings to safe udev-compatible formats
				28	*
				29	*/
				30
				31	/* count of characters used to encode one unicode char */
				32	static int utf8_encoded_expected_len(const char *str)
				33	{
				34	unsigned char c = (unsigned char)str[0];
				35
				36	if (c < 0x80)
				37	return 1;
				38	if ((c & 0xe0) == 0xc0)
				39	return 2;
				40	if ((c & 0xf0) == 0xe0)
				41	return 3;
				42	if ((c & 0xf8) == 0xf0)
				43	return 4;
				44	if ((c & 0xfc) == 0xf8)
				45	return 5;
				46	if ((c & 0xfe) == 0xfc)
				47	return 6;
				48	return 0;
				49	}
				50
				51	/* decode one unicode char */
				52	static int utf8_encoded_to_unichar(const char *str)
				53	{
				54	int unichar;
				55	int len;
				56	int i;
				57
				58	len = utf8_encoded_expected_len(str);
				59	switch (len) {
				60	case 1:
				61	return (int)str[0];
				62	case 2:
				63	unichar = str[0] & 0x1f;
				64	break;
				65	case 3:
				66	unichar = (int)str[0] & 0x0f;
				67	break;
				68	case 4:
				69	unichar = (int)str[0] & 0x07;
				70	break;
				71	case 5:
				72	unichar = (int)str[0] & 0x03;
				73	break;
				74	case 6:
				75	unichar = (int)str[0] & 0x01;
				76	break;
				77	default:
				78	return -1;
				79	}
				80
				81	for (i = 1; i < len; i++) {
				82	if (((int)str[i] & 0xc0) != 0x80)
				83	return -1;
				84	unichar <<= 6;
				85	unichar \|= (int)str[i] & 0x3f;
				86	}
				87
				88	return unichar;
				89	}
				90
				91	/* expected size used to encode one unicode char */
				92	static int utf8_unichar_to_encoded_len(int unichar)
				93	{
				94	if (unichar < 0x80)
				95	return 1;
				96	if (unichar < 0x800)
				97	return 2;
				98	if (unichar < 0x10000)
				99	return 3;
				100	if (unichar < 0x200000)
				101	return 4;
				102	if (unichar < 0x4000000)
				103	return 5;
				104	return 6;
				105	}
				106
				107	/* check if unicode char has a valid numeric range */
				108	static int utf8_unichar_valid_range(int unichar)
				109	{
				110	if (unichar > 0x10ffff)
				111	return 0;
				112	if ((unichar & 0xfffff800) == 0xd800)
				113	return 0;
				114	if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
				115	return 0;
				116	if ((unichar & 0xffff) == 0xffff)
				117	return 0;
				118	return 1;
				119	}
				120
				121	/* validate one encoded unicode char and return its length */
				122	static int utf8_encoded_valid_unichar(const char *str)
				123	{
				124	int len;
				125	int unichar;
				126	int i;
				127
				128	len = utf8_encoded_expected_len(str);
				129	if (len == 0)
				130	return -1;
				131
				132	/* ascii is valid */
				133	if (len == 1)
				134	return 1;
				135
				136	/* check if expected encoded chars are available */
				137	for (i = 0; i < len; i++)
				138	if ((str[i] & 0x80) != 0x80)
				139	return -1;
				140
				141	unichar = utf8_encoded_to_unichar(str);
				142
				143	/* check if encoded length matches encoded value */
				144	if (utf8_unichar_to_encoded_len(unichar) != len)
				145	return -1;
				146
				147	/* check if value has valid range */
				148	if (!utf8_unichar_valid_range(unichar))
				149	return -1;
				150
				151	return len;
				152	}
				153
				154	static int replace_whitespace(const char str, char to, size_t len)
				155	{
				156	size_t i, j;
				157
				158	/* strip trailing whitespace */
				159	len = strnlen(str, len);
				160	while (len && isspace(str[len-1]))
				161	len--;
				162
				163	/* strip leading whitespace */
				164	i = 0;
				165	while (isspace(str[i]) && (i < len))
				166	i++;
				167
				168	j = 0;
				169	while (i < len) {
				170	/* substitute multiple whitespace with a single '_' */
				171	if (isspace(str[i])) {
				172	while (isspace(str[i]))
				173	i++;
				174	to[j++] = '_';
				175	}
				176	to[j++] = str[i++];
				177	}
				178	to[j] = '\0';
				179	return 0;
				180	}
				181
				182	static int is_whitelisted(char c, const char *white)
				183	{
				184	if ((c >= '0' && c <= '9') \|\|
				185	(c >= 'A' && c <= 'Z') \|\|
				186	(c >= 'a' && c <= 'z') \|\|
				187	strchr("#+-.:=@_", c) != NULL \|\|
				188	(white != NULL && strchr(white, c) != NULL))
				189	return 1;
				190	return 0;
				191	}
				192
				193	/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
				194	static int replace_chars(char str, const char white)
				195	{
				196	size_t i = 0;
				197	int replaced = 0;
				198
				199	while (str[i] != '\0') {
				200	int len;
				201
				202	if (is_whitelisted(str[i], white)) {
				203	i++;
				204	continue;
				205	}
				206
				207	/* accept hex encoding */
				208	if (str[i] == '\\' && str[i+1] == 'x') {
				209	i += 2;
				210	continue;
				211	}
				212
				213	/* accept valid utf8 */
				214	len = utf8_encoded_valid_unichar(&str[i]);
				215	if (len > 1) {
				216	i += len;
				217	continue;
				218	}
				219
				220	/* if space is allowed, replace whitespace with ordinary space */
				221	if (isspace(str[i]) && white != NULL && strchr(white, ' ') != NULL) {
				222	str[i] = ' ';
				223	i++;
				224	replaced++;
				225	continue;
				226	}
				227
				228	/* everything else is replaced with '_' */
				229	str[i] = '_';
				230	i++;
				231	replaced++;
				232	}
				233	return replaced;
				234	}
				235
				236	size_t blkid_encode_to_utf8(int enc, unsigned char *dest, size_t len,
				237	const unsigned char *src, size_t count)
				238	{
				239	size_t i, j;
				240	uint16_t c;
				241
				242	for (j = i = 0; i + 2 <= count; i += 2) {
				243	if (enc == BLKID_ENC_UTF16LE)
				244	c = (src[i+1] << 8) \| src[i];
				245	else /* BLKID_ENC_UTF16BE */
				246	c = (src[i] << 8) \| src[i+1];
				247	if (c == 0) {
				248	dest[j] = '\0';
				249	break;
				250	} else if (c < 0x80) {
				251	if (j+1 >= len)
				252	break;
				253	dest[j++] = (uint8_t) c;
				254	} else if (c < 0x800) {
				255	if (j+2 >= len)
				256	break;
				257	dest[j++] = (uint8_t) (0xc0 \| (c >> 6));
				258	dest[j++] = (uint8_t) (0x80 \| (c & 0x3f));
				259	} else {
				260	if (j+3 >= len)
				261	break;
				262	dest[j++] = (uint8_t) (0xe0 \| (c >> 12));
				263	dest[j++] = (uint8_t) (0x80 \| ((c >> 6) & 0x3f));
				264	dest[j++] = (uint8_t) (0x80 \| (c & 0x3f));
				265	}
				266	}
				267	dest[j] = '\0';
				268	return j;
				269	}
				270
				271	/**
				272	* blkid_encode_string:
				273	* @str: input string to be encoded
				274	* @str_enc: output string to store the encoded input string
				275	* @len: maximum size of the output string, which may be
				276	* four times as long as the input string
				277	*
				278	* Encode all potentially unsafe characters of a string to the
				279	* corresponding hex value prefixed by '\x'.
				280	*
				281	* Returns: 0 if the entire string was copied, non-zero otherwise.
				282	**/
				283	int blkid_encode_string(const char str, char str_enc, size_t len)
				284	{
				285	size_t i, j;
				286
				287	if (!str \|\| !str_enc \|\| !len)
				288	return -1;
				289
				290	for (i = 0, j = 0; str[i] != '\0'; i++) {
				291	int seqlen;
				292
				293	seqlen = utf8_encoded_valid_unichar(&str[i]);
				294	if (seqlen > 1) {
				295	if (len-j < (size_t)seqlen)
				296	goto err;
				297	memcpy(&str_enc[j], &str[i], seqlen);
				298	j += seqlen;
				299	i += (seqlen-1);
				300	} else if (str[i] == '\\' \|\| !is_whitelisted(str[i], NULL)) {
				301	if (len-j < 4)
				302	goto err;
				303	sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
				304	j += 4;
				305	} else {
				306	if (len-j < 1)
				307	goto err;
				308	str_enc[j] = str[i];
				309	j++;
				310	}
				311	if (j+3 >= len)
				312	goto err;
				313	}
				314	if (len-j < 1)
				315	goto err;
				316	str_enc[j] = '\0';
				317	return 0;
				318	err:
				319	return -1;
				320	}
				321
				322	/**
				323	* blkid_safe_string:
				324	* @str: input string
				325	* @str_safe: output string
				326	* @len: size of output string
				327	*
				328	* Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces
				329	* with '_'.
				330	*
				331	* Returns: 0 on success or -1 in case of error.
				332	*/
				333	int blkid_safe_string(const char str, char str_safe, size_t len)
				334	{
				335	if (!str \|\| !str_safe \|\| !len)
				336	return -1;
				337	replace_whitespace(str, str_safe, len);
				338	replace_chars(str_safe, UDEV_ALLOWED_CHARS_INPUT);
				339	return 0;
				340	}