cudl

A data language as simple as JSON but as readable as YAML or TOML.
git clone https://shtanton.xyz/git/cudl.git
Log | Files | Refs | README

cudl.c (13581B)


      1 #include <stdio.h>
      2 #include <ctype.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 #include "cudl.h"
      6 
      7 #define STRIP_WHITESPACE(text) while (isspace(*(text))) (text)++
      8 
      9 #define IS_KEY_CHAR(c) (\
     10 	'a' <= (c) && (c) <= 'z' ||\
     11 	'A' <= (c) && (c) <= 'Z' ||\
     12 	'0' <= (c) && (c) <= '9' ||\
     13 	(c) == '_' || (c) == '-'\
     14 )
     15 
     16 #define IS_DIGIT(c) (\
     17 	'0' <= (c) && (c) <= '9'\
     18 )
     19 
     20 int cudl_err = CUDL_OK;
     21 
     22 static char *fread_all(FILE *file) {
     23 	size_t size;
     24 	char *buffer;
     25 	fseek(file, 0, SEEK_END);
     26 	size = ftell(file);
     27 	rewind(file);
     28 	clearerr(file);
     29 	if ((buffer = malloc(size + 1)) == NULL)
     30 		return NULL;
     31 	if (fread(buffer, 1, size, file) != size) {
     32 		free(buffer);
     33 		return NULL;
     34 	}
     35 	buffer[size] = '\0';
     36 	return buffer;
     37 }
     38 
     39 void cudl_debug(struct cudl_value value) {
     40 	int i;
     41 	switch (value.tag) {
     42 		case CUDL_TAG_NULL:
     43 			printf("%%null");
     44 			break;
     45 		case CUDL_TAG_BOOL:
     46 			if (value.data.boolean)
     47 				printf("%%true");
     48 			else
     49 				printf("%%false");
     50 			break;
     51 		case CUDL_TAG_NUMBER:
     52 			printf("%lf", value.data.number);
     53 			break;
     54 		case CUDL_TAG_STRING:
     55 			printf("\"%s\"", value.data.string);
     56 			break;;
     57 		case CUDL_TAG_ARRAY:
     58 			printf("[");
     59 			for (i = 0; i < value.data.array.length; i++) {
     60 				if (i != 0)
     61 					printf(" ");
     62 				cudl_debug(value.data.array.values[i]);
     63 			}
     64 			printf("]");
     65 			break;
     66 		case CUDL_TAG_MAP:
     67 			printf("{");
     68 			for (i = 0; i < value.data.map.length; i++) {
     69 				if (i != 0)
     70 					printf(" ");
     71 				printf("\"%s\": ", value.data.map.fields[i].key);
     72 				cudl_debug(value.data.map.fields[i].value);
     73 			}
     74 			printf("}");
     75 			break;
     76 		default:
     77 			printf("UNKNOWN");
     78 			break;
     79 	}
     80 }
     81 
     82 /* Free all children of the value, not the value itself */
     83 void cudl_deinit_value(struct cudl_value value) {
     84 	int i;
     85 	switch (value.tag) {
     86 		case CUDL_TAG_ARRAY:
     87 			for (i = 0; i < value.data.array.length; i++) {
     88 				cudl_deinit_value(value.data.array.values[i]);
     89 			}
     90 			free(value.data.array.values);
     91 			break;
     92 		case CUDL_TAG_NULL:
     93 		default:
     94 			break;
     95 	}
     96 }
     97 
     98 /* Parse a value from input and store it in value.
     99  * Return the number of bytes consumed.
    100  * Input must end with a null byte */
    101 static size_t parse_value(char *input, struct cudl_value *value);
    102 
    103 static size_t parse_bool_or_null(char *input, struct cudl_value *value) {
    104 	if (strncmp(input, "null", 4) == 0) {
    105 		value->tag = CUDL_TAG_NULL;
    106 		return 4;
    107 	}
    108 	if (strncmp(input, "true", 4) == 0) {
    109 		value->tag = CUDL_TAG_BOOL;
    110 		value->data.boolean = 1;
    111 		return 4;
    112 	}
    113 	if (strncmp(input, "false", 5) == 0) {
    114 		value->tag = CUDL_TAG_BOOL;
    115 		value->data.boolean = 0;
    116 		return 5;
    117 	}
    118 	cudl_err = CUDL_ERR_EXPECTED_BOOL_OR_NULL;
    119 	return 0;
    120 }
    121 
    122 static size_t parse_number(char *input, struct cudl_value *value) {
    123 	double number;
    124 	size_t i, exponentStart;
    125 	int exponent, otherExponent;
    126 	int exponentUsed;
    127 	exponentUsed = 0;
    128 	number = 0;
    129 	i = input[0] == '-';
    130 	for (;; i++) {
    131 		if (IS_DIGIT(input[i])) {
    132 			number = number * 10 + (input[i] - '0');
    133 			exponent++;
    134 			continue;
    135 		} else if (input[i] == '.') {
    136 			exponent = 0;
    137 			exponentUsed = 1;
    138 			continue;
    139 		}
    140 		break;
    141 	}
    142 	if (input[0] == '-')
    143 		number = 0 - number;
    144 	if (!exponentUsed)
    145 		exponent = 0;
    146 	otherExponent = 0;
    147 	if (input[i] == 'e' && (IS_DIGIT(input[i+1]) || (input[i+1] == '-' && IS_DIGIT(input[i+2])))) {
    148 		i++;
    149 		exponentStart = i;
    150 		i += input[i] == '-';
    151 		for (;; i++) {
    152 			if (IS_DIGIT(input[i]))
    153 				otherExponent = otherExponent * 10 + (input[i] - '0');
    154 			else
    155 				break;
    156 		}
    157 		if (input[exponentStart] == '-')
    158 			otherExponent = 0 - otherExponent;
    159 	}
    160 	exponent = exponent - otherExponent;
    161 	for (; exponent > 0; exponent--) {
    162 		number /= 10;
    163 	}
    164 	for (; exponent < 0; exponent++) {
    165 		number *= 10;
    166 	}
    167 	value->tag = CUDL_TAG_NUMBER;
    168 	value->data.number = number;
    169 	return i;
    170 }
    171 
    172 /* Convert UCS character to utf-8 bytes.
    173  * Return number of bytes generated.
    174  * Sets cudl_error on error.
    175  * Shamelessly lifted from https://github.com/cktan/tomc99 */
    176 static size_t cudl_ucs_to_utf8(int64_t ucs, char utf8[6]) {
    177 	if (
    178 		0xd800 <= ucs && ucs <= 0xdfff ||
    179 		0xfffe <= ucs && ucs <= 0xffff ||
    180 		ucs < 0
    181 	) {
    182 		cudl_err = CUDL_ERR_UNRECOGNISED_UNICODE;
    183 		return 0;
    184 	}
    185 
    186 	/* 0x00000000 - 0x0000007F:
    187 	   0xxxxxxx
    188 	*/
    189 	if (ucs <= 0x7F) {
    190 		utf8[0] = (unsigned char) ucs;
    191 		return 1;
    192 	}
    193 
    194 	/* 0x00000080 - 0x000007FF:
    195 	   110xxxxx 10xxxxxx
    196 	*/
    197 	if (ucs <= 0x000007FF) {
    198 		utf8[0] = 0xc0 | (ucs >> 6);
    199 		utf8[1] = 0x80 | (ucs & 0x3f);
    200 		return 2;
    201 	}
    202 
    203 	/* 0x00000800 - 0x0000FFFF:
    204 	   1110xxxx 10xxxxxx 10xxxxxx
    205 	*/
    206 	if (ucs <= 0x0000FFFF) {
    207 		utf8[0] = 0xe0 | (ucs >> 12);
    208 		utf8[1] = 0x80 | ((ucs >> 6) & 0x3f);
    209 		utf8[2] = 0x80 | (ucs & 0x3f);
    210 		return 3;
    211 	}
    212 
    213 	/* 0x00010000 - 0x001FFFFF:
    214 	   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
    215 	*/
    216 	if (ucs <= 0x001FFFFF) {
    217 		utf8[0] = 0xf0 | (ucs >> 18);
    218 		utf8[1] = 0x80 | ((ucs >> 12) & 0x3f);
    219 		utf8[2] = 0x80 | ((ucs >> 6) & 0x3f);
    220 		utf8[3] = 0x80 | (ucs & 0x3f);
    221 		return 4;
    222 	}
    223 
    224 	/* 0x00200000 - 0x03FFFFFF:
    225 	   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    226 	*/
    227 	if (ucs <= 0x03FFFFFF) {
    228 		utf8[0] = 0xf8 | (ucs >> 24);
    229 		utf8[1] = 0x80 | ((ucs >> 18) & 0x3f);
    230 		utf8[2] = 0x80 | ((ucs >> 12) & 0x3f);
    231 		utf8[3] = 0x80 | ((ucs >> 6) & 0x3f);
    232 		utf8[4] = 0x80 | (ucs & 0x3f);
    233 		return 5;
    234 	}
    235 
    236 	/* 0x04000000 - 0x7FFFFFFF:
    237 	   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    238 	*/
    239 	if (ucs <= 0x7FFFFFFF) {
    240 		utf8[0] = 0xfc | (ucs >> 30);
    241 		utf8[1] = 0x80 | ((ucs >> 24) & 0x3f);
    242 		utf8[2] = 0x80 | ((ucs >> 18) & 0x3f);
    243 		utf8[3] = 0x80 | ((ucs >> 12) & 0x3f);
    244 		utf8[4] = 0x80 | ((ucs >> 6) & 0x3f);
    245 		utf8[5] = 0x80 | (ucs & 0x3f);
    246 		return 6;
    247 	}
    248 
    249 	cudl_err = CUDL_ERR_UNRECOGNISED_UNICODE;
    250 	return 0;
    251 }
    252 
    253 /* Parse a string starting after the opening quote.
    254  * Set string to be the contents of the string.
    255  * No memory is allocated if an error occurs. */
    256 static size_t parse_quoted_string(char *input, char **string) {
    257 	size_t length, capacity;
    258 	char *original_input, *newstring;
    259 	int64_t ucs;
    260 	int ucs_length, i;
    261 
    262 	length = 0;
    263 	capacity = 32;
    264 	original_input = input;
    265 	if ((*string = malloc(capacity)) == NULL) {
    266 		cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    267 		return 0;
    268 	}
    269 	for (;;) {
    270 		if (*input == '\0') {
    271 			cudl_err = CUDL_ERR_UNMATCHED_QUOTE;
    272 			free(*string);
    273 			return 0;
    274 		}
    275 		if (*input == '"') {
    276 			if ((newstring = realloc(*string, length + 1)) == NULL) {
    277 				cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    278 				free(*string);
    279 				return 0;
    280 			}
    281 			*string = newstring;
    282 			(*string)[length] = '\0';
    283 			input++;
    284 			return input - original_input;
    285 		}
    286 		if (length >= capacity) {
    287 			if ((newstring = realloc(*string, capacity * 2)) == NULL) {
    288 				cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    289 				free(*string);
    290 				return 0;
    291 			}
    292 			*string = newstring;
    293 			capacity *= 2;
    294 		}
    295 		if (*input == '\\') {
    296 			input++;
    297 			switch (*input) {
    298 				case '\0':
    299 					cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE;
    300 					free(*string);
    301 					return 0;
    302 				case 'b':
    303 					(*string)[length++] = '\b';
    304 					input++;
    305 					break;
    306 				case 't':
    307 					(*string)[length++] = '\t';
    308 					input++;
    309 					break;
    310 				case 'n':
    311 					(*string)[length++] = '\n';
    312 					input++;
    313 					break;
    314 				case 'r':
    315 					(*string)[length++] = '\r';
    316 					input++;
    317 					break;
    318 				case '"':
    319 					(*string)[length++] = '"';
    320 					input++;
    321 					break;
    322 				case '\\':
    323 					(*string)[length++] = '\\';
    324 					input++;
    325 					break;
    326 				case 'u':
    327 				case 'U':
    328 					ucs = 0;
    329 					ucs_length = (*input == 'u') ? 4 : 8;
    330 					input++;
    331 					for (i = 0; i < ucs_length; i++) {
    332 						if (input[i] == '\0') {
    333 							cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE;
    334 							free(*string);
    335 							return 0;
    336 						}
    337 						if ('0' <= input[i] && input[i] <= '9') {
    338 							ucs = (ucs << 4) + (input[i] - '0');
    339 						} else if ('a' <= input[i] && input[i] <= 'z') {
    340 							ucs = (ucs << 4) + (input[i] - 'a' + 10);
    341 						} else if ('A' <= input[i] && input[i] <= 'Z') {
    342 							ucs = (ucs << 4) + (input[i] - 'A' + 10);
    343 						} else {
    344 							cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE;
    345 							free(*string);
    346 							return 0;
    347 						}
    348 					}
    349 					if (length + 6 > capacity) {
    350 						if ((newstring = realloc(*string, capacity * 2)) == NULL) {
    351 							cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    352 							free(*string);
    353 							return 0;
    354 						}
    355 						*string = newstring;
    356 						capacity *= 2;
    357 					}
    358 					length += cudl_ucs_to_utf8(ucs, (*string) + length);
    359 					if (cudl_err) {
    360 						free(*string);
    361 						return 0;
    362 					}
    363 					input += ucs_length;
    364 					break;
    365 				default:
    366 					(*string)[length++] = *input;
    367 					input++;
    368 					break;
    369 			}
    370 		} else {
    371 			(*string)[length++] = *(input++);
    372 		}
    373 	}
    374 }
    375 
    376 static size_t parse_array(char *input, struct cudl_value *value) {
    377 	size_t length, capacity;
    378 	struct cudl_value *values, *newvalues;
    379 	int i;
    380 	char *original_input;
    381 
    382 	original_input = input;
    383 	value->tag = CUDL_TAG_ARRAY;
    384 	length = 0;
    385 	capacity = 8;
    386 	if ((values = malloc(capacity * sizeof(struct cudl_value))) == NULL) {
    387 		cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    388 		return 0;
    389 	}
    390 
    391 	STRIP_WHITESPACE(input);
    392 	for (;;) {
    393 		if (*input == '\0') {
    394 			cudl_err = CUDL_ERR_UNMATCHED_BRACK;
    395 			for (i = 0; i < length; i++)
    396 				cudl_deinit_value(values[i]);
    397 			free(values);
    398 			return 0;
    399 		} else if (*input == ']') {
    400 			input++;
    401 			values = realloc(values, length * sizeof(struct cudl_value));
    402 			value->data.array.length = length;
    403 			value->data.array.values = values;
    404 			return input - original_input;
    405 		}
    406 		if (length >= capacity) {
    407 			if ((newvalues = realloc(values, 2 * capacity * sizeof(struct cudl_value))) == NULL) {
    408 				cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    409 				for (i = 0; i < length; i++)
    410 					cudl_deinit_value(values[i]);
    411 				free(values);
    412 				return 0;
    413 			}
    414 			values = newvalues;
    415 			capacity *= 2;
    416 		}
    417 		input += parse_value(input, values + length);
    418 		if (cudl_err) {
    419 			for (i = 0; i < length; i++)
    420 				cudl_deinit_value(values[i]);
    421 			free(values);
    422 			return 0;
    423 		}
    424 		length++;
    425 	}
    426 }
    427 
    428 static size_t parse_map_key(char *input, char **key) {
    429 	char *original_input;
    430 	switch (*input) {
    431 		case '\0':
    432 			cudl_err = CUDL_ERR_EXPECTED_MAP_KEY;
    433 			return 0;
    434 		case '"':
    435 			input++;
    436 			return parse_quoted_string(input, key) + 1;
    437 		default:
    438 			original_input = input;
    439 			while (IS_KEY_CHAR(*input))
    440 				input++;
    441 			if (input == original_input) {
    442 				cudl_err = CUDL_ERR_EXPECTED_MAP_KEY;
    443 				return 0;
    444 			}
    445 			if ((*key = malloc(input - original_input + 1)) == NULL) {
    446 				cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    447 				return 0;
    448 			}
    449 			memcpy(*key, original_input, input - original_input);
    450 			(*key)[input - original_input] = '\0';
    451 			return input - original_input;
    452 	}
    453 }
    454 
    455 static size_t parse_map(char *input, struct cudl_value *value, char end_char) {
    456 	char *original_input;
    457 	int i;
    458 	struct cudl_map_field *fields, *newfields;
    459 	size_t length, capacity;
    460 
    461 	original_input = input;
    462 	value->tag = CUDL_TAG_MAP;
    463 	length = 0;
    464 	capacity = 8;
    465 	if ((fields = malloc(capacity * sizeof(struct cudl_map_field))) == NULL) {
    466 		cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    467 		return 0;
    468 	}
    469 
    470 	STRIP_WHITESPACE(input);
    471 	for (;;) {
    472 		if (*input == end_char) {
    473 			input++;
    474 			fields = realloc(fields, length * sizeof(struct cudl_map_field));
    475 			value->data.map.length = length;
    476 			value->data.map.fields = fields;
    477 			return input - original_input;
    478 		}
    479 		if (*input == '\0') {
    480 			cudl_err = CUDL_ERR_UNMATCHED_BRACE;
    481 			for (i = 0; i < length; i++) {
    482 				cudl_deinit_value(fields[i].value);
    483 				free(fields[i].key);
    484 			}
    485 			free(fields);
    486 			return 0;
    487 		}
    488 		if (length >= capacity) {
    489 			if ((newfields = realloc(fields,  2 * capacity * sizeof(struct cudl_map_field))) == NULL) {
    490 				cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    491 				for (i = 0; i < length; i++) {
    492 					cudl_deinit_value(fields[i].value);
    493 					free(fields[i].key);
    494 				}
    495 				free(fields);
    496 				return 0;
    497 			}
    498 			fields = newfields;
    499 			capacity *= 2;
    500 		}
    501 		input += parse_map_key(input, &fields[length].key);
    502 		if (cudl_err) {
    503 			for (i = 0; i < length; i++) {
    504 				cudl_deinit_value(fields[i].value);
    505 				free(fields[i].key);
    506 			}
    507 			free(fields);
    508 			return 0;
    509 		}
    510 		STRIP_WHITESPACE(input);
    511 		if (*input != ':') {
    512 			cudl_err = CUDL_ERR_EXPECTED_COLON;
    513 			for (i = 0; i < length; i++) {
    514 				cudl_deinit_value(fields[i].value);
    515 				free(fields[i].key);
    516 			}
    517 			free(fields[length].key);
    518 			free(fields);
    519 			return 0;
    520 		}
    521 		input++;
    522 		STRIP_WHITESPACE(input);
    523 		input += parse_value(input, &fields[length].value);
    524 		if (cudl_err) {
    525 			for (i = 0; i < length; i++) {
    526 				cudl_deinit_value(fields[i].value);
    527 				free(fields[i].key);
    528 			}
    529 			free(fields[length].key);
    530 			free(fields);
    531 			return 0;
    532 		}
    533 		length++;
    534 	}
    535 }
    536 
    537 static size_t _parse_value(char *input, struct cudl_value *value) {
    538 	if (*input == '%')
    539 		return parse_bool_or_null(++input, value) + 1;
    540 	if (*input == '[')
    541 		return parse_array(++input, value) + 1;
    542 	if (*input == '{')
    543 		return parse_map(++input, value, '}') + 1;
    544 	if (*input == '"') {
    545 		value->tag = CUDL_TAG_STRING;
    546 		return parse_quoted_string(++input, &value->data.string) + 1;
    547 	}
    548 	if (IS_DIGIT(*input) || *input == '-')
    549 		return parse_number(input, value);
    550 	cudl_err = CUDL_ERR_UNRECOGNISED_VALUE;
    551 	return 0;
    552 }
    553 
    554 static size_t parse_value(char *input, struct cudl_value *value) {
    555 	char *original_input;
    556 	original_input = input;
    557 	input += _parse_value(input, value);
    558 	STRIP_WHITESPACE(input);
    559 	return input - original_input;
    560 }
    561 
    562 void cudl_parse_from_file(FILE *file, struct cudl_value *value) {
    563 	char *input, *original_input;
    564 	if ((original_input = input = fread_all(file)) == NULL) {
    565 		if (ferror(file))
    566 			cudl_err = CUDL_ERR_READING;
    567 		else
    568 			cudl_err = CUDL_ERR_OUT_OF_MEMORY;
    569 		return;
    570 	}
    571 	input += cudl_parse(input, value);
    572 	if (*input != '\0')
    573 		cudl_deinit_value(*value);
    574 	free(original_input);
    575 }
    576 
    577 size_t cudl_parse(char *input, struct cudl_value *value) {
    578 	STRIP_WHITESPACE(input);
    579 	return parse_value(input, value);
    580 }