#include #include #include #include #include "cudl.h" #define STRIP_WHITESPACE(text) while (isspace(*(text))) (text)++ #define IS_KEY_CHAR(c) (\ 'a' <= (c) && (c) <= 'z' ||\ 'A' <= (c) && (c) <= 'Z' ||\ '0' <= (c) && (c) <= '9' ||\ (c) == '_' || (c) == '-'\ ) #define IS_DIGIT(c) (\ '0' <= (c) && (c) <= '9'\ ) int cudl_err = CUDL_OK; static char *fread_all(FILE *file) { size_t size; char *buffer; fseek(file, 0, SEEK_END); size = ftell(file); rewind(file); clearerr(file); if ((buffer = malloc(size + 1)) == NULL) return NULL; if (fread(buffer, 1, size, file) != size) { free(buffer); return NULL; } buffer[size] = '\0'; return buffer; } void cudl_debug(struct cudl_value value) { int i; switch (value.tag) { case CUDL_TAG_NULL: printf("%%null"); break; case CUDL_TAG_BOOL: if (value.data.boolean) printf("%%true"); else printf("%%false"); break; case CUDL_TAG_NUMBER: printf("%lf", value.data.number); break; case CUDL_TAG_STRING: printf("\"%s\"", value.data.string); break;; case CUDL_TAG_ARRAY: printf("["); for (i = 0; i < value.data.array.length; i++) { if (i != 0) printf(" "); cudl_debug(value.data.array.values[i]); } printf("]"); break; case CUDL_TAG_MAP: printf("{"); for (i = 0; i < value.data.map.length; i++) { if (i != 0) printf(" "); printf("\"%s\": ", value.data.map.fields[i].key); cudl_debug(value.data.map.fields[i].value); } printf("}"); break; default: printf("UNKNOWN"); break; } } /* Free all children of the value, not the value itself */ void cudl_deinit_value(struct cudl_value value) { int i; switch (value.tag) { case CUDL_TAG_ARRAY: for (i = 0; i < value.data.array.length; i++) { cudl_deinit_value(value.data.array.values[i]); } free(value.data.array.values); break; case CUDL_TAG_NULL: default: break; } } /* Parse a value from input and store it in value. * Return the number of bytes consumed. * Input must end with a null byte */ static size_t parse_value(char *input, struct cudl_value *value); static size_t parse_bool_or_null(char *input, struct cudl_value *value) { if (strncmp(input, "null", 4) == 0) { value->tag = CUDL_TAG_NULL; return 4; } if (strncmp(input, "true", 4) == 0) { value->tag = CUDL_TAG_BOOL; value->data.boolean = 1; return 4; } if (strncmp(input, "false", 5) == 0) { value->tag = CUDL_TAG_BOOL; value->data.boolean = 0; return 5; } cudl_err = CUDL_ERR_EXPECTED_BOOL_OR_NULL; return 0; } static size_t parse_number(char *input, struct cudl_value *value) { double number; size_t i, exponentStart; int exponent, otherExponent; int exponentUsed; exponentUsed = 0; number = 0; i = input[0] == '-'; for (;; i++) { if (IS_DIGIT(input[i])) { number = number * 10 + (input[i] - '0'); exponent++; continue; } else if (input[i] == '.') { exponent = 0; exponentUsed = 1; continue; } break; } if (input[0] == '-') number = 0 - number; if (!exponentUsed) exponent = 0; otherExponent = 0; if (input[i] == 'e' && (IS_DIGIT(input[i+1]) || (input[i+1] == '-' && IS_DIGIT(input[i+2])))) { i++; exponentStart = i; i += input[i] == '-'; for (;; i++) { if (IS_DIGIT(input[i])) otherExponent = otherExponent * 10 + (input[i] - '0'); else break; } if (input[exponentStart] == '-') otherExponent = 0 - otherExponent; } exponent = exponent - otherExponent; for (; exponent > 0; exponent--) { number /= 10; } for (; exponent < 0; exponent++) { number *= 10; } value->tag = CUDL_TAG_NUMBER; value->data.number = number; return i; } /* Convert UCS character to utf-8 bytes. * Return number of bytes generated. * Sets cudl_error on error. * Shamelessly lifted from https://github.com/cktan/tomc99 */ static size_t cudl_ucs_to_utf8(int64_t ucs, char utf8[6]) { if ( 0xd800 <= ucs && ucs <= 0xdfff || 0xfffe <= ucs && ucs <= 0xffff || ucs < 0 ) { cudl_err = CUDL_ERR_UNRECOGNISED_UNICODE; return 0; } /* 0x00000000 - 0x0000007F: 0xxxxxxx */ if (ucs <= 0x7F) { utf8[0] = (unsigned char) ucs; return 1; } /* 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx */ if (ucs <= 0x000007FF) { utf8[0] = 0xc0 | (ucs >> 6); utf8[1] = 0x80 | (ucs & 0x3f); return 2; } /* 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx */ if (ucs <= 0x0000FFFF) { utf8[0] = 0xe0 | (ucs >> 12); utf8[1] = 0x80 | ((ucs >> 6) & 0x3f); utf8[2] = 0x80 | (ucs & 0x3f); return 3; } /* 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ if (ucs <= 0x001FFFFF) { utf8[0] = 0xf0 | (ucs >> 18); utf8[1] = 0x80 | ((ucs >> 12) & 0x3f); utf8[2] = 0x80 | ((ucs >> 6) & 0x3f); utf8[3] = 0x80 | (ucs & 0x3f); return 4; } /* 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ if (ucs <= 0x03FFFFFF) { utf8[0] = 0xf8 | (ucs >> 24); utf8[1] = 0x80 | ((ucs >> 18) & 0x3f); utf8[2] = 0x80 | ((ucs >> 12) & 0x3f); utf8[3] = 0x80 | ((ucs >> 6) & 0x3f); utf8[4] = 0x80 | (ucs & 0x3f); return 5; } /* 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ if (ucs <= 0x7FFFFFFF) { utf8[0] = 0xfc | (ucs >> 30); utf8[1] = 0x80 | ((ucs >> 24) & 0x3f); utf8[2] = 0x80 | ((ucs >> 18) & 0x3f); utf8[3] = 0x80 | ((ucs >> 12) & 0x3f); utf8[4] = 0x80 | ((ucs >> 6) & 0x3f); utf8[5] = 0x80 | (ucs & 0x3f); return 6; } cudl_err = CUDL_ERR_UNRECOGNISED_UNICODE; return 0; } /* Parse a string starting after the opening quote. * Set string to be the contents of the string. * No memory is allocated if an error occurs. */ static size_t parse_quoted_string(char *input, char **string) { size_t length, capacity; char *original_input, *newstring; int64_t ucs; int ucs_length, i; length = 0; capacity = 32; original_input = input; if ((*string = malloc(capacity)) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; return 0; } for (;;) { if (*input == '\0') { cudl_err = CUDL_ERR_UNMATCHED_QUOTE; free(*string); return 0; } if (*input == '"') { if ((newstring = realloc(*string, length + 1)) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; free(*string); return 0; } *string = newstring; (*string)[length] = '\0'; input++; return input - original_input; } if (length >= capacity) { if ((newstring = realloc(*string, capacity * 2)) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; free(*string); return 0; } *string = newstring; capacity *= 2; } if (*input == '\\') { input++; switch (*input) { case '\0': cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE; free(*string); return 0; case 'b': (*string)[length++] = '\b'; input++; break; case 't': (*string)[length++] = '\t'; input++; break; case 'n': (*string)[length++] = '\n'; input++; break; case 'r': (*string)[length++] = '\r'; input++; break; case '"': (*string)[length++] = '"'; input++; break; case '\\': (*string)[length++] = '\\'; input++; break; case 'u': case 'U': ucs = 0; ucs_length = (*input == 'u') ? 4 : 8; input++; for (i = 0; i < ucs_length; i++) { if (input[i] == '\0') { cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE; free(*string); return 0; } if ('0' <= input[i] && input[i] <= '9') { ucs = (ucs << 4) + (input[i] - '0'); } else if ('a' <= input[i] && input[i] <= 'z') { ucs = (ucs << 4) + (input[i] - 'a' + 10); } else if ('A' <= input[i] && input[i] <= 'Z') { ucs = (ucs << 4) + (input[i] - 'A' + 10); } else { cudl_err = CUDL_ERR_EXPECTED_ESCAPE_SEQUENCE; free(*string); return 0; } } if (length + 6 > capacity) { if ((newstring = realloc(*string, capacity * 2)) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; free(*string); return 0; } *string = newstring; capacity *= 2; } length += cudl_ucs_to_utf8(ucs, (*string) + length); if (cudl_err) { free(*string); return 0; } input += ucs_length; break; default: (*string)[length++] = *input; input++; break; } } else { (*string)[length++] = *(input++); } } } static size_t parse_array(char *input, struct cudl_value *value) { size_t length, capacity; struct cudl_value *values, *newvalues; int i; char *original_input; original_input = input; value->tag = CUDL_TAG_ARRAY; length = 0; capacity = 8; if ((values = malloc(capacity * sizeof(struct cudl_value))) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; return 0; } STRIP_WHITESPACE(input); for (;;) { if (*input == '\0') { cudl_err = CUDL_ERR_UNMATCHED_BRACK; for (i = 0; i < length; i++) cudl_deinit_value(values[i]); free(values); return 0; } else if (*input == ']') { input++; values = realloc(values, length * sizeof(struct cudl_value)); value->data.array.length = length; value->data.array.values = values; return input - original_input; } if (length >= capacity) { if ((newvalues = realloc(values, 2 * capacity * sizeof(struct cudl_value))) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; for (i = 0; i < length; i++) cudl_deinit_value(values[i]); free(values); return 0; } values = newvalues; capacity *= 2; } input += parse_value(input, values + length); if (cudl_err) { for (i = 0; i < length; i++) cudl_deinit_value(values[i]); free(values); return 0; } length++; } } static size_t parse_map_key(char *input, char **key) { char *original_input; switch (*input) { case '\0': cudl_err = CUDL_ERR_EXPECTED_MAP_KEY; return 0; case '"': input++; return parse_quoted_string(input, key) + 1; default: original_input = input; while (IS_KEY_CHAR(*input)) input++; if (input == original_input) { cudl_err = CUDL_ERR_EXPECTED_MAP_KEY; return 0; } if ((*key = malloc(input - original_input + 1)) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; return 0; } memcpy(*key, original_input, input - original_input); (*key)[input - original_input] = '\0'; return input - original_input; } } static size_t parse_map(char *input, struct cudl_value *value, char end_char) { char *original_input; int i; struct cudl_map_field *fields, *newfields; size_t length, capacity; original_input = input; value->tag = CUDL_TAG_MAP; length = 0; capacity = 8; if ((fields = malloc(capacity * sizeof(struct cudl_map_field))) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; return 0; } STRIP_WHITESPACE(input); for (;;) { if (*input == end_char) { input++; fields = realloc(fields, length * sizeof(struct cudl_map_field)); value->data.map.length = length; value->data.map.fields = fields; return input - original_input; } if (*input == '\0') { cudl_err = CUDL_ERR_UNMATCHED_BRACE; for (i = 0; i < length; i++) { cudl_deinit_value(fields[i].value); free(fields[i].key); } free(fields); return 0; } if (length >= capacity) { if ((newfields = realloc(fields, 2 * capacity * sizeof(struct cudl_map_field))) == NULL) { cudl_err = CUDL_ERR_OUT_OF_MEMORY; for (i = 0; i < length; i++) { cudl_deinit_value(fields[i].value); free(fields[i].key); } free(fields); return 0; } fields = newfields; capacity *= 2; } input += parse_map_key(input, &fields[length].key); if (cudl_err) { for (i = 0; i < length; i++) { cudl_deinit_value(fields[i].value); free(fields[i].key); } free(fields); return 0; } STRIP_WHITESPACE(input); if (*input != ':') { cudl_err = CUDL_ERR_EXPECTED_COLON; for (i = 0; i < length; i++) { cudl_deinit_value(fields[i].value); free(fields[i].key); } free(fields[length].key); free(fields); return 0; } input++; STRIP_WHITESPACE(input); input += parse_value(input, &fields[length].value); if (cudl_err) { for (i = 0; i < length; i++) { cudl_deinit_value(fields[i].value); free(fields[i].key); } free(fields[length].key); free(fields); return 0; } length++; } } static size_t _parse_value(char *input, struct cudl_value *value) { if (*input == '%') return parse_bool_or_null(++input, value) + 1; if (*input == '[') return parse_array(++input, value) + 1; if (*input == '{') return parse_map(++input, value, '}') + 1; if (*input == '"') { value->tag = CUDL_TAG_STRING; return parse_quoted_string(++input, &value->data.string) + 1; } if (IS_DIGIT(*input) || *input == '-') return parse_number(input, value); cudl_err = CUDL_ERR_UNRECOGNISED_VALUE; return 0; } static size_t parse_value(char *input, struct cudl_value *value) { char *original_input; original_input = input; input += _parse_value(input, value); STRIP_WHITESPACE(input); return input - original_input; } void cudl_parse_from_file(FILE *file, struct cudl_value *value) { char *input, *original_input; if ((original_input = input = fread_all(file)) == NULL) { if (ferror(file)) cudl_err = CUDL_ERR_READING; else cudl_err = CUDL_ERR_OUT_OF_MEMORY; return; } input += cudl_parse(input, value); if (*input != '\0') cudl_deinit_value(*value); free(original_input); } size_t cudl_parse(char *input, struct cudl_value *value) { STRIP_WHITESPACE(input); return parse_value(input, value); }