diff options
Diffstat (limited to 'base/gsargs.c')
-rw-r--r-- | base/gsargs.c | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/base/gsargs.c b/base/gsargs.c new file mode 100644 index 00000000..24e9ccec --- /dev/null +++ b/base/gsargs.c @@ -0,0 +1,417 @@ +/* Copyright (C) 2001-2019 Artifex Software, Inc. + All Rights Reserved. + + This software is provided AS-IS with no warranty, either express or + implied. + + This software is distributed under license and may not be copied, + modified or distributed except as expressly authorized under the terms + of the license contained in the file LICENSE in this distribution. + + Refer to licensing information at http://www.artifex.com or contact + Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, + CA 94945, U.S.A., +1(415)492-9861, for further information. +*/ + + +/* Command line argument list management */ +#include "ctype_.h" +#include "stdio_.h" +#include "string_.h" +#include "gsexit.h" +#include "gsmemory.h" +#include "gsargs.h" +#include "gserrors.h" +#include "gp.h" + +int codepoint_to_utf8(char *cstr, int rune) +{ + int idx = 0; + + if (rune < 0x80) { + cstr[idx++] = rune; + } else { + if (rune < 0x800) { + cstr[idx++] = 0xc0 | (rune>>6); + } else { + if (rune < 0x10000) { + cstr[idx++] = 0xe0 | (rune>>12); + } else { + if (rune < 0x200000) { + cstr[idx++] = 0xf0 | (rune>>18); + } else { + /* Shouldn't ever be required, but included for completeness */ + if (rune < 0x4000000) { + cstr[idx++] = 0xf8 | (rune>>24); + } else { + cstr[idx++] = 0xfc | (rune>>30); + cstr[idx++] = 0x80 | ((rune>>24) & 0x3f); + } + cstr[idx++] = 0x80 | ((rune>>18) & 0x3f); + } + cstr[idx++] = 0x80 | ((rune>>12) & 0x3f); + } + cstr[idx++] = 0x80 | ((rune>>6) & 0x3f); + } + cstr[idx++] = 0x80 | (rune & 0x3f); + } + + return idx; +} + +static int get_codepoint_utf8(gp_file *file, const char **astr) +{ + int c; + int rune; + int len; + + /* This code spots the BOM for utf8 and ignores it. Strictly speaking + * this may be wrong, as we are only supposed to ignore it at the beginning + * of the string, but if anyone is stupid enough to use ZWNBSP (zero width + * non breaking space) in the middle of their strings, then they deserve + * what they get. */ + + do { + c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); + if (c == EOF) + return EOF; + if (c < 0x80) + return c; +lead: /* We've just read a byte >= 0x80, presumably a leading byte */ + if (c < 0xc0) + continue; /* Illegal - skip it */ + else if (c < 0xe0) + len = 1, rune = c & 0x1f; + else if (c < 0xf0) + len = 2, rune = c & 0xf; + else if (c < 0xf8) + len = 3, rune = c & 7; + else if (c < 0xfc) + len = 4, rune = c & 3; + else if (c < 0xfe) + len = 5, rune = c & 1; + else + continue; /* Illegal - skip it */ + do { + c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); + if (c == EOF) + return EOF; + rune = (rune<<6) | (c & 0x3f); + } while (((c & 0xC0) == 0x80) && --len); + if (len) { + /* The rune we are collecting is improperly formed. */ + if (c < 0x80) { + /* Just return the simple char we've ended on. */ + return c; + } + /* Start collecting again */ + goto lead; + } + if (rune == 0xFEFF) + continue; /* BOM. Skip it */ + break; + } while (1); + + return rune; +} + +/* Initialize an arg list. */ +int +arg_init(arg_list * pal, + const char **argv, + int argc, + gp_file *(*arg_fopen)(const char *fname, void *fopen_data), + void *fopen_data, + int (*get_codepoint)(gp_file *file, const char **astr), + gs_memory_t *memory) +{ + int code; + const char *arg; + + pal->expand_ats = true; + pal->arg_fopen = arg_fopen; + pal->fopen_data = fopen_data; + pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8); + pal->memory = memory; + pal->argp = argv; + pal->argn = argc; + pal->depth = 0; + pal->sources[0].is_file = 0; + pal->sources[0].u.s.memory = NULL; + pal->sources[0].u.s.decoded = 0; + pal->sources[0].u.s.parsed = 0; + + /* Stash the 0th one */ + code = arg_next(pal, &arg, memory); + if (code < 0) + return code; + return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg); +} + +/* Push a string onto an arg list. */ +int +arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem) +{ + return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem); +} + +int +arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem) +{ + arg_source *pas; + + if (pal->depth+1 == arg_depth_max) { + lprintf("Too much nesting of @-files.\n"); + return 1; + } + pas = &pal->sources[++pal->depth]; + pas->is_file = false; + pas->u.s.parsed = parsed; + pas->u.s.decoded = decoded; + pas->u.s.chars = str; + pas->u.s.memory = mem; + pas->u.s.str = str; + return 0; +} + +/* Clean up an arg list. */ +void +arg_finit(arg_list * pal) +{ + /* No cleanup is required for level 0 */ + while (pal->depth) { + arg_source *pas = &pal->sources[pal->depth--]; + + if (pas->is_file) + gp_fclose(pas->u.file); + else if (pas->u.s.memory) + gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit"); + } +} + +static int get_codepoint(arg_list *pal, arg_source *pas) +{ + int (*fn)(gp_file *file, const char **str); + + fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint); + return fn(pas->is_file ? pas->u.file : NULL, &pas->u.s.str); +} + +/* Get the next arg from a list. */ +/* Note that these are not copied to the heap. */ +/* returns: + * >0 - valid argument + * 0 - arguments exhausted + * <0 - error condition + * *argstr is *always* set: to the arg string if it is valid, + * or to NULL otherwise + */ +int +arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem) +{ + arg_source *pas; + char *cstr; + int c; + int i; + bool in_quote, eol; + + *argstr = NULL; + + /* Loop over arguments, finding one to return. */ + do { + pas = &pal->sources[pal->depth]; + if (!pas->is_file && pas->u.s.parsed) { + /* This string is a "pushed-back" argument (retrieved + * by a preceding arg_next(), but not processed). No + * decoding is required. */ + /* assert(pas->u.s.decoded); */ + if (strlen(pas->u.s.str) >= arg_str_max) { + errprintf(errmem, "Command too long: %s\n", pas->u.s.str); + return_error(gs_error_Fatal); + } + strcpy(pal->cstr, pas->u.s.str); + *argstr = pal->cstr; + if (pas->u.s.memory) + gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next"); + pal->depth--; + } else { + /* We need to decode the next argument */ + if (pal->depth == 0) { + if (pal->argn <= 0) + return 0; /* all done */ + /* Move onto the next argument from the string. */ + pal->argn--; + pas->u.s.str = *(pal->argp++); + } + /* Skip a prefix of whitespace. */ + do { + c = get_codepoint(pal, pas); + } while (c > 0 && c < 256 && isspace(c)); + if (c == EOF) { + /* EOF before any argument characters. */ + if (pas->is_file) + gp_fclose(pas->u.file); + else if (pas->u.s.memory) + gs_free_object(pas->u.s.memory, pas->u.s.chars, + "arg_next"); + /* If depth is 0, then we are reading from the simple + * argument list and we just hit an "empty" argument + * (such as -o ""). Return this. */ + if (pal->depth == 0) + { + *argstr = pal->cstr; + pal->cstr[0] = 0; + break; + } + /* If depth > 0, then we're reading from a response + * file, and we've hit the end of the response file. + * Pop up one level and continue. */ + pal->depth--; + continue; /* Next argument */ + } + #define is_eol(c) (c == '\r' || c == '\n') + /* Convert from astr into pal->cstr, and return it as *argstr. */ + *argstr = cstr = pal->cstr; + in_quote = false; + /* We keep track of whether we have just read an "eol" or not, + * in order to skip # characters at the start of a line + * (possibly preceeded by whitespace). We do NOT want this to + * apply to the start of arguments in the arg list, so only + * set eol to be true, if we are in a file. */ + eol = pal->depth > 0; + for (i = 0;;) { + if (c == EOF) { + if (in_quote) { + cstr[i] = 0; + errprintf(errmem, + "Unterminated quote in @-file: %s\n", cstr); + return_error(gs_error_Fatal); + } + break; /* End of arg */ + } + /* c != 0 */ + /* If we aren't parsing from the arglist (i.e. depth > 0) + * then we break on whitespace (unless we're in quotes). */ + if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c)) + break; /* End of arg */ + /* c isn't leading or terminating whitespace. */ + if (c == '#' && eol) { + /* Skip a comment. */ + do { + c = get_codepoint(pal, pas); + } while (c != 0 && !is_eol(c) && c != EOF); + if (c == '\r') + c = get_codepoint(pal, pas); + if (c == '\n') + c = get_codepoint(pal, pas); + continue; /* Next char */ + } + if (c == '\\') { + /* Check for \ followed by newline. */ + c = get_codepoint(pal, pas); + if (is_eol(c)) { + if (c == '\r') + c = get_codepoint(pal, pas); + if (c == '\n') + c = get_codepoint(pal, pas); + eol = true; + continue; /* Next char */ + } + /* \ anywhere else is treated as a printing character. */ + /* This is different from the Unix shells. */ + if (i >= arg_str_max - 1) { + cstr[i] = 0; + errprintf(errmem, "Command too long: %s\n", cstr); + return_error(gs_error_Fatal); + } + cstr[i++] = '\\'; + eol = false; + continue; /* Next char */ + } + /* c will become part of the argument */ + if (i >= arg_str_max - 1) { + cstr[i] = 0; + errprintf(errmem, "Command too long: %s\n", cstr); + return_error(gs_error_Fatal); + } + /* Allow quotes to protect whitespace. */ + /* (special cases have already been handled and don't reach this point) */ + if (c == '"') + in_quote = !in_quote; + else + i += codepoint_to_utf8(&cstr[i], c); + eol = is_eol(c); + c = get_codepoint(pal, pas); + } + cstr[i] = 0; + } + + /* At this point *argstr is full of utf8 encoded argument. */ + /* If it's an @filename argument, then deal with it, and never return + * it to the caller. */ + if (pal->expand_ats && **argstr == '@') { + char *fname; + gp_file *f; + if (pal->depth+1 == arg_depth_max) { + errprintf(errmem, "Too much nesting of @-files.\n"); + return_error(gs_error_Fatal); + } + fname = (char *)*argstr + 1; /* skip @ */ + + if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0) + return_error(gs_error_Fatal); + + f = (*pal->arg_fopen) (fname, pal->fopen_data); + DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname)); + if (f == NULL) { + errprintf(errmem, "Unable to open command line file %s\n", *argstr); + return_error(gs_error_Fatal); + } + pas = &pal->sources[++pal->depth]; + pas->is_file = true; + pas->u.file = f; + *argstr = NULL; /* Empty the argument string so we don't return it. */ + continue; /* Loop back to parse the first arg from the file. */ + } + } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */ + + return 1; +} + +/* Copy an argument string to the heap. */ +char * +arg_copy(const char *str, gs_memory_t * mem) +{ + char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy"); + + if (sstr == 0) { + lprintf("Out of memory!\n"); + return NULL; + } + strcpy(sstr, str); + return sstr; +} + +/* Free a previously arg_copy'd string */ +void +arg_free(char *str, gs_memory_t * mem) +{ + gs_free_object(mem, str, "arg_copy"); +} + +int arg_strcmp(arg_list *pal, const char *arg, const char *match) +{ + int rune, c; + + if (!arg || !match) + return 1; + do { + rune = pal->get_codepoint(NULL, &arg); + if (rune == -1) + rune = 0; + c = *match++; + if (rune != c) + return rune - c; + } while (rune && c); + return 0; +} |