/* parseconf.c - state machine-driven dynamic configuration file parser Copyright (C) 2002 Russell Kroll This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* parseconf, version 4. * * This one abandons the "callback" system introduced last time. It * didn't turn out as well as I had hoped - you got stuck "behind" * parseconf too often. * * There is now a context buffer, and you call pconf_init to set it up. * All subsequent calls must have it as the first argument. There are * two entry points for parsing lines. You can have it read a file * (pconf_file_begin and pconf_file_next), take lines directly from * the caller (pconf_line), or go along a character at a time (pconf_char). * The parsing is identical no matter how you feed it. * * Since there are no more callbacks, you take the successful return * from the function and access ctx->arglist and ctx->numargs yourself. * You must check for errors with pconf_parse_error before using them, * since it might not be complete. This lets the caller handle all * error reporting that's nonfatal. * * Fatal errors are those that involve memory allocation. If the user * defines an error handler when calling pconf_init, that function will * be called with the error message before parseconf exits. By default * it will just write the message to stderr before exiting. * * Input vs. Output: * * What it reads --> What ends up in each argument * * this is a line --> "this" "is" "a" "line" * this "is also" a line --> "this" "is also" "a" "line" * embedded\ space --> "embedded space" * embedded\\backslash --> "embedded\backslash" * * Arguments are split by whitespace (isspace()) unless that whitespace * occurs inside a "quoted pair like this". * * You can also escape the double quote (") character. The backslash * also allows you to join lines, allowing you to have logical lines * that span physical lines, just like you can do in some shells. * * Lines normally end with a newline, but reaching EOF will also force * parsing on what's been scanned so far. * * Design: * * Characters are read one at a time to drive the state machine. * As words are completed (by hitting whitespace or ending a "" item), * they are committed to the next buffer in the arglist. realloc is * used, so the buffer can grow to handle bigger words. * * The arglist also grows as necessary with a similar approach. As a * result, you can parse extremely long words and lines with an insane * number of elements. * * Finally, there is argsize, which remembers how long each of the * arglist elements are. This is how we know when to expand them. * */ #include "common.h" #include #include #include #include #include #include #include #include #include "parseconf.h" #include "attribute.h" #include "nut_stdint.h" /* possible states */ #define STATE_FINDWORDSTART 1 #define STATE_FINDEOL 2 #define STATE_QUOTECOLLECT 3 #define STATE_QC_LITERAL 4 #define STATE_COLLECT 5 #define STATE_COLLECTLITERAL 6 #define STATE_ENDOFLINE 7 #define STATE_PARSEERR 8 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt) __attribute__((noreturn)); static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt) { if (ctx->errhandler) ctx->errhandler(errtxt); else fprintf(stderr, "parseconf: fatal error: %s\n", errtxt); exit(EXIT_FAILURE); } static void add_arg_word(PCONF_CTX_t *ctx) { size_t argpos; size_t wbuflen; /* this is where the new value goes */ argpos = ctx->numargs; ctx->numargs++; /* when facing more args than ever before, expand the list */ if (ctx->numargs > ctx->maxargs) { ctx->maxargs = ctx->numargs; /* resize the lists */ ctx->arglist = realloc(ctx->arglist, sizeof(char *) * ctx->numargs); if (!ctx->arglist) pconf_fatal(ctx, "realloc arglist failed"); ctx->argsize = realloc(ctx->argsize, sizeof(size_t) * ctx->numargs); if (!ctx->argsize) pconf_fatal(ctx, "realloc argsize failed"); /* ensure sane starting values */ ctx->arglist[argpos] = NULL; ctx->argsize[argpos] = 0; } wbuflen = strlen(ctx->wordbuf); /* now see if the string itself grew compared to last time */ if (wbuflen >= ctx->argsize[argpos]) { size_t newlen; /* allow for the trailing NULL */ newlen = wbuflen + 1; /* expand the string storage */ ctx->arglist[argpos] = realloc(ctx->arglist[argpos], newlen); if (!ctx->arglist[argpos]) pconf_fatal(ctx, "realloc arglist member failed"); /* remember the new size */ ctx->argsize[argpos] = newlen; } /* strncpy doesn't give us a trailing NULL, so prep the space */ memset(ctx->arglist[argpos], '\0', ctx->argsize[argpos]); /* finally copy the new value into the provided space */ strncpy(ctx->arglist[argpos], ctx->wordbuf, wbuflen); } static void addchar(PCONF_CTX_t *ctx) { size_t wbuflen; wbuflen = strlen(ctx->wordbuf); /* CVE-2012-2944: only allow the subset of ASCII charset from Space to ~ */ if ((ctx->ch < 0x20) || (ctx->ch > 0x7f)) { fprintf(stderr, "addchar: discarding invalid character (0x%02x)!\n", ctx->ch); return; } if (ctx->wordlen_limit != 0) { if (wbuflen >= ctx->wordlen_limit) { /* limit reached: don't append any more */ return; } } /* allow for the null */ if (wbuflen >= (ctx->wordbufsize - 1)) { ctx->wordbufsize += 8; ctx->wordbuf = realloc(ctx->wordbuf, ctx->wordbufsize); if (!ctx->wordbuf) pconf_fatal(ctx, "realloc wordbuf failed"); /* repoint as wordbuf may have moved */ ctx->wordptr = &ctx->wordbuf[wbuflen]; } *ctx->wordptr++ = (char)ctx->ch; *ctx->wordptr = '\0'; } static void endofword(PCONF_CTX_t *ctx) { if (ctx->arg_limit != 0) { if (ctx->numargs >= ctx->arg_limit) { /* don't accept this word - just drop it */ ctx->wordptr = ctx->wordbuf; *ctx->wordptr = '\0'; return; } } add_arg_word(ctx); ctx->wordptr = ctx->wordbuf; *ctx->wordptr = '\0'; } /* look for the beginning of a word */ static int findwordstart(PCONF_CTX_t *ctx) { /* newline = the physical line is over, so the logical one is too */ if (ctx->ch == 10) return STATE_ENDOFLINE; /* the rest of the line is a comment */ if (ctx->ch == '#') return STATE_FINDEOL; /* space = not in a word yet, so loop back */ if (isspace((size_t)ctx->ch)) return STATE_FINDWORDSTART; /* \ = literal = accept the next char blindly */ if (ctx->ch == '\\') return STATE_COLLECTLITERAL; /* " = begin word bounded by quotes */ if (ctx->ch == '"') return STATE_QUOTECOLLECT; /* at this point the word just started */ addchar(ctx); /* if the first character is a '=' this is considered a whole word */ if (ctx->ch == '=') { endofword(ctx); return STATE_FINDWORDSTART; } return STATE_COLLECT; } /* eat characters until the end of the line is found */ static int findeol(PCONF_CTX_t *ctx) { /* newline = found it, so start a new line */ if (ctx->ch == 10) return STATE_ENDOFLINE; /* come back here */ return STATE_FINDEOL; } /* set up the error reporting details */ static void pconf_seterr(PCONF_CTX_t *ctx, const char *errmsg) { snprintf(ctx->errmsg, PCONF_ERR_LEN, "%s", errmsg); ctx->error = 1; } /* quote characters inside a word bounded by "quotes" */ static int quotecollect(PCONF_CTX_t *ctx) { /* user is trying to break us */ if (ctx->ch == '#') { pconf_seterr(ctx, "Unbalanced word due to unescaped # in quotes"); endofword(ctx); /* this makes us drop all the way out of the caller */ return STATE_PARSEERR; } /* another " means we're done with this word */ if (ctx->ch == '"') { endofword(ctx); return STATE_FINDWORDSTART; } /* literal - special case since it needs to return here */ if (ctx->ch == '\\') return STATE_QC_LITERAL; /* otherwise save it and loop back */ addchar(ctx); return STATE_QUOTECOLLECT; } /* take almost anything literally, but return to quotecollect */ static int qc_literal(PCONF_CTX_t *ctx) { /* continue onto the next line of the file */ if (ctx->ch == 10) return STATE_QUOTECOLLECT; addchar(ctx); return STATE_QUOTECOLLECT; } /* collect characters inside a word */ static int collect(PCONF_CTX_t *ctx) { /* comment means the word is done, and skip to the end of the line */ if (ctx->ch == '#') { endofword(ctx); return STATE_FINDEOL; } /* newline means the word is done, and the line is done */ if (ctx->ch == 10) { endofword(ctx); return STATE_ENDOFLINE; } /* space means the word is done */ if (isspace((size_t)ctx->ch)) { endofword(ctx); return STATE_FINDWORDSTART; } /* '=' means the word is done and the = is a single char word*/ if (ctx->ch == '=') { endofword(ctx); findwordstart(ctx); return STATE_FINDWORDSTART; } /* \ = literal = accept the next char blindly */ if (ctx->ch == '\\') return STATE_COLLECTLITERAL; /* otherwise store it and come back for more */ addchar(ctx); return STATE_COLLECT; } /* take almost anything literally */ static int collectliteral(PCONF_CTX_t *ctx) { /* continue to the next line */ if (ctx->ch == 10) return STATE_COLLECT; addchar(ctx); return STATE_COLLECT; } /* clean up memory before going back to the user */ static void free_storage(PCONF_CTX_t *ctx) { unsigned int i; free(ctx->wordbuf); /* clear out the individual words first */ for (i = 0; i < ctx->maxargs; i++) free(ctx->arglist[i]); free(ctx->arglist); free(ctx->argsize); /* put things back to the initial state */ ctx->arglist = NULL; ctx->argsize = NULL; ctx->numargs = 0; ctx->maxargs = 0; } int pconf_init(PCONF_CTX_t *ctx, void errhandler(const char *)) { /* set up the ctx elements */ ctx->f = NULL; ctx->state = STATE_FINDWORDSTART; ctx->numargs = 0; ctx->maxargs = 0; ctx->arg_limit = PCONF_DEFAULT_ARG_LIMIT; ctx->wordlen_limit = PCONF_DEFAULT_WORDLEN_LIMIT; ctx->linenum = 0; ctx->error = 0; ctx->arglist = NULL; ctx->argsize = NULL; ctx->wordbufsize = 16; ctx->wordbuf = calloc(1, ctx->wordbufsize); if (!ctx->wordbuf) pconf_fatal(ctx, "malloc wordbuf failed"); ctx->wordptr = ctx->wordbuf; ctx->errhandler = errhandler; ctx->magic = PCONF_CTX_t_MAGIC; return 1; } static int check_magic(PCONF_CTX_t *ctx) { if (!ctx) return 0; if (ctx->magic != PCONF_CTX_t_MAGIC) { snprintf(ctx->errmsg, PCONF_ERR_LEN, "Invalid ctx buffer"); return 0; } return 1; } int pconf_file_begin(PCONF_CTX_t *ctx, const char *fn) { if (!check_magic(ctx)) return 0; ctx->f = fopen(fn, "r"); if (!ctx->f) { snprintf(ctx->errmsg, PCONF_ERR_LEN, "Can't open %s: %s", fn, strerror(errno)); return 0; } /* prevent fd leaking to child processes */ fcntl(fileno(ctx->f), F_SETFD, FD_CLOEXEC); return 1; /* OK */ } static void parse_char(PCONF_CTX_t *ctx) { switch(ctx->state) { case STATE_FINDWORDSTART: ctx->state = findwordstart(ctx); break; case STATE_FINDEOL: ctx->state = findeol(ctx); break; case STATE_QUOTECOLLECT: ctx->state = quotecollect(ctx); break; case STATE_QC_LITERAL: ctx->state = qc_literal(ctx); break; case STATE_COLLECT: ctx->state = collect(ctx); break; case STATE_COLLECTLITERAL: ctx->state = collectliteral(ctx); break; } /* switch */ } /* return 1 if an error occurred, but only do it once */ int pconf_parse_error(PCONF_CTX_t *ctx) { if (!check_magic(ctx)) return 0; if (ctx->error == 1) { ctx->error = 0; return 1; } return 0; } /* clean up the ctx space */ void pconf_finish(PCONF_CTX_t *ctx) { if (!check_magic(ctx)) return; if (ctx->f) fclose(ctx->f); free_storage(ctx); ctx->magic = 0; } /* read from a file until a whole line is ready for use */ int pconf_file_next(PCONF_CTX_t *ctx) { if (!check_magic(ctx)) return 0; ctx->linenum++; /* start over for the new line */ ctx->numargs = 0; ctx->state = STATE_FINDWORDSTART; while ((ctx->ch = fgetc(ctx->f)) != EOF) { parse_char(ctx); if (ctx->state == STATE_PARSEERR) return 1; if (ctx->state == STATE_ENDOFLINE) return 1; } /* deal with files that don't end in a newline */ if (ctx->numargs != 0) { /* still building a word? */ if (ctx->wordptr != ctx->wordbuf) endofword(ctx); return 1; } /* finished with nothing left over */ return 0; } /* parse a provided line */ int pconf_line(PCONF_CTX_t *ctx, const char *line) { size_t i, linelen; if (!check_magic(ctx)) return 0; ctx->linenum++; /* start over for the new line */ ctx->numargs = 0; ctx->state = STATE_FINDWORDSTART; linelen = strlen(line); for (i = 0; i < linelen; i++) { ctx->ch = line[i]; parse_char(ctx); if (ctx->state == STATE_PARSEERR) return 1; if (ctx->state == STATE_ENDOFLINE) return 1; } /* deal with any lingering characters */ /* still building a word? */ if (ctx->wordptr != ctx->wordbuf) endofword(ctx); /* tie it off */ return 1; } #define PCONF_ESCAPE "#\\\"" char *pconf_encode(const char *src, char *dest, size_t destsize) { size_t i, srclen, destlen, maxlen; if (destsize < 1) return dest; memset(dest, '\0', destsize); /* always leave room for a final NULL */ maxlen = destsize - 1; srclen = strlen(src); destlen = 0; for (i = 0; i < srclen; i++) { if (strchr(PCONF_ESCAPE, src[i])) { /* if they both won't fit, we're done */ if (destlen >= maxlen - 1) return dest; dest[destlen++] = '\\'; } /* bail out when dest is full */ if (destlen >= maxlen) return dest; dest[destlen++] = src[i]; } return dest; } /* parse input a character at a time */ int pconf_char(PCONF_CTX_t *ctx, char ch) { if (!check_magic(ctx)) return -1; /* if the last call finished a line, clean stuff up for another */ if ((ctx->state == STATE_ENDOFLINE) || (ctx->state == STATE_PARSEERR)) { ctx->numargs = 0; ctx->state = STATE_FINDWORDSTART; } ctx->ch = ch; parse_char(ctx); if (ctx->state == STATE_ENDOFLINE) return 1; if (ctx->state == STATE_PARSEERR) return -1; return 0; }