locale: Turn ADDC and ADDS into functions in linereader.c

And introduce struct lr_buffer.  The functions addc and adds can
be called from functions, enabling subsequent refactoring.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Tested-by: Carlos O'Donell <carlos@redhat.com>
This commit is contained in:
Florian Weimer 2022-07-05 09:05:22 +02:00
parent 62595e8944
commit 5dcbff5879
1 changed files with 104 additions and 99 deletions

View File

@ -416,36 +416,60 @@ get_toplvl_escape (struct linereader *lr)
return &lr->token; return &lr->token;
} }
/* Multibyte string buffer. */
struct lr_buffer
{
size_t act;
size_t max;
char *buf;
};
#define ADDC(ch) \ /* Initialize *LRB with a default-sized buffer. */
do \ static void
{ \ lr_buffer_init (struct lr_buffer *lrb)
if (bufact == bufmax) \ {
{ \ lrb->act = 0;
bufmax *= 2; \ lrb->max = 56;
buf = xrealloc (buf, bufmax); \ lrb->buf = xmalloc (lrb->max);
} \ }
buf[bufact++] = (ch); \
} \
while (0)
/* Transfers the buffer string from *LRB to LR->token.mbstr. */
static void
lr_buffer_to_token (struct lr_buffer *lrb, struct linereader *lr)
{
lr->token.val.str.startmb = xrealloc (lrb->buf, lrb->act + 1);
lr->token.val.str.startmb[lrb->act] = '\0';
lr->token.val.str.lenmb = lrb->act;
}
#define ADDS(s, l) \ /* Adds CH to *LRB. */
do \ static void
{ \ addc (struct lr_buffer *lrb, char ch)
size_t _l = (l); \ {
if (bufact + _l > bufmax) \ if (lrb->act == lrb->max)
{ \ {
if (bufact < _l) \ lrb->max *= 2;
bufact = _l; \ lrb->buf = xrealloc (lrb->buf, lrb->max);
bufmax *= 2; \ }
buf = xrealloc (buf, bufmax); \ lrb->buf[lrb->act++] = ch;
} \ }
memcpy (&buf[bufact], s, _l); \
bufact += _l; \
} \
while (0)
/* Adds L bytes at S to *LRB. */
static void
adds (struct lr_buffer *lrb, const unsigned char *s, size_t l)
{
if (lrb->max - lrb->act < l)
{
size_t required_size = lrb->act + l;
size_t new_max = 2 * lrb->max;
if (new_max < required_size)
new_max = required_size;
lrb->buf = xrealloc (lrb->buf, new_max);
lrb->max = new_max;
}
memcpy (lrb->buf + lrb->act, s, l);
lrb->act += l;
}
#define ADDWC(ch) \ #define ADDWC(ch) \
do \ do \
@ -467,13 +491,11 @@ get_symname (struct linereader *lr)
1. reserved words 1. reserved words
2. ISO 10646 position values 2. ISO 10646 position values
3. all other. */ 3. all other. */
char *buf;
size_t bufact = 0;
size_t bufmax = 56;
const struct keyword_t *kw; const struct keyword_t *kw;
int ch; int ch;
struct lr_buffer lrb;
buf = (char *) xmalloc (bufmax); lr_buffer_init (&lrb);
do do
{ {
@ -481,13 +503,13 @@ get_symname (struct linereader *lr)
if (ch == lr->escape_char) if (ch == lr->escape_char)
{ {
int c2 = lr_getc (lr); int c2 = lr_getc (lr);
ADDC (c2); addc (&lrb, c2);
if (c2 == '\n') if (c2 == '\n')
ch = '\n'; ch = '\n';
} }
else else
ADDC (ch); addc (&lrb, ch);
} }
while (ch != '>' && ch != '\n'); while (ch != '>' && ch != '\n');
@ -495,39 +517,35 @@ get_symname (struct linereader *lr)
lr_error (lr, _("unterminated symbolic name")); lr_error (lr, _("unterminated symbolic name"));
/* Test for ISO 10646 position value. */ /* Test for ISO 10646 position value. */
if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) if (lrb.buf[0] == 'U' && (lrb.act == 6 || lrb.act == 10))
{ {
char *cp = buf + 1; char *cp = lrb.buf + 1;
while (cp < &buf[bufact - 1] && isxdigit (*cp)) while (cp < &lrb.buf[lrb.act - 1] && isxdigit (*cp))
++cp; ++cp;
if (cp == &buf[bufact - 1]) if (cp == &lrb.buf[lrb.act - 1])
{ {
/* Yes, it is. */ /* Yes, it is. */
lr->token.tok = tok_ucs4; lr->token.tok = tok_ucs4;
lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); lr->token.val.ucs4 = strtoul (lrb.buf + 1, NULL, 16);
return &lr->token; return &lr->token;
} }
} }
/* It is a symbolic name. Test for reserved words. */ /* It is a symbolic name. Test for reserved words. */
kw = lr->hash_fct (buf, bufact - 1); kw = lr->hash_fct (lrb.buf, lrb.act - 1);
if (kw != NULL && kw->symname_or_ident == 1) if (kw != NULL && kw->symname_or_ident == 1)
{ {
lr->token.tok = kw->token; lr->token.tok = kw->token;
free (buf); free (lrb.buf);
} }
else else
{ {
lr->token.tok = tok_bsymbol; lr->token.tok = tok_bsymbol;
lr_buffer_to_token (&lrb, lr);
buf = xrealloc (buf, bufact + 1); --lr->token.val.str.lenmb; /* Hide the training '>'. */
buf[bufact] = '\0';
lr->token.val.str.startmb = buf;
lr->token.val.str.lenmb = bufact - 1;
} }
return &lr->token; return &lr->token;
@ -537,16 +555,13 @@ get_symname (struct linereader *lr)
static struct token * static struct token *
get_ident (struct linereader *lr) get_ident (struct linereader *lr)
{ {
char *buf;
size_t bufact;
size_t bufmax = 56;
const struct keyword_t *kw; const struct keyword_t *kw;
int ch; int ch;
struct lr_buffer lrb;
buf = xmalloc (bufmax); lr_buffer_init (&lrb);
bufact = 0;
ADDC (lr->buf[lr->idx - 1]); addc (&lrb, lr->buf[lr->idx - 1]);
while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
&& ch != '<' && ch != ',' && ch != EOF) && ch != '<' && ch != ',' && ch != EOF)
@ -560,27 +575,22 @@ get_ident (struct linereader *lr)
break; break;
} }
} }
ADDC (ch); addc (&lrb, ch);
} }
lr_ungetc (lr, ch); lr_ungetc (lr, ch);
kw = lr->hash_fct (buf, bufact); kw = lr->hash_fct (lrb.buf, lrb.act);
if (kw != NULL && kw->symname_or_ident == 0) if (kw != NULL && kw->symname_or_ident == 0)
{ {
lr->token.tok = kw->token; lr->token.tok = kw->token;
free (buf); free (lrb.buf);
} }
else else
{ {
lr->token.tok = tok_ident; lr->token.tok = tok_ident;
lr_buffer_to_token (&lrb, lr);
buf = xrealloc (buf, bufact + 1);
buf[bufact] = '\0';
lr->token.val.str.startmb = buf;
lr->token.val.str.lenmb = bufact;
} }
return &lr->token; return &lr->token;
@ -593,14 +603,10 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
int verbose) int verbose)
{ {
int return_widestr = lr->return_widestr; int return_widestr = lr->return_widestr;
char *buf; struct lr_buffer lrb;
wchar_t *buf2 = NULL; wchar_t *buf2 = NULL;
size_t bufact;
size_t bufmax = 56;
/* We must return two different strings. */ lr_buffer_init (&lrb);
buf = xmalloc (bufmax);
bufact = 0;
/* We know it'll be a string. */ /* We know it'll be a string. */
lr->token.tok = tok_string; lr->token.tok = tok_string;
@ -613,19 +619,19 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
buf2 = NULL; buf2 = NULL;
while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
ADDC (ch); addc (&lrb, ch);
/* Catch errors with trailing escape character. */ /* Catch errors with trailing escape character. */
if (bufact > 0 && buf[bufact - 1] == lr->escape_char if (lrb.act > 0 && lrb.buf[lrb.act - 1] == lr->escape_char
&& (bufact == 1 || buf[bufact - 2] != lr->escape_char)) && (lrb.act == 1 || lrb.buf[lrb.act - 2] != lr->escape_char))
{ {
lr_error (lr, _("illegal escape sequence at end of string")); lr_error (lr, _("illegal escape sequence at end of string"));
--bufact; --lrb.act;
} }
else if (ch == '\n' || ch == EOF) else if (ch == '\n' || ch == EOF)
lr_error (lr, _("unterminated string")); lr_error (lr, _("unterminated string"));
ADDC ('\0'); addc (&lrb, '\0');
} }
else else
{ {
@ -662,7 +668,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
break; break;
} }
ADDC (ch); addc (&lrb, ch);
if (return_widestr) if (return_widestr)
ADDWC ((uint32_t) ch); ADDWC ((uint32_t) ch);
@ -671,7 +677,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
/* Now we have to search for the end of the symbolic name, i.e., /* Now we have to search for the end of the symbolic name, i.e.,
the closing '>'. */ the closing '>'. */
startidx = bufact; startidx = lrb.act;
while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
{ {
if (ch == lr->escape_char) if (ch == lr->escape_char)
@ -680,12 +686,12 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
if (ch == '\n' || ch == EOF) if (ch == '\n' || ch == EOF)
break; break;
} }
ADDC (ch); addc (&lrb, ch);
} }
if (ch == '\n' || ch == EOF) if (ch == '\n' || ch == EOF)
/* Not a correct string. */ /* Not a correct string. */
break; break;
if (bufact == startidx) if (lrb.act == startidx)
{ {
/* <> is no correct name. Ignore it and also signal an /* <> is no correct name. Ignore it and also signal an
error. */ error. */
@ -694,23 +700,23 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
} }
/* It might be a Uxxxx symbol. */ /* It might be a Uxxxx symbol. */
if (buf[startidx] == 'U' if (lrb.buf[startidx] == 'U'
&& (bufact - startidx == 5 || bufact - startidx == 9)) && (lrb.act - startidx == 5 || lrb.act - startidx == 9))
{ {
char *cp = buf + startidx + 1; char *cp = lrb.buf + startidx + 1;
while (cp < &buf[bufact] && isxdigit (*cp)) while (cp < &lrb.buf[lrb.act] && isxdigit (*cp))
++cp; ++cp;
if (cp == &buf[bufact]) if (cp == &lrb.buf[lrb.act])
{ {
char utmp[10]; char utmp[10];
/* Yes, it is. */ /* Yes, it is. */
ADDC ('\0'); addc (&lrb, '\0');
wch = strtoul (buf + startidx + 1, NULL, 16); wch = strtoul (lrb.buf + startidx + 1, NULL, 16);
/* Now forget about the name we just added. */ /* Now forget about the name we just added. */
bufact = startidx; lrb.act = startidx;
if (return_widestr) if (return_widestr)
ADDWC (wch); ADDWC (wch);
@ -774,7 +780,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
seq = charmap_find_value (charmap, utmp, seq = charmap_find_value (charmap, utmp,
9); 9);
assert (seq != NULL); assert (seq != NULL);
ADDS (seq->bytes, seq->nbytes); adds (&lrb, seq->bytes, seq->nbytes);
} }
continue; continue;
@ -788,24 +794,24 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
} }
if (seq != NULL) if (seq != NULL)
ADDS (seq->bytes, seq->nbytes); adds (&lrb, seq->bytes, seq->nbytes);
continue; continue;
} }
} }
/* We now have the symbolic name in buf[startidx] to /* We now have the symbolic name in lrb.buf[startidx] to
buf[bufact-1]. Now find out the value for this character lrb.buf[lrb.act-1]. Now find out the value for this character
in the charmap as well as in the repertoire map (in this in the charmap as well as in the repertoire map (in this
order). */ order). */
seq = charmap_find_value (charmap, &buf[startidx], seq = charmap_find_value (charmap, &lrb.buf[startidx],
bufact - startidx); lrb.act - startidx);
if (seq == NULL) if (seq == NULL)
{ {
/* This name is not in the charmap. */ /* This name is not in the charmap. */
lr_error (lr, _("symbol `%.*s' not in charmap"), lr_error (lr, _("symbol `%.*s' not in charmap"),
(int) (bufact - startidx), &buf[startidx]); (int) (lrb.act - startidx), &lrb.buf[startidx]);
illegal_string = 1; illegal_string = 1;
} }
@ -816,8 +822,8 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
wch = seq->ucs4; wch = seq->ucs4;
else else
{ {
wch = repertoire_find_value (repertoire, &buf[startidx], wch = repertoire_find_value (repertoire, &lrb.buf[startidx],
bufact - startidx); lrb.act - startidx);
if (seq != NULL) if (seq != NULL)
seq->ucs4 = wch; seq->ucs4 = wch;
} }
@ -826,7 +832,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
{ {
/* This name is not in the repertoire map. */ /* This name is not in the repertoire map. */
lr_error (lr, _("symbol `%.*s' not in repertoire map"), lr_error (lr, _("symbol `%.*s' not in repertoire map"),
(int) (bufact - startidx), &buf[startidx]); (int) (lrb.act - startidx), &lrb.buf[startidx]);
illegal_string = 1; illegal_string = 1;
} }
else else
@ -834,11 +840,11 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
} }
/* Now forget about the name we just added. */ /* Now forget about the name we just added. */
bufact = startidx; lrb.act = startidx;
/* And copy the bytes. */ /* And copy the bytes. */
if (seq != NULL) if (seq != NULL)
ADDS (seq->bytes, seq->nbytes); adds (&lrb, seq->bytes, seq->nbytes);
} }
if (ch == '\n' || ch == EOF) if (ch == '\n' || ch == EOF)
@ -849,7 +855,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
if (illegal_string) if (illegal_string)
{ {
free (buf); free (lrb.buf);
free (buf2); free (buf2);
lr->token.val.str.startmb = NULL; lr->token.val.str.startmb = NULL;
lr->token.val.str.lenmb = 0; lr->token.val.str.lenmb = 0;
@ -859,7 +865,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
return &lr->token; return &lr->token;
} }
ADDC ('\0'); addc (&lrb, '\0');
if (return_widestr) if (return_widestr)
{ {
@ -870,8 +876,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
} }
} }
lr->token.val.str.startmb = xrealloc (buf, bufact); lr_buffer_to_token (&lrb, lr);
lr->token.val.str.lenmb = bufact;
return &lr->token; return &lr->token;
} }