libdap  Updated for version 3.20.11
libdap4 is an implementation of OPeNDAP's DAP protocol.
parser-util.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 // (c) COPYRIGHT URI/MIT 1995-1999
27 // Please read the full copyright statement in the file COPYRIGHT_URI.
28 //
29 // Authors:
30 // jhrg,jimg James Gallagher <jgallagher@gso.uri.edu>
31 
32 // These functions are utility functions used by the various DAP parsers (the
33 // DAS, DDS and constraint expression parsers).
34 // jhrg 9/7/95
35 
36 #include "config.h"
37 
38 #include <cerrno>
39 #include <cassert>
40 #include <cstring>
41 #include <cmath>
42 #include <cstdlib>
43 
44 #include <iostream>
45 #include <sstream>
46 
47 // We wrap VC++ 6.x strtod() to account for a short comming
48 // in that function in regards to "NaN".
49 #ifdef WIN32
50 #include <limits>
51 double w32strtod(const char *, char **);
52 #endif
53 
54 #include "Error.h"
55 #include "debug.h"
56 #include "parser.h" // defines constants such as ID_MAX
57 #include "dods-limits.h"
58 #include "util.h" // Jose Garcia: for append_long_to_string.
59 
60 using std::cerr;
61 using std::endl;
62 
63 #ifdef WIN32
64 // VC++ 6.x strtod() doesn't recognize "NaN". Account for it
65 // by wrapping it around a check for the Nan string. Use of
66 // the product is obsolete as of 1/2007, but it is unknown if
67 // the issue is still there in later releases of that product.
68 // ROM - 01/2007
69 double w32strtod(const char *val, char **ptr)
70 {
71  // Convert the two char arrays to compare to strings.
72  string *sval = new string(val);
73  string *snan = new string("NaN");
74 
75  // If val doesn't contain "NaN|Nan|nan|etc", use strtod as
76  // provided.
77  if (stricmp(sval->c_str(), snan->c_str()) != 0)
78  return (strtod(val, ptr));
79 
80  // But if it does, return the bit pattern for Nan and point
81  // the parsing ptr arg at the trailing '\0'.
82  *ptr = (char *) val + strlen(val);
83  return (std::numeric_limits < double >::quiet_NaN());
84 }
85 #endif
86 
87 namespace libdap {
88 
89 // Deprecated, but still used by the HDF4 EOS server code.
90 void
91 parse_error(parser_arg * arg, const char *msg, const int line_num,
92  const char *context)
93 {
94  // Jose Garcia
95  // This assert(s) is (are) only for developing purposes
96  // For production servers remove it by compiling with NDEBUG
97  assert(arg);
98  assert(msg);
99 
100  arg->set_status(FALSE);
101 
102  string oss = "";
103 
104  if (line_num != 0) {
105  oss += "Error parsing the text on line ";
106  append_long_to_string(line_num, 10, oss);
107  }
108  else {
109  oss += "Parse error.";
110  }
111 
112  if (context)
113  oss += (string) " at or near: " + context + (string) "\n" + msg
114  + (string) "\n";
115  else
116  oss += (string) "\n" + msg + (string) "\n";
117 
118  arg->set_error(new Error(unknown_error, oss));
119 }
120 
121 void
122 parse_error(const char *msg, const int line_num, const char *context)
123 {
124  // Jose Garcia
125  // This assert(s) is (are) only for developing purposes
126  // For production servers remove it by compiling with NDEBUG
127  assert(msg);
128 
129  string oss = "";
130 
131  if (line_num != 0) {
132  oss += "Error parsing the text on line ";
133  append_long_to_string(line_num, 10, oss);
134  }
135  else {
136  oss += "Parse error.";
137  }
138 
139  if (context)
140  oss += (string) " at or near: " + context + (string) "\n" + msg
141  + (string) "\n";
142  else
143  oss += (string) "\n" + msg + (string) "\n";
144 
145  throw Error(malformed_expr, oss);
146 }
147 
148 // context comes from the parser and will always be a char * unless the
149 // parsers change dramatically.
150 void
151 parse_error(const string & msg, const int line_num, const char *context)
152 {
153  parse_error(msg.c_str(), line_num, context);
154 }
155 
156 #if 0
157 // Remove this since it is not used and contains a potential (low level) vulnerability.
158 // jhrg 3/7/22
159 void save_str(char *dst, const char *src, const int line_num)
160 {
161  if (strlen(src) >= ID_MAX)
162  parse_error(string("The word `") + string(src)
163  + string("' is too long (it should be no longer than ")
164  + long_to_string(ID_MAX) + string(")."), line_num);
165 
166  strncpy(dst, src, ID_MAX);
167  dst[ID_MAX - 1] = '\0'; /* in case ... */
168 }
169 #endif
170 
171 void save_str(string & dst, const char *src, const int)
172 {
173  dst = src;
174 }
175 
176 bool is_keyword(string id, const string & keyword)
177 {
178  downcase(id);
179  id = prune_spaces(id);
180  DBG(cerr << "is_keyword: " << keyword << " = " << id << endl);
181  return id == keyword;
182 }
183 
194 int check_byte(const char *val)
195 {
196  char *ptr;
197  long v = strtol(val, &ptr, 0);
198 
199  if ((v == 0 && val == ptr) || *ptr != '\0') {
200  return FALSE;
201  }
202 
203  DBG(cerr << "v: " << v << endl);
204 
205  // We're very liberal here with values. Anything that can fit into 8 bits
206  // is allowed through. Clients will have to deal with the fact that the
207  // ASCII representation for the value might need to be tweaked. This is
208  // especially the case for Java clients where Byte datatypes are
209  // signed. 3/20/2000 jhrg
210  if ((v < 0 && v < DODS_SCHAR_MIN)
211  || (v > 0 && static_cast < unsigned long >(v) > DODS_UCHAR_MAX))
212  return FALSE;
213 
214  return TRUE;
215 }
216 
217 // This version of check_int will pass base 8, 10 and 16 numbers when they
218 // use the ANSI standard for string representation of those number bases.
219 
220 int check_int16(const char *val)
221 {
222  char *ptr;
223  long v = strtol(val, &ptr, 0); // `0' --> use val to determine base
224 
225  if ((v == 0 && val == ptr) || *ptr != '\0') {
226  return FALSE;
227  }
228  // Don't use the constant from limits.h, use the ones in dods-limits.h
229  if (v > DODS_SHRT_MAX || v < DODS_SHRT_MIN) {
230  return FALSE;
231  }
232 
233  return TRUE;
234 }
235 
236 int check_uint16(const char *val)
237 {
238  char *ptr;
239  unsigned long v = strtol(val, &ptr, 0);
240 
241  if ((v == 0 && val == ptr) || *ptr != '\0') {
242  return FALSE;
243  }
244 
245  if (v > DODS_USHRT_MAX) {
246  return FALSE;
247  }
248 
249  return TRUE;
250 }
251 
252 int check_int32(const char *val)
253 {
254  char *ptr;
255  errno = 0;
256  long v = strtol(val, &ptr, 0); // `0' --> use val to determine base
257 
258  if ((v == 0 && val == ptr) || *ptr != '\0') {
259  return FALSE;
260  }
261 
262  // We need to check errno since strtol return clamps on overflow so the
263  // check against the DODS values below will always pass, even for out of
264  // bounds values in the string. mjohnson 7/20/09
265  if (errno == ERANGE) {
266  return FALSE;
267  }
268  // This could be combined with the above, or course, but I'm making it
269  // separate to highlight the test. On 64-bit linux boxes 'long' may be
270  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
271  else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
272  return FALSE;
273  }
274  else {
275  return TRUE;
276  }
277 }
278 
279 int check_uint32(const char *val)
280 {
281  // Eat whitespace and check for an initial '-' sign...
282  // strtoul allows an initial minus. mjohnson
283  const char* c = val;
284  while (c && isspace(*c)) {
285  c++;
286  }
287  if (c && (*c == '-')) {
288  return FALSE;
289  }
290 
291  char *ptr;
292  errno = 0;
293  unsigned long v = strtoul(val, &ptr, 0);
294 
295  if ((v == 0 && val == ptr) || *ptr != '\0') {
296  return FALSE;
297  }
298 
299  // check overflow first, or the below check is invalid due to
300  // clamping to the maximum value by strtoul
301  // maybe consider using long long for these checks? mjohnson
302  if (errno == ERANGE) {
303  return FALSE;
304  }
305  // See above.
306  else if (v > DODS_UINT_MAX) {
307  return FALSE;
308  }
309  else {
310  return TRUE;
311  }
312 }
313 
314 int check_int32(const char *val, int &v)
315 {
316  char *ptr;
317  errno = 0;
318  long tmp = strtol(val, &ptr, 0); // `0' --> use val to determine base
319 
320  if ((tmp == 0 && val == ptr) || *ptr != '\0') {
321  return FALSE;
322  }
323 
324  // We need to check errno since strtol return clamps on overflow so the
325  // check against the DODS values below will always pass, even for out of
326  // bounds values in the string. mjohnson 7/20/09
327  if (errno == ERANGE) {
328  return FALSE;
329  }
330  // This could be combined with the above, or course, but I'm making it
331  // separate to highlight the test. On 64-bit linux boxes 'long' may be
332  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
333  else if (tmp > DODS_INT_MAX || tmp < DODS_INT_MIN) {
334  return FALSE;
335  }
336  else {
337  v = (int)tmp;
338  return TRUE;
339  }
340 }
341 
342 int check_uint32(const char *val, unsigned int &v)
343 {
344  // Eat whitespace and check for an initial '-' sign...
345  // strtoul allows an initial minus. mjohnson
346  const char* c = val;
347  while (c && isspace(*c)) {
348  c++;
349  }
350  if (c && (*c == '-')) {
351  return FALSE;
352  }
353 
354  char *ptr;
355  errno = 0;
356  unsigned long tmp = strtoul(val, &ptr, 0);
357 
358  if ((tmp == 0 && val == ptr) || *ptr != '\0') {
359  return FALSE;
360  }
361 
362  // check overflow first, or the below check is invalid due to
363  // clamping to the maximum value by strtoul
364  // maybe consider using long long for these checks? mjohnson
365  if (errno == ERANGE) {
366  return FALSE;
367  }
368  // See above.
369  else if (tmp > DODS_UINT_MAX) {
370  return FALSE;
371  }
372  else {
373  v = (unsigned int)tmp;
374  return TRUE;
375  }
376 }
377 
378 int check_int64(const char *val)
379 {
380  char *ptr;
381  errno = 0;
382  long long v = strtoll(val, &ptr, 0); // `0' --> use val to determine base
383 
384  if ((v == 0 && val == ptr) || *ptr != '\0') {
385  return FALSE;
386  }
387 
388  // We need to check errno since strtol return clamps on overflow so the
389  // check against the DODS values below will always pass, even for out of
390  // bounds values in the string. mjohnson 7/20/09
391  if (errno == ERANGE) {
392  return FALSE;
393  }
394 #if 0
395  // This could be combined with the above, or course, but I'm making it
396  // separate to highlight the test. On 64-bit linux boxes 'long' may be
397  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
398  //
399  // Removed - Coverity says it can never be false. Makes sense. jhrg 5/10/16
400  else if (v <= DODS_LLONG_MAX && v >= DODS_LLONG_MIN) {
401  return FALSE;
402  }
403 #endif
404  else {
405  return TRUE;
406  }
407 }
408 
409 int check_uint64(const char *val)
410 {
411  // Eat whitespace and check for an initial '-' sign...
412  // strtoul allows an initial minus. mjohnson
413  const char* c = val;
414  while (c && isspace(*c)) {
415  c++;
416  }
417  if (c && (*c == '-')) {
418  return FALSE;
419  }
420 
421  char *ptr;
422  errno = 0;
423  unsigned long long v = strtoull(val, &ptr, 0);
424 
425  if ((v == 0 && val == ptr) || *ptr != '\0') {
426  return FALSE;
427  }
428 
429  if (errno == ERANGE) {
430  return FALSE;
431  }
432  else if (v > DODS_ULLONG_MAX) { // 2^61
433  return FALSE;
434  }
435  else {
436  return v;
437  }
438 }
439 
440 // Check first for system errors (like numbers so small they convert
441 // (erroneously) to zero. Then make sure that the value is within
442 // limits.
443 
444 int check_float32(const char *val)
445 {
446  char *ptr;
447  errno = 0; // Clear previous value. Fix for the 64bit
448  // IRIX from Rob Morris. 5/21/2001 jhrg
449 
450 #ifdef WIN32
451  double v = w32strtod(val, &ptr);
452 #else
453  double v = strtod(val, &ptr);
454 #endif
455 
456  DBG(cerr << "v: " << v << ", ptr: " << ptr
457  << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
458 
459  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
460  return FALSE;
461 
462 #if 0
463  if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
464  || *ptr != '\0') {
465  return FALSE;
466  }
467 #endif
468 
469  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
470  double abs_val = fabs(v);
471  if (abs_val > DODS_FLT_MAX
472  || (abs_val != 0.0 && abs_val < DODS_FLT_MIN))
473  return FALSE;
474 
475  return TRUE;
476 }
477 
478 int check_float64(const char *val)
479 {
480  DBG(cerr << "val: " << val << endl);
481  char *ptr;
482  errno = 0; // Clear previous value. 5/21/2001 jhrg
483 
484 #ifdef WIN32
485  double v = w32strtod(val, &ptr);
486 #else
487  double v = strtod(val, &ptr);
488 #endif
489 
490  DBG(cerr << "v: " << v << ", ptr: " << ptr
491  << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
492 
493 
494  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
495  return FALSE;
496 #if 0
497  if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
498  || *ptr != '\0') {
499  return FALSE;
500  }
501 #endif
502  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
503  double abs_val = fabs(v);
504  if (abs_val > DODS_DBL_MAX
505  || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
506  return FALSE;
507 
508  return TRUE;
509 }
510 
511 int check_float64(const char *val, double &v)
512 {
513  DBG(cerr << "val: " << val << endl);
514  char *ptr;
515  errno = 0; // Clear previous value. 5/21/2001 jhrg
516 
517 #ifdef WIN32
518  v = w32strtod(val, &ptr);
519 #else
520  v = strtod(val, &ptr);
521 #endif
522 
523  DBG(cerr << "v: " << v << ", ptr: " << ptr
524  << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
525 
526 
527  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
528  return FALSE;
529 #if 0
530  if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
531  || *ptr != '\0') {
532  return FALSE;
533  }
534 #endif
535  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
536  double abs_val = fabs(v);
537  if (abs_val > DODS_DBL_MAX
538  || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
539  return FALSE;
540 
541  return TRUE;
542 }
543 
544 long long get_int64(const char *val)
545 {
546  char *ptr;
547  errno = 0;
548  long long v = strtoll(val, &ptr, 0); // `0' --> use val to determine base
549 
550  if ((v == 0 && val == ptr) || *ptr != '\0') {
551  throw Error("Expected a 64-bit integer, but found other characters.");
552  // The value '" + string(val) + "' contains extra characters.");
553  }
554 
555  // We need to check errno since strtol return clamps on overflow so the
556  // check against the DODS values below will always pass, even for out of
557  // bounds values in the string. mjohnson 7/20/09
558  if (errno == ERANGE) {
559  throw Error("The 64-bit integer value is out of range.");
560  }
561 
562 #if 0
563  // This could be combined with the above, or course, but I'm making it
564  // separate to highlight the test. On 64-bit linux boxes 'long' may be
565  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
566  //
567  // Removed because coverity flags it as useless, which it is until we
568  // have 128-bit ints... jhrg 5/9/16
569  else if (v > DODS_LLONG_MAX || v < DODS_LLONG_MIN) {
570  throw Error("The value '" + string(val) + "' is out of range.");
571  }
572 #endif
573 
574  else {
575  return v;
576  }
577 }
578 
579 unsigned long long get_uint64(const char *val)
580 {
581  // Eat whitespace and check for an initial '-' sign...
582  // strtoul allows an initial minus. mjohnson
583  const char* c = val;
584  while (c && isspace(*c)) {
585  c++;
586  }
587  if (c && (*c == '-')) {
588  throw Error("Expected a valid array index.");
589  }
590 
591  char *ptr;
592  errno = 0;
593  unsigned long long v = strtoull(val, &ptr, 0);
594 
595  if ((v == 0 && val == ptr) || *ptr != '\0') {
596  throw Error("Expected an unsigned 64-bit integer, but found other characters.");
597  }
598 
599  if (errno == ERANGE) {
600  throw Error("The 64-bit integer value is out of range.");
601  }
602 #if 0
603  // Coverity; see above. jhrg 5/9/16
604  else if (v > DODS_MAX_ARRAY_INDEX) { // 2^61
605  throw Error("The value '" + string(val) + "' is out of range.");
606  }
607 #endif
608  else {
609  return v;
610  }
611 }
612 
613 int get_int32(const char *val)
614 {
615  char *ptr;
616  errno = 0;
617  int v = strtol(val, &ptr, 0); // `0' --> use val to determine base
618 
619  if ((v == 0 && val == ptr) || *ptr != '\0') {
620  throw Error("Expected a 32-bit integer, but found other characters.");
621  }
622 
623  // We need to check errno since strtol return clamps on overflow so the
624  // check against the DODS values below will always pass, even for out of
625  // bounds values in the string. mjohnson 7/20/09
626  if (errno == ERANGE) {
627  throw Error("The 32-bit integer value is out of range.");
628  }
629  // This could be combined with the above, or course, but I'm making it
630  // separate to highlight the test. On 64-bit linux boxes 'long' may be
631  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
632  else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
633  return FALSE;
634  }
635 
636  else {
637  return v;
638  }
639 }
640 
641 unsigned int get_uint32(const char *val)
642 {
643  // Eat whitespace and check for an initial '-' sign...
644  // strtoul allows an initial minus. mjohnson
645  const char* c = val;
646  while (c && isspace(*c)) {
647  c++;
648  }
649  if (c && (*c == '-')) {
650  throw Error("Expected an unsigned 32-bit integer, but found other characters.");
651  }
652 
653  char *ptr;
654  errno = 0;
655  unsigned int v = strtoul(val, &ptr, 0);
656 
657  if ((v == 0 && val == ptr) || *ptr != '\0') {
658  throw Error("Expected an unsigned 32-bit integer, but found other characters.");
659  }
660 
661  if (errno == ERANGE) {
662  throw Error("The 32-bit integer value is out of range.");
663  }
664  // See above.
665  else if (v > DODS_UINT_MAX) {
666  return FALSE;
667  }
668  else {
669  return v;
670  }
671 }
672 
673 double get_float64(const char *val)
674 {
675  DBG(cerr << "val: " << val << endl);
676  char *ptr;
677  errno = 0; // Clear previous value. 5/21/2001 jhrg
678 
679 #ifdef WIN32
680  double v = w32strtod(val, &ptr);
681 #else
682  double v = strtod(val, &ptr);
683 #endif
684 
685  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
686  throw Error("The 64-bit floating point value is out of range.");;
687 
688  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
689  double abs_val = fabs(v);
690  if (abs_val > DODS_DBL_MAX || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
691  throw Error("The 64-bit floating point value is out of range.");;
692 
693  return v;
694 }
695 
696 /*
697  Maybe someday we will really check the Urls to see if they are valid...
698 */
699 
700 int check_url(const char *)
701 {
702  return TRUE;
703 }
704 
705 } // namespace libdap
int check_url(const char *)
Is the value a valid URL?
Definition: parser-util.cc:700
int check_byte(const char *val)
Is the value a valid byte?
Definition: parser-util.cc:194
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
string prune_spaces(const string &name)
Definition: util.cc:459
void save_str(string &dst, const char *src, const int)
Save a string to a temporary variable during the parse.
Definition: parser-util.cc:171
void downcase(string &s)
Definition: util.cc:566