Scippy

SCIP

Solving Constraint Integer Programs

xmlparse.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2017 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file xmldef.h
17  * @brief declarations for XML parsing
18  * @author Thorsten Koch
19  * @author Marc Pfetsch
20  *
21  * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
22  * sequence of spaces only one will be used.
23  *
24  * @todo Implement possibility to avoid the construction of parsing information for certain tags
25  * (and their children). For solution files this would avoid parsing the constraints section.
26  */
27 
28 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
29 
30 #include <blockmemshell/memory.h>
31 
32 #include "xml.h"
33 #include "xmldef.h"
34 
35 
36 #include <sys/types.h>
37 #ifdef WITH_ZLIB
38 #include <unistd.h>
39 #endif
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <assert.h>
43 #include <ctype.h>
44 #include <string.h>
45 
46 
47 #define NAME_EXT_SIZE 128
48 #define ATTR_EXT_SIZE 4096
49 #define DATA_EXT_SIZE 4096
50 #define LINE_BUF_SIZE 8192
51 
52 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
53 
54 
55 /* forward declarations */
56 typedef struct parse_stack_struct PSTACK;
57 typedef struct parse_pos_struct PPOS;
58 
59 /** state of the parser */
61 {
67 };
68 typedef enum parse_state_enum PSTATE;
69 
70 /** Stack as a (singly) linked list. The top element is the current node. */
72 {
75 };
76 
77 /** Store the current position in the file and the state of the parser. */
79 {
80  const char* filename;
82  char buf[LINE_BUF_SIZE];
83  int pos;
84  int lineno;
85  int nextsym;
86  int lastsym;
89 };
90 
91 
92 /** output error message with corresponding line and position */
93 static void xmlErrmsg(
94  PPOS* ppos,
95  const char* msg,
96  XML_Bool msg_only,
97  const char* file,
98  int line
99  )
100 {
101 #ifndef NDEBUG
102  int ret;
103  assert( ppos != NULL );
104 
105  if ( ! msg_only )
106  {
107  ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
108  assert(ret >= 0);
109 
110  ret = fprintf(stderr, "%s", ppos->buf);
111  assert(ret >= 0);
112 
113  if ( strchr(ppos->buf, '\n') == NULL )
114  {
115  int retc;
116 
117  retc = fputc('\n', stderr);
118  assert(retc != EOF);
119  }
120 
121  ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
122  assert(ret >= 0);
123  }
124  ret = fprintf(stderr, "%s\n\n", msg);
125  assert(ret >= 0);
126 
127 #else
128 
129  if ( ! msg_only )
130  {
131  (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
132 
133  (void) fprintf(stderr, "%s", ppos->buf);
134 
135  if ( strchr(ppos->buf, '\n') == NULL )
136  {
137  (void) fputc('\n', stderr);
138  }
139 
140  (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
141  }
142  (void) fprintf(stderr, "%s\n\n", msg);
143 #endif
144 }
145 
146 
147 /** Push new element on the parse stack.
148  *
149  * TRUE if it worked, FAILURE otherwise.
150  */
151 static
153  PPOS* ppos,
154  XML_NODE* node
155  )
156 {
157  PSTACK* p;
158 
159  assert(ppos != NULL);
160  assert(node != NULL);
161 
162  debugMessage("Pushing %s\n", node->name);
163 
165  assert(p != NULL);
166 
167  p->node = node;
168  p->next = ppos->top;
169  ppos->top = p;
170 
171  return TRUE;
172 }
173 
174 /** returns top element on stack (which has to be present) */
176  const PPOS* ppos
177  )
178 {
179  assert(ppos != NULL);
180  assert(ppos->top != NULL);
181 
182  return ppos->top->node;
183 }
184 
185 /** remove top element from stack and deletes it
186  *
187  * TRUE if ok, FALSE otherwise
188  */
189 static
191  PPOS* ppos /**< input stream position */
192  )
193 {
194  PSTACK* p;
195  XML_Bool result;
196 
197  assert(ppos != NULL);
198 
199  if ( ppos->top == NULL )
200  {
201  xmlError(ppos, "Stack underflow");
202  result = FALSE;
203  }
204  else
205  {
206  result = TRUE;
207  p = ppos->top;
208  ppos->top = p->next;
209 
210  debugMessage("Poping %s\n", p->node->name);
211  BMSfreeMemory(&p);
212  }
213  return result;
214 }
215 
216 /** remove complete stack */
217 static
219  PPOS* ppos
220  )
221 {
222  assert(ppos != NULL);
223 
224  while ( ppos->top != NULL )
225  (void) popPstack(ppos);
226 }
227 
228 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
229 static
230 int mygetc(
231  PPOS* ppos
232  )
233 {
234  assert(ppos != NULL);
235  assert(ppos->fp != NULL);
236  assert(ppos->pos < LINE_BUF_SIZE);
237 
238  if ( ppos->buf[ppos->pos] == '\0' )
239  {
240 #ifdef SCIP_DISABLED_CODE
241  /* the low level function gzread/fread used below seem to be faster */
242  if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
243  return EOF;
244 #else
245  size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
246 
247  if( len == 0 || len > sizeof(ppos->buf) - 1 )
248  return EOF;
249 
250  ppos->buf[len] = '\0';
251 #endif
252  ppos->pos = 0;
253  }
254  return (unsigned char)ppos->buf[ppos->pos++];
255 }
256 
257 
258 #ifdef SPEC_LIKE_SPACE_HANDLING
259 /** Read input from fp_in.
260  *
261  * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of
262  * characters.
263  */
264 static
265 int getsymbol(
266  PPOS* ppos
267  )
268 {
269  int c;
270 
271  assert(ppos != NULL);
272 
273  if ( ppos->nextsym == 0 )
274  c = mygetc(ppos);
275  else
276  {
277  c = ppos->nextsym;
278  ppos->nextsym = 0;
279  }
280  assert(ppos->nextsym == 0);
281 
282  if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
283  c = mygetc(ppos);
284 
285  ppos->lastsym = c;
286 
287  if ( c == '\r' )
288  c = '\n';
289 
290  if ( c == '\n' )
291  ++ppos->lineno;
292 
293  return c;
294 }
295 #else
296 /** Read input from fp_in (variant).
297  *
298  * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
299  *
300  * @note This function counts lines differently. On systems that have only one '\\r' as line feed
301  * (MAC) it does not count correctly.
302  */
303 static
305  PPOS* ppos
306  )
307 {
308  int c;
309 
310  assert(ppos != NULL);
311 
312  do
313  {
314  if ( ppos->nextsym == 0 )
315  c = mygetc(ppos);
316  else
317  {
318  c = ppos->nextsym;
319  ppos->nextsym = 0;
320  }
321  assert(ppos->nextsym == 0);
322 
323  if ( c == '\n' )
324  ++ppos->lineno;
325 
326  if ((c == '\n') || (c == '\r'))
327  c = ' ';
328  } while((c == ' ') && (ppos->lastsym == c));
329 
330  ppos->lastsym = c;
331 
332  debugMessage("[%c]\n", c);
333 
334  return c;
335 }
336 #endif
337 
338 /** Reinserts a character into the input stream */
339 static
341  PPOS* ppos,
342  int c
343  )
344 {
345  assert(ppos != NULL);
346  assert(ppos->nextsym == 0);
347 
348  ppos->nextsym = c;
349 }
350 
351 /** Skip all spaces and return the next non-space character or EOF */
352 static
354  PPOS* ppos
355  )
356 {
357  int c;
358 
359  assert(ppos != NULL);
360 
361  do
362  {
363  c = getsymbol(ppos);
364  }
365  while(isspace(c));
366 
367  return c;
368 }
369 
370 /** Get name of a TAG or attribute from the input stream.
371  *
372  * Either it returns a pointer to allocated memory which contains the name or it returns NULL if
373  * there is some error.
374  */
375 static
376 char* getName(
377  PPOS* ppos
378  )
379 {
380  char* name = NULL;
381  size_t size = 0;
382  size_t len = 0;
383  int c;
384 
385  assert(ppos != NULL);
386 
387  c = getsymbol(ppos);
388 
389  if ( ! isalpha(c) && (c != '_') && (c != ':') )
390  {
391  xmlError(ppos, "Name starting with illegal charater");
392  return NULL;
393  }
394 
395  /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
396  while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
397  {
398  if ( len + 1 >= size )
399  {
400  size += NAME_EXT_SIZE;
401 
402  if ( name == NULL )
403  {
404  ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
405  }
406  else
407  {
408  ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
409  }
410  }
411  assert(name != NULL);
412  assert(size > len);
413 
414  name[len++] = (char)c;
415 
416  c = getsymbol(ppos);
417  }
418  if ( c != EOF )
419  ungetsymbol(ppos, c);
420 
421  assert(name != NULL);
422 
423  if ( len == 0 )
424  {
425  BMSfreeMemoryArray(&name);
426  name = NULL;
427  }
428  else
429  name[len] = '\0';
430 
431  return name;
432 }
433 
434 /** Read the value of an attribute from the input stream.
435  *
436  * The value has to be between two " or ' (the other character is then valid as well). The function
437  * returns a pointer to allocated memory containing the value or it returns NULL in case of an
438  * error.
439  */
440 static
442  PPOS* ppos
443  )
444 {
445  char* attr = NULL;
446  int c;
447  int stop;
448  size_t len = 0;
449  size_t size = 0;
450 
451  assert(ppos != NULL);
452 
453  /* The following is not allowed according to the specification (the value has to be directly
454  * after the equation sign). */
455  c = skipSpace(ppos);
456 
457  if ( (c != '"') && (c != '\'') )
458  {
459  xmlError(ppos, "Atribute value does not start with \" or \'");
460  return NULL;
461  }
462  stop = c;
463 
464  for(;;)
465  {
466  if ( len == size )
467  {
468  size += ATTR_EXT_SIZE;
469 
470  if ( attr == NULL )
471  {
472  ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
473  }
474  else
475  {
476  ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
477  }
478  }
479  assert(attr != NULL);
480  assert(size > len);
481 
482  c = getsymbol(ppos);
483 
484  if ( (c == stop) || (c == EOF) )
485  break;
486 
487  attr[len++] = (char)c;
488  }
489 
490  if ( c != EOF )
491  attr[len] = '\0';
492  else
493  {
494  BMSfreeMemoryArray(&attr);
495  attr = NULL;
496  }
497  return attr;
498 }
499 
500 /** Skip comment
501  *
502  * Return FALSE if an error occurs.
503  */
504 static
506  PPOS* ppos
507  )
508 {
509  XML_Bool result = TRUE;
510  int c;
511  int state = 0;
512 
513  assert(ppos != NULL);
514 
515  for(;;)
516  {
517  c = getsymbol(ppos);
518 
519  if ( c == EOF )
520  break;
521 
522  if ( (c == '>') && (state >= 2) )
523  break;
524 
525  state = (c == '-') ? state + 1 : 0;
526  }
527  if ( c == EOF )
528  {
529  xmlError(ppos, "Unexpected EOF in comment");
530  result = FALSE;
531  }
532  return result;
533 }
534 
535 /** Handles a CDATA section.
536  *
537  * Returns a pointer to allocated memory containing the data of this section or NULL in case of an
538  * error.
539  */
540 static
541 char* doCdata(
542  PPOS* ppos
543  )
544 {
545  char* data = NULL;
546  size_t size = 0;
547  size_t len = 0;
548  int state = 0;
549  int c;
550 
551  assert(ppos != NULL);
552 
553  for(;;)
554  {
555  c = getsymbol(ppos);
556 
557  if ( c == EOF )
558  break;
559 
560  if ( c == ']' )
561  state++;
562  else
563  if ( (c == '>') && (state >= 2) )
564  break;
565  else
566  state = 0;
567 
568  if ( len == size )
569  {
570  size += DATA_EXT_SIZE;
571 
572  if ( data == NULL )
573  {
574  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
575  }
576  else
577  {
578  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
579  }
580  }
581  assert(data != NULL);
582  assert(size > len);
583 
584  data[len++] = (char)c;
585  }
586  assert(data != NULL);
587 
588  /*lint --e{527}*/
589  if ( c != EOF )
590  {
591  assert(len >= 2);
592  assert(data != NULL);
593 
594  data[len - 2] = '\0'; /*lint !e413*/
595  }
596  else
597  {
598  BMSfreeMemoryArray(&data);
599  data = NULL;
600  xmlError(ppos, "Unexpected EOF in CDATA");
601  }
602  return data;
603 }
604 
605 /** Handle processing instructions (skipping) */
606 static
607 void handlePi(
608  PPOS* ppos
609  )
610 {
611  int c;
612 
613  assert(ppos != NULL);
614  assert(ppos->state == XML_STATE_BEFORE);
615 
616  do
617  {
618  c = getsymbol(ppos);
619  }
620  while ( (c != EOF) && (c != '>') );
621 
622  if ( c != EOF )
623  ppos->state = XML_STATE_PCDATA;
624  else
625  {
626  xmlError(ppos, "Unexpected EOF in PI");
627  ppos->state = XML_STATE_ERROR;
628  }
629 }
630 
631 /** Handles declarations that start with a <!.
632  *
633  * This includes comments. Does currenlty not work very well, because of DTDs.
634  */
635 static
637  PPOS* ppos
638  )
639 {
640  enum XmlSection
641  {
642  IS_COMMENT,
643  IS_ATTLIST,
644  IS_DOCTYPE,
645  IS_ELEMENT,
646  IS_ENTITY,
647  IS_NOTATION,
648  IS_CDATA
649  };
650  typedef enum XmlSection XMLSECTION;
651 
652  static struct
653  {
654  const char* name;
655  XMLSECTION what;
656  } key[] =
657  {
658  { "--", IS_COMMENT },
659  { "ATTLIST", IS_ATTLIST },
660  { "DOCTYPE", IS_DOCTYPE },
661  { "ELEMENT", IS_ELEMENT },
662  { "ENTITY", IS_ENTITY },
663  { "NOTATION", IS_NOTATION },
664  { "[CDATA[", IS_CDATA }
665  };
666  XML_NODE* node;
667  char* data;
668  int c;
669  int k = 0;
670  int beg = 0;
671  int end;
672 
673  assert(ppos != NULL);
674  assert(ppos->state == XML_STATE_BEFORE);
675 
676  end = (int) (sizeof(key) / sizeof(key[0])) - 1;
677  do
678  {
679  c = getsymbol(ppos);
680 
681  for(; (beg <= end) && (c != key[beg].name[k]); beg++)
682  ;
683  for(; (end >= beg) && (c != key[end].name[k]); end--)
684  ;
685  k++;
686  } while(beg < end);
687 
688  if ( beg != end )
689  {
690  xmlError(ppos, "Unknown declaration");
691 
692  while ( (c != EOF) && (c != '>') )
693  c = getsymbol(ppos);
694  }
695  else
696  {
697  assert(beg == end);
698  assert(beg < (int)(sizeof(key) / sizeof(*key)));
699 
700  switch(key[beg].what)
701  {
702  case IS_COMMENT :
703  if ( ! doComment(ppos) )
704  ppos->state = XML_STATE_ERROR;
705  break;
706  case IS_CDATA :
707  if ( (data = doCdata(ppos)) == NULL )
708  ppos->state = XML_STATE_ERROR;
709  else
710  {
711  if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
712  {
713  xmlError(ppos, "Can't create new node");
714  ppos->state = XML_STATE_ERROR;
715  }
716  else
717  {
718  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
719  BMSfreeMemoryArray(&data);
720  xmlAppendChild(topPstack(ppos), node);
721  }
722  }
723  break;
724  case IS_ATTLIST :
725  case IS_ELEMENT :
726  case IS_NOTATION :
727  case IS_ENTITY :
728  case IS_DOCTYPE :
729  break;
730  default :
731  abort();
732  }
733  }
734 }
735 
736 /** Handle end tag */
737 static
739  PPOS* ppos
740  )
741 {
742  char* name;
743  int c;
744 
745  assert(ppos != NULL);
746 
747  if ( (name = getName(ppos)) == NULL )
748  xmlError(ppos, "Missing name in endtag");
749  else
750  {
751  c = skipSpace(ppos);
752 
753  if ( c != '>' )
754  {
755  xmlError(ppos, "Missing '>' in endtag");
756  ppos->state = XML_STATE_ERROR;
757  }
758  else
759  {
760  if ( strcmp(name, topPstack(ppos)->name) )
761  {
762  xmlError(ppos, "Name of endtag does not match starttag");
763  ppos->state = XML_STATE_ERROR;
764  }
765  else
766  {
767  if ( popPstack(ppos) )
768  ppos->state = XML_STATE_PCDATA;
769  else
770  ppos->state = XML_STATE_ERROR;
771  }
772  }
773 
774  BMSfreeMemoryArray(&name);
775  }
776 }
777 
778 /** Handle start tag */
779 static
781  PPOS* ppos
782  )
783 {
784  XML_NODE* node;
785  char* name;
786 
787  assert(ppos != NULL);
788 
789  name = getName(ppos);
790  if ( name == NULL )
791  {
792  xmlError(ppos, "Missing name in tagstart");
793  ppos->state = XML_STATE_ERROR;
794  }
795  else
796  {
797  node = xmlNewNode(name, ppos->lineno);
798  if ( node == NULL )
799  {
800  xmlError(ppos, "Can't create new node");
801  ppos->state = XML_STATE_ERROR;
802  }
803  else
804  {
805  xmlAppendChild(topPstack(ppos), node);
806 
807  if ( pushPstack(ppos, node) )
808  ppos->state = XML_STATE_IN_TAG;
809  else
810  ppos->state = XML_STATE_ERROR;
811  }
812  BMSfreeMemoryArray(&name);
813  }
814 }
815 
816 /** Checks for next tag */
817 static
819  PPOS* ppos /**< input stream position */
820  )
821 {
822  int c;
823 
824  assert(ppos != NULL);
825  assert(ppos->state == XML_STATE_BEFORE);
826 
827  c = skipSpace(ppos);
828 
829  if ( c != '<' )
830  {
831  xmlError(ppos, "Expecting '<'");
832  ppos->state = XML_STATE_ERROR;
833  }
834  else
835  {
836  c = getsymbol(ppos);
837 
838  switch(c)
839  {
840  case EOF :
841  xmlError(ppos, "Unexpected EOF");
842  ppos->state = XML_STATE_ERROR;
843  break;
844  case '!' :
845  handleDecl(ppos);
846  break;
847  case '?' :
848  handlePi(ppos);
849  break;
850  case '/' :
851  handleEndtag(ppos);
852  break;
853  default :
854  ungetsymbol(ppos, c);
855  handleStarttag(ppos);
856  break;
857  }
858  }
859 }
860 
861 /** Process tag */
862 static
864  PPOS* ppos /**< input stream position */
865  )
866 {
867  XML_ATTR* attr;
868  int c;
869  XML_Bool empty = FALSE;
870  char* name;
871  char* value;
872 
873  assert(ppos != NULL);
874  assert(ppos->state == XML_STATE_IN_TAG);
875 
876  c = skipSpace(ppos);
877 
878  if ( (c == '/') || (c == '>') || (c == EOF) )
879  {
880  if ( c == '/' )
881  {
882  empty = TRUE;
883  c = getsymbol(ppos);
884  }
885 
886  if ( c == EOF )
887  {
888  xmlError(ppos, "Unexpected EOF while in a tag");
889  ppos->state = XML_STATE_ERROR;
890  }
891 
892  if ( c == '>' )
893  {
894  ppos->state = XML_STATE_PCDATA;
895 
896  if (empty && ! popPstack(ppos))
897  ppos->state = XML_STATE_ERROR;
898  }
899  else
900  {
901  xmlError(ppos, "Expected tag end marker '>'");
902  ppos->state = XML_STATE_ERROR;
903  }
904  }
905  else
906  {
907  ungetsymbol(ppos, c);
908 
909  name = getName(ppos);
910  if ( name == NULL )
911  {
912  xmlError(ppos, "No name for attribute");
913  ppos->state = XML_STATE_ERROR;
914  }
915  else
916  {
917  c = skipSpace(ppos);
918 
919  if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
920  {
921  xmlError(ppos, "Missing attribute value");
922  ppos->state = XML_STATE_ERROR;
923  BMSfreeMemoryArray(&name);
924  }
925  else
926  {
927  attr = xmlNewAttr(name, value);
928  if ( attr == NULL )
929  {
930  xmlError(ppos, "Can't create new attribute");
931  ppos->state = XML_STATE_ERROR;
932  }
933  else
934  {
935  xmlAddAttr(topPstack(ppos), attr);
936  }
937  BMSfreeMemoryArray(&name);
938  BMSfreeMemoryArray(&value);
939  }
940  }
941  }
942 }
943 
944 /* Handles PCDATA */
945 static
947  PPOS* ppos /**< input stream position */
948  )
949 {
950  XML_NODE* node;
951  char* data = NULL;
952  size_t size = 0;
953  size_t len = 0;
954  int c;
955 
956  assert(ppos != NULL);
957  assert(ppos->state == XML_STATE_PCDATA);
958 
959 #ifndef SPEC_LIKE_SPACE_HANDLING
960  c = skipSpace(ppos);
961  if ( c != EOF )
962  ungetsymbol(ppos, c);
963 #endif
964  c = getsymbol(ppos);
965 
966  while ( (c != EOF) && (c != '<') )
967  {
968  if ( len + 1 >= size ) /* leave space for terminating '\0' */
969  {
970  size += DATA_EXT_SIZE;
971 
972  if ( data == NULL )
973  {
974  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
975  }
976  else
977  {
978  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
979  }
980  }
981  assert(data != NULL);
982  assert(size > len + 1);
983 
984  data[len++] = (char)c;
985 
986  c = getsymbol(ppos);
987  }
988  if ( data == NULL )
989  {
990  if ( c == EOF )
991  ppos->state = XML_STATE_EOF;
992  else if ( c == '<' )
993  {
994  ppos->state = XML_STATE_BEFORE;
995  ungetsymbol(ppos, c);
996  }
997  else
998  {
999  ppos->state = XML_STATE_ERROR;
1000  }
1001  }
1002  else
1003  {
1004  assert(len < size);
1005  data[len] = '\0';
1006 
1007  if ( c == EOF )
1008  ppos->state = XML_STATE_ERROR;
1009  else
1010  {
1011  ungetsymbol(ppos, c);
1012 
1013  node = xmlNewNode("#PCDATA", ppos->lineno);
1014  if ( node == NULL )
1015  {
1016  xmlError(ppos, "Can't create new node");
1017  ppos->state = XML_STATE_ERROR;
1018  }
1019  else
1020  {
1021  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1022  xmlAppendChild(topPstack(ppos), node);
1023  ppos->state = XML_STATE_BEFORE;
1024  }
1025  }
1026 
1027  BMSfreeMemoryArray(&data);
1028  }
1029 }
1030 
1031 /** Parse input stream */
1032 static
1034  PPOS* ppos /**< input stream position */
1035  )
1036 {
1037  XML_Bool ok = TRUE;
1038 
1039  while (ok)
1040  {
1041  debugMessage("state=%d\n", ppos->state);
1042 
1043  switch (ppos->state)
1044  {
1045  case XML_STATE_BEFORE :
1046  procBefore(ppos);
1047  break;
1048  case XML_STATE_IN_TAG :
1049  procInTag(ppos);
1050  break;
1051  case XML_STATE_PCDATA :
1052  procPcdata(ppos);
1053  break;
1054  case XML_STATE_EOF :
1055  ok = FALSE;
1056  break;
1057  case XML_STATE_ERROR :
1058  ok = FALSE;
1059  break;
1060  default :
1061  xmlError(ppos, "Internal Error, illegal state");
1062  ok = FALSE;
1063  }
1064  }
1065  return (ppos->state == XML_STATE_EOF);
1066 }
1067 
1068 /** Parse file */
1070  const char* filename /**< XML file name */
1071  )
1072 {
1073  PPOS ppos;
1074  XML_NODE* node = NULL;
1075  XML_ATTR* attr;
1076  XML_Bool result = FALSE;
1077  char* myfilename;
1078  size_t filenamelen;
1079 
1080  /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1081  assert( filename != NULL );
1082  filenamelen = strlen(filename);
1083  if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1084  return NULL;
1085  BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1086 
1087 #ifdef WITH_ZLIB
1088  if ( access(filename, R_OK) != 0 )
1089  {
1090  strcat(myfilename, ".gz");
1091 
1092  /* If .gz also does not work, revert to the old name
1093  * to get a better error message.
1094  */
1095  if ( access(myfilename, R_OK) != 0 )
1096  strcpy(myfilename, filename);
1097  }
1098 #endif
1099  ppos.fp = FOPEN(myfilename, "r");
1100  if ( ppos.fp == NULL )
1101  perror(myfilename);
1102  else
1103  {
1104  ppos.filename = myfilename;
1105  ppos.buf[0] = '\0';
1106  ppos.pos = 0;
1107  ppos.lineno = 1;
1108  ppos.nextsym = 0;
1109  ppos.lastsym = 0;
1110  ppos.state = XML_STATE_BEFORE;
1111  ppos.top = NULL;
1112 
1113  node = xmlNewNode("#ROOT", ppos.lineno);
1114  if ( node == NULL )
1115  {
1116  xmlError(&ppos, "Can't create new node");
1117  }
1118  else
1119  {
1120  attr = xmlNewAttr("filename", myfilename);
1121  if ( attr == NULL )
1122  xmlError(&ppos, "Can't create new attribute");
1123  else
1124  {
1125  xmlAddAttr(node, attr);
1126 
1127  /* push root node on stack and start to process */
1128  if ( pushPstack(&ppos, node) )
1129  {
1130  result = xmlParse(&ppos);
1131 
1132  clearPstack(&ppos);
1133  }
1134  }
1135  }
1136 
1137  if ( ! result && (node != NULL) )
1138  {
1139  xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1140  xmlFreeNode(node);
1141  node = NULL;
1142  }
1143  if ( FCLOSE(ppos.fp) )
1144  perror(myfilename);
1145  }
1146  BMSfreeMemoryArray(&myfilename);
1147 
1148  return node;
1149 }
1150 
1151 
1152 
1153 
1154 
1155 
1156 /*----------------------------------------------------------------------------------------------*/
1157 
1158 
1159 /** create new node */
1161  const char* name,
1162  int lineno
1163  )
1164 {
1165  XML_NODE* n = NULL;
1166 
1167  assert(name != NULL);
1168 
1169  if ( BMSallocMemory(&n) != NULL )
1170  {
1171  BMSclearMemory(n);
1172  BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1173  n->lineno = lineno;
1174  }
1175  return n;
1176 }
1177 
1178 /** create new attribute */
1180  const char* name,
1181  const char* value
1182  )
1183 {
1184  XML_ATTR* a = NULL;
1185 
1186  assert(name != NULL);
1187  assert(value != NULL);
1188 
1189  if ( BMSallocMemory(&a) != NULL )
1190  {
1191  BMSclearMemory(a);
1192  BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1193  BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1194  }
1195  return a;
1196 }
1197 
1198 /** add attribute */
1200  XML_NODE* n,
1201  XML_ATTR* a
1202  )
1203 {
1204  assert(n != NULL);
1205  assert(a != NULL);
1206 
1207  a->next = n->attrlist;
1208  n->attrlist = a;
1209 }
1210 
1211 /** append child node */
1213  XML_NODE* parent,
1214  XML_NODE* child
1215  )
1216 {
1217  assert(parent != NULL);
1218  assert(child != NULL);
1219 
1220  child->parent = parent;
1221  child->prevsibl = parent->lastchild;
1222  child->nextsibl = NULL;
1223  parent->lastchild = child;
1224 
1225  if ( child->prevsibl != NULL )
1226  child->prevsibl->nextsibl = child;
1227 
1228  if ( parent->firstchild == NULL )
1229  parent->firstchild = child;
1230 }
1231 
1232 /** free attribute */
1233 static
1235  XML_ATTR* attr
1236  )
1237 {
1238  XML_ATTR* a;
1239 
1240  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1241  * and might overflow the heap. */
1242  a = attr;
1243  while (a != NULL)
1244  {
1245  XML_ATTR* b;
1246  b = a->next;
1247 
1248  assert(a->name != NULL);
1249  assert(a->value != NULL);
1250 
1251  BMSfreeMemoryArray(&a->name);
1252  BMSfreeMemoryArray(&a->value);
1253  BMSfreeMemory(&a);
1254  a = b;
1255  }
1256 }
1257 
1258 /** free node */
1260  XML_NODE* node
1261  )
1262 {
1263  XML_NODE* n;
1264 
1265  if ( node == NULL )
1266  return;
1267 
1268  /* Free data from back to front (because free is faster this way). */
1269  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1270  * and might overflow the heap. */
1271  n = node->lastchild;
1272  while ( n != NULL )
1273  {
1274  XML_NODE* m;
1275  m = n->prevsibl;
1276  xmlFreeNode(n);
1277  n = m;
1278  }
1279 
1280  xmlFreeAttr(node->attrlist);
1281 
1282  if ( node->data != NULL )
1283  {
1284  BMSfreeMemoryArray(&node->data);
1285  }
1286  assert(node->name != NULL);
1287 
1288  BMSfreeMemoryArray(&node->name);
1289  BMSfreeMemory(&node);
1290 }
1291 
1292 /** output node */
1294  const XML_NODE* root
1295  )
1296 {
1297  const XML_NODE* n;
1298  const XML_ATTR* a;
1299 
1300  assert(root != NULL);
1301 
1302  for (n = root; n != NULL; n = n->nextsibl)
1303  {
1304  infoMessage("Name: %s\n", n->name);
1305  infoMessage("Line: %d\n", n->lineno);
1306  infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1307 
1308  for (a = n->attrlist; a != NULL; a = a->next)
1309  infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1310 
1311  if ( n->firstchild != NULL )
1312  {
1313  infoMessage("->\n");
1314  xmlShowNode(n->firstchild);
1315  infoMessage("<-\n");
1316  }
1317  }
1318 }
1319 
1320 /** get attribute value */
1321 const char* xmlGetAttrval(
1322  const XML_NODE* node,
1323  const char* name
1324  )
1325 {
1326  XML_ATTR* a;
1327 
1328  assert(node != NULL);
1329  assert(name != NULL);
1330 
1331  for (a = node->attrlist; a != NULL; a = a->next)
1332  {
1333  if ( ! strcmp(name, a->name) )
1334  break;
1335  }
1336 
1337 #ifdef SCIP_DEBUG
1338  if (a == NULL)
1339  infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1340 #endif
1341 
1342  return (a == NULL) ? NULL : a->value;
1343 }
1344 
1345 /** return first node */
1347  const XML_NODE* node,
1348  const char* name
1349  )
1350 {
1351  const XML_NODE* n;
1352 
1353  assert(node != NULL);
1354  assert(name != NULL);
1355 
1356  for (n = node; n != NULL; n = n->nextsibl)
1357  {
1358  if ( ! strcmp(name, n->name) )
1359  break;
1360  }
1361 
1362  return n;
1363 }
1364 
1365 /** return next node */
1367  const XML_NODE* node,
1368  const char* name
1369  )
1370 {
1371  assert(node != NULL);
1372  assert(name != NULL);
1373 
1374  return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1375 }
1376 
1377 /** find node */
1379  const XML_NODE* node,
1380  const char* name
1381  )
1382 {
1383  const XML_NODE* n;
1384  const XML_NODE* r;
1385 
1386  assert(node != NULL);
1387  assert(name != NULL);
1388 
1389  if ( ! strcmp(name, node->name) )
1390  return node;
1391 
1392  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1393  {
1394  r = xmlFindNode(n, name);
1395  if ( r != NULL )
1396  return r;
1397  }
1398 
1399  return NULL;
1400 }
1401 
1402 /** find node with bound on the depth */
1404  const XML_NODE* node, /**< current node - use start node to begin */
1405  const char* name, /**< name of tag to search for */
1406  int depth, /**< current depth - start with 0 for root */
1407  int maxdepth /**< maximal depth */
1408  )
1409 {
1410  const XML_NODE* n;
1411  const XML_NODE* r;
1412 
1413  assert(node != NULL);
1414  assert(name != NULL);
1415 
1416  if ( ! strcmp(name, node->name) )
1417  return node;
1418 
1419  if ( depth < maxdepth )
1420  {
1421  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1422  {
1423  r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1424  if ( r != NULL )
1425  return r;
1426  }
1427  }
1428 
1429  return NULL;
1430 }
1431 
1432 /** return next sibling */
1434  const XML_NODE* node
1435  )
1436 {
1437  assert(node != NULL);
1438 
1439  return node->nextsibl;
1440 }
1441 
1442 /** return previous sibling */
1444  const XML_NODE* node
1445  )
1446 {
1447  assert(node != NULL);
1448 
1449  return node->prevsibl;
1450 }
1451 
1452 /** return first child */
1454  const XML_NODE* node
1455  )
1456 {
1457  assert(node != NULL);
1458 
1459  return node->firstchild;
1460 }
1461 
1462 /** return last child */
1464  const XML_NODE* node
1465  )
1466 {
1467  assert(node != NULL);
1468 
1469  return node->lastchild;
1470 }
1471 
1472 /** return name of node */
1473 const char* xmlGetName(
1474  const XML_NODE* node
1475  )
1476 {
1477  assert(node != NULL);
1478 
1479  return node->name;
1480 }
1481 
1482 /** get line number */
1484  const XML_NODE* node
1485  )
1486 {
1487  assert(node != NULL);
1488 
1489  return node->lineno;
1490 }
1491 
1492 /** get data */
1493 const char* xmlGetData(
1494  const XML_NODE* node
1495  )
1496 {
1497  assert(node != NULL);
1498 
1499  return node->data;
1500 }
1501 
1502 /** find PCDATA */
1503 const char* xmlFindPcdata(
1504  const XML_NODE* node,
1505  const char* name
1506  )
1507 {
1508  const XML_NODE* n;
1509 
1510  assert(node != NULL);
1511  assert(name != NULL);
1512 
1513  n = xmlFindNode(node, name);
1514  if ( n == NULL )
1515  return NULL;
1516 
1517  if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1518  return n->firstchild->data;
1519 
1520  return NULL;
1521 }
#define XML_Bool
Definition: xmldef.h:33
#define LINE_BUF_SIZE
Definition: xmlparse.c:50
const XML_NODE * xmlFirstNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1346
static void handleDecl(PPOS *ppos)
Definition: xmlparse.c:636
PSTACK * next
Definition: xmlparse.c:74
const char * xmlFindPcdata(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1503
void xmlFreeNode(XML_NODE *node)
Definition: xmlparse.c:1259
#define FREAD(buf, len, fp)
Definition: xmldef.h:54
static int getsymbol(PPOS *ppos)
Definition: xmlparse.c:304
#define ALLOC_ABORT(x)
Definition: tclique_def.h:35
#define FCLOSE(fp)
Definition: xmldef.h:52
struct XML_ATTR_struct XML_ATTR
Definition: xml.h:32
static char * doCdata(PPOS *ppos)
Definition: xmlparse.c:541
static void xmlFreeAttr(XML_ATTR *attr)
Definition: xmlparse.c:1234
#define FALSE
Definition: def.h:64
XML_NODE * xmlNewNode(const char *name, int lineno)
Definition: xmlparse.c:1160
#define TRUE
Definition: def.h:63
const char * xmlGetName(const XML_NODE *node)
Definition: xmlparse.c:1473
#define BMSallocMemoryArray(ptr, num)
Definition: memory.h:78
#define DATA_EXT_SIZE
Definition: xmlparse.c:49
enum parse_state_enum PSTATE
Definition: xmlparse.c:68
#define ATTR_EXT_SIZE
Definition: xmlparse.c:48
XML_ATTR * xmlNewAttr(const char *name, const char *value)
Definition: xmlparse.c:1179
#define BMSfreeMemory(ptr)
Definition: memory.h:100
void xmlShowNode(const XML_NODE *root)
Definition: xmlparse.c:1293
static void procInTag(PPOS *ppos)
Definition: xmlparse.c:863
const char * xmlGetData(const XML_NODE *node)
Definition: xmlparse.c:1493
#define debugMessage
Definition: tclique_def.h:65
static void xmlErrmsg(PPOS *ppos, const char *msg, XML_Bool msg_only, const char *file, int line)
Definition: xmlparse.c:93
static void procPcdata(PPOS *ppos)
Definition: xmlparse.c:946
static int mygetc(PPOS *ppos)
Definition: xmlparse.c:230
PSTACK * top
Definition: xmlparse.c:88
static void ungetsymbol(PPOS *ppos, int c)
Definition: xmlparse.c:340
#define BMSfreeMemoryArray(ptr)
Definition: memory.h:102
parse_state_enum
Definition: xmlparse.c:60
static XML_Bool popPstack(PPOS *ppos)
Definition: xmlparse.c:190
struct XML_NODE_struct XML_NODE
Definition: xml.h:41
static XML_Bool doComment(PPOS *ppos)
Definition: xmlparse.c:505
#define ALLOC_FALSE(x)
Definition: tclique_def.h:47
#define NULL
Definition: lpi_spx1.cpp:137
const XML_NODE * xmlFindNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1378
const char * xmlGetAttrval(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1321
const XML_NODE * xmlNextNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1366
static XML_Bool xmlParse(PPOS *ppos)
Definition: xmlparse.c:1033
#define BMSduplicateMemoryArray(ptr, source, num)
Definition: memory.h:98
const XML_NODE * xmlFirstChild(const XML_NODE *node)
Definition: xmlparse.c:1453
XML_NODE * xmlProcess(const char *filename)
Definition: xmlparse.c:1069
static int skipSpace(PPOS *ppos)
Definition: xmlparse.c:353
static XML_NODE * topPstack(const PPOS *ppos)
Definition: xmlparse.c:175
static char * getAttrval(PPOS *ppos)
Definition: xmlparse.c:441
void xmlAddAttr(XML_NODE *n, XML_ATTR *a)
Definition: xmlparse.c:1199
#define FGETS(buf, len, fp)
Definition: xmldef.h:53
#define BMScopyMemoryArray(ptr, source, num)
Definition: memory.h:89
#define FOPEN(file, mode)
Definition: xmldef.h:51
#define infoMessage
Definition: tclique_def.h:71
#define FPTYPE
Definition: xmldef.h:55
#define xmlError(a, b)
Definition: xmlparse.c:52
#define BMSclearMemory(ptr)
Definition: memory.h:84
static char * getName(PPOS *ppos)
Definition: xmlparse.c:376
XML_NODE * node
Definition: xmlparse.c:73
const XML_NODE * xmlNextSibl(const XML_NODE *node)
Definition: xmlparse.c:1433
char buf[LINE_BUF_SIZE]
Definition: xmlparse.c:82
const char * filename
Definition: xmlparse.c:80
static XML_Bool pushPstack(PPOS *ppos, XML_NODE *node)
Definition: xmlparse.c:152
#define BMSallocMemory(ptr)
Definition: memory.h:74
#define BMSreallocMemoryArray(ptr, num)
Definition: memory.h:82
declarations for XML parsing
#define NAME_EXT_SIZE
Definition: xmlparse.c:47
const XML_NODE * xmlPrevSibl(const XML_NODE *node)
Definition: xmlparse.c:1443
const XML_NODE * xmlLastChild(const XML_NODE *node)
Definition: xmlparse.c:1463
const XML_NODE * xmlFindNodeMaxdepth(const XML_NODE *node, const char *name, int depth, int maxdepth)
Definition: xmlparse.c:1403
static void handlePi(PPOS *ppos)
Definition: xmlparse.c:607
int xmlGetLine(const XML_NODE *node)
Definition: xmlparse.c:1483
void xmlAppendChild(XML_NODE *parent, XML_NODE *child)
Definition: xmlparse.c:1212
static void handleStarttag(PPOS *ppos)
Definition: xmlparse.c:780
PSTATE state
Definition: xmlparse.c:87
static void procBefore(PPOS *ppos)
Definition: xmlparse.c:818
static void handleEndtag(PPOS *ppos)
Definition: xmlparse.c:738
static void clearPstack(PPOS *ppos)
Definition: xmlparse.c:218
memory allocation routines
definitions for XML parsing