Scippy

SCIP

Solving Constraint Integer Programs

xmlparse.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2017 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file xmldef.h
17  * @brief declarations for XML parsing
18  * @author Thorsten Koch
19  * @author Marc Pfetsch
20  *
21  * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
22  * sequence of spaces only one will be used.
23  *
24  * @todo Implement possibility to avoid the construction of parsing information for certain tags
25  * (and their children). For solution files this would avoid parsing the constraints section.
26  */
27 
28 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
29 
30 #include <blockmemshell/memory.h>
31 
32 #include "xml.h"
33 #include "xmldef.h"
34 
35 
36 #include <sys/types.h>
37 #ifdef WITH_ZLIB
38 #if defined(_WIN32) || defined(_WIN64)
39 #define R_OK _A_RDONLY
40 #define access _access
41 #include <io.h>
42 #else
43 #include <unistd.h>
44 #endif
45 #endif
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <ctype.h>
50 #include <string.h>
51 
52 
53 #define NAME_EXT_SIZE 128
54 #define ATTR_EXT_SIZE 4096
55 #define DATA_EXT_SIZE 4096
56 #define LINE_BUF_SIZE 8192
57 
58 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
59 
60 
61 /* forward declarations */
62 typedef struct parse_stack_struct PSTACK;
63 typedef struct parse_pos_struct PPOS;
64 
65 /** state of the parser */
67 {
73 };
74 typedef enum parse_state_enum PSTATE;
75 
76 /** Stack as a (singly) linked list. The top element is the current node. */
78 {
81 };
82 
83 /** Store the current position in the file and the state of the parser. */
85 {
86  const char* filename;
88  char buf[LINE_BUF_SIZE];
89  int pos;
90  int lineno;
91  int nextsym;
92  int lastsym;
95 };
96 
97 
98 /** output error message with corresponding line and position */
99 static void xmlErrmsg(
100  PPOS* ppos,
101  const char* msg,
102  XML_Bool msg_only,
103  const char* file,
104  int line
105  )
106 {
107 #ifndef NDEBUG
108  int ret;
109  assert( ppos != NULL );
110 
111  if ( ! msg_only )
112  {
113  ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
114  assert(ret >= 0);
115 
116  ret = fprintf(stderr, "%s", ppos->buf);
117  assert(ret >= 0);
118 
119  if ( strchr(ppos->buf, '\n') == NULL )
120  {
121  int retc;
122 
123  retc = fputc('\n', stderr);
124  assert(retc != EOF);
125  }
126 
127  ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
128  assert(ret >= 0);
129  }
130  ret = fprintf(stderr, "%s\n\n", msg);
131  assert(ret >= 0);
132 
133 #else
134 
135  if ( ! msg_only )
136  {
137  (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
138 
139  (void) fprintf(stderr, "%s", ppos->buf);
140 
141  if ( strchr(ppos->buf, '\n') == NULL )
142  {
143  (void) fputc('\n', stderr);
144  }
145 
146  (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
147  }
148  (void) fprintf(stderr, "%s\n\n", msg);
149 #endif
150 }
151 
152 
153 /** Push new element on the parse stack.
154  *
155  * TRUE if it worked, FAILURE otherwise.
156  */
157 static
159  PPOS* ppos,
160  XML_NODE* node
161  )
162 {
163  PSTACK* p;
164 
165  assert(ppos != NULL);
166  assert(node != NULL);
167 
168  debugMessage("Pushing %s\n", node->name);
169 
171  assert(p != NULL);
172 
173  p->node = node;
174  p->next = ppos->top;
175  ppos->top = p;
176 
177  return TRUE;
178 }
179 
180 /** returns top element on stack (which has to be present) */
182  const PPOS* ppos
183  )
184 {
185  assert(ppos != NULL);
186  assert(ppos->top != NULL);
187 
188  return ppos->top->node;
189 }
190 
191 /** remove top element from stack and deletes it
192  *
193  * TRUE if ok, FALSE otherwise
194  */
195 static
197  PPOS* ppos /**< input stream position */
198  )
199 {
200  PSTACK* p;
201  XML_Bool result;
202 
203  assert(ppos != NULL);
204 
205  if ( ppos->top == NULL )
206  {
207  xmlError(ppos, "Stack underflow");
208  result = FALSE;
209  }
210  else
211  {
212  result = TRUE;
213  p = ppos->top;
214  ppos->top = p->next;
215 
216  debugMessage("Poping %s\n", p->node->name);
217  BMSfreeMemory(&p);
218  }
219  return result;
220 }
221 
222 /** remove complete stack */
223 static
225  PPOS* ppos
226  )
227 {
228  assert(ppos != NULL);
229 
230  while ( ppos->top != NULL )
231  (void) popPstack(ppos);
232 }
233 
234 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
235 static
236 int mygetc(
237  PPOS* ppos
238  )
239 {
240  assert(ppos != NULL);
241  assert(ppos->fp != NULL);
242  assert(ppos->pos < LINE_BUF_SIZE);
243 
244  if ( ppos->buf[ppos->pos] == '\0' )
245  {
246 #ifdef SCIP_DISABLED_CODE
247  /* the low level function gzread/fread used below seem to be faster */
248  if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
249  return EOF;
250 #else
251  size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
252 
253  if( len == 0 || len > sizeof(ppos->buf) - 1 )
254  return EOF;
255 
256  ppos->buf[len] = '\0';
257 #endif
258  ppos->pos = 0;
259  }
260  return (unsigned char)ppos->buf[ppos->pos++];
261 }
262 
263 
264 #ifdef SPEC_LIKE_SPACE_HANDLING
265 /** Read input from fp_in.
266  *
267  * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of
268  * characters.
269  */
270 static
271 int getsymbol(
272  PPOS* ppos
273  )
274 {
275  int c;
276 
277  assert(ppos != NULL);
278 
279  if ( ppos->nextsym == 0 )
280  c = mygetc(ppos);
281  else
282  {
283  c = ppos->nextsym;
284  ppos->nextsym = 0;
285  }
286  assert(ppos->nextsym == 0);
287 
288  if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
289  c = mygetc(ppos);
290 
291  ppos->lastsym = c;
292 
293  if ( c == '\r' )
294  c = '\n';
295 
296  if ( c == '\n' )
297  ++ppos->lineno;
298 
299  return c;
300 }
301 #else
302 /** Read input from fp_in (variant).
303  *
304  * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
305  *
306  * @note This function counts lines differently. On systems that have only one '\\r' as line feed
307  * (MAC) it does not count correctly.
308  */
309 static
311  PPOS* ppos
312  )
313 {
314  int c;
315 
316  assert(ppos != NULL);
317 
318  do
319  {
320  if ( ppos->nextsym == 0 )
321  c = mygetc(ppos);
322  else
323  {
324  c = ppos->nextsym;
325  ppos->nextsym = 0;
326  }
327  assert(ppos->nextsym == 0);
328 
329  if ( c == '\n' )
330  ++ppos->lineno;
331 
332  if ((c == '\n') || (c == '\r'))
333  c = ' ';
334  } while((c == ' ') && (ppos->lastsym == c));
335 
336  ppos->lastsym = c;
337 
338  debugMessage("[%c]\n", c);
339 
340  return c;
341 }
342 #endif
343 
344 /** Reinserts a character into the input stream */
345 static
347  PPOS* ppos,
348  int c
349  )
350 {
351  assert(ppos != NULL);
352  assert(ppos->nextsym == 0);
353 
354  ppos->nextsym = c;
355 }
356 
357 /** Skip all spaces and return the next non-space character or EOF */
358 static
360  PPOS* ppos
361  )
362 {
363  int c;
364 
365  assert(ppos != NULL);
366 
367  do
368  {
369  c = getsymbol(ppos);
370  }
371  while(isspace(c));
372 
373  return c;
374 }
375 
376 /** Get name of a TAG or attribute from the input stream.
377  *
378  * Either it returns a pointer to allocated memory which contains the name or it returns NULL if
379  * there is some error.
380  */
381 static
382 char* getName(
383  PPOS* ppos
384  )
385 {
386  char* name = NULL;
387  size_t size = 0;
388  size_t len = 0;
389  int c;
390 
391  assert(ppos != NULL);
392 
393  c = getsymbol(ppos);
394 
395  if ( ! isalpha(c) && (c != '_') && (c != ':') )
396  {
397  xmlError(ppos, "Name starting with illegal charater");
398  return NULL;
399  }
400 
401  /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
402  while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
403  {
404  if ( len + 1 >= size )
405  {
406  size += NAME_EXT_SIZE;
407 
408  if ( name == NULL )
409  {
410  ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
411  }
412  else
413  {
414  ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
415  }
416  }
417  assert(name != NULL);
418  assert(size > len);
419 
420  name[len++] = (char)c;
421 
422  c = getsymbol(ppos);
423  }
424  if ( c != EOF )
425  ungetsymbol(ppos, c);
426 
427  assert(name != NULL);
428 
429  if ( len == 0 )
430  {
431  BMSfreeMemoryArray(&name);
432  name = NULL;
433  }
434  else
435  name[len] = '\0';
436 
437  return name;
438 }
439 
440 /** Read the value of an attribute from the input stream.
441  *
442  * The value has to be between two " or ' (the other character is then valid as well). The function
443  * returns a pointer to allocated memory containing the value or it returns NULL in case of an
444  * error.
445  */
446 static
448  PPOS* ppos
449  )
450 {
451  char* attr = NULL;
452  int c;
453  int stop;
454  size_t len = 0;
455  size_t size = 0;
456 
457  assert(ppos != NULL);
458 
459  /* The following is not allowed according to the specification (the value has to be directly
460  * after the equation sign). */
461  c = skipSpace(ppos);
462 
463  if ( (c != '"') && (c != '\'') )
464  {
465  xmlError(ppos, "Atribute value does not start with \" or \'");
466  return NULL;
467  }
468  stop = c;
469 
470  for(;;)
471  {
472  if ( len == size )
473  {
474  size += ATTR_EXT_SIZE;
475 
476  if ( attr == NULL )
477  {
478  ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
479  }
480  else
481  {
482  ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
483  }
484  }
485  assert(attr != NULL);
486  assert(size > len);
487 
488  c = getsymbol(ppos);
489 
490  if ( (c == stop) || (c == EOF) )
491  break;
492 
493  attr[len++] = (char)c;
494  }
495 
496  if ( c != EOF )
497  attr[len] = '\0';
498  else
499  {
500  BMSfreeMemoryArray(&attr);
501  attr = NULL;
502  }
503  return attr;
504 }
505 
506 /** Skip comment
507  *
508  * Return FALSE if an error occurs.
509  */
510 static
512  PPOS* ppos
513  )
514 {
515  XML_Bool result = TRUE;
516  int c;
517  int state = 0;
518 
519  assert(ppos != NULL);
520 
521  for(;;)
522  {
523  c = getsymbol(ppos);
524 
525  if ( c == EOF )
526  break;
527 
528  if ( (c == '>') && (state >= 2) )
529  break;
530 
531  state = (c == '-') ? state + 1 : 0;
532  }
533  if ( c == EOF )
534  {
535  xmlError(ppos, "Unexpected EOF in comment");
536  result = FALSE;
537  }
538  return result;
539 }
540 
541 /** Handles a CDATA section.
542  *
543  * Returns a pointer to allocated memory containing the data of this section or NULL in case of an
544  * error.
545  */
546 static
547 char* doCdata(
548  PPOS* ppos
549  )
550 {
551  char* data = NULL;
552  size_t size = 0;
553  size_t len = 0;
554  int state = 0;
555  int c;
556 
557  assert(ppos != NULL);
558 
559  for(;;)
560  {
561  c = getsymbol(ppos);
562 
563  if ( c == EOF )
564  break;
565 
566  if ( c == ']' )
567  state++;
568  else
569  if ( (c == '>') && (state >= 2) )
570  break;
571  else
572  state = 0;
573 
574  if ( len == size )
575  {
576  size += DATA_EXT_SIZE;
577 
578  if ( data == NULL )
579  {
580  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
581  }
582  else
583  {
584  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
585  }
586  }
587  assert(data != NULL);
588  assert(size > len);
589 
590  data[len++] = (char)c;
591  }
592  assert(data != NULL);
593 
594  /*lint --e{527}*/
595  if ( c != EOF )
596  {
597  assert(len >= 2);
598  assert(data != NULL);
599 
600  data[len - 2] = '\0'; /*lint !e413*/
601  }
602  else
603  {
604  BMSfreeMemoryArray(&data);
605  data = NULL;
606  xmlError(ppos, "Unexpected EOF in CDATA");
607  }
608  return data;
609 }
610 
611 /** Handle processing instructions (skipping) */
612 static
613 void handlePi(
614  PPOS* ppos
615  )
616 {
617  int c;
618 
619  assert(ppos != NULL);
620  assert(ppos->state == XML_STATE_BEFORE);
621 
622  do
623  {
624  c = getsymbol(ppos);
625  }
626  while ( (c != EOF) && (c != '>') );
627 
628  if ( c != EOF )
629  ppos->state = XML_STATE_PCDATA;
630  else
631  {
632  xmlError(ppos, "Unexpected EOF in PI");
633  ppos->state = XML_STATE_ERROR;
634  }
635 }
636 
637 /** Handles declarations that start with a <!.
638  *
639  * This includes comments. Does currenlty not work very well, because of DTDs.
640  */
641 static
643  PPOS* ppos
644  )
645 {
646  enum XmlSection
647  {
648  IS_COMMENT,
649  IS_ATTLIST,
650  IS_DOCTYPE,
651  IS_ELEMENT,
652  IS_ENTITY,
653  IS_NOTATION,
654  IS_CDATA
655  };
656  typedef enum XmlSection XMLSECTION;
657 
658  static struct
659  {
660  const char* name;
661  XMLSECTION what;
662  } key[] =
663  {
664  { "--", IS_COMMENT },
665  { "ATTLIST", IS_ATTLIST },
666  { "DOCTYPE", IS_DOCTYPE },
667  { "ELEMENT", IS_ELEMENT },
668  { "ENTITY", IS_ENTITY },
669  { "NOTATION", IS_NOTATION },
670  { "[CDATA[", IS_CDATA }
671  };
672  XML_NODE* node;
673  char* data;
674  int c;
675  int k = 0;
676  int beg = 0;
677  int end;
678 
679  assert(ppos != NULL);
680  assert(ppos->state == XML_STATE_BEFORE);
681 
682  end = (int) (sizeof(key) / sizeof(key[0])) - 1;
683  do
684  {
685  c = getsymbol(ppos);
686 
687  for(; (beg <= end) && (c != key[beg].name[k]); beg++)
688  ;
689  for(; (end >= beg) && (c != key[end].name[k]); end--)
690  ;
691  k++;
692  } while(beg < end);
693 
694  if ( beg != end )
695  {
696  xmlError(ppos, "Unknown declaration");
697 
698  while ( (c != EOF) && (c != '>') )
699  c = getsymbol(ppos);
700  }
701  else
702  {
703  assert(beg == end);
704  assert(beg < (int)(sizeof(key) / sizeof(*key)));
705 
706  switch(key[beg].what)
707  {
708  case IS_COMMENT :
709  if ( ! doComment(ppos) )
710  ppos->state = XML_STATE_ERROR;
711  break;
712  case IS_CDATA :
713  if ( (data = doCdata(ppos)) == NULL )
714  ppos->state = XML_STATE_ERROR;
715  else
716  {
717  if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
718  {
719  xmlError(ppos, "Can't create new node");
720  ppos->state = XML_STATE_ERROR;
721  }
722  else
723  {
724  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
725  BMSfreeMemoryArray(&data);
726  xmlAppendChild(topPstack(ppos), node);
727  }
728  }
729  break;
730  case IS_ATTLIST :
731  case IS_ELEMENT :
732  case IS_NOTATION :
733  case IS_ENTITY :
734  case IS_DOCTYPE :
735  break;
736  default :
737  abort();
738  }
739  }
740 }
741 
742 /** Handle end tag */
743 static
745  PPOS* ppos
746  )
747 {
748  char* name;
749  int c;
750 
751  assert(ppos != NULL);
752 
753  if ( (name = getName(ppos)) == NULL )
754  xmlError(ppos, "Missing name in endtag");
755  else
756  {
757  c = skipSpace(ppos);
758 
759  if ( c != '>' )
760  {
761  xmlError(ppos, "Missing '>' in endtag");
762  ppos->state = XML_STATE_ERROR;
763  }
764  else
765  {
766  if ( strcmp(name, topPstack(ppos)->name) )
767  {
768  xmlError(ppos, "Name of endtag does not match starttag");
769  ppos->state = XML_STATE_ERROR;
770  }
771  else
772  {
773  if ( popPstack(ppos) )
774  ppos->state = XML_STATE_PCDATA;
775  else
776  ppos->state = XML_STATE_ERROR;
777  }
778  }
779 
780  BMSfreeMemoryArray(&name);
781  }
782 }
783 
784 /** Handle start tag */
785 static
787  PPOS* ppos
788  )
789 {
790  XML_NODE* node;
791  char* name;
792 
793  assert(ppos != NULL);
794 
795  name = getName(ppos);
796  if ( name == NULL )
797  {
798  xmlError(ppos, "Missing name in tagstart");
799  ppos->state = XML_STATE_ERROR;
800  }
801  else
802  {
803  node = xmlNewNode(name, ppos->lineno);
804  if ( node == NULL )
805  {
806  xmlError(ppos, "Can't create new node");
807  ppos->state = XML_STATE_ERROR;
808  }
809  else
810  {
811  xmlAppendChild(topPstack(ppos), node);
812 
813  if ( pushPstack(ppos, node) )
814  ppos->state = XML_STATE_IN_TAG;
815  else
816  ppos->state = XML_STATE_ERROR;
817  }
818  BMSfreeMemoryArray(&name);
819  }
820 }
821 
822 /** Checks for next tag */
823 static
825  PPOS* ppos /**< input stream position */
826  )
827 {
828  int c;
829 
830  assert(ppos != NULL);
831  assert(ppos->state == XML_STATE_BEFORE);
832 
833  c = skipSpace(ppos);
834 
835  if ( c != '<' )
836  {
837  xmlError(ppos, "Expecting '<'");
838  ppos->state = XML_STATE_ERROR;
839  }
840  else
841  {
842  c = getsymbol(ppos);
843 
844  switch(c)
845  {
846  case EOF :
847  xmlError(ppos, "Unexpected EOF");
848  ppos->state = XML_STATE_ERROR;
849  break;
850  case '!' :
851  handleDecl(ppos);
852  break;
853  case '?' :
854  handlePi(ppos);
855  break;
856  case '/' :
857  handleEndtag(ppos);
858  break;
859  default :
860  ungetsymbol(ppos, c);
861  handleStarttag(ppos);
862  break;
863  }
864  }
865 }
866 
867 /** Process tag */
868 static
870  PPOS* ppos /**< input stream position */
871  )
872 {
873  XML_ATTR* attr;
874  int c;
875  XML_Bool empty = FALSE;
876  char* name;
877  char* value;
878 
879  assert(ppos != NULL);
880  assert(ppos->state == XML_STATE_IN_TAG);
881 
882  c = skipSpace(ppos);
883 
884  if ( (c == '/') || (c == '>') || (c == EOF) )
885  {
886  if ( c == '/' )
887  {
888  empty = TRUE;
889  c = getsymbol(ppos);
890  }
891 
892  if ( c == EOF )
893  {
894  xmlError(ppos, "Unexpected EOF while in a tag");
895  ppos->state = XML_STATE_ERROR;
896  }
897 
898  if ( c == '>' )
899  {
900  ppos->state = XML_STATE_PCDATA;
901 
902  if (empty && ! popPstack(ppos))
903  ppos->state = XML_STATE_ERROR;
904  }
905  else
906  {
907  xmlError(ppos, "Expected tag end marker '>'");
908  ppos->state = XML_STATE_ERROR;
909  }
910  }
911  else
912  {
913  ungetsymbol(ppos, c);
914 
915  name = getName(ppos);
916  if ( name == NULL )
917  {
918  xmlError(ppos, "No name for attribute");
919  ppos->state = XML_STATE_ERROR;
920  }
921  else
922  {
923  c = skipSpace(ppos);
924 
925  if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
926  {
927  xmlError(ppos, "Missing attribute value");
928  ppos->state = XML_STATE_ERROR;
929  BMSfreeMemoryArray(&name);
930  }
931  else
932  {
933  attr = xmlNewAttr(name, value);
934  if ( attr == NULL )
935  {
936  xmlError(ppos, "Can't create new attribute");
937  ppos->state = XML_STATE_ERROR;
938  }
939  else
940  {
941  xmlAddAttr(topPstack(ppos), attr);
942  }
943  BMSfreeMemoryArray(&name);
944  BMSfreeMemoryArray(&value);
945  }
946  }
947  }
948 }
949 
950 /* Handles PCDATA */
951 static
953  PPOS* ppos /**< input stream position */
954  )
955 {
956  XML_NODE* node;
957  char* data = NULL;
958  size_t size = 0;
959  size_t len = 0;
960  int c;
961 
962  assert(ppos != NULL);
963  assert(ppos->state == XML_STATE_PCDATA);
964 
965 #ifndef SPEC_LIKE_SPACE_HANDLING
966  c = skipSpace(ppos);
967  if ( c != EOF )
968  ungetsymbol(ppos, c);
969 #endif
970  c = getsymbol(ppos);
971 
972  while ( (c != EOF) && (c != '<') )
973  {
974  if ( len + 1 >= size ) /* leave space for terminating '\0' */
975  {
976  size += DATA_EXT_SIZE;
977 
978  if ( data == NULL )
979  {
980  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
981  }
982  else
983  {
984  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
985  }
986  }
987  assert(data != NULL);
988  assert(size > len + 1);
989 
990  data[len++] = (char)c;
991 
992  c = getsymbol(ppos);
993  }
994  if ( data == NULL )
995  {
996  if ( c == EOF )
997  ppos->state = XML_STATE_EOF;
998  else
999  {
1000  assert(c == '<');
1001  ppos->state = XML_STATE_BEFORE;
1002  ungetsymbol(ppos, c);
1003  }
1004  }
1005  else
1006  {
1007  assert(len < size);
1008  data[len] = '\0';
1009 
1010  if ( c == EOF )
1011  ppos->state = XML_STATE_ERROR;
1012  else
1013  {
1014  ungetsymbol(ppos, c);
1015 
1016  node = xmlNewNode("#PCDATA", ppos->lineno);
1017  if ( node == NULL )
1018  {
1019  xmlError(ppos, "Can't create new node");
1020  ppos->state = XML_STATE_ERROR;
1021  }
1022  else
1023  {
1024  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1025  xmlAppendChild(topPstack(ppos), node);
1026  ppos->state = XML_STATE_BEFORE;
1027  }
1028  }
1029 
1030  BMSfreeMemoryArray(&data);
1031  }
1032 }
1033 
1034 /** Parse input stream */
1035 static
1037  PPOS* ppos /**< input stream position */
1038  )
1039 {
1040  XML_Bool ok = TRUE;
1041 
1042  while (ok)
1043  {
1044  debugMessage("state=%d\n", ppos->state);
1045 
1046  switch (ppos->state)
1047  {
1048  case XML_STATE_BEFORE :
1049  procBefore(ppos);
1050  break;
1051  case XML_STATE_IN_TAG :
1052  procInTag(ppos);
1053  break;
1054  case XML_STATE_PCDATA :
1055  procPcdata(ppos);
1056  break;
1057  case XML_STATE_EOF :
1058  ok = FALSE;
1059  break;
1060  case XML_STATE_ERROR :
1061  ok = FALSE;
1062  break;
1063  default :
1064  xmlError(ppos, "Internal Error, illegal state");
1065  ok = FALSE;
1066  }
1067  }
1068  return (ppos->state == XML_STATE_EOF);
1069 }
1070 
1071 /** Parse file */
1073  const char* filename /**< XML file name */
1074  )
1075 {
1076  PPOS ppos;
1077  XML_NODE* node = NULL;
1078  XML_ATTR* attr;
1079  XML_Bool result = FALSE;
1080  char* myfilename;
1081  size_t filenamelen;
1082 
1083  /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1084  assert( filename != NULL );
1085  filenamelen = strlen(filename);
1086  if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1087  return NULL;
1088  BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1089 
1090 #ifdef WITH_ZLIB
1091  if ( access(filename, R_OK) != 0 )
1092  {
1093  strcat(myfilename, ".gz");
1094 
1095  /* If .gz also does not work, revert to the old name
1096  * to get a better error message.
1097  */
1098  if ( access(myfilename, R_OK) != 0 )
1099  strcpy(myfilename, filename);
1100  }
1101 #endif
1102  ppos.fp = FOPEN(myfilename, "r");
1103  if ( ppos.fp == NULL )
1104  perror(myfilename);
1105  else
1106  {
1107  ppos.filename = myfilename;
1108  ppos.buf[0] = '\0';
1109  ppos.pos = 0;
1110  ppos.lineno = 1;
1111  ppos.nextsym = 0;
1112  ppos.lastsym = 0;
1113  ppos.state = XML_STATE_BEFORE;
1114  ppos.top = NULL;
1115 
1116  node = xmlNewNode("#ROOT", ppos.lineno);
1117  if ( node == NULL )
1118  {
1119  xmlError(&ppos, "Can't create new node");
1120  }
1121  else
1122  {
1123  attr = xmlNewAttr("filename", myfilename);
1124  if ( attr == NULL )
1125  xmlError(&ppos, "Can't create new attribute");
1126  else
1127  {
1128  xmlAddAttr(node, attr);
1129 
1130  /* push root node on stack and start to process */
1131  if ( pushPstack(&ppos, node) )
1132  {
1133  result = xmlParse(&ppos);
1134 
1135  clearPstack(&ppos);
1136  }
1137  }
1138  }
1139 
1140  if ( ! result && (node != NULL) )
1141  {
1142  xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1143  xmlFreeNode(node);
1144  node = NULL;
1145  }
1146  if ( FCLOSE(ppos.fp) )
1147  perror(myfilename);
1148  }
1149  BMSfreeMemoryArray(&myfilename);
1150 
1151  return node;
1152 }
1153 
1154 
1155 
1156 
1157 
1158 
1159 /*----------------------------------------------------------------------------------------------*/
1160 
1161 
1162 /** create new node */
1164  const char* name,
1165  int lineno
1166  )
1167 {
1168  XML_NODE* n = NULL;
1169 
1170  assert(name != NULL);
1171 
1172  if ( BMSallocMemory(&n) != NULL )
1173  {
1174  BMSclearMemory(n);
1175  BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1176  n->lineno = lineno;
1177  }
1178  return n;
1179 }
1180 
1181 /** create new attribute */
1183  const char* name,
1184  const char* value
1185  )
1186 {
1187  XML_ATTR* a = NULL;
1188 
1189  assert(name != NULL);
1190  assert(value != NULL);
1191 
1192  if ( BMSallocMemory(&a) != NULL )
1193  {
1194  BMSclearMemory(a);
1195  BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1196  BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1197  }
1198  return a;
1199 }
1200 
1201 /** add attribute */
1203  XML_NODE* n,
1204  XML_ATTR* a
1205  )
1206 {
1207  assert(n != NULL);
1208  assert(a != NULL);
1209 
1210  a->next = n->attrlist;
1211  n->attrlist = a;
1212 }
1213 
1214 /** append child node */
1216  XML_NODE* parent,
1217  XML_NODE* child
1218  )
1219 {
1220  assert(parent != NULL);
1221  assert(child != NULL);
1222 
1223  child->parent = parent;
1224  child->prevsibl = parent->lastchild;
1225  child->nextsibl = NULL;
1226  parent->lastchild = child;
1227 
1228  if ( child->prevsibl != NULL )
1229  child->prevsibl->nextsibl = child;
1230 
1231  if ( parent->firstchild == NULL )
1232  parent->firstchild = child;
1233 }
1234 
1235 /** free attribute */
1236 static
1238  XML_ATTR* attr
1239  )
1240 {
1241  XML_ATTR* a;
1242 
1243  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1244  * and might overflow the heap. */
1245  a = attr;
1246  while (a != NULL)
1247  {
1248  XML_ATTR* b;
1249  b = a->next;
1250 
1251  assert(a->name != NULL);
1252  assert(a->value != NULL);
1253 
1254  BMSfreeMemoryArray(&a->name);
1255  BMSfreeMemoryArray(&a->value);
1256  BMSfreeMemory(&a);
1257  a = b;
1258  }
1259 }
1260 
1261 /** free node */
1263  XML_NODE* node
1264  )
1265 {
1266  XML_NODE* n;
1267 
1268  if ( node == NULL )
1269  return;
1270 
1271  /* Free data from back to front (because free is faster this way). */
1272  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1273  * and might overflow the heap. */
1274  n = node->lastchild;
1275  while ( n != NULL )
1276  {
1277  XML_NODE* m;
1278  m = n->prevsibl;
1279  xmlFreeNode(n);
1280  n = m;
1281  }
1282 
1283  xmlFreeAttr(node->attrlist);
1284 
1285  if ( node->data != NULL )
1286  {
1287  BMSfreeMemoryArray(&node->data);
1288  }
1289  assert(node->name != NULL);
1290 
1291  BMSfreeMemoryArray(&node->name);
1292  BMSfreeMemory(&node);
1293 }
1294 
1295 /** output node */
1297  const XML_NODE* root
1298  )
1299 {
1300  const XML_NODE* n;
1301  const XML_ATTR* a;
1302 
1303  assert(root != NULL);
1304 
1305  for (n = root; n != NULL; n = n->nextsibl)
1306  {
1307  infoMessage("Name: %s\n", n->name);
1308  infoMessage("Line: %d\n", n->lineno);
1309  infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1310 
1311  for (a = n->attrlist; a != NULL; a = a->next)
1312  infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1313 
1314  if ( n->firstchild != NULL )
1315  {
1316  infoMessage("->\n");
1317  xmlShowNode(n->firstchild);
1318  infoMessage("<-\n");
1319  }
1320  }
1321 }
1322 
1323 /** get attribute value */
1324 const char* xmlGetAttrval(
1325  const XML_NODE* node,
1326  const char* name
1327  )
1328 {
1329  XML_ATTR* a;
1330 
1331  assert(node != NULL);
1332  assert(name != NULL);
1333 
1334  for (a = node->attrlist; a != NULL; a = a->next)
1335  {
1336  if ( ! strcmp(name, a->name) )
1337  break;
1338  }
1339 
1340 #ifdef SCIP_DEBUG
1341  if (a == NULL)
1342  infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1343 #endif
1344 
1345  return (a == NULL) ? NULL : a->value;
1346 }
1347 
1348 /** return first node */
1350  const XML_NODE* node,
1351  const char* name
1352  )
1353 {
1354  const XML_NODE* n;
1355 
1356  assert(node != NULL);
1357  assert(name != NULL);
1358 
1359  for (n = node; n != NULL; n = n->nextsibl)
1360  {
1361  if ( ! strcmp(name, n->name) )
1362  break;
1363  }
1364 
1365  return n;
1366 }
1367 
1368 /** return next node */
1370  const XML_NODE* node,
1371  const char* name
1372  )
1373 {
1374  assert(node != NULL);
1375  assert(name != NULL);
1376 
1377  return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1378 }
1379 
1380 /** find node */
1382  const XML_NODE* node,
1383  const char* name
1384  )
1385 {
1386  const XML_NODE* n;
1387  const XML_NODE* r;
1388 
1389  assert(node != NULL);
1390  assert(name != NULL);
1391 
1392  if ( ! strcmp(name, node->name) )
1393  return node;
1394 
1395  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1396  {
1397  r = xmlFindNode(n, name);
1398  if ( r != NULL )
1399  return r;
1400  }
1401 
1402  return NULL;
1403 }
1404 
1405 /** find node with bound on the depth */
1407  const XML_NODE* node, /**< current node - use start node to begin */
1408  const char* name, /**< name of tag to search for */
1409  int depth, /**< current depth - start with 0 for root */
1410  int maxdepth /**< maximal depth */
1411  )
1412 {
1413  const XML_NODE* n;
1414  const XML_NODE* r;
1415 
1416  assert(node != NULL);
1417  assert(name != NULL);
1418 
1419  if ( ! strcmp(name, node->name) )
1420  return node;
1421 
1422  if ( depth < maxdepth )
1423  {
1424  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1425  {
1426  r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1427  if ( r != NULL )
1428  return r;
1429  }
1430  }
1431 
1432  return NULL;
1433 }
1434 
1435 /** return next sibling */
1437  const XML_NODE* node
1438  )
1439 {
1440  assert(node != NULL);
1441 
1442  return node->nextsibl;
1443 }
1444 
1445 /** return previous sibling */
1447  const XML_NODE* node
1448  )
1449 {
1450  assert(node != NULL);
1451 
1452  return node->prevsibl;
1453 }
1454 
1455 /** return first child */
1457  const XML_NODE* node
1458  )
1459 {
1460  assert(node != NULL);
1461 
1462  return node->firstchild;
1463 }
1464 
1465 /** return last child */
1467  const XML_NODE* node
1468  )
1469 {
1470  assert(node != NULL);
1471 
1472  return node->lastchild;
1473 }
1474 
1475 /** return name of node */
1476 const char* xmlGetName(
1477  const XML_NODE* node
1478  )
1479 {
1480  assert(node != NULL);
1481 
1482  return node->name;
1483 }
1484 
1485 /** get line number */
1487  const XML_NODE* node
1488  )
1489 {
1490  assert(node != NULL);
1491 
1492  return node->lineno;
1493 }
1494 
1495 /** get data */
1496 const char* xmlGetData(
1497  const XML_NODE* node
1498  )
1499 {
1500  assert(node != NULL);
1501 
1502  return node->data;
1503 }
1504 
1505 /** find PCDATA */
1506 const char* xmlFindPcdata(
1507  const XML_NODE* node,
1508  const char* name
1509  )
1510 {
1511  const XML_NODE* n;
1512 
1513  assert(node != NULL);
1514  assert(name != NULL);
1515 
1516  n = xmlFindNode(node, name);
1517  if ( n == NULL )
1518  return NULL;
1519 
1520  if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1521  return n->firstchild->data;
1522 
1523  return NULL;
1524 }
#define XML_Bool
Definition: xmldef.h:33
#define LINE_BUF_SIZE
Definition: xmlparse.c:56
const XML_NODE * xmlFirstNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1349
static void handleDecl(PPOS *ppos)
Definition: xmlparse.c:642
PSTACK * next
Definition: xmlparse.c:80
const char * xmlFindPcdata(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1506
void xmlFreeNode(XML_NODE *node)
Definition: xmlparse.c:1262
#define FREAD(buf, len, fp)
Definition: xmldef.h:54
static int getsymbol(PPOS *ppos)
Definition: xmlparse.c:310
#define ALLOC_ABORT(x)
Definition: tclique_def.h:35
#define FCLOSE(fp)
Definition: xmldef.h:52
struct XML_ATTR_struct XML_ATTR
Definition: xml.h:32
static char * doCdata(PPOS *ppos)
Definition: xmlparse.c:547
static void xmlFreeAttr(XML_ATTR *attr)
Definition: xmlparse.c:1237
#define FALSE
Definition: def.h:64
XML_NODE * xmlNewNode(const char *name, int lineno)
Definition: xmlparse.c:1163
#define TRUE
Definition: def.h:63
const char * xmlGetName(const XML_NODE *node)
Definition: xmlparse.c:1476
#define BMSallocMemoryArray(ptr, num)
Definition: memory.h:105
#define DATA_EXT_SIZE
Definition: xmlparse.c:55
enum parse_state_enum PSTATE
Definition: xmlparse.c:74
#define ATTR_EXT_SIZE
Definition: xmlparse.c:54
XML_ATTR * xmlNewAttr(const char *name, const char *value)
Definition: xmlparse.c:1182
#define BMSfreeMemory(ptr)
Definition: memory.h:127
void xmlShowNode(const XML_NODE *root)
Definition: xmlparse.c:1296
static void procInTag(PPOS *ppos)
Definition: xmlparse.c:869
const char * xmlGetData(const XML_NODE *node)
Definition: xmlparse.c:1496
#define debugMessage
Definition: tclique_def.h:65
static void xmlErrmsg(PPOS *ppos, const char *msg, XML_Bool msg_only, const char *file, int line)
Definition: xmlparse.c:99
static void procPcdata(PPOS *ppos)
Definition: xmlparse.c:952
static int mygetc(PPOS *ppos)
Definition: xmlparse.c:236
PSTACK * top
Definition: xmlparse.c:94
static void ungetsymbol(PPOS *ppos, int c)
Definition: xmlparse.c:346
#define BMSfreeMemoryArray(ptr)
Definition: memory.h:129
parse_state_enum
Definition: xmlparse.c:66
static XML_Bool popPstack(PPOS *ppos)
Definition: xmlparse.c:196
struct XML_NODE_struct XML_NODE
Definition: xml.h:41
static XML_Bool doComment(PPOS *ppos)
Definition: xmlparse.c:511
#define ALLOC_FALSE(x)
Definition: tclique_def.h:47
const XML_NODE * xmlFindNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1381
const char * xmlGetAttrval(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1324
const XML_NODE * xmlNextNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1369
static XML_Bool xmlParse(PPOS *ppos)
Definition: xmlparse.c:1036
#define BMSduplicateMemoryArray(ptr, source, num)
Definition: memory.h:125
const XML_NODE * xmlFirstChild(const XML_NODE *node)
Definition: xmlparse.c:1456
XML_NODE * xmlProcess(const char *filename)
Definition: xmlparse.c:1072
static int skipSpace(PPOS *ppos)
Definition: xmlparse.c:359
static XML_NODE * topPstack(const PPOS *ppos)
Definition: xmlparse.c:181
static char * getAttrval(PPOS *ppos)
Definition: xmlparse.c:447
void xmlAddAttr(XML_NODE *n, XML_ATTR *a)
Definition: xmlparse.c:1202
#define FGETS(buf, len, fp)
Definition: xmldef.h:53
#define BMScopyMemoryArray(ptr, source, num)
Definition: memory.h:116
#define FOPEN(file, mode)
Definition: xmldef.h:51
#define infoMessage
Definition: tclique_def.h:71
#define FPTYPE
Definition: xmldef.h:55
#define xmlError(a, b)
Definition: xmlparse.c:58
#define BMSclearMemory(ptr)
Definition: memory.h:111
static char * getName(PPOS *ppos)
Definition: xmlparse.c:382
XML_NODE * node
Definition: xmlparse.c:79
const XML_NODE * xmlNextSibl(const XML_NODE *node)
Definition: xmlparse.c:1436
char buf[LINE_BUF_SIZE]
Definition: xmlparse.c:88
const char * filename
Definition: xmlparse.c:86
static XML_Bool pushPstack(PPOS *ppos, XML_NODE *node)
Definition: xmlparse.c:158
#define BMSallocMemory(ptr)
Definition: memory.h:101
#define BMSreallocMemoryArray(ptr, num)
Definition: memory.h:109
declarations for XML parsing
#define NAME_EXT_SIZE
Definition: xmlparse.c:53
const XML_NODE * xmlPrevSibl(const XML_NODE *node)
Definition: xmlparse.c:1446
const XML_NODE * xmlLastChild(const XML_NODE *node)
Definition: xmlparse.c:1466
const XML_NODE * xmlFindNodeMaxdepth(const XML_NODE *node, const char *name, int depth, int maxdepth)
Definition: xmlparse.c:1406
static void handlePi(PPOS *ppos)
Definition: xmlparse.c:613
int xmlGetLine(const XML_NODE *node)
Definition: xmlparse.c:1486
void xmlAppendChild(XML_NODE *parent, XML_NODE *child)
Definition: xmlparse.c:1215
static void handleStarttag(PPOS *ppos)
Definition: xmlparse.c:786
PSTATE state
Definition: xmlparse.c:93
static void procBefore(PPOS *ppos)
Definition: xmlparse.c:824
static void handleEndtag(PPOS *ppos)
Definition: xmlparse.c:744
static void clearPstack(PPOS *ppos)
Definition: xmlparse.c:224
memory allocation routines
definitions for XML parsing