001         package com.croftsoft.core.text.sml;
002    
003         import java.io.*;
004         import java.util.StringTokenizer;
005         import java.util.Vector;
006    
007         import com.croftsoft.core.lang.NullArgumentException;
008         import com.croftsoft.core.lang.StringLib;
009    
010         /*********************************************************************
011         * A library of static methods for manipulating SmlNode objects.
012         *
013         * <p>
014         * Java 1.1 compatible.
015         * </p>
016         *
017         * @version
018         *   2001-05-14
019         * @since
020         *   2001-05-10
021         * @author
022         *   <a href="https://www.croftsoft.com/">David W. Croft</a>
023         *********************************************************************/
024    
025         public final class  SmlNodeLib
026         //////////////////////////////////////////////////////////////////////
027         //////////////////////////////////////////////////////////////////////
028         {
029    
030         public static void  main ( String [ ]  args )
031         //////////////////////////////////////////////////////////////////////
032         {
033           System.out.println ( parse ( "<abc/>" ) );
034           System.out.println ( parse ( "<abc></abc>" ) );
035           System.out.println ( parse ( "<abc>def</abc>" ) );
036           System.out.println ( parse ( "<abc><def/></abc>" ) );
037           System.out.println ( parse ( "<abc><def></def></abc>" ) );
038           System.out.println ( parse ( "<abc><def>ghi</def></abc>" ) );
039         }
040              
041         //////////////////////////////////////////////////////////////////////
042         //////////////////////////////////////////////////////////////////////
043    
044         public static boolean  isCDataCharacter ( char  c )
045         //////////////////////////////////////////////////////////////////////
046         {
047    // check to see what XML really defines
048    
049           return ( c != '<' )
050             &&   ( c != '>' );
051         }
052    
053         public static boolean  isElementNameCharacter ( char  c )
054         //////////////////////////////////////////////////////////////////////
055         {
056    // check to see what XML really defines
057    
058           return
059             !isWhiteSpaceCharacter ( c )
060             && ( c != '<' )
061             && ( c != '>' )
062             && ( c != '/' );
063         }
064    
065         public static boolean  isWhiteSpaceCharacter ( char  c )
066         //////////////////////////////////////////////////////////////////////
067         {
068    // check to see what XML really defines
069    
070           return Character.isWhitespace ( c );
071         }
072    
073         public static SmlNode  load (
074           String   filename,
075           boolean  allowMixedChildren )
076           throws IOException
077         //////////////////////////////////////////////////////////////////////
078         {
079           SmlNodeParseHandler  smlNodeParseHandler
080             = new SmlNodeParseHandler ( allowMixedChildren );
081    
082           BufferedInputStream  in = null;
083    
084           try
085           {
086             in = new BufferedInputStream (
087               new FileInputStream ( filename ) );
088    
089             SmlNodeLib.parse ( in, smlNodeParseHandler );
090           }
091           finally
092           {
093             if ( in != null )
094             {
095               in.close ( );
096             }
097           }
098    
099           return smlNodeParseHandler.getSmlNode ( );
100         }
101    
102         public static SmlNode  parse ( String  smlString )
103         //////////////////////////////////////////////////////////////////////
104         {
105           NullArgumentException.check ( smlString );
106    
107           StringTokenizer  stringTokenizer
108             = new StringTokenizer ( smlString, "<>", true );
109    
110           Vector  stackVector = new Vector ( );
111    
112           SmlNode  currentSmlNode = null;
113    
114           while ( stringTokenizer.hasMoreTokens ( ) )
115           {
116             String  token = stringTokenizer.nextToken ( );
117             
118             if ( token.equals ( "<" ) )
119             {
120               token = stringTokenizer.nextToken ( );
121               
122               if ( token.startsWith ( "/" ) )
123               {
124                 if ( !currentSmlNode.hasChild ( ) )
125                 {
126                   currentSmlNode.add ( "" );
127                 }
128                 
129                 if ( !stackVector.isEmpty ( ) )
130                 {
131                   // stack pop
132    
133                   int  index = stackVector.size ( ) - 1;
134    
135                   currentSmlNode = ( SmlNode ) stackVector.elementAt ( index );
136    
137                   stackVector.removeElementAt ( index );
138                 }
139                 else
140                 {
141                   return currentSmlNode;
142                 }
143               }
144               else
145               {
146                 String  name = token;
147                 
148                 boolean  isParent = true;
149                 
150                 if ( token.endsWith ( "/" ) )
151                 {
152                   name = token.substring ( 0, token.length ( ) - 1 );
153                   
154                   isParent = false;
155                 }
156    
157                 SmlNode  childSmlNode = new SmlNode ( name );
158                 
159                 if ( currentSmlNode == null )
160                 {
161                   if ( !isParent )
162                   {
163                     return childSmlNode;
164                   }
165    
166                   currentSmlNode = childSmlNode;
167                 }
168                 else
169                 {
170                   currentSmlNode.add ( childSmlNode );
171                   
172                   if ( isParent )
173                   {
174                     stackVector.addElement ( currentSmlNode );
175                     
176                     currentSmlNode = childSmlNode;
177                   }
178                 }
179               }               
180    
181               stringTokenizer.nextToken ( ); // >
182             }
183             else
184             {
185               try
186               {
187                 if ( currentSmlNode != null )
188                 {
189                   currentSmlNode.add ( SmlCoder.decode ( token ) );
190                 }
191               }
192               catch ( NullPointerException  ex )
193               {
194                 throw new IllegalArgumentException (
195                   "Missing or mismatched angle brackets:  \""
196                   + smlString + "\"" );
197               }
198             } 
199           }
200    
201           throw new IllegalArgumentException (
202             "Missing or mismatched angle brackets:  \""
203             + smlString + "\"" );
204         }
205    
206         /*********************************************************************
207         * Parses an SML stream.
208         *
209         * <p>
210         * State Transitions:
211         * <pre>
212         * c == character data (cdata) character (excludes angle brackets)
213         * w == white space character
214         * e == element (tag) name character
215         *
216         * (+)     == saves character to buffer
217         * (cdata) == calls handleCData
218         * (open)  == calls handleElementOpen
219         * (close) == calls handleElementClose
220         *
221         * 0 | c --> 0 (+), < --> 1 (cdata)
222         * 1 | w --> 2, e --> 3 (+), / --> 6
223         * 2 | w --> 2, e --> 3 (+)
224         * 3 | e --> 3 (+), w --> 4, / --> 5, > --> 0 (open)
225         * 4 | w --> 4, / --> 5, > --> 0 (open)
226         * 5 | > --> 0 (open, close)
227         * 6 | w --> 6, e --> 7 (+)
228         * 7 | e --> 7 (+), w --> 8, > --> 0 (close)
229         * 8 | w --> 8, > --> 0 (close)
230         *
231         *  0 | reading cdata, seeking <
232         *  1 | just after <; seeking element (tag) name, white space, or /
233         *  2 | inside white space before tag name; seeking tag name
234         *  3 | reading opening tag name; seeking white space, /, or >
235         *  4 | inside white space after opening tag name; seeking / or >
236         *  5 | found / after tag name, element has no children; seeking >
237         *  6 | found / after <, closing tag; skipping white, seeking tag name
238         *  7 | reading closing tag name; seeking white space or >
239         *  8 | inside white space after closing tag name; seeking >
240         * </pre>
241         * </p>
242         *********************************************************************/
243         public static void  parse (
244           InputStream      inputStream,
245           SmlParseHandler  smlParseHandler )
246           throws IOException
247         //////////////////////////////////////////////////////////////////////
248         {
249           NullArgumentException.check ( inputStream );
250    
251           NullArgumentException.check ( smlParseHandler );
252    
253           StringBuffer  stringBuffer = new StringBuffer ( );
254    
255           int  state = 0;
256    
257           int  i;
258    
259           while ( ( i = inputStream.read ( ) ) > -1 )
260           {
261             char  c = ( char ) i;
262    
263             switch ( state )
264             {
265               case 0:
266    
267                 // c --> 0 (+), < --> 1 (cdata)
268                 // reading cdata, seeking <
269    
270                 if ( isCDataCharacter ( c ) )
271                 {
272                   stringBuffer.append ( c );
273                 }
274                 else if ( c == '<' )
275                 {
276                   state = 1;
277    
278                   smlParseHandler.handleCData ( stringBuffer.toString ( ) );
279    
280                   stringBuffer = new StringBuffer ( );
281                 }
282                 else
283                 {
284                   smlParseHandler.handleParseError ( );
285                 }
286     
287                 break;
288    
289               case 1:
290     
291                 // w --> 2, e --> 3 (+), / --> 6
292                 // just after <; seeking tag name, white space, or /
293    
294                 if ( isWhiteSpaceCharacter ( c ) )
295                 {
296                   state = 2;
297                 }
298                 else if ( isElementNameCharacter ( c ) )
299                 {
300                   state = 3;
301    
302                   stringBuffer.append ( c );
303                 }
304                 else if ( c == '/' )
305                 {
306                   state = 6;
307                 }
308                 else
309                 {
310                   smlParseHandler.handleParseError ( );
311                 }
312     
313                 break;
314    
315               case 2:
316    
317                 // w --> 2, e --> 3 (+)
318                 // inside white space before tag name; seeking tag name
319    
320                 if ( isWhiteSpaceCharacter ( c ) )
321                 {
322                 }
323                 else if ( isElementNameCharacter ( c ) )
324                 {
325                   state = 3;
326    
327                   stringBuffer.append ( c );
328                 }
329                 else
330                 {
331                   smlParseHandler.handleParseError ( );
332                 }
333     
334                 break;
335    
336               case 3:
337    
338                 // e --> 3 (+), w --> 4, / --> 5, > --> 0 (open)
339                 // reading opening tag name; seeking white space, /, or >
340    
341                 if ( isElementNameCharacter ( c ) )
342                 {
343                   stringBuffer.append ( c );
344                 }
345                 else if ( isWhiteSpaceCharacter ( c ) )
346                 {
347                   state = 4;
348                 }
349                 else if ( c == '/' )
350                 {
351                   state = 5;
352                 }
353                 else if ( c == '>' )
354                 {
355                   state = 0;
356    
357                   smlParseHandler.handleElementOpen ( stringBuffer.toString ( ) );
358    
359                   stringBuffer = new StringBuffer ( );
360                 }
361                 else
362                 {
363                   smlParseHandler.handleParseError ( );
364                 }
365     
366                 break;
367    
368               case 4:
369    
370                 // w --> 4, / --> 5, > --> 0 (open)
371                 // inside white space after opening tag name; seeking / or >
372    
373                 if ( isWhiteSpaceCharacter ( c ) )
374                 {
375                 }
376                 else if ( c == '/' )
377                 {
378                   state = 5;
379                 }
380                 else if ( c == '>' )
381                 {
382                   state = 0;
383    
384                   smlParseHandler.handleElementOpen ( stringBuffer.toString ( ) );
385    
386                   stringBuffer = new StringBuffer ( );
387                 }
388                 else
389                 {
390                   smlParseHandler.handleParseError ( );
391                 }
392     
393                 break;
394    
395               case 5:
396    
397                 // > --> 0 (open, close)
398                 // found / after tag name, element has no children; seeking >
399    
400                 if ( c == '>' )
401                 {
402                   state = 0;
403    
404                   String  elementName = stringBuffer.toString ( );
405    
406                   smlParseHandler.handleElementOpen ( elementName );
407    
408                   smlParseHandler.handleElementClose ( elementName );
409    
410                   stringBuffer = new StringBuffer ( );
411                 }
412                 else
413                 {
414                   smlParseHandler.handleParseError ( );
415                 }
416     
417                 break;
418    
419               case 6:
420    
421                 // w --> 6, e --> 7 (+)
422                 // found / after <, closing tag; skipping white, seeking tag name
423    
424                 if ( isWhiteSpaceCharacter ( c ) )
425                 {
426                 }
427                 else if ( isElementNameCharacter ( c ) )
428                 {
429                   state = 7;
430    
431                   stringBuffer.append ( c );
432                 }
433                 else
434                 {
435                   smlParseHandler.handleParseError ( );
436                 }
437     
438                 break;
439    
440               case 7:
441    
442                 // e --> 7 (+), w --> 8, > --> 0 (close)
443                 // reading closing tag name; seeking white space or >
444    
445                 if ( isElementNameCharacter ( c ) )
446                 {
447                   stringBuffer.append ( c );
448                 }
449                 else if ( isWhiteSpaceCharacter ( c ) )
450                 {
451                   state = 8;
452                 }
453                 else if ( c == '>' )
454                 {
455                   state = 0;
456    
457                   smlParseHandler.handleElementClose ( stringBuffer.toString ( ) );
458    
459                   stringBuffer = new StringBuffer ( );
460                 }
461                 else
462                 {
463                   smlParseHandler.handleParseError ( );
464                 }
465     
466                 break;
467    
468               case 8:
469    
470                 // w --> 8, > --> 0 (close)
471                 // inside white space after closing tag name; seeking >
472    
473                 if ( isWhiteSpaceCharacter ( c ) )
474                 {
475                 }
476                 else if ( c == '>' )
477                 {
478                   state = 0;
479    
480                   smlParseHandler.handleElementClose ( stringBuffer.toString ( ) );
481    
482                   stringBuffer = new StringBuffer ( );
483                 }
484                 else
485                 {
486                   smlParseHandler.handleParseError ( );
487                 }
488     
489                 break;
490    
491               default:
492    
493                 throw new RuntimeException ( ); 
494             }
495           }
496         }
497    
498         public static void  save (
499           String   filename,
500           SmlNode  smlNode )
501           throws IOException
502         //////////////////////////////////////////////////////////////////////
503         {
504           PrintWriter  printWriter = null;
505    
506           try
507           {
508             printWriter = new PrintWriter (
509               new BufferedWriter ( new FileWriter ( filename ) ) );
510    
511             printWriter.println ( smlNode.toString ( 0, 2 ) );
512           }
513           finally
514           {
515             if ( printWriter != null )
516             {
517               printWriter.close ( );
518             }
519           }
520         }
521    
522         //////////////////////////////////////////////////////////////////////
523         //////////////////////////////////////////////////////////////////////
524    
525         private  SmlNodeLib ( ) { }
526    
527         //////////////////////////////////////////////////////////////////////
528         //////////////////////////////////////////////////////////////////////
529         }