001         package com.croftsoft.core.text.sml;
002    
003         import java.io.*;
004         import java.util.zip.ZipInputStream;
005    
006         import com.croftsoft.core.lang.NullArgumentException;
007    import com.croftsoft.core.text.sml.SmlNode;
008    import com.croftsoft.core.util.consumer.Consumer;
009    
010         /*********************************************************************
011         * Used to parse large SML files one data record at a time.
012         *
013         * <p>
014         * The input is assumed to be a very large Simplified Markup Language
015         * (SML) file consisting of a root node containing zero or more child
016         * nodes, each child representing an individual data record.  As each
017         * direct child of the top-level root element is parsed, it is passed
018         * as an SmlNode to a Consumer.
019         * </p>
020         *
021         * <p>
022         * This parser is useful when you have a database dump in SML
023         * format and you want to read it back in one data record at a time.
024         * Since the data file is large, your Consumer implementation will
025         * typically commit each record to secondary storage (disk or database)
026         * as it is received.  This prevents an out-of-memory condition that
027         * might result from loading the entire file into primary storage
028         * (memory) as a Document Object Model (DOM), an object graph composed
029         * of a root node and multiple child nodes, as it is being parsed.
030         * </p>
031         *
032         * <p>
033         * Example:
034         * <pre>
035         * SmlNodeLoader.load ( smlInputStream,
036         *   new Consumer ( )
037         *   {
038         *     public void  consume ( Object  o )
039         *     {
040         *       SmlNode  smlNode = ( SmlNode ) o;
041         *
042         *       User  user = User.fromSmlNode ( smlNode );
043         *
044         *       userDatabase.add ( user );
045         *     }
046         *   } );
047         * </pre>
048         * </p>
049         *
050         * <p>
051         * It is assumed that an SML node will have never have both character
052         * data and SML nodes mixed together as immediate children.  Given that
053         * assumption, this parser will overwrite a parsed String child with a
054         * subsequently parsed SmlNode child.  Additionally, character data will
055         * not be recorded as a child once an SmlNode child is already in place.
056         * This is useful for preventing unnecessary white space between element
057         * tags in the SML file from being stored as character data.
058         * </p>
059         *
060         * @version
061         *   2001-05-18
062         * @since
063         *   2001-05-10
064         * @author
065         *   <a href="https://www.croftsoft.com/">David W. Croft</a>
066         *********************************************************************/
067    
068         public final class  SmlNodeLoader
069           implements SmlParseHandler
070         //////////////////////////////////////////////////////////////////////
071         //////////////////////////////////////////////////////////////////////
072         {
073    
074         private Consumer             smlNodeConsumer;
075    
076         private SmlNodeParseHandler  smlNodeParseHandler;
077    
078         private int                  depth;
079    
080         //////////////////////////////////////////////////////////////////////
081         //////////////////////////////////////////////////////////////////////
082    
083         public static void  main ( String [ ]  args )
084           throws Exception
085         //////////////////////////////////////////////////////////////////////
086         {
087           parse (
088             args [ 0 ],
089             new Consumer ( )
090             {
091               public void  consume ( Object  o )
092               {
093                 System.out.println ( o );
094    
095                 System.out.println ( "" );
096               }
097             },
098             args [ 0 ].toLowerCase ( ).endsWith ( ".zip" ) );
099         }
100    
101         //////////////////////////////////////////////////////////////////////
102         //////////////////////////////////////////////////////////////////////
103    
104         public static void  parse (
105           InputStream  inputStream,
106           Consumer     smlNodeConsumer )
107           throws IOException
108         //////////////////////////////////////////////////////////////////////
109         {
110           SmlParseHandler  smlParseHandler
111             = new SmlNodeLoader ( smlNodeConsumer );
112    
113           SmlNodeLib.parse ( inputStream, smlParseHandler );
114         }
115    
116         public static void  parse (
117           String    smlDataFilename,
118           Consumer  smlNodeConsumer,
119           boolean   isZipFile )
120           throws IOException
121         //////////////////////////////////////////////////////////////////////
122         {
123           InputStream  inputStream = null;
124    
125           try
126           {
127             inputStream = new BufferedInputStream (
128               new FileInputStream ( smlDataFilename ) );
129    
130             if ( isZipFile )
131             {
132               ZipInputStream  zipInputStream
133                 = new ZipInputStream ( inputStream );
134    
135               zipInputStream.getNextEntry ( );
136    
137               inputStream = zipInputStream;
138             }
139    
140             parse ( inputStream, smlNodeConsumer );
141           }
142           finally
143           {
144             if ( inputStream != null )
145             {
146               inputStream.close ( );
147             }
148           }
149         }
150    
151         //////////////////////////////////////////////////////////////////////
152         //////////////////////////////////////////////////////////////////////
153    
154         private  SmlNodeLoader ( Consumer  smlNodeConsumer )
155         //////////////////////////////////////////////////////////////////////
156         {
157           NullArgumentException.check (
158             this.smlNodeConsumer = smlNodeConsumer );
159    
160           smlNodeParseHandler = new SmlNodeParseHandler ( );
161         }
162    
163         //////////////////////////////////////////////////////////////////////
164         //////////////////////////////////////////////////////////////////////
165    
166         public void  handleCData ( String  cData )
167         //////////////////////////////////////////////////////////////////////
168         {
169           smlNodeParseHandler.handleCData ( cData );
170         }
171    
172         public void  handleElementOpen ( String  elementName )
173         //////////////////////////////////////////////////////////////////////
174         {
175           ++depth;
176    
177           smlNodeParseHandler.handleElementOpen ( elementName );
178         }
179    
180         public void  handleElementClose ( String  elementName )
181         //////////////////////////////////////////////////////////////////////
182         {
183           --depth;
184    
185           if ( depth == 1 )
186           {
187             smlNodeConsumer.consume ( smlNodeParseHandler.getSmlNode ( ) );
188    
189             smlNodeParseHandler.handleElementClose ( elementName );
190    
191             smlNodeParseHandler.getSmlNode ( ).removeChildren ( );
192           }
193           else
194           {
195             smlNodeParseHandler.handleElementClose ( elementName );
196           }
197         }
198    
199         public void  handleParseError ( )
200         //////////////////////////////////////////////////////////////////////
201         {
202           smlNodeParseHandler.handleParseError ( );
203         }
204    
205         //////////////////////////////////////////////////////////////////////
206         //////////////////////////////////////////////////////////////////////
207         }