001 package com.croftsoft.core.text.sml; 002 003 import java.io.*; 004 import java.util.StringTokenizer; 005 import java.util.Vector; 006 007 import com.croftsoft.core.lang.NullArgumentException; 008 import com.croftsoft.core.lang.StringLib; 009 010 /********************************************************************* 011 * A library of static methods for manipulating SmlNode objects. 012 * 013 * <p> 014 * Java 1.1 compatible. 015 * </p> 016 * 017 * @version 018 * 2001-05-14 019 * @since 020 * 2001-05-10 021 * @author 022 * <a href="https://www.croftsoft.com/">David W. Croft</a> 023 *********************************************************************/ 024 025 public final class SmlNodeLib 026 ////////////////////////////////////////////////////////////////////// 027 ////////////////////////////////////////////////////////////////////// 028 { 029 030 public static void main ( String [ ] args ) 031 ////////////////////////////////////////////////////////////////////// 032 { 033 System.out.println ( parse ( "<abc/>" ) ); 034 System.out.println ( parse ( "<abc></abc>" ) ); 035 System.out.println ( parse ( "<abc>def</abc>" ) ); 036 System.out.println ( parse ( "<abc><def/></abc>" ) ); 037 System.out.println ( parse ( "<abc><def></def></abc>" ) ); 038 System.out.println ( parse ( "<abc><def>ghi</def></abc>" ) ); 039 } 040 041 ////////////////////////////////////////////////////////////////////// 042 ////////////////////////////////////////////////////////////////////// 043 044 public static boolean isCDataCharacter ( char c ) 045 ////////////////////////////////////////////////////////////////////// 046 { 047 // check to see what XML really defines 048 049 return ( c != '<' ) 050 && ( c != '>' ); 051 } 052 053 public static boolean isElementNameCharacter ( char c ) 054 ////////////////////////////////////////////////////////////////////// 055 { 056 // check to see what XML really defines 057 058 return 059 !isWhiteSpaceCharacter ( c ) 060 && ( c != '<' ) 061 && ( c != '>' ) 062 && ( c != '/' ); 063 } 064 065 public static boolean isWhiteSpaceCharacter ( char c ) 066 ////////////////////////////////////////////////////////////////////// 067 { 068 // check to see what XML really defines 069 070 return Character.isWhitespace ( c ); 071 } 072 073 public static SmlNode load ( 074 String filename, 075 boolean allowMixedChildren ) 076 throws IOException 077 ////////////////////////////////////////////////////////////////////// 078 { 079 SmlNodeParseHandler smlNodeParseHandler 080 = new SmlNodeParseHandler ( allowMixedChildren ); 081 082 BufferedInputStream in = null; 083 084 try 085 { 086 in = new BufferedInputStream ( 087 new FileInputStream ( filename ) ); 088 089 SmlNodeLib.parse ( in, smlNodeParseHandler ); 090 } 091 finally 092 { 093 if ( in != null ) 094 { 095 in.close ( ); 096 } 097 } 098 099 return smlNodeParseHandler.getSmlNode ( ); 100 } 101 102 public static SmlNode parse ( String smlString ) 103 ////////////////////////////////////////////////////////////////////// 104 { 105 NullArgumentException.check ( smlString ); 106 107 StringTokenizer stringTokenizer 108 = new StringTokenizer ( smlString, "<>", true ); 109 110 Vector stackVector = new Vector ( ); 111 112 SmlNode currentSmlNode = null; 113 114 while ( stringTokenizer.hasMoreTokens ( ) ) 115 { 116 String token = stringTokenizer.nextToken ( ); 117 118 if ( token.equals ( "<" ) ) 119 { 120 token = stringTokenizer.nextToken ( ); 121 122 if ( token.startsWith ( "/" ) ) 123 { 124 if ( !currentSmlNode.hasChild ( ) ) 125 { 126 currentSmlNode.add ( "" ); 127 } 128 129 if ( !stackVector.isEmpty ( ) ) 130 { 131 // stack pop 132 133 int index = stackVector.size ( ) - 1; 134 135 currentSmlNode = ( SmlNode ) stackVector.elementAt ( index ); 136 137 stackVector.removeElementAt ( index ); 138 } 139 else 140 { 141 return currentSmlNode; 142 } 143 } 144 else 145 { 146 String name = token; 147 148 boolean isParent = true; 149 150 if ( token.endsWith ( "/" ) ) 151 { 152 name = token.substring ( 0, token.length ( ) - 1 ); 153 154 isParent = false; 155 } 156 157 SmlNode childSmlNode = new SmlNode ( name ); 158 159 if ( currentSmlNode == null ) 160 { 161 if ( !isParent ) 162 { 163 return childSmlNode; 164 } 165 166 currentSmlNode = childSmlNode; 167 } 168 else 169 { 170 currentSmlNode.add ( childSmlNode ); 171 172 if ( isParent ) 173 { 174 stackVector.addElement ( currentSmlNode ); 175 176 currentSmlNode = childSmlNode; 177 } 178 } 179 } 180 181 stringTokenizer.nextToken ( ); // > 182 } 183 else 184 { 185 try 186 { 187 if ( currentSmlNode != null ) 188 { 189 currentSmlNode.add ( SmlCoder.decode ( token ) ); 190 } 191 } 192 catch ( NullPointerException ex ) 193 { 194 throw new IllegalArgumentException ( 195 "Missing or mismatched angle brackets: \"" 196 + smlString + "\"" ); 197 } 198 } 199 } 200 201 throw new IllegalArgumentException ( 202 "Missing or mismatched angle brackets: \"" 203 + smlString + "\"" ); 204 } 205 206 /********************************************************************* 207 * Parses an SML stream. 208 * 209 * <p> 210 * State Transitions: 211 * <pre> 212 * c == character data (cdata) character (excludes angle brackets) 213 * w == white space character 214 * e == element (tag) name character 215 * 216 * (+) == saves character to buffer 217 * (cdata) == calls handleCData 218 * (open) == calls handleElementOpen 219 * (close) == calls handleElementClose 220 * 221 * 0 | c --> 0 (+), < --> 1 (cdata) 222 * 1 | w --> 2, e --> 3 (+), / --> 6 223 * 2 | w --> 2, e --> 3 (+) 224 * 3 | e --> 3 (+), w --> 4, / --> 5, > --> 0 (open) 225 * 4 | w --> 4, / --> 5, > --> 0 (open) 226 * 5 | > --> 0 (open, close) 227 * 6 | w --> 6, e --> 7 (+) 228 * 7 | e --> 7 (+), w --> 8, > --> 0 (close) 229 * 8 | w --> 8, > --> 0 (close) 230 * 231 * 0 | reading cdata, seeking < 232 * 1 | just after <; seeking element (tag) name, white space, or / 233 * 2 | inside white space before tag name; seeking tag name 234 * 3 | reading opening tag name; seeking white space, /, or > 235 * 4 | inside white space after opening tag name; seeking / or > 236 * 5 | found / after tag name, element has no children; seeking > 237 * 6 | found / after <, closing tag; skipping white, seeking tag name 238 * 7 | reading closing tag name; seeking white space or > 239 * 8 | inside white space after closing tag name; seeking > 240 * </pre> 241 * </p> 242 *********************************************************************/ 243 public static void parse ( 244 InputStream inputStream, 245 SmlParseHandler smlParseHandler ) 246 throws IOException 247 ////////////////////////////////////////////////////////////////////// 248 { 249 NullArgumentException.check ( inputStream ); 250 251 NullArgumentException.check ( smlParseHandler ); 252 253 StringBuffer stringBuffer = new StringBuffer ( ); 254 255 int state = 0; 256 257 int i; 258 259 while ( ( i = inputStream.read ( ) ) > -1 ) 260 { 261 char c = ( char ) i; 262 263 switch ( state ) 264 { 265 case 0: 266 267 // c --> 0 (+), < --> 1 (cdata) 268 // reading cdata, seeking < 269 270 if ( isCDataCharacter ( c ) ) 271 { 272 stringBuffer.append ( c ); 273 } 274 else if ( c == '<' ) 275 { 276 state = 1; 277 278 smlParseHandler.handleCData ( stringBuffer.toString ( ) ); 279 280 stringBuffer = new StringBuffer ( ); 281 } 282 else 283 { 284 smlParseHandler.handleParseError ( ); 285 } 286 287 break; 288 289 case 1: 290 291 // w --> 2, e --> 3 (+), / --> 6 292 // just after <; seeking tag name, white space, or / 293 294 if ( isWhiteSpaceCharacter ( c ) ) 295 { 296 state = 2; 297 } 298 else if ( isElementNameCharacter ( c ) ) 299 { 300 state = 3; 301 302 stringBuffer.append ( c ); 303 } 304 else if ( c == '/' ) 305 { 306 state = 6; 307 } 308 else 309 { 310 smlParseHandler.handleParseError ( ); 311 } 312 313 break; 314 315 case 2: 316 317 // w --> 2, e --> 3 (+) 318 // inside white space before tag name; seeking tag name 319 320 if ( isWhiteSpaceCharacter ( c ) ) 321 { 322 } 323 else if ( isElementNameCharacter ( c ) ) 324 { 325 state = 3; 326 327 stringBuffer.append ( c ); 328 } 329 else 330 { 331 smlParseHandler.handleParseError ( ); 332 } 333 334 break; 335 336 case 3: 337 338 // e --> 3 (+), w --> 4, / --> 5, > --> 0 (open) 339 // reading opening tag name; seeking white space, /, or > 340 341 if ( isElementNameCharacter ( c ) ) 342 { 343 stringBuffer.append ( c ); 344 } 345 else if ( isWhiteSpaceCharacter ( c ) ) 346 { 347 state = 4; 348 } 349 else if ( c == '/' ) 350 { 351 state = 5; 352 } 353 else if ( c == '>' ) 354 { 355 state = 0; 356 357 smlParseHandler.handleElementOpen ( stringBuffer.toString ( ) ); 358 359 stringBuffer = new StringBuffer ( ); 360 } 361 else 362 { 363 smlParseHandler.handleParseError ( ); 364 } 365 366 break; 367 368 case 4: 369 370 // w --> 4, / --> 5, > --> 0 (open) 371 // inside white space after opening tag name; seeking / or > 372 373 if ( isWhiteSpaceCharacter ( c ) ) 374 { 375 } 376 else if ( c == '/' ) 377 { 378 state = 5; 379 } 380 else if ( c == '>' ) 381 { 382 state = 0; 383 384 smlParseHandler.handleElementOpen ( stringBuffer.toString ( ) ); 385 386 stringBuffer = new StringBuffer ( ); 387 } 388 else 389 { 390 smlParseHandler.handleParseError ( ); 391 } 392 393 break; 394 395 case 5: 396 397 // > --> 0 (open, close) 398 // found / after tag name, element has no children; seeking > 399 400 if ( c == '>' ) 401 { 402 state = 0; 403 404 String elementName = stringBuffer.toString ( ); 405 406 smlParseHandler.handleElementOpen ( elementName ); 407 408 smlParseHandler.handleElementClose ( elementName ); 409 410 stringBuffer = new StringBuffer ( ); 411 } 412 else 413 { 414 smlParseHandler.handleParseError ( ); 415 } 416 417 break; 418 419 case 6: 420 421 // w --> 6, e --> 7 (+) 422 // found / after <, closing tag; skipping white, seeking tag name 423 424 if ( isWhiteSpaceCharacter ( c ) ) 425 { 426 } 427 else if ( isElementNameCharacter ( c ) ) 428 { 429 state = 7; 430 431 stringBuffer.append ( c ); 432 } 433 else 434 { 435 smlParseHandler.handleParseError ( ); 436 } 437 438 break; 439 440 case 7: 441 442 // e --> 7 (+), w --> 8, > --> 0 (close) 443 // reading closing tag name; seeking white space or > 444 445 if ( isElementNameCharacter ( c ) ) 446 { 447 stringBuffer.append ( c ); 448 } 449 else if ( isWhiteSpaceCharacter ( c ) ) 450 { 451 state = 8; 452 } 453 else if ( c == '>' ) 454 { 455 state = 0; 456 457 smlParseHandler.handleElementClose ( stringBuffer.toString ( ) ); 458 459 stringBuffer = new StringBuffer ( ); 460 } 461 else 462 { 463 smlParseHandler.handleParseError ( ); 464 } 465 466 break; 467 468 case 8: 469 470 // w --> 8, > --> 0 (close) 471 // inside white space after closing tag name; seeking > 472 473 if ( isWhiteSpaceCharacter ( c ) ) 474 { 475 } 476 else if ( c == '>' ) 477 { 478 state = 0; 479 480 smlParseHandler.handleElementClose ( stringBuffer.toString ( ) ); 481 482 stringBuffer = new StringBuffer ( ); 483 } 484 else 485 { 486 smlParseHandler.handleParseError ( ); 487 } 488 489 break; 490 491 default: 492 493 throw new RuntimeException ( ); 494 } 495 } 496 } 497 498 public static void save ( 499 String filename, 500 SmlNode smlNode ) 501 throws IOException 502 ////////////////////////////////////////////////////////////////////// 503 { 504 PrintWriter printWriter = null; 505 506 try 507 { 508 printWriter = new PrintWriter ( 509 new BufferedWriter ( new FileWriter ( filename ) ) ); 510 511 printWriter.println ( smlNode.toString ( 0, 2 ) ); 512 } 513 finally 514 { 515 if ( printWriter != null ) 516 { 517 printWriter.close ( ); 518 } 519 } 520 } 521 522 ////////////////////////////////////////////////////////////////////// 523 ////////////////////////////////////////////////////////////////////// 524 525 private SmlNodeLib ( ) { } 526 527 ////////////////////////////////////////////////////////////////////// 528 ////////////////////////////////////////////////////////////////////// 529 }