001 /* DTD.java --
002 Copyright (C) 2005 Free Software Foundation, Inc.
003
004 This file is part of GNU Classpath.
005
006 GNU Classpath is free software; you can redistribute it and/or modify
007 it under the terms of the GNU General Public License as published by
008 the Free Software Foundation; either version 2, or (at your option)
009 any later version.
010
011 GNU Classpath is distributed in the hope that it will be useful, but
012 WITHOUT ANY WARRANTY; without even the implied warranty of
013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 General Public License for more details.
015
016 You should have received a copy of the GNU General Public License
017 along with GNU Classpath; see the file COPYING. If not, write to the
018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019 02110-1301 USA.
020
021 Linking this library statically or dynamically with other modules is
022 making a combined work based on this library. Thus, the terms and
023 conditions of the GNU General Public License cover the whole
024 combination.
025
026 As a special exception, the copyright holders of this library give you
027 permission to link this library with independent modules to produce an
028 executable, regardless of the license terms of these independent
029 modules, and to copy and distribute the resulting executable under
030 terms of your choice, provided that you also meet, for each linked
031 independent module, the terms and conditions of the license of that
032 module. An independent module is a module which is not derived from
033 or based on this library. If you modify this library, you may extend
034 this exception to your version of the library, but you are not
035 obligated to do so. If you do not wish to do so, delete this
036 exception statement from your version. */
037
038
039 package javax.swing.text.html.parser;
040
041 import java.io.DataInputStream;
042 import java.io.EOFException;
043 import java.io.IOException;
044 import java.io.ObjectInputStream;
045 import java.lang.reflect.Field;
046 import java.lang.reflect.Modifier;
047 import java.util.BitSet;
048 import java.util.Hashtable;
049 import java.util.StringTokenizer;
050 import java.util.Vector;
051
052 /**
053 * <p>Representation or the SGML DTD document.
054 * Provides basis for describing a syntax of the
055 * HTML documents. The fields of this class are NOT initialized in
056 * constructor. You need to do this separately before passing this data
057 * structure to the HTML parser. The subclasses with the fields, pre-
058 * initialized, for example, for HTML 4.01, can be available only between
059 * the implementation specific classes
060 * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
061 * in this implementation).</p>
062 * <p>
063 * If you need more information about SGML DTD documents,
064 * the author suggests to read SGML tutorial on
065 * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
066 * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
067 * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
068 * Oxford University Press, 688 p, ISBN: 0198537379.
069 * </p>
070 * <p>
071 * Warning: the html, head and other tag fields will only be automatically
072 * assigned if the VM has the correctly implemented reflection mechanism.
073 * As these fields are not used anywhere in the implementation, not
074 * exception will be thrown in the opposite case.
075 * </p>
076 *
077 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
078 */
079 public class DTD
080 implements DTDConstants
081 {
082 /**
083 * The version of the persistent data format.
084 * @specnote This was made <code>final</code> in 1.5.
085 */
086 public static final int FILE_VERSION = 1;
087
088 /**
089 * The table of existing available DTDs.
090 */
091 static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>();
092
093 /**
094 * The applet element for this DTD.
095 */
096 public Element applet;
097
098 /**
099 * The base element for this DTD.
100 */
101 public Element base;
102
103 /**
104 * The body element for this DTD.
105 */
106 public Element body;
107
108 /**
109 * The head element for this DTD.
110 */
111 public Element head;
112
113 /**
114 * The html element for this DTD.
115 */
116 public Element html;
117
118 /**
119 * The isindex element of for this DTD.
120 */
121 public Element isindex;
122
123 /**
124 * The meta element for this DTD.
125 */
126 public Element meta;
127
128 /**
129 * The p element for this DTD.
130 */
131 public Element p;
132
133 /**
134 * The param element for this DTD.
135 */
136 public Element param;
137
138 /**
139 * The pcdata for this DTD.
140 */
141 public Element pcdata;
142
143 /**
144 * The title element for this DTD.
145 */
146 public Element title;
147
148 /**
149 * The element for accessing all DTD elements by name.
150 */
151 public Hashtable<String,Element> elementHash =
152 new Hashtable<String,Element>();
153
154 /**
155 * The entity table for accessing all DTD entities by name.
156 */
157 public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>();
158
159 /**
160 * The name of this DTD.
161 */
162 public String name;
163
164 /**
165 * Contains all elements in this DTD. The
166 * javax.swing.text.html.parser.Element#index field of all elements
167 * in this vector is set to the element position in this vector.
168 */
169 public Vector<Element> elements = new Vector<Element>();
170
171 /** Create a new DTD with the specified name. */
172 protected DTD(String a_name)
173 {
174 name = a_name;
175 }
176
177 /** Get this DTD by name. The current implementation
178 * only looks in the internal table of DTD documents. If no corresponding
179 * entry is found, the new entry is created, placed into
180 * the table and returned. */
181 public static DTD getDTD(String name)
182 throws IOException
183 {
184 DTD d = dtdHash.get(name);
185
186 if (d == null)
187 {
188 d = new DTD(name);
189 dtdHash.put(d.name, d);
190 }
191
192 return d;
193 }
194
195 /**
196 * Get the element by the element name. If the element is not yet
197 * defined, it is newly created and placed into the element table.
198 * If the element name matches (ingoring case) a public non static
199 * element field in this class, this field is assigned to the value
200 * of the newly created element.
201 */
202 public Element getElement(String element_name)
203 {
204 return newElement(element_name);
205 }
206
207 /**
208 * Get the element by the value of its
209 * {@link javax.swing.text.html.parser.Element#index} field.
210 */
211 public Element getElement(int index)
212 {
213 return elements.get(index);
214 }
215
216 /**
217 * Get the entity with the given identifier.
218 * @param id that can be returned by
219 * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
220 * @return The entity from this DTD or null if there is no entity with
221 * such id or such entity is not present in the table of this instance.
222 */
223 public Entity getEntity(int id)
224 {
225 String name = Entity.mapper.get(id);
226
227 if (name != null)
228 return entityHash.get(name);
229 else
230 return null;
231 }
232
233 /**
234 * Get the named entity by its name.
235 */
236 public Entity getEntity(String entity_name)
237 {
238 return entityHash.get(entity_name);
239 }
240
241 /**
242 * Get the name of this instance of DTD
243 */
244 public String getName()
245 {
246 return name;
247 }
248
249 /**
250 * Creates, adds into the entity table and returns the
251 * character entity like <code>&lt;</code>
252 * (means '<code><</code>' );
253 * @param name The entity name (without heading & and closing ;)
254 * @param type The entity type
255 * @param character The entity value (single character)
256 * @return The created entity
257 */
258 public Entity defEntity(String name, int type, int character)
259 {
260 Entity e = newEntity(name, type);
261 e.data = new char[] { (char) character };
262 return e;
263 }
264
265 /**
266 * Define the attributes for the element with the given name.
267 * If the element is not exist, it is created.
268 * @param forElement
269 * @param attributes
270 */
271 public void defineAttributes(String forElement, AttributeList attributes)
272 {
273 Element e = elementHash.get(forElement.toLowerCase());
274
275 if (e == null)
276 e = newElement(forElement);
277
278 e.atts = attributes;
279 }
280
281 /**
282 * Defines the element and adds it to the element table. Sets the
283 * <code>Element.index</code> field to the value, unique for this
284 * instance of DTD. If the element with the given name already exists,
285 * replaces all other its settings by the method argument values.
286 * @param name the name of the element
287 * @param type the type of the element
288 * @param headless true if the element needs no starting tag
289 * (should not occur in HTML).
290 * @param tailless true if the element needs no ending tag (like
291 * <code><hr></code>
292 * @param content the element content
293 * @param exclusions the set of elements that must not occur inside
294 * this element. The <code>Element.index</code> value defines which
295 * bit in this bitset corresponds to that element.
296 * @param inclusions the set of elements that can occur inside this
297 * element. the <code>Element.index</code> value defines which
298 * bit in this bitset corresponds to that element.
299 * @param attributes the element attributes.
300 * @return the newly defined element.
301 */
302 public Element defineElement(String name, int type, boolean headless,
303 boolean tailless, ContentModel content,
304 BitSet exclusions, BitSet inclusions,
305 AttributeList attributes
306 )
307 {
308 Element e = newElement(name);
309 e.type = type;
310 e.oStart = headless;
311 e.oEnd = tailless;
312 e.content = content;
313 e.exclusions = exclusions;
314 e.inclusions = inclusions;
315 e.atts = attributes;
316
317 return e;
318 }
319
320 /**
321 * Creates, intializes and adds to the entity table the new
322 * entity.
323 * @param name the name of the entity
324 * @param type the type of the entity
325 * @param data the data section of the entity
326 * @return the created entity
327 */
328 public Entity defineEntity(String name, int type, char[] data)
329 {
330 Entity e = newEntity(name, type);
331 e.data = data;
332
333 return e;
334 }
335
336 /** Place this DTD into the DTD table. */
337 public static void putDTDHash(String name, DTD dtd)
338 {
339 dtdHash.put(name, dtd);
340 }
341
342 /**
343 * <p>Reads DTD from an archived format. This format is not standardized
344 * and differs between implementations.</p><p> This implementation
345 * reads and defines all entities and elements using
346 * ObjectInputStream. The elements and entities can be written into the
347 * stream in any order. The objects other than elements and entities
348 * are ignored.</p>
349 * @param stream A data stream to read from.
350 * @throws java.io.IOException If one is thrown by the input stream
351 */
352 public void read(DataInputStream stream)
353 throws java.io.IOException
354 {
355 ObjectInputStream oi = new ObjectInputStream(stream);
356 Object def;
357 try
358 {
359 while (true)
360 {
361 def = oi.readObject();
362 if (def instanceof Element)
363 {
364 Element e = (Element) def;
365 elementHash.put(e.name.toLowerCase(), e);
366 assignField(e);
367 }
368 else if (def instanceof Entity)
369 {
370 Entity e = (Entity) def;
371 entityHash.put(e.name, e);
372 }
373 }
374 }
375 catch (ClassNotFoundException ex)
376 {
377 throw new IOException(ex.getMessage());
378 }
379 catch (EOFException ex)
380 {
381 // ok EOF
382 }
383 }
384
385 /**
386 * Returns the name of this instance of DTD.
387 */
388 public String toString()
389 {
390 return name;
391 }
392
393 /**
394 * Creates and returns new attribute (not an attribute list).
395 * @param name the name of this attribute
396 * @param type the type of this attribute (FIXED, IMPLIED or
397 * REQUIRED from <code>DTDConstants</code>).
398 * @param modifier the modifier of this attribute
399 * @param default_value the default value of this attribute
400 * @param allowed_values the allowed values of this attribute. The multiple
401 * possible values in this parameter are supposed to be separated by
402 * '|', same as in SGML DTD <code><!ATTLIST </code>tag. This parameter
403 * can be null if no list of allowed values is specified.
404 * @param atts the previous attribute of this element. This is
405 * placed to the field
406 * {@link javax.swing.text.html.parser.AttributeList#next },
407 * creating a linked list.
408 * @return The attributes.
409 */
410 protected AttributeList defAttributeList(String name, int type, int modifier,
411 String default_value,
412 String allowed_values,
413 AttributeList atts
414 )
415 {
416 AttributeList al = new AttributeList(name);
417 al.modifier = modifier;
418 al.value = default_value;
419 al.next = atts;
420
421 if (allowed_values != null)
422 {
423 StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
424 Vector<String> v = new Vector<String>(st.countTokens());
425
426 while (st.hasMoreTokens())
427 v.add(st.nextToken());
428
429 al.values = v;
430 }
431
432 return al;
433 }
434
435 /**
436 * Creates a new content model.
437 * @param type specifies the BNF operation for this content model.
438 * The valid operations are documented in the
439 * {@link javax.swing.text.html.parser.ContentModel#type }.
440 * @param content the content of this content model
441 * @param next if the content model is specified by BNF-like
442 * expression, contains the rest of this expression.
443 * @return The newly created content model.
444 */
445 protected ContentModel defContentModel(int type, Object content,
446 ContentModel next
447 )
448 {
449 ContentModel model = new ContentModel();
450 model.type = type;
451 model.next = next;
452 model.content = content;
453
454 return model;
455 }
456
457 /**
458 * Defines a new element and adds it to the element table.
459 * If the element alredy exists,
460 * overrides it settings with the specified values.
461 * @param name the name of the new element
462 * @param type the type of the element
463 * @param headless true if the element needs no starting tag
464 * @param tailless true if the element needs no closing tag
465 * @param content the element content.
466 * @param exclusions the elements that must be excluded from the
467 * content of this element, in all levels of the hierarchy.
468 * @param inclusions the elements that can be included as the
469 * content of this element.
470 * @param attributes the element attributes.
471 * @return the created or updated element.
472 */
473 protected Element defElement(String name, int type, boolean headless,
474 boolean tailless, ContentModel content,
475 String[] exclusions, String[] inclusions,
476 AttributeList attributes
477 )
478 {
479 // compute the bit sets
480 BitSet exclude = bitSet(exclusions);
481 BitSet include = bitSet(inclusions);
482
483 Element e =
484 defineElement(name, type, headless, tailless, content, exclude, include,
485 attributes
486 );
487
488 return e;
489 }
490
491 /**
492 * Creates, intializes and adds to the entity table the new
493 * entity.
494 * @param name the name of the entity
495 * @param type the type of the entity
496 * @param data the data section of the entity
497 * @return the created entity
498 */
499 protected Entity defEntity(String name, int type, String data)
500 {
501 Entity e = newEntity(name, type);
502 e.data = data.toCharArray();
503
504 return e;
505 }
506
507 private void assignField(Element e)
508 {
509 String element_name = e.name;
510 try
511 {
512 // Assign the field via reflection.
513 Field f = getClass().getField(element_name.toLowerCase());
514 if ((f.getModifiers() & Modifier.PUBLIC) != 0)
515 if ((f.getModifiers() & Modifier.STATIC) == 0)
516 if (f.getType().isAssignableFrom(e.getClass()))
517 f.set(this, e);
518 }
519 catch (IllegalAccessException ex)
520 {
521 unexpected(ex);
522 }
523 catch (NoSuchFieldException ex)
524 {
525 // This is ok.
526 }
527
528 // Some virtual machines may still lack the proper
529 // implementation of reflection. As the tag fields
530 // are not used anywhere in this implementation,
531 // (and this class is also rarely used by the end user),
532 // it may be better not to crash everything by throwing an error
533 // for each case when the HTML parsing is required.
534 catch (Throwable t)
535 {
536 // This VM has no reflection mechanism implemented!
537 if (t instanceof OutOfMemoryError)
538 throw (Error) t;
539 }
540 }
541
542 /**
543 * Create the bit set for this array of elements.
544 * The unknown elements are automatically defined and added
545 * to the element table.
546 * @param elements
547 * @return The bit set.
548 */
549 private BitSet bitSet(String[] elements)
550 {
551 BitSet b = new BitSet();
552
553 for (int i = 0; i < elements.length; i++)
554 {
555 Element e = getElement(elements [ i ]);
556
557 if (e == null)
558 e = newElement(elements [ i ]);
559
560 b.set(e.index);
561 }
562
563 return b;
564 }
565
566 /**
567 * Find the element with the given name in the element table.
568 * If not find, create a new element with this name and add to the
569 * table.
570 * @param name the name of the element
571 * @return the found or created element.
572 */
573 private Element newElement(String name)
574 {
575 Element e = elementHash.get(name.toLowerCase());
576
577 if (e == null)
578 {
579 e = new Element();
580 e.name = name;
581 e.index = elements.size();
582 elements.add(e);
583 elementHash.put(e.name.toLowerCase(), e);
584 assignField(e);
585 }
586 return e;
587 }
588
589 /**
590 * Creates and adds to the element table the entity with an
591 * unitialized data section. Used internally.
592 * @param name the name of the entity
593 * @param type the type of the entity, a bitwise combination
594 * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
595 *
596 * @return the created entity
597 */
598 private Entity newEntity(String name, int type)
599 {
600 Entity e = new Entity(name, type, null);
601 entityHash.put(e.name, e);
602 return e;
603 }
604
605 private void unexpected(Exception ex)
606 {
607 throw new Error("This should never happen, report a bug", ex);
608 }
609 }