Hubbub
|
#include <stdbool.h>
#include <inttypes.h>
#include <hubbub/errors.h>
#include <hubbub/functypes.h>
#include <hubbub/types.h>
#include <parserutils/input/inputstream.h>
Go to the source code of this file.
Data Structures | |
union | hubbub_tokeniser_optparams |
Hubbub tokeniser option parameters. More... | |
Typedefs | |
typedef struct hubbub_tokeniser | hubbub_tokeniser |
typedef enum hubbub_tokeniser_opttype | hubbub_tokeniser_opttype |
Hubbub tokeniser option types. More... | |
typedef union hubbub_tokeniser_optparams | hubbub_tokeniser_optparams |
Hubbub tokeniser option parameters. More... | |
Enumerations | |
enum | hubbub_tokeniser_opttype { HUBBUB_TOKENISER_TOKEN_HANDLER, HUBBUB_TOKENISER_ERROR_HANDLER, HUBBUB_TOKENISER_CONTENT_MODEL, HUBBUB_TOKENISER_PROCESS_CDATA, HUBBUB_TOKENISER_PAUSE } |
Hubbub tokeniser option types. More... | |
Functions | |
hubbub_error | hubbub_tokeniser_create (parserutils_inputstream *input, hubbub_tokeniser **tokeniser) |
Create a hubbub tokeniser. More... | |
hubbub_error | hubbub_tokeniser_destroy (hubbub_tokeniser *tokeniser) |
Destroy a hubbub tokeniser. More... | |
hubbub_error | hubbub_tokeniser_setopt (hubbub_tokeniser *tokeniser, hubbub_tokeniser_opttype type, hubbub_tokeniser_optparams *params) |
Configure a hubbub tokeniser. More... | |
hubbub_error | hubbub_tokeniser_insert_chunk (hubbub_tokeniser *tokeniser, const uint8_t *data, size_t len) |
Insert a chunk of data into the input stream. More... | |
hubbub_error | hubbub_tokeniser_run (hubbub_tokeniser *tokeniser) |
Process remaining data in the input stream. More... | |
typedef struct hubbub_tokeniser hubbub_tokeniser |
Definition at line 20 of file tokeniser.h.
typedef union hubbub_tokeniser_optparams hubbub_tokeniser_optparams |
Hubbub tokeniser option parameters.
typedef enum hubbub_tokeniser_opttype hubbub_tokeniser_opttype |
Hubbub tokeniser option types.
Hubbub tokeniser option types.
Enumerator | |
---|---|
HUBBUB_TOKENISER_TOKEN_HANDLER | |
HUBBUB_TOKENISER_ERROR_HANDLER | |
HUBBUB_TOKENISER_CONTENT_MODEL | |
HUBBUB_TOKENISER_PROCESS_CDATA | |
HUBBUB_TOKENISER_PAUSE |
Definition at line 25 of file tokeniser.h.
hubbub_error hubbub_tokeniser_create | ( | parserutils_inputstream * | input, |
hubbub_tokeniser ** | tokeniser | ||
) |
Create a hubbub tokeniser.
input | Input stream instance |
tokeniser | Pointer to location to receive tokeniser instance |
Definition at line 285 of file tokeniser.c.
References hubbub_tokeniser::buffer, hubbub_tokeniser::content_model, hubbub_tokeniser::context, hubbub_tokeniser::error_handler, hubbub_tokeniser::error_pw, hubbub_tokeniser::escape_flag, HUBBUB_BADPARM, HUBBUB_CONTENT_MODEL_PCDATA, hubbub_error_from_parserutils_error(), HUBBUB_NOMEM, HUBBUB_OK, hubbub_tokeniser::input, hubbub_tokeniser::insert_buf, hubbub_tokeniser::paused, hubbub_tokeniser::process_cdata_section, hubbub_tokeniser::state, STATE_DATA, hubbub_tokeniser::token_handler, and hubbub_tokeniser::token_pw.
Referenced by hubbub_parser_create().
hubbub_error hubbub_tokeniser_destroy | ( | hubbub_tokeniser * | tokeniser | ) |
Destroy a hubbub tokeniser.
tokeniser | The tokeniser instance to destroy |
Definition at line 340 of file tokeniser.c.
References hubbub_tag::attributes, hubbub_tokeniser::buffer, hubbub_tokeniser::context, hubbub_tokeniser_context::current_tag, HUBBUB_BADPARM, HUBBUB_OK, and hubbub_tokeniser::insert_buf.
Referenced by hubbub_parser_create(), and hubbub_parser_destroy().
hubbub_error hubbub_tokeniser_insert_chunk | ( | hubbub_tokeniser * | tokeniser, |
const uint8_t * | data, | ||
size_t | len | ||
) |
Insert a chunk of data into the input stream.
Inserts the given data into the input stream ready for parsing but does not cause any additional processing of the input.
tokeniser | Tokeniser instance |
data | Data to insert (UTF-8 encoded) |
len | Length, in bytes, of data |
Definition at line 415 of file tokeniser.c.
References HUBBUB_BADPARM, hubbub_error_from_parserutils_error(), HUBBUB_OK, and hubbub_tokeniser::insert_buf.
Referenced by hubbub_parser_insert_chunk().
hubbub_error hubbub_tokeniser_run | ( | hubbub_tokeniser * | tokeniser | ) |
Process remaining data in the input stream.
tokeniser | The tokeniser instance to invoke |
Definition at line 436 of file tokeniser.c.
References HUBBUB_BADPARM, HUBBUB_NEEDDATA, HUBBUB_OK, HUBBUB_PAUSED, hubbub_tokeniser_handle_after_attribute_name(), hubbub_tokeniser_handle_after_attribute_value_q(), hubbub_tokeniser_handle_after_doctype_name(), hubbub_tokeniser_handle_after_doctype_public(), hubbub_tokeniser_handle_after_doctype_system(), hubbub_tokeniser_handle_attribute_name(), hubbub_tokeniser_handle_attribute_value_dq(), hubbub_tokeniser_handle_attribute_value_sq(), hubbub_tokeniser_handle_attribute_value_uq(), hubbub_tokeniser_handle_before_attribute_name(), hubbub_tokeniser_handle_before_attribute_value(), hubbub_tokeniser_handle_before_doctype_name(), hubbub_tokeniser_handle_before_doctype_public(), hubbub_tokeniser_handle_before_doctype_system(), hubbub_tokeniser_handle_bogus_comment(), hubbub_tokeniser_handle_bogus_doctype(), hubbub_tokeniser_handle_cdata_block(), hubbub_tokeniser_handle_character_reference_data(), hubbub_tokeniser_handle_character_reference_in_attribute_value(), hubbub_tokeniser_handle_close_tag_open(), hubbub_tokeniser_handle_comment(), hubbub_tokeniser_handle_data(), hubbub_tokeniser_handle_doctype(), hubbub_tokeniser_handle_doctype_name(), hubbub_tokeniser_handle_doctype_public_dq(), hubbub_tokeniser_handle_doctype_public_sq(), hubbub_tokeniser_handle_doctype_system_dq(), hubbub_tokeniser_handle_doctype_system_sq(), hubbub_tokeniser_handle_markup_declaration_open(), hubbub_tokeniser_handle_match_cdata(), hubbub_tokeniser_handle_match_comment(), hubbub_tokeniser_handle_match_doctype(), hubbub_tokeniser_handle_match_public(), hubbub_tokeniser_handle_match_system(), hubbub_tokeniser_handle_named_entity(), hubbub_tokeniser_handle_numbered_entity(), hubbub_tokeniser_handle_self_closing_start_tag(), hubbub_tokeniser_handle_tag_name(), hubbub_tokeniser_handle_tag_open(), hubbub_tokeniser::paused, hubbub_tokeniser::state, state, STATE_AFTER_ATTRIBUTE_NAME, STATE_AFTER_ATTRIBUTE_VALUE_Q, STATE_AFTER_DOCTYPE_NAME, STATE_AFTER_DOCTYPE_PUBLIC, STATE_AFTER_DOCTYPE_SYSTEM, STATE_ATTRIBUTE_NAME, STATE_ATTRIBUTE_VALUE_DQ, STATE_ATTRIBUTE_VALUE_SQ, STATE_ATTRIBUTE_VALUE_UQ, STATE_BEFORE_ATTRIBUTE_NAME, STATE_BEFORE_ATTRIBUTE_VALUE, STATE_BEFORE_DOCTYPE_NAME, STATE_BEFORE_DOCTYPE_PUBLIC, STATE_BEFORE_DOCTYPE_SYSTEM, STATE_BOGUS_COMMENT, STATE_BOGUS_DOCTYPE, STATE_CDATA_BLOCK, STATE_CHARACTER_REFERENCE_DATA, STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE, STATE_CLOSE_TAG_OPEN, STATE_COMMENT, STATE_COMMENT_END, STATE_COMMENT_END_DASH, STATE_COMMENT_START, STATE_COMMENT_START_DASH, STATE_DATA, STATE_DOCTYPE, STATE_DOCTYPE_NAME, STATE_DOCTYPE_PUBLIC_DQ, STATE_DOCTYPE_PUBLIC_SQ, STATE_DOCTYPE_SYSTEM_DQ, STATE_DOCTYPE_SYSTEM_SQ, STATE_MARKUP_DECLARATION_OPEN, STATE_MATCH_CDATA, STATE_MATCH_COMMENT, STATE_MATCH_DOCTYPE, STATE_MATCH_PUBLIC, STATE_MATCH_SYSTEM, STATE_NAMED_ENTITY, STATE_NUMBERED_ENTITY, STATE_SELF_CLOSING_START_TAG, STATE_TAG_NAME, and STATE_TAG_OPEN.
Referenced by hubbub_parser_completed(), hubbub_parser_parse_chunk(), and hubbub_tokeniser_setopt().
hubbub_error hubbub_tokeniser_setopt | ( | hubbub_tokeniser * | tokeniser, |
hubbub_tokeniser_opttype | type, | ||
hubbub_tokeniser_optparams * | params | ||
) |
Configure a hubbub tokeniser.
tokeniser | The tokeniser instance to configure |
type | The option type to set |
params | Option-specific parameters |
Definition at line 366 of file tokeniser.c.
References hubbub_tokeniser_optparams::content_model, hubbub_tokeniser::content_model, hubbub_tokeniser_optparams::error_handler, hubbub_tokeniser::error_handler, hubbub_tokeniser::error_pw, hubbub_tokeniser_optparams::handler, HUBBUB_BADPARM, HUBBUB_OK, HUBBUB_TOKENISER_CONTENT_MODEL, HUBBUB_TOKENISER_ERROR_HANDLER, HUBBUB_TOKENISER_PAUSE, HUBBUB_TOKENISER_PROCESS_CDATA, hubbub_tokeniser_run(), HUBBUB_TOKENISER_TOKEN_HANDLER, hubbub_tokeniser_optparams::model, hubbub_tokeniser_optparams::pause_parse, hubbub_tokeniser::paused, hubbub_tokeniser_optparams::process_cdata, hubbub_tokeniser::process_cdata_section, hubbub_tokeniser_optparams::pw, hubbub_tokeniser_optparams::token_handler, hubbub_tokeniser::token_handler, and hubbub_tokeniser::token_pw.
Referenced by hubbub_parser_setopt(), hubbub_treebuilder_create(), hubbub_treebuilder_destroy(), parse_generic_rcdata(), and process_plaintext_in_body().