>current_node_is( $token_name ) ) {
// This is a parse error.
}
$this->state->stack_of_open_elements->pop_until( $token_name );
$this->state->active_formatting_elements->clear_up_to_last_marker();
return true;
/*
* > A start tag whose tag name is "table"
*/
case '+TABLE':
/*
* > If the Document is not set to quirks mode, and the stack of open elements
* > has a p element in button scope, then close a p element.
*/
if (
WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode &&
$this->state->stack_of_open_elements->has_p_in_button_scope()
) {
$this->close_a_p_element();
}
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return true;
/*
* > An end tag whose tag name is "br"
*
* This is prevented from happening because the Tag Processor
* reports all closing BR tags as if they were opening tags.
*/
/*
* > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
*/
case '+AREA':
case '+BR':
case '+EMBED':
case '+IMG':
case '+KEYGEN':
case '+WBR':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
return true;
/*
* > A start tag whose tag name is "input"
*/
case '+INPUT':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
/*
* > If the token does not have an attribute with the name "type", or if it does,
* > but that attribute's value is not an ASCII case-insensitive match for the
* > string "hidden", then: set the frameset-ok flag to "not ok".
*/
$type_attribute = $this->get_attribute( 'type' );
if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) {
$this->state->frameset_ok = false;
}
return true;
/*
* > A start tag whose tag name is one of: "param", "source", "track"
*/
case '+PARAM':
case '+SOURCE':
case '+TRACK':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is "hr"
*/
case '+HR':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
return true;
/*
* > A start tag whose tag name is "image"
*/
case '+IMAGE':
/*
* > Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.)
*
* Note that this is handled elsewhere, so it should not be possible to reach this code.
*/
$this->bail( "Cannot process an IMAGE tag. (Don't ask.)" );
break;
/*
* > A start tag whose tag name is "textarea"
*/
case '+TEXTAREA':
$this->insert_html_element( $this->state->current_token );
/*
* > If the next token is a U+000A LINE FEED (LF) character token, then ignore
* > that token and move on to the next one. (Newlines at the start of
* > textarea elements are ignored as an authoring convenience.)
*
* This is handled in `get_modifiable_text()`.
*/
$this->state->frameset_ok = false;
/*
* > Switch the insertion mode to "text".
*
* As a self-contained node, this behavior is handled in the Tag Processor.
*/
return true;
/*
* > A start tag whose tag name is "xmp"
*/
case '+XMP':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
$this->reconstruct_active_formatting_elements();
$this->state->frameset_ok = false;
/*
* > Follow the generic raw text element parsing algorithm.
*
* As a self-contained node, this behavior is handled in the Tag Processor.
*/
$this->insert_html_element( $this->state->current_token );
return true;
/*
* A start tag whose tag name is "iframe"
*/
case '+IFRAME':
$this->state->frameset_ok = false;
/*
* > Follow the generic raw text element parsing algorithm.
*
* As a self-contained node, this behavior is handled in the Tag Processor.
*/
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is "noembed"
* > A start tag whose tag name is "noscript", if the scripting flag is enabled
*
* The scripting flag is never enabled in this parser.
*/
case '+NOEMBED':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is "select"
*/
case '+SELECT':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
switch ( $this->state->insertion_mode ) {
/*
* > If the insertion mode is one of "in table", "in caption", "in table body", "in row",
* > or "in cell", then switch the insertion mode to "in select in table".
*/
case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE:
case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION:
case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY:
case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW:
case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL:
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
break;
/*
* > Otherwise, switch the insertion mode to "in select".
*/
default:
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
break;
}
return true;
/*
* > A start tag whose tag name is one of: "optgroup", "option"
*/
case '+OPTGROUP':
case '+OPTION':
if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
$this->state->stack_of_open_elements->pop();
}
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is one of: "rb", "rtc"
*/
case '+RB':
case '+RTC':
if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) {
$this->generate_implied_end_tags();
if ( $this->state->stack_of_open_elements->current_node_is( 'RUBY' ) ) {
// @todo Indicate a parse error once it's possible.
}
}
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is one of: "rp", "rt"
*/
case '+RP':
case '+RT':
if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) {
$this->generate_implied_end_tags( 'RTC' );
$current_node_name = $this->state->stack_of_open_elements->current_node()->node_name;
if ( 'RTC' === $current_node_name || 'RUBY' === $current_node_name ) {
// @todo Indicate a parse error once it's possible.
}
}
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is "math"
*/
case '+MATH':
$this->reconstruct_active_formatting_elements();
/*
* @todo Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.)
* @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.)
*
* These ought to be handled in the attribute methods.
*/
$this->state->current_token->namespace = 'math';
$this->insert_html_element( $this->state->current_token );
if ( $this->state->current_token->has_self_closing_flag ) {
$this->state->stack_of_open_elements->pop();
}
return true;
/*
* > A start tag whose tag name is "svg"
*/
case '+SVG':
$this->reconstruct_active_formatting_elements();
/*
* @todo Adjust SVG attributes for the token. (This fixes the case of SVG attributes that are not all lowercase.)
* @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink in SVG.)
*
* These ought to be handled in the attribute methods.
*/
$this->state->current_token->namespace = 'svg';
$this->insert_html_element( $this->state->current_token );
if ( $this->state->current_token->has_self_closing_flag ) {
$this->state->stack_of_open_elements->pop();
}
return true;
/*
* > A start tag whose tag name is one of: "caption", "col", "colgroup",
* > "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr"
*/
case '+CAPTION':
case '+COL':
case '+COLGROUP':
case '+FRAME':
case '+HEAD':
case '+TBODY':
case '+TD':
case '+TFOOT':
case '+TH':
case '+THEAD':
case '+TR':
// Parse error. Ignore the token.
return $this->step();
}
if ( ! parent::is_tag_closer() ) {
/*
* > Any other start tag
*/
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
return true;
} else {
/*
* > Any other end tag
*/
/*
* Find the corresponding tag opener in the stack of open elements, if
* it exists before reaching a special element, which provides a kind
* of boundary in the stack. For example, a `` should not
* close anything beyond its containing `P` or `DIV` element.
*/
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
if ( 'html' === $node->namespace && $token_name === $node->node_name ) {
break;
}
if ( self::is_special( $node ) ) {
// This is a parse error, ignore the token.
return $this->step();
}
}
$this->generate_implied_end_tags( $token_name );
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
// @todo Record parse error: this error doesn't impact parsing.
}
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();
if ( $node === $item ) {
return true;
}
}
}
$this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' );
// This unnecessary return prevents tools from inaccurately reporting type errors.
return false;
}
/**
* Parses next element in the 'in table' insertion mode.
*
* This internal function performs the 'in table' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-intable
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_table(): bool {
$token_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
/*
* > A character token, if the current node is table,
* > tbody, template, tfoot, thead, or tr element
*/
case '#text':
$current_node = $this->state->stack_of_open_elements->current_node();
$current_node_name = $current_node ? $current_node->node_name : null;
if (
$current_node_name && (
'TABLE' === $current_node_name ||
'TBODY' === $current_node_name ||
'TEMPLATE' === $current_node_name ||
'TFOOT' === $current_node_name ||
'THEAD' === $current_node_name ||
'TR' === $current_node_name
)
) {
/*
* If the text is empty after processing HTML entities and stripping
* U+0000 NULL bytes then ignore the token.
*/
if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) {
return $this->step();
}
/*
* This follows the rules for "in table text" insertion mode.
*
* Whitespace-only text nodes are inserted in-place. Otherwise
* foster parenting is enabled and the nodes would be
* inserted out-of-place.
*
* > If any of the tokens in the pending table character tokens
* > list are character tokens that are not ASCII whitespace,
* > then this is a parse error: reprocess the character tokens
* > in the pending table character tokens list using the rules
* > given in the "anything else" entry in the "in table"
* > insertion mode.
* >
* > Otherwise, insert the characters given by the pending table
* > character tokens list.
*
* @see https://html.spec.whatwg.org/#parsing-main-intabletext
*/
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
$this->insert_html_element( $this->state->current_token );
return true;
}
// Non-whitespace would trigger fostering, unsupported at this time.
$this->bail( 'Foster parenting is not supported.' );
break;
}
break;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A DOCTYPE token
*/
case 'html':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "caption"
*/
case '+CAPTION':
$this->state->stack_of_open_elements->clear_to_table_context();
$this->state->active_formatting_elements->insert_marker();
$this->insert_html_element( $this->state->current_token );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION;
return true;
/*
* > A start tag whose tag name is "colgroup"
*/
case '+COLGROUP':
$this->state->stack_of_open_elements->clear_to_table_context();
$this->insert_html_element( $this->state->current_token );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
return true;
/*
* > A start tag whose tag name is "col"
*/
case '+COL':
$this->state->stack_of_open_elements->clear_to_table_context();
/*
* > Insert an HTML element for a "colgroup" start tag token with no attributes,
* > then switch the insertion mode to "in column group".
*/
$this->insert_virtual_node( 'COLGROUP' );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > A start tag whose tag name is one of: "tbody", "tfoot", "thead"
*/
case '+TBODY':
case '+TFOOT':
case '+THEAD':
$this->state->stack_of_open_elements->clear_to_table_context();
$this->insert_html_element( $this->state->current_token );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return true;
/*
* > A start tag whose tag name is one of: "td", "th", "tr"
*/
case '+TD':
case '+TH':
case '+TR':
$this->state->stack_of_open_elements->clear_to_table_context();
/*
* > Insert an HTML element for a "tbody" start tag token with no attributes,
* > then switch the insertion mode to "in table body".
*/
$this->insert_virtual_node( 'TBODY' );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > A start tag whose tag name is "table"
*
* This tag in the IN TABLE insertion mode is a parse error.
*/
case '+TABLE':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) {
return $this->step();
}
$this->state->stack_of_open_elements->pop_until( 'TABLE' );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is "table"
*/
case '-TABLE':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) {
// @todo Indicate a parse error once it's possible.
return $this->step();
}
$this->state->stack_of_open_elements->pop_until( 'TABLE' );
$this->reset_insertion_mode_appropriately();
return true;
/*
* > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"
*/
case '-BODY':
case '-CAPTION':
case '-COL':
case '-COLGROUP':
case '-HTML':
case '-TBODY':
case '-TD':
case '-TFOOT':
case '-TH':
case '-THEAD':
case '-TR':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is one of: "style", "script", "template"
* > An end tag whose tag name is "template"
*/
case '+STYLE':
case '+SCRIPT':
case '+TEMPLATE':
case '-TEMPLATE':
/*
* > Process the token using the rules for the "in head" insertion mode.
*/
return $this->step_in_head();
/*
* > A start tag whose tag name is "input"
*
* > If the token does not have an attribute with the name "type", or if it does, but
* > that attribute's value is not an ASCII case-insensitive match for the string
* > "hidden", then: act as described in the "anything else" entry below.
*/
case '+INPUT':
$type_attribute = $this->get_attribute( 'type' );
if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) {
goto anything_else;
}
// @todo Indicate a parse error once it's possible.
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is "form"
*
* This tag in the IN TABLE insertion mode is a parse error.
*/
case '+FORM':
if (
$this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) ||
isset( $this->state->form_element )
) {
return $this->step();
}
// This FORM is special because it immediately closes and cannot have other children.
$this->insert_html_element( $this->state->current_token );
$this->state->form_element = $this->state->current_token;
$this->state->stack_of_open_elements->pop();
return true;
}
/*
* > Anything else
* > Parse error. Enable foster parenting, process the token using the rules for the
* > "in body" insertion mode, and then disable foster parenting.
*
* @todo Indicate a parse error once it's possible.
*/
anything_else:
$this->bail( 'Foster parenting is not supported.' );
}
/**
* Parses next element in the 'in table text' insertion mode.
*
* This internal function performs the 'in table text' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-intabletext
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_table_text(): bool {
$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT . ' state.' );
}
/**
* Parses next element in the 'in caption' insertion mode.
*
* This internal function performs the 'in caption' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-incaption
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_caption(): bool {
$tag_name = $this->get_tag();
$op_sigil = $this->is_tag_closer() ? '-' : '+';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > An end tag whose tag name is "caption"
* > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"
* > An end tag whose tag name is "table"
*
* These tag handling rules are identical except for the final instruction.
* Handle them in a single block.
*/
case '-CAPTION':
case '+CAPTION':
case '+COL':
case '+COLGROUP':
case '+TBODY':
case '+TD':
case '+TFOOT':
case '+TH':
case '+THEAD':
case '+TR':
case '-TABLE':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'CAPTION' ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->generate_implied_end_tags();
if ( ! $this->state->stack_of_open_elements->current_node_is( 'CAPTION' ) ) {
// @todo Indicate a parse error once it's possible.
}
$this->state->stack_of_open_elements->pop_until( 'CAPTION' );
$this->state->active_formatting_elements->clear_up_to_last_marker();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
// If this is not a CAPTION end tag, the token should be reprocessed.
if ( '-CAPTION' === $op ) {
return true;
}
return $this->step( self::REPROCESS_CURRENT_NODE );
/**
* > An end tag whose tag name is one of: "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"
*/
case '-BODY':
case '-COL':
case '-COLGROUP':
case '-HTML':
case '-TBODY':
case '-TD':
case '-TFOOT':
case '-TH':
case '-THEAD':
case '-TR':
// Parse error: ignore the token.
return $this->step();
}
/**
* > Anything else
* > Process the token using the rules for the "in body" insertion mode.
*/
return $this->step_in_body();
}
/**
* Parses next element in the 'in column group' insertion mode.
*
* This internal function performs the 'in column group' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-incolgroup
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_column_group(): bool {
$token_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
/*
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
*/
case '#text':
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
// Insert the character.
$this->insert_html_element( $this->state->current_token );
return true;
}
goto in_column_group_anything_else;
break;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A DOCTYPE token
*/
case 'html':
// @todo Indicate a parse error once it's possible.
return $this->step();
/*
* > A start tag whose tag name is "html"
*/
case '+HTML':
return $this->step_in_body();
/*
* > A start tag whose tag name is "col"
*/
case '+COL':
$this->insert_html_element( $this->state->current_token );
$this->state->stack_of_open_elements->pop();
return true;
/*
* > An end tag whose tag name is "colgroup"
*/
case '-COLGROUP':
if ( ! $this->state->stack_of_open_elements->current_node_is( 'COLGROUP' ) ) {
// @todo Indicate a parse error once it's possible.
return $this->step();
}
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return true;
/*
* > An end tag whose tag name is "col"
*/
case '-COL':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "template"
* > An end tag whose tag name is "template"
*/
case '+TEMPLATE':
case '-TEMPLATE':
return $this->step_in_head();
}
in_column_group_anything_else:
/*
* > Anything else
*/
if ( ! $this->state->stack_of_open_elements->current_node_is( 'COLGROUP' ) ) {
// @todo Indicate a parse error once it's possible.
return $this->step();
}
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
* Parses next element in the 'in table body' insertion mode.
*
* This internal function performs the 'in table body' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-intbody
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_table_body(): bool {
$tag_name = $this->get_tag();
$op_sigil = $this->is_tag_closer() ? '-' : '+';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A start tag whose tag name is "tr"
*/
case '+TR':
$this->state->stack_of_open_elements->clear_to_table_body_context();
$this->insert_html_element( $this->state->current_token );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return true;
/*
* > A start tag whose tag name is one of: "th", "td"
*/
case '+TH':
case '+TD':
// @todo Indicate a parse error once it's possible.
$this->state->stack_of_open_elements->clear_to_table_body_context();
$this->insert_virtual_node( 'TR' );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is one of: "tbody", "tfoot", "thead"
*/
case '-TBODY':
case '-TFOOT':
case '-THEAD':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->clear_to_table_body_context();
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return true;
/*
* > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead"
* > An end tag whose tag name is "table"
*/
case '+CAPTION':
case '+COL':
case '+COLGROUP':
case '+TBODY':
case '+TFOOT':
case '+THEAD':
case '-TABLE':
if (
! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TBODY' ) &&
! $this->state->stack_of_open_elements->has_element_in_table_scope( 'THEAD' ) &&
! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TFOOT' )
) {
// Parse error: ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->clear_to_table_body_context();
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th", "tr"
*/
case '-BODY':
case '-CAPTION':
case '-COL':
case '-COLGROUP':
case '-HTML':
case '-TD':
case '-TH':
case '-TR':
// Parse error: ignore the token.
return $this->step();
}
/*
* > Anything else
* > Process the token using the rules for the "in table" insertion mode.
*/
return $this->step_in_table();
}
/**
* Parses next element in the 'in row' insertion mode.
*
* This internal function performs the 'in row' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-intr
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_row(): bool {
$tag_name = $this->get_tag();
$op_sigil = $this->is_tag_closer() ? '-' : '+';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A start tag whose tag name is one of: "th", "td"
*/
case '+TH':
case '+TD':
$this->state->stack_of_open_elements->clear_to_table_row_context();
$this->insert_html_element( $this->state->current_token );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL;
$this->state->active_formatting_elements->insert_marker();
return true;
/*
* > An end tag whose tag name is "tr"
*/
case '-TR':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->clear_to_table_row_context();
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return true;
/*
* > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"
* > An end tag whose tag name is "table"
*/
case '+CAPTION':
case '+COL':
case '+COLGROUP':
case '+TBODY':
case '+TFOOT':
case '+THEAD':
case '+TR':
case '-TABLE':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->clear_to_table_row_context();
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is one of: "tbody", "tfoot", "thead"
*/
case '-TBODY':
case '-TFOOT':
case '-THEAD':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) {
// Parse error: ignore the token.
return $this->step();
}
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) {
// Ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->clear_to_table_row_context();
$this->state->stack_of_open_elements->pop();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th"
*/
case '-BODY':
case '-CAPTION':
case '-COL':
case '-COLGROUP':
case '-HTML':
case '-TD':
case '-TH':
// Parse error: ignore the token.
return $this->step();
}
/*
* > Anything else
* > Process the token using the rules for the "in table" insertion mode.
*/
return $this->step_in_table();
}
/**
* Parses next element in the 'in cell' insertion mode.
*
* This internal function performs the 'in cell' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-intd
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_cell(): bool {
$tag_name = $this->get_tag();
$op_sigil = $this->is_tag_closer() ? '-' : '+';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > An end tag whose tag name is one of: "td", "th"
*/
case '-TD':
case '-TH':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->generate_implied_end_tags();
/*
* @todo This needs to check if the current node is an HTML element, meaning that
* when SVG and MathML support is added, this needs to differentiate between an
* HTML element of the given name, such as `
`, and a foreign element of
* the same given name.
*/
if ( ! $this->state->stack_of_open_elements->current_node_is( $tag_name ) ) {
// @todo Indicate a parse error once it's possible.
}
$this->state->stack_of_open_elements->pop_until( $tag_name );
$this->state->active_formatting_elements->clear_up_to_last_marker();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return true;
/*
* > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "td",
* > "tfoot", "th", "thead", "tr"
*/
case '+CAPTION':
case '+COL':
case '+COLGROUP':
case '+TBODY':
case '+TD':
case '+TFOOT':
case '+TH':
case '+THEAD':
case '+TR':
/*
* > Assert: The stack of open elements has a td or th element in table scope.
*
* Nothing to do here, except to verify in tests that this never appears.
*/
$this->close_cell();
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html"
*/
case '-BODY':
case '-CAPTION':
case '-COL':
case '-COLGROUP':
case '-HTML':
// Parse error: ignore the token.
return $this->step();
/*
* > An end tag whose tag name is one of: "table", "tbody", "tfoot", "thead", "tr"
*/
case '-TABLE':
case '-TBODY':
case '-TFOOT':
case '-THEAD':
case '-TR':
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->close_cell();
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/*
* > Anything else
* > Process the token using the rules for the "in body" insertion mode.
*/
return $this->step_in_body();
}
/**
* Parses next element in the 'in select' insertion mode.
*
* This internal function performs the 'in select' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_select(): bool {
$token_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
/*
* > Any other character token
*/
case '#text':
/*
* > A character token that is U+0000 NULL
*
* If a text node only comprises null bytes then it should be
* entirely ignored and should not return to calling code.
*/
if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) {
// Parse error: ignore the token.
return $this->step();
}
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A DOCTYPE token
*/
case 'html':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "html"
*/
case '+HTML':
return $this->step_in_body();
/*
* > A start tag whose tag name is "option"
*/
case '+OPTION':
if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
$this->state->stack_of_open_elements->pop();
}
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A start tag whose tag name is "optgroup"
* > A start tag whose tag name is "hr"
*
* These rules are identical except for the treatment of the self-closing flag and
* the subsequent pop of the HR void element, all of which is handled elsewhere in the processor.
*/
case '+OPTGROUP':
case '+HR':
if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
$this->state->stack_of_open_elements->pop();
}
if ( $this->state->stack_of_open_elements->current_node_is( 'OPTGROUP' ) ) {
$this->state->stack_of_open_elements->pop();
}
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > An end tag whose tag name is "optgroup"
*/
case '-OPTGROUP':
$current_node = $this->state->stack_of_open_elements->current_node();
if ( $current_node && 'OPTION' === $current_node->node_name ) {
foreach ( $this->state->stack_of_open_elements->walk_up( $current_node ) as $parent ) {
break;
}
if ( $parent && 'OPTGROUP' === $parent->node_name ) {
$this->state->stack_of_open_elements->pop();
}
}
if ( $this->state->stack_of_open_elements->current_node_is( 'OPTGROUP' ) ) {
$this->state->stack_of_open_elements->pop();
return true;
}
// Parse error: ignore the token.
return $this->step();
/*
* > An end tag whose tag name is "option"
*/
case '-OPTION':
if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
$this->state->stack_of_open_elements->pop();
return true;
}
// Parse error: ignore the token.
return $this->step();
/*
* > An end tag whose tag name is "select"
* > A start tag whose tag name is "select"
*
* > It just gets treated like an end tag.
*/
case '-SELECT':
case '+SELECT':
if ( ! $this->state->stack_of_open_elements->has_element_in_select_scope( 'SELECT' ) ) {
// Parse error: ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->pop_until( 'SELECT' );
$this->reset_insertion_mode_appropriately();
return true;
/*
* > A start tag whose tag name is one of: "input", "keygen", "textarea"
*
* All three of these tags are considered a parse error when found in this insertion mode.
*/
case '+INPUT':
case '+KEYGEN':
case '+TEXTAREA':
if ( ! $this->state->stack_of_open_elements->has_element_in_select_scope( 'SELECT' ) ) {
// Ignore the token.
return $this->step();
}
$this->state->stack_of_open_elements->pop_until( 'SELECT' );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > A start tag whose tag name is one of: "script", "template"
* > An end tag whose tag name is "template"
*/
case '+SCRIPT':
case '+TEMPLATE':
case '-TEMPLATE':
return $this->step_in_head();
}
/*
* > Anything else
* > Parse error: ignore the token.
*/
return $this->step();
}
/**
* Parses next element in the 'in select in table' insertion mode.
*
* This internal function performs the 'in select in table' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-inselectintable
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_select_in_table(): bool {
$token_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
/*
* > A start tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"
*/
case '+CAPTION':
case '+TABLE':
case '+TBODY':
case '+TFOOT':
case '+THEAD':
case '+TR':
case '+TD':
case '+TH':
// @todo Indicate a parse error once it's possible.
$this->state->stack_of_open_elements->pop_until( 'SELECT' );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > An end tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"
*/
case '-CAPTION':
case '-TABLE':
case '-TBODY':
case '-TFOOT':
case '-THEAD':
case '-TR':
case '-TD':
case '-TH':
// @todo Indicate a parse error once it's possible.
if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $token_name ) ) {
return $this->step();
}
$this->state->stack_of_open_elements->pop_until( 'SELECT' );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/*
* > Anything else
*/
return $this->step_in_select();
}
/**
* Parses next element in the 'in template' insertion mode.
*
* This internal function performs the 'in template' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-intemplate
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_template(): bool {
$token_name = $this->get_token_name();
$token_type = $this->get_token_type();
$is_closer = $this->is_tag_closer();
$op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
/*
* > A character token
* > A comment token
* > A DOCTYPE token
*/
case '#text':
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
case 'html':
return $this->step_in_body();
/*
* > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link",
* > "meta", "noframes", "script", "style", "template", "title"
* > An end tag whose tag name is "template"
*/
case '+BASE':
case '+BASEFONT':
case '+BGSOUND':
case '+LINK':
case '+META':
case '+NOFRAMES':
case '+SCRIPT':
case '+STYLE':
case '+TEMPLATE':
case '+TITLE':
case '-TEMPLATE':
return $this->step_in_head();
/*
* > A start tag whose tag name is one of: "caption", "colgroup", "tbody", "tfoot", "thead"
*/
case '+CAPTION':
case '+COLGROUP':
case '+TBODY':
case '+TFOOT':
case '+THEAD':
array_pop( $this->state->stack_of_template_insertion_modes );
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > A start tag whose tag name is "col"
*/
case '+COL':
array_pop( $this->state->stack_of_template_insertion_modes );
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > A start tag whose tag name is "tr"
*/
case '+TR':
array_pop( $this->state->stack_of_template_insertion_modes );
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
/*
* > A start tag whose tag name is one of: "td", "th"
*/
case '+TD':
case '+TH':
array_pop( $this->state->stack_of_template_insertion_modes );
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/*
* > Any other start tag
*/
if ( ! $is_closer ) {
array_pop( $this->state->stack_of_template_insertion_modes );
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/*
* > Any other end tag
*/
if ( $is_closer ) {
// Parse error: ignore the token.
return $this->step();
}
/*
* > An end-of-file token
*/
if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
// Stop parsing.
return false;
}
// @todo Indicate a parse error once it's possible.
$this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
$this->state->active_formatting_elements->clear_up_to_last_marker();
array_pop( $this->state->stack_of_template_insertion_modes );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
* Parses next element in the 'after body' insertion mode.
*
* This internal function performs the 'after body' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-afterbody
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_after_body(): bool {
$tag_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
*
* > Process the token using the rules for the "in body" insertion mode.
*/
case '#text':
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
return $this->step_in_body();
}
goto after_body_anything_else;
break;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->bail( 'Content outside of BODY is unsupported.' );
break;
/*
* > A DOCTYPE token
*/
case 'html':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "html"
*/
case '+HTML':
return $this->step_in_body();
/*
* > An end tag whose tag name is "html"
*
* > If the parser was created as part of the HTML fragment parsing algorithm,
* > this is a parse error; ignore the token. (fragment case)
* >
* > Otherwise, switch the insertion mode to "after after body".
*/
case '-HTML':
if ( isset( $this->context_node ) ) {
return $this->step();
}
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY;
return true;
}
/*
* > Parse error. Switch the insertion mode to "in body" and reprocess the token.
*/
after_body_anything_else:
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
* Parses next element in the 'in frameset' insertion mode.
*
* This internal function performs the 'in frameset' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-inframeset
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_frameset(): bool {
$tag_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
* >
* > Insert the character.
*
* This algorithm effectively strips non-whitespace characters from text and inserts
* them under HTML. This is not supported at this time.
*/
case '#text':
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
return $this->step_in_body();
}
$this->bail( 'Non-whitespace characters cannot be handled in frameset.' );
break;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A DOCTYPE token
*/
case 'html':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "html"
*/
case '+HTML':
return $this->step_in_body();
/*
* > A start tag whose tag name is "frameset"
*/
case '+FRAMESET':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > An end tag whose tag name is "frameset"
*/
case '-FRAMESET':
/*
* > If the current node is the root html element, then this is a parse error;
* > ignore the token. (fragment case)
*/
if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) {
return $this->step();
}
/*
* > Otherwise, pop the current node from the stack of open elements.
*/
$this->state->stack_of_open_elements->pop();
/*
* > If the parser was not created as part of the HTML fragment parsing algorithm
* > (fragment case), and the current node is no longer a frameset element, then
* > switch the insertion mode to "after frameset".
*/
if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) {
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET;
}
return true;
/*
* > A start tag whose tag name is "frame"
*
* > Insert an HTML element for the token. Immediately pop the
* > current node off the stack of open elements.
* >
* > Acknowledge the token's self-closing flag, if it is set.
*/
case '+FRAME':
$this->insert_html_element( $this->state->current_token );
$this->state->stack_of_open_elements->pop();
return true;
/*
* > A start tag whose tag name is "noframes"
*/
case '+NOFRAMES':
return $this->step_in_head();
}
// Parse error: ignore the token.
return $this->step();
}
/**
* Parses next element in the 'after frameset' insertion mode.
*
* This internal function performs the 'after frameset' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-afterframeset
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_after_frameset(): bool {
$tag_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
* >
* > Insert the character.
*
* This algorithm effectively strips non-whitespace characters from text and inserts
* them under HTML. This is not supported at this time.
*/
case '#text':
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
return $this->step_in_body();
}
$this->bail( 'Non-whitespace characters cannot be handled in after frameset' );
break;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > A DOCTYPE token
*/
case 'html':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "html"
*/
case '+HTML':
return $this->step_in_body();
/*
* > An end tag whose tag name is "html"
*/
case '-HTML':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET;
return true;
/*
* > A start tag whose tag name is "noframes"
*/
case '+NOFRAMES':
return $this->step_in_head();
}
// Parse error: ignore the token.
return $this->step();
}
/**
* Parses next element in the 'after after body' insertion mode.
*
* This internal function performs the 'after after body' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#the-after-after-body-insertion-mode
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_after_after_body(): bool {
$tag_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->bail( 'Content outside of HTML is unsupported.' );
break;
/*
* > A DOCTYPE token
* > A start tag whose tag name is "html"
*
* > Process the token using the rules for the "in body" insertion mode.
*/
case 'html':
case '+HTML':
return $this->step_in_body();
/*
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
* >
* > Process the token using the rules for the "in body" insertion mode.
*/
case '#text':
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
return $this->step_in_body();
}
goto after_after_body_anything_else;
break;
}
/*
* > Parse error. Switch the insertion mode to "in body" and reprocess the token.
*/
after_after_body_anything_else:
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
* Parses next element in the 'after after frameset' insertion mode.
*
* This internal function performs the 'after after frameset' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#the-after-after-frameset-insertion-mode
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_after_after_frameset(): bool {
$tag_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$tag_name}";
switch ( $op ) {
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->bail( 'Content outside of HTML is unsupported.' );
break;
/*
* > A DOCTYPE token
* > A start tag whose tag name is "html"
*
* > Process the token using the rules for the "in body" insertion mode.
*/
case 'html':
case '+HTML':
return $this->step_in_body();
/*
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
* >
* > Process the token using the rules for the "in body" insertion mode.
*
* This algorithm effectively strips non-whitespace characters from text and inserts
* them under HTML. This is not supported at this time.
*/
case '#text':
if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) {
return $this->step_in_body();
}
$this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' );
break;
/*
* > A start tag whose tag name is "noframes"
*/
case '+NOFRAMES':
return $this->step_in_head();
}
// Parse error: ignore the token.
return $this->step();
}
/**
* Parses next element in the 'in foreign content' insertion mode.
*
* This internal function performs the 'in foreign content' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
* @since 6.7.0 Stub implementation.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#parsing-main-inforeign
* @see WP_HTML_Processor::step
*
* @return bool Whether an element was found.
*/
private function step_in_foreign_content(): bool {
$tag_name = $this->get_token_name();
$token_type = $this->get_token_type();
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$tag_name}";
/*
* > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size"
*
* This section drawn out above the switch to more easily incorporate
* the additional rules based on the presence of the attributes.
*/
if (
'+FONT' === $op &&
(
null !== $this->get_attribute( 'color' ) ||
null !== $this->get_attribute( 'face' ) ||
null !== $this->get_attribute( 'size' )
)
) {
$op = '+FONT with attributes';
}
switch ( $op ) {
case '#text':
/*
* > A character token that is U+0000 NULL
*
* This is handled by `get_modifiable_text()`.
*/
/*
* Whitespace-only text does not affect the frameset-ok flag.
* It is probably inter-element whitespace, but it may also
* contain character references which decode only to whitespace.
*/
if ( parent::TEXT_IS_GENERIC === $this->text_node_classification ) {
$this->state->frameset_ok = false;
}
$this->insert_foreign_element( $this->state->current_token, false );
return true;
/*
* CDATA sections are alternate wrappers for text content and therefore
* ought to follow the same rules as text nodes.
*/
case '#cdata-section':
/*
* NULL bytes and whitespace do not change the frameset-ok flag.
*/
$current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$cdata_content_start = $current_token->start + 9;
$cdata_content_length = $current_token->length - 12;
if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) {
$this->state->frameset_ok = false;
}
$this->insert_foreign_element( $this->state->current_token, false );
return true;
/*
* > A comment token
*/
case '#comment':
case '#funky-comment':
case '#presumptuous-tag':
$this->insert_foreign_element( $this->state->current_token, false );
return true;
/*
* > A DOCTYPE token
*/
case 'html':
// Parse error: ignore the token.
return $this->step();
/*
* > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center",
* > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5",
* > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol",
* > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup",
* > "table", "tt", "u", "ul", "var"
*
* > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size"
*
* > An end tag whose tag name is "br", "p"
*
* Closing BR tags are always reported by the Tag Processor as opening tags.
*/
case '+B':
case '+BIG':
case '+BLOCKQUOTE':
case '+BODY':
case '+BR':
case '+CENTER':
case '+CODE':
case '+DD':
case '+DIV':
case '+DL':
case '+DT':
case '+EM':
case '+EMBED':
case '+H1':
case '+H2':
case '+H3':
case '+H4':
case '+H5':
case '+H6':
case '+HEAD':
case '+HR':
case '+I':
case '+IMG':
case '+LI':
case '+LISTING':
case '+MENU':
case '+META':
case '+NOBR':
case '+OL':
case '+P':
case '+PRE':
case '+RUBY':
case '+S':
case '+SMALL':
case '+SPAN':
case '+STRONG':
case '+STRIKE':
case '+SUB':
case '+SUP':
case '+TABLE':
case '+TT':
case '+U':
case '+UL':
case '+VAR':
case '+FONT with attributes':
case '-BR':
case '-P':
// @todo Indicate a parse error once it's possible.
foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) {
if (
'math' === $current_node->integration_node_type ||
'html' === $current_node->integration_node_type ||
'html' === $current_node->namespace
) {
break;
}
$this->state->stack_of_open_elements->pop();
}
goto in_foreign_content_process_in_current_insertion_mode;
}
/*
* > Any other start tag
*/
if ( ! $this->is_tag_closer() ) {
$this->insert_foreign_element( $this->state->current_token, false );
/*
* > If the token has its self-closing flag set, then run
* > the appropriate steps from the following list:
* >
* > ↪ the token's tag name is "script", and the new current node is in the SVG namespace
* > Acknowledge the token's self-closing flag, and then act as
* > described in the steps for a "script" end tag below.
* >
* > ↪ Otherwise
* > Pop the current node off the stack of open elements and
* > acknowledge the token's self-closing flag.
*
* Since the rules for SCRIPT below indicate to pop the element off of the stack of
* open elements, which is the same for the Otherwise condition, there's no need to
* separate these checks. The difference comes when a parser operates with the scripting
* flag enabled, and executes the script, which this parser does not support.
*/
if ( $this->state->current_token->has_self_closing_flag ) {
$this->state->stack_of_open_elements->pop();
}
return true;
}
/*
* > An end tag whose name is "script", if the current node is an SVG script element.
*/
if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) {
$this->state->stack_of_open_elements->pop();
return true;
}
/*
* > Any other end tag
*/
if ( $this->is_tag_closer() ) {
$node = $this->state->stack_of_open_elements->current_node();
if ( $tag_name !== $node->node_name ) {
// @todo Indicate a parse error once it's possible.
}
in_foreign_content_end_tag_loop:
if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) {
return true;
}
/*
* > If node's tag name, converted to ASCII lowercase, is the same as the tag name
* > of the token, pop elements from the stack of open elements until node has
* > been popped from the stack, and then return.
*/
if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) {
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();
if ( $node === $item ) {
return true;
}
}
}
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
$node = $item;
break;
}
if ( 'html' !== $node->namespace ) {
goto in_foreign_content_end_tag_loop;
}
in_foreign_content_process_in_current_insertion_mode:
switch ( $this->state->insertion_mode ) {
case WP_HTML_Processor_State::INSERTION_MODE_INITIAL:
return $this->step_initial();
case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML:
return $this->step_before_html();
case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD:
return $this->step_before_head();
case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD:
return $this->step_in_head();
case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT:
return $this->step_in_head_noscript();
case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD:
return $this->step_after_head();
case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY:
return $this->step_in_body();
case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE:
return $this->step_in_table();
case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT:
return $this->step_in_table_text();
case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION:
return $this->step_in_caption();
case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP:
return $this->step_in_column_group();
case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY:
return $this->step_in_table_body();
case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW:
return $this->step_in_row();
case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL:
return $this->step_in_cell();
case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT:
return $this->step_in_select();
case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE:
return $this->step_in_select_in_table();
case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
return $this->step_in_template();
case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY:
return $this->step_after_body();
case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET:
return $this->step_in_frameset();
case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET:
return $this->step_after_frameset();
case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY:
return $this->step_after_after_body();
case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET:
return $this->step_after_after_frameset();
// This should be unreachable but PHP doesn't have total type checking on switch.
default:
$this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." );
}
}
$this->bail( 'Should not have been able to reach end of IN FOREIGN CONTENT processing. Check HTML API code.' );
// This unnecessary return prevents tools from inaccurately reporting type errors.
return false;
}
/*
* Internal helpers
*/
/**
* Creates a new bookmark for the currently-matched token and returns the generated name.
*
* @since 6.4.0
* @since 6.5.0 Renamed from bookmark_tag() to bookmark_token().
*
* @throws Exception When unable to allocate requested bookmark.
*
* @return string|false Name of created bookmark, or false if unable to create.
*/
private function bookmark_token() {
if ( ! parent::set_bookmark( ++$this->bookmark_counter ) ) {
$this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS;
throw new Exception( 'could not allocate bookmark' );
}
return "{$this->bookmark_counter}";
}
/*
* HTML semantic overrides for Tag Processor
*/
/**
* Indicates the namespace of the current token, or "html" if there is none.
*
* @return string One of "html", "math", or "svg".
*/
public function get_namespace(): string {
if ( ! isset( $this->current_element ) ) {
return parent::get_namespace();
}
return $this->current_element->token->namespace;
}
/**
* Returns the uppercase name of the matched tag.
*
* The semantic rules for HTML specify that certain tags be reprocessed
* with a different tag name. Because of this, the tag name presented
* by the HTML Processor may differ from the one reported by the HTML
* Tag Processor, which doesn't apply these semantic rules.
*
* Example:
*
* $processor = new WP_HTML_Tag_Processor( 'Test
' );
* $processor->next_tag() === true;
* $processor->get_tag() === 'DIV';
*
* $processor->next_tag() === false;
* $processor->get_tag() === null;
*
* @since 6.4.0
*
* @return string|null Name of currently matched tag in input HTML, or `null` if none found.
*/
public function get_tag(): ?string {
if ( null !== $this->last_error ) {
return null;
}
if ( $this->is_virtual() ) {
return $this->current_element->token->node_name;
}
$tag_name = parent::get_tag();
/*
* > A start tag whose tag name is "image"
* > Change the token's tag name to "img" and reprocess it. (Don't ask.)
*/
return ( 'IMAGE' === $tag_name && 'html' === $this->get_namespace() )
? 'IMG'
: $tag_name;
}
/**
* Indicates if the currently matched tag contains the self-closing flag.
*
* No HTML elements ought to have the self-closing flag and for those, the self-closing
* flag will be ignored. For void elements this is benign because they "self close"
* automatically. For non-void HTML elements though problems will appear if someone
* intends to use a self-closing element in place of that element with an empty body.
* For HTML foreign elements and custom elements the self-closing flag determines if
* they self-close or not.
*
* This function does not determine if a tag is self-closing,
* but only if the self-closing flag is present in the syntax.
*
* @since 6.6.0 Subclassed for the HTML Processor.
*
* @return bool Whether the currently matched tag contains the self-closing flag.
*/
public function has_self_closing_flag(): bool {
return $this->is_virtual() ? false : parent::has_self_closing_flag();
}
/**
* Returns the node name represented by the token.
*
* This matches the DOM API value `nodeName`. Some values
* are static, such as `#text` for a text node, while others
* are dynamically generated from the token itself.
*
* Dynamic names:
* - Uppercase tag name for tag matches.
* - `html` for DOCTYPE declarations.
*
* Note that if the Tag Processor is not matched on a token
* then this function will return `null`, either because it
* hasn't yet found a token or because it reached the end
* of the document without matching a token.
*
* @since 6.6.0 Subclassed for the HTML Processor.
*
* @return string|null Name of the matched token.
*/
public function get_token_name(): ?string {
return $this->is_virtual()
? $this->current_element->token->node_name
: parent::get_token_name();
}
/**
* Indicates the kind of matched token, if any.
*
* This differs from `get_token_name()` in that it always
* returns a static string indicating the type, whereas
* `get_token_name()` may return values derived from the
* token itself, such as a tag name or processing
* instruction tag.
*
* Possible values:
* - `#tag` when matched on a tag.
* - `#text` when matched on a text node.
* - `#cdata-section` when matched on a CDATA node.
* - `#comment` when matched on a comment.
* - `#doctype` when matched on a DOCTYPE declaration.
* - `#presumptuous-tag` when matched on an empty tag closer.
* - `#funky-comment` when matched on a funky comment.
*
* @since 6.6.0 Subclassed for the HTML Processor.
*
* @return string|null What kind of token is matched, or null.
*/
public function get_token_type(): ?string {
if ( $this->is_virtual() ) {
/*
* This logic comes from the Tag Processor.
*
* @todo It would be ideal not to repeat this here, but it's not clearly
* better to allow passing a token name to `get_token_type()`.
*/
$node_name = $this->current_element->token->node_name;
$starting_char = $node_name[0];
if ( 'A' <= $starting_char && 'Z' >= $starting_char ) {
return '#tag';
}
if ( 'html' === $node_name ) {
return '#doctype';
}
return $node_name;
}
retur500-internal server errorError occurred: 500 - internal server error