* Since UTF-8 is the only currently-accepted charset, if working with a * document that isn't UTF-8, it's important to convert the document before * creating the processor: pass in the converted HTML. * * @param string $html Input HTML document to process. * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used * in the input byte stream. Currently must be UTF-8. * @return static|null The created processor if successful, otherwise null. */ public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) { if ( 'UTF-8' !== $known_definite_encoding ) { return null; } $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); $processor->state->encoding = $known_definite_encoding; $processor->state->encoding_confidence = 'certain'; return $processor; } /** * Constructor. * * Do not use this method. Use the static creator methods instead. * * @access private * * @since 6.4.0 * * @see WP_HTML_Processor::create_fragment() * * @param string $html HTML to process. * @param string|null $use_the_static_create_methods_instead This constructor should not be called manually. */ public function __construct( $html, $use_the_static_create_methods_instead = null ) { parent::__construct( $html ); if ( self::CONSTRUCTOR_UNLOCK_CODE !== $use_the_static_create_methods_instead ) { _doing_it_wrong( __METHOD__, sprintf( /* translators: %s: WP_HTML_Processor::create_fragment(). */ __( 'Call %s to create an HTML Processor instead of calling the constructor directly.' ), 'WP_HTML_Processor::create_fragment()' ), '6.4.0' ); } $this->state = new WP_HTML_Processor_State(); $this->state->stack_of_open_elements->set_push_handler( function ( WP_HTML_Token $token ): void { $is_virtual = ! isset( $this->state->current_token ) || $this->is_tag_closer(); $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); $this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace ); } ); $this->state->stack_of_open_elements->set_pop_handler( function ( WP_HTML_Token $token ): void { $is_virtual = ! isset( $this->state->current_token ) || ! $this->is_tag_closer(); $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); $adjusted_current_node = $this->get_adjusted_current_node(); if ( $adjusted_current_node ) { $this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace ); } else { $this->change_parsing_namespace( 'html' ); } } ); /* * Create this wrapper so that it's possible to pass * a private method into WP_HTML_Token classes without * exposing it to any public API. */ $this->release_internal_bookmark_on_destruct = function ( string $name ): void { parent::release_bookmark( $name ); }; } /** * Stops the parser and terminates its execution when encountering unsupported markup. * * @throws WP_HTML_Unsupported_Exception Halts execution of the parser. * * @since 6.7.0 * * @param string $message Explains support is missing in order to parse the current node. */ private function bail( string $message ) { $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; $token = substr( $this->html, $here->start, $here->length ); $open_elements = array(); foreach ( $this->state->stack_of_open_elements->stack as $item ) { $open_elements[] = $item->node_name; } $active_formats = array(); foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { $active_formats[] = $item->node_name; } $this->last_error = self::ERROR_UNSUPPORTED; $this->unsupported_exception = new WP_HTML_Unsupported_Exception( $message, $this->state->current_token->node_name, $here->start, $token, $open_elements, $active_formats ); throw $this->unsupported_exception; } /** * Returns the last error, if any. * * Various situations lead to parsing failure but this class will * return `false` in all those cases. To determine why something * failed it's possible to request the last error. This can be * helpful to know to distinguish whether a given tag couldn't * be found or if content in the document caused the processor * to give up and abort processing. * * Example * * $processor = WP_HTML_Processor::create_fragment( '