'U', 'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w', 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z', 'Ž' => 'Z', 'ž' => 'z', 'ſ' => 's', // Decompositions for Latin Extended-B. 'Ə' => 'E', 'ǝ' => 'e', 'Ș' => 'S', 'ș' => 's', 'Ț' => 'T', 'ț' => 't', // Euro sign. '€' => 'E', // GBP (Pound) sign. '£' => '', // Vowels with diacritic (Vietnamese). Unmarked. 'Ơ' => 'O', 'ơ' => 'o', 'Ư' => 'U', 'ư' => 'u', // Grave accent. 'Ầ' => 'A', 'ầ' => 'a', 'Ằ' => 'A', 'ằ' => 'a', 'Ề' => 'E', 'ề' => 'e', 'Ồ' => 'O', 'ồ' => 'o', 'Ờ' => 'O', 'ờ' => 'o', 'Ừ' => 'U', 'ừ' => 'u', 'Ỳ' => 'Y', 'ỳ' => 'y', // Hook. 'Ả' => 'A', 'ả' => 'a', 'Ẩ' => 'A', 'ẩ' => 'a', 'Ẳ' => 'A', 'ẳ' => 'a', 'Ẻ' => 'E', 'ẻ' => 'e', 'Ể' => 'E', 'ể' => 'e', 'Ỉ' => 'I', 'ỉ' => 'i', 'Ỏ' => 'O', 'ỏ' => 'o', 'Ổ' => 'O', 'ổ' => 'o', 'Ở' => 'O', 'ở' => 'o', 'Ủ' => 'U', 'ủ' => 'u', 'Ử' => 'U', 'ử' => 'u', 'Ỷ' => 'Y', 'ỷ' => 'y', // Tilde. 'Ẫ' => 'A', 'ẫ' => 'a', 'Ẵ' => 'A', 'ẵ' => 'a', 'Ẽ' => 'E', 'ẽ' => 'e', 'Ễ' => 'E', 'ễ' => 'e', 'Ỗ' => 'O', 'ỗ' => 'o', 'Ỡ' => 'O', 'ỡ' => 'o', 'Ữ' => 'U', 'ữ' => 'u', 'Ỹ' => 'Y', 'ỹ' => 'y', // Acute accent. 'Ấ' => 'A', 'ấ' => 'a', 'Ắ' => 'A', 'ắ' => 'a', 'Ế' => 'E', 'ế' => 'e', 'Ố' => 'O', 'ố' => 'o', 'Ớ' => 'O', 'ớ' => 'o', 'Ứ' => 'U', 'ứ' => 'u', // Dot below. 'Ạ' => 'A', 'ạ' => 'a', 'Ậ' => 'A', 'ậ' => 'a', 'Ặ' => 'A', 'ặ' => 'a', 'Ẹ' => 'E', 'ẹ' => 'e', 'Ệ' => 'E', 'ệ' => 'e', 'Ị' => 'I', 'ị' => 'i', 'Ọ' => 'O', 'ọ' => 'o', 'Ộ' => 'O', 'ộ' => 'o', 'Ợ' => 'O', 'ợ' => 'o', 'Ụ' => 'U', 'ụ' => 'u', 'Ự' => 'U', 'ự' => 'u', 'Ỵ' => 'Y', 'ỵ' => 'y', // Vowels with diacritic (Chinese, Hanyu Pinyin). 'ɑ' => 'a', // Macron. 'Ǖ' => 'U', 'ǖ' => 'u', // Acute accent. 'Ǘ' => 'U', 'ǘ' => 'u', // Caron. 'Ǎ' => 'A', 'ǎ' => 'a', 'Ǐ' => 'I', 'ǐ' => 'i', 'Ǒ' => 'O', 'ǒ' => 'o', 'Ǔ' => 'U', 'ǔ' => 'u', 'Ǚ' => 'U', 'ǚ' => 'u', // Grave accent. 'Ǜ' => 'U', 'ǜ' => 'u', ); // Used for locale-specific rules. if ( empty( $locale ) ) { $locale = get_locale(); } /* * German has various locales (de_DE, de_CH, de_AT, ...) with formal and informal variants. * There is no 3-letter locale like 'def', so checking for 'de' instead of 'de_' is safe, * since 'de' itself would be a valid locale too. */ if ( str_starts_with( $locale, 'de' ) ) { $chars['Ä'] = 'Ae'; $chars['ä'] = 'ae'; $chars['Ö'] = 'Oe'; $chars['ö'] = 'oe'; $chars['Ü'] = 'Ue'; $chars['ü'] = 'ue'; $chars['ß'] = 'ss'; } elseif ( 'da_DK' === $locale ) { $chars['Æ'] = 'Ae'; $chars['æ'] = 'ae'; $chars['Ø'] = 'Oe'; $chars['ø'] = 'oe'; $chars['Å'] = 'Aa'; $chars['å'] = 'aa'; } elseif ( 'ca' === $locale ) { $chars['l·l'] = 'll'; } elseif ( 'sr_RS' === $locale || 'bs_BA' === $locale ) { $chars['Đ'] = 'DJ'; $chars['đ'] = 'dj'; } $text = strtr( $text, $chars ); } else { $chars = array(); // Assume ISO-8859-1 if not UTF-8. $chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e" . "\x9f\xa2\xa5\xb5\xc0\xc1\xc2" . "\xc3\xc4\xc5\xc7\xc8\xc9\xca" . "\xcb\xcc\xcd\xce\xcf\xd1\xd2" . "\xd3\xd4\xd5\xd6\xd8\xd9\xda" . "\xdb\xdc\xdd\xe0\xe1\xe2\xe3" . "\xe4\xe5\xe7\xe8\xe9\xea\xeb" . "\xec\xed\xee\xef\xf1\xf2\xf3" . "\xf4\xf5\xf6\xf8\xf9\xfa\xfb" . "\xfc\xfd\xff"; $chars['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy'; $text = strtr( $text, $chars['in'], $chars['out'] ); $double_chars = array(); $double_chars['in'] = array( "\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe" ); $double_chars['out'] = array( 'OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th' ); $text = str_replace( $double_chars['in'], $double_chars['out'], $text ); } return $text; } /** * Sanitizes a filename, replacing whitespace with dashes. * * Removes special characters that are illegal in filenames on certain * operating systems and special characters requiring special escaping * to manipulate at the command line. Replaces spaces and consecutive * dashes with a single dash. Trims period, dash and underscore from beginning * and end of filename. It is not guaranteed that this function will return a * filename that is allowed to be uploaded. * * @since 2.1.0 * * @param string $filename The filename to be sanitized. * @return string The sanitized filename. */ function sanitize_file_name( $filename ) { $filename_raw = $filename; $filename = remove_accents( $filename ); $special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', '’', '«', '»', '”', '“', chr( 0 ) ); // Check for support for utf8 in the installed PCRE library once and store the result in a static. static $utf8_pcre = null; if ( ! isset( $utf8_pcre ) ) { // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged $utf8_pcre = @preg_match( '/^./u', 'a' ); } if ( ! seems_utf8( $filename ) ) { $_ext = pathinfo( $filename, PATHINFO_EXTENSION ); $_name = pathinfo( $filename, PATHINFO_FILENAME ); $filename = sanitize_title_with_dashes( $_name ) . '.' . $_ext; } if ( $utf8_pcre ) { $filename = preg_replace( "#\x{00a0}#siu", ' ', $filename ); } /** * Filters the list of characters to remove from a filename. * * @since 2.8.0 * * @param string[] $special_chars Array of characters to remove. * @param string $filename_raw The original filename to be sanitized. */ $special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw ); $filename = str_replace( $special_chars, '', $filename ); $filename = str_replace( array( '%20', '+' ), '-', $filename ); $filename = preg_replace( '/\.{2,}/', '.', $filename ); $filename = preg_replace( '/[\r\n\t -]+/', '-', $filename ); $filename = trim( $filename, '.-_' ); if ( ! str_contains( $filename, '.' ) ) { $mime_types = wp_get_mime_types(); $filetype = wp_check_filetype( 'test.' . $filename, $mime_types ); if ( $filetype['ext'] === $filename ) { $filename = 'unnamed-file.' . $filetype['ext']; } } // Split the filename into a base and extension[s]. $parts = explode( '.', $filename ); // Return if only one extension. if ( count( $parts ) <= 2 ) { /** This filter is documented in wp-includes/formatting.php */ return apply_filters( 'sanitize_file_name', $filename, $filename_raw ); } // Process multiple extensions. $filename = array_shift( $parts ); $extension = array_pop( $parts ); $mimes = get_allowed_mime_types(); /* * Loop over any intermediate extensions. Postfix them with a trailing underscore * if they are a 2 - 5 character long alpha string not in the allowed extension list. */ foreach ( (array) $parts as $part ) { $filename .= '.' . $part; if ( preg_match( '/^[a-zA-Z]{2,5}\d?$/', $part ) ) { $allowed = false; foreach ( $mimes as $ext_preg => $mime_match ) { $ext_preg = '!^(' . $ext_preg . ')$!i'; if ( preg_match( $ext_preg, $part ) ) { $allowed = true; break; } } if ( ! $allowed ) { $filename .= '_'; } } } $filename .= '.' . $extension; /** * Filters a sanitized filename string. * * @since 2.8.0 * * @param string $filename Sanitized filename. * @param string $filename_raw The filename prior to sanitization. */ return apply_filters( 'sanitize_file_name', $filename, $filename_raw ); } /** * Sanitizes a username, stripping out unsafe characters. * * Removes tags, percent-encoded characters, HTML entities, and if strict is enabled, * will only keep alphanumeric, _, space, ., -, @. After sanitizing, it passes the username, * raw username (the username in the parameter), and the value of $strict as parameters * for the {@see 'sanitize_user'} filter. * * @since 2.0.0 * * @param string $username The username to be sanitized. * @param bool $strict Optional. If set to true, limits $username to specific characters. * Default false. * @return string The sanitized username, after passing through filters. */ function sanitize_user( $username, $strict = false ) { $raw_username = $username; $username = wp_strip_all_tags( $username ); $username = remove_accents( $username ); // Remove percent-encoded characters. $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username ); // Remove HTML entities. $username = preg_replace( '/&.+?;/', '', $username ); // If strict, reduce to ASCII for max portability. if ( $strict ) { $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username ); } $username = trim( $username ); // Consolidate contiguous whitespace. $username = preg_replace( '|\s+|', ' ', $username ); /** * Filters a sanitized username string. * * @since 2.0.1 * * @param string $username Sanitized username. * @param string $raw_username The username prior to sanitization. * @param bool $strict Whether to limit the sanitization to specific characters. */ return apply_filters( 'sanitize_user', $username, $raw_username, $strict ); } /** * Sanitizes a string key. * * Keys are used as internal identifiers. Lowercase alphanumeric characters, * dashes, and underscores are allowed. * * @since 3.0.0 * * @param string $key String key. * @return string Sanitized key. */ function sanitize_key( $key ) { $sanitized_key = ''; if ( is_scalar( $key ) ) { $sanitized_key = strtolower( $key ); $sanitized_key = preg_replace( '/[^a-z0-9_\-]/', '', $sanitized_key ); } /** * Filters a sanitized key string. * * @since 3.0.0 * * @param string $sanitized_key Sanitized key. * @param string $key The key prior to sanitization. */ return apply_filters( 'sanitize_key', $sanitized_key, $key ); } /** * Sanitizes a string into a slug, which can be used in URLs or HTML attributes. * * By default, converts accent characters to ASCII characters and further * limits the output to alphanumeric characters, underscore (_) and dash (-) * through the {@see 'sanitize_title'} filter. * * If `$title` is empty and `$fallback_title` is set, the latter will be used. * * @since 1.0.0 * * @param string $title The string to be sanitized. * @param string $fallback_title Optional. A title to use if $title is empty. Default empty. * @param string $context Optional. The operation for which the string is sanitized. * When set to 'save', the string runs through remove_accents(). * Default 'save'. * @return string The sanitized string. */ function sanitize_title( $title, $fallback_title = '', $context = 'save' ) { $raw_title = $title; if ( 'save' === $context ) { $title = remove_accents( $title ); } /** * Filters a sanitized title string. * * @since 1.2.0 * * @param string $title Sanitized title. * @param string $raw_title The title prior to sanitization. * @param string $context The context for which the title is being sanitized. */ $title = apply_filters( 'sanitize_title', $title, $raw_title, $context ); if ( '' === $title || false === $title ) { $title = $fallback_title; } return $title; } /** * Sanitizes a title with the 'query' context. * * Used for querying the database for a value from URL. * * @since 3.1.0 * * @param string $title The string to be sanitized. * @return string The sanitized string. */ function sanitize_title_for_query( $title ) { return sanitize_title( $title, '', 'query' ); } /** * Sanitizes a title, replacing whitespace and a few other characters with dashes. * * Limits the output to alphanumeric characters, underscore (_) and dash (-). * Whitespace becomes a dash. * * @since 1.2.0 * * @param string $title The title to be sanitized. * @param string $raw_title Optional. Not used. Default empty. * @param string $context Optional. The operation for which the string is sanitized. * When set to 'save', additional entities are converted to hyphens * or stripped entirely. Default 'display'. * @return string The sanitized title. */ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) { $title = strip_tags( $title ); // Preserve escaped octets. $title = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title ); // Remove percent signs that are not part of an octet. $title = str_replace( '%', '', $title ); // Restore octets. $title = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title ); if ( seems_utf8( $title ) ) { if ( function_exists( 'mb_strtolower' ) ) { $title = mb_strtolower( $title, 'UTF-8' ); } $title = utf8_uri_encode( $title, 200 ); } $title = strtolower( $title ); if ( 'save' === $context ) { // Convert  , &ndash, and &mdash to hyphens. $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); // Convert  , &ndash, and &mdash HTML entities to hyphens. $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title ); // Convert forward slash to hyphen. $title = str_replace( '/', '-', $title ); // Strip these characters entirely. $title = str_replace( array( // Soft hyphens. '%c2%ad', // ¡ and ¿. '%c2%a1', '%c2%bf', // Angle quotes. '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba', // Curly quotes. '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d', '%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f', // Bullet. '%e2%80%a2', // ©, ®, °, &hellip, and &trade. '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2', // Acute accents. '%c2%b4', '%cb%8a', '%cc%81', '%cd%81', // Grave accent, macron, caron. '%cc%80', '%cc%84', '%cc%8c', // Non-visible characters that display without a width. '%e2%80%8b', // Zero width space. '%e2%80%8c', // Zero width non-joiner. '%e2%80%8d', // Zero width joiner. '%e2%80%8e', // Left-to-right mark. '%e2%80%8f', // Right-to-left mark. '%e2%80%aa', // Left-to-right embedding. '%e2%80%ab', // Right-to-left embedding. '%e2%80%ac', // Pop directional formatting. '%e2%80%ad', // Left-to-right override. '%e2%80%ae', // Right-to-left override. '%ef%bb%bf', // Byte order mark. '%ef%bf%bc', // Object replacement character. ), '', $title ); // Convert non-visible characters that display with a width to hyphen. $title = str_replace( array( '%e2%80%80', // En quad. '%e2%80%81', // Em quad. '%e2%80%82', // En space. '%e2%80%83', // Em space. '%e2%80%84', // Three-per-em space. '%e2%80%85', // Four-per-em space. '%e2%80%86', // Six-per-em space. '%e2%80%87', // Figure space. '%e2%80%88', // Punctuation space. '%e2%80%89', // Thin space. '%e2%80%8a', // Hair space. '%e2%80%a8', // Line separator. '%e2%80%a9', // Paragraph separator. '%e2%80%af', // Narrow no-break space. ), '-', $title ); // Convert × to 'x'. $title = str_replace( '%c3%97', 'x', $title ); } // Remove HTML entities. $title = preg_replace( '/&.+?;/', '', $title ); $title = str_replace( '.', '-', $title ); $title = preg_replace( '/[^%a-z0-9 _-]/', '', $title ); $title = preg_replace( '/\s+/', '-', $title ); $title = preg_replace( '|-+|', '-', $title ); $title = trim( $title, '-' ); return $title; } /** * Ensures a string is a valid SQL 'order by' clause. * * Accepts one or more columns, with or without a sort order (ASC / DESC). * e.g. 'column_1', 'column_1, column_2', 'column_1 ASC, column_2 DESC' etc. * * Also accepts 'RAND()'. * * @since 2.5.1 * * @param string $orderby Order by clause to be validated. * @return string|false Returns $orderby if valid, false otherwise. */ function sanitize_sql_orderby( $orderby ) { if ( preg_match( '/^\s*(([a-z0-9_]+|`[a-z0-9_]+`)(\s+(ASC|DESC))?\s*(,\s*(?=[a-z0-9_`])|$))+$/i', $orderby ) || preg_match( '/^\s*RAND\(\s*\)\s*$/i', $orderby ) ) { return $orderby; } return false; } /** * Sanitizes an HTML classname to ensure it only contains valid characters. * * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty * string then it will return the alternative value supplied. * * @todo Expand to support the full range of CDATA that a class attribute can contain. * * @since 2.8.0 * * @param string $classname The classname to be sanitized. * @param string $fallback Optional. The value to return if the sanitization ends up as an empty string. * Default empty string. * @return string The sanitized value. */ function sanitize_html_class( $classname, $fallback = '' ) { // Strip out any percent-encoded characters. $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $classname ); // Limit to A-Z, a-z, 0-9, '_', '-'. $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); if ( '' === $sanitized && $fallback ) { return sanitize_html_class( $fallback ); } /** * Filters a sanitized HTML class string. * * @since 2.8.0 * * @param string $sanitized The sanitized HTML class. * @param string $classname HTML class before sanitization. * @param string $fallback The fallback string. */ return apply_filters( 'sanitize_html_class', $sanitized, $classname, $fallback ); } /** * Strips out all characters not allowed in a locale name. * * @since 6.2.1 * * @param string $locale_name The locale name to be sanitized. * @return string The sanitized value. */ function sanitize_locale_name( $locale_name ) { // Limit to A-Z, a-z, 0-9, '_', '-'. $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $locale_name ); /** * Filters a sanitized locale name string. * * @since 6.2.1 * * @param string $sanitized The sanitized locale name. * @param string $locale_name The locale name before sanitization. */ return apply_filters( 'sanitize_locale_name', $sanitized, $locale_name ); } /** * Converts lone & characters into `&` (a.k.a. `&`) * * @since 0.71 * * @param string $content String of characters to be converted. * @param string $deprecated Not used. * @return string Converted string. */ function convert_chars( $content, $deprecated = '' ) { if ( ! empty( $deprecated ) ) { _deprecated_argument( __FUNCTION__, '0.71' ); } if ( str_contains( $content, '&' ) ) { $content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content ); } return $content; } /** * Converts invalid Unicode references range to valid range. * * @since 4.3.0 * * @param string $content String with entities that need converting. * @return string Converted string. */ function convert_invalid_entities( $content ) { $wp_htmltranswinuni = array( '€' => '€', // The Euro sign. '' => '', '‚' => '‚', // These are Windows CP1252 specific characters. 'ƒ' => 'ƒ', // They would look weird on non-Windows browsers. '„' => '„', '…' => '…', '†' => '†', '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 'Š' => 'Š', '‹' => '‹', 'Œ' => 'Œ', '' => '', 'Ž' => 'Ž', '' => '', '' => '', '‘' => '‘', '’' => '’', '“' => '“', '”' => '”', '•' => '•', '–' => '–', '—' => '—', '˜' => '˜', '™' => '™', 'š' => 'š', '›' => '›', 'œ' => 'œ', '' => '', 'ž' => 'ž', 'Ÿ' => 'Ÿ', ); if ( str_contains( $content, '' ) ) { $content = strtr( $content, $wp_htmltranswinuni ); } return $content; } /** * Balances tags if forced to, or if the 'use_balanceTags' option is set to true. * * @since 0.71 * * @param string $text Text to be balanced * @param bool $force If true, forces balancing, ignoring the value of the option. Default false. * @return string Balanced text */ function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) { return force_balance_tags( $text ); } else { return $text; } } /** * Balances tags of string using a modified stack. * * @since 2.0.4 * @since 5.3.0 Improve accuracy and add support for custom element tags. * * @author Leonard Lin * @license GPL * @copyright November 4, 2001 * @version 1.1 * @todo Make better - change loop condition to $text in 1.2 * @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004 * 1.1 Fixed handling of append/stack pop order of end text * Added Cleaning Hooks * 1.0 First Version * * @param string $text Text to be balanced. * @return string Balanced text. */ function force_balance_tags( $text ) { $tagstack = array(); $stacksize = 0; $tagqueue = ''; $newtext = ''; // Known single-entity/self-closing tags. $single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr' ); // Tags that can be immediately nested within themselves. $nestable_tags = array( 'article', 'aside', 'blockquote', 'details', 'div', 'figure', 'object', 'q', 'section', 'span' ); // WP bug fix for comments - in case you REALLY meant to type '< !--'. $text = str_replace( '< !--', '< !--', $text ); // WP bug fix for LOVE <3 (and other situations with '<' before a number). $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); /** * Matches supported tags. * * To get the pattern as a string without the comments paste into a PHP * REPL like `php -a`. * * @see https://html.spec.whatwg.org/#elements-2 * @see https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name * * @example * ~# php -a * php > $s = [paste copied contents of expression below including parentheses]; * php > echo $s; */ $tag_pattern = ( '#<' . // Start with an opening bracket. '(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash. '(' . // Group 2 - Tag name. // Custom element tags have more lenient rules than HTML tag names. '(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' . '|' . // Traditional tag rules approximate HTML tag names. '(?:[\w:]+)' . ')' . '(?:' . // We either immediately close the tag with its '>' and have nothing here. '\s*' . '(/?)' . // Group 3 - "attributes" for empty tag. '|' . // Or we must start with space characters to separate the tag name from the attributes (or whitespace). '(\s+)' . // Group 4 - Pre-attribute whitespace. '([^>]*)' . // Group 5 - Attributes. ')' . '>#' // End with a closing bracket. ); while ( preg_match( $tag_pattern, $text, $regex ) ) { $full_match = $regex[0]; $has_leading_slash = ! empty( $regex[1] ); $tag_name = $regex[2]; $tag = strtolower( $tag_name ); $is_single_tag = in_array( $tag, $single_tags, true ); $pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : ''; $attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] ); $has_self_closer = str_ends_with( $attributes, '/' ); $newtext .= $tagqueue; $i = strpos( $text, $full_match ); $l = strlen( $full_match ); // Clear the shifter. $tagqueue = ''; if ( $has_leading_slash ) { // End tag. // If too many closing tags. if ( $stacksize <= 0 ) { $tag = ''; // Or close to be safe $tag = '/' . $tag. // If stacktop value = tag close value, then pop. } elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag. $tag = ''; // Close tag. array_pop( $tagstack ); --$stacksize; } else { // Closing tag not at top, search for it. for ( $j = $stacksize - 1; $j >= 0; $j-- ) { if ( $tagstack[ $j ] === $tag ) { // Add tag to tagqueue. for ( $k = $stacksize - 1; $k >= $j; $k-- ) { $tagqueue .= ''; --$stacksize; } break; } } $tag = ''; } } else { // Begin tag. if ( $has_self_closer ) { /* * If it presents itself as a self-closing tag, but it isn't a known single-entity self-closing tag, * then don't let it be treated as such and immediately close it with a closing tag. * The tag will encapsulate no text as a result. */ if ( ! $is_single_tag ) { $attributes = trim( substr( $attributes, 0, -1 ) ) . "> 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) { $tagqueue = ''; --$stacksize; } $stacksize = array_push( $tagstack, $tag ); } // Attributes. if ( $has_self_closer && $is_single_tag ) { // We need some space - avoid
and prefer
. $pre_attribute_ws = ' '; } $tag = '<' . $tag . $pre_attribute_ws . $attributes . '>'; // If already queuing a close tag, then put this tag on too. if ( ! empty( $tagqueue ) ) { $tagqueue .= $tag; $tag = ''; } } $newtext .= substr( $text, 0, $i ) . $tag; $text = substr( $text, $i + $l ); } // Clear tag queue. $newtext .= $tagqueue; // Add remaining text. $newtext .= $text; while ( $x = array_pop( $tagstack ) ) { $newtext .= ''; // Add remaining tags to close. } // WP fix for the bug with HTML comments. $newtext = str_replace( '< !--', '