eme'] . '://' . $parsed_url['host'] . '/'; $image = WP_Http::make_absolute_url( $image, $root_url ); } return $image; } /** * Prepares the metadata by: * - stripping all HTML tags and tag entities. * - converting non-tag entities into characters. * * @since 5.9.0 * * @param string $metadata The metadata content to prepare. * @return string The prepared metadata. */ private function prepare_metadata_for_output( $metadata ) { $metadata = html_entity_decode( $metadata, ENT_QUOTES, get_bloginfo( 'charset' ) ); $metadata = wp_strip_all_tags( $metadata ); return $metadata; } /** * Utility function to build cache key for a given URL. * * @since 5.9.0 * * @param string $url The URL for which to build a cache key. * @return string The cache key. */ private function build_cache_key_for_url( $url ) { return 'g_url_details_response_' . md5( $url ); } /** * Utility function to retrieve a value from the cache at a given key. * * @since 5.9.0 * * @param string $key The cache key. * @return mixed The value from the cache. */ private function get_cache( $key ) { return get_site_transient( $key ); } /** * Utility function to cache a given data set at a given cache key. * * @since 5.9.0 * * @param string $key The cache key under which to store the value. * @param string $data The data to be stored at the given cache key. * @return bool True when transient set. False if not set. */ private function set_cache( $key, $data = '' ) { $ttl = HOUR_IN_SECONDS; /** * Filters the cache expiration. * * Can be used to adjust the time until expiration in seconds for the cache * of the data retrieved for the given URL. * * @since 5.9.0 * * @param int $ttl The time until cache expiration in seconds. */ $cache_expiration = apply_filters( 'rest_url_details_cache_expiration', $ttl ); return set_site_transient( $key, $data, $cache_expiration ); } /** * Retrieves the head element section. * * @since 5.9.0 * * @param string $html The string of HTML to parse. * @return string The `
..` section on success. Given `$html` if not found. */ private function get_document_head( $html ) { $head_html = $html; // Find the opening `` tag. $head_start = strpos( $html, '` tag. $head_end = strpos( $head_html, '' ); if ( false === $head_end ) { // Didn't find it. Find the opening `` tag. $head_end = strpos( $head_html, ' symbol. * * The content attribute's value (i.e. the description to get) can have HTML in it and be well-formed as * it's a string to the browser. Imagine what happens when attempting to match for the name=description * first. Hmm, if a > or /> symbol is in the content attribute's value, then it terminates the match * as the element's closing symbol. But wait, it's in the content attribute and is not the end of the * element. This is a limitation of using regex. It can't determine "wait a minute this is inside of quotation". * If this happens, what gets matched is not the entire element or all of the content. * * Why not search for the name=description and then content="(.*)"? * The attribute order could be opposite. Plus, additional attributes may exist including being between * the name and content attributes. * * Why not lookahead? * Lookahead is not constrained to stay within the element. The first symbol. */ '[^>]*' . /* * Find the content attribute. When found, capture its value (.*). * * Allows for (a) single or double quotes and (b) whitespace in the value. * * Why capture the opening quotation mark, i.e. (["\']), and then backreference, * i.e \1, for the closing quotation mark? * To ensure the closing quotation mark matches the opening one. Why? Attribute values * can contain quotation marks, such as an apostrophe in the content. */ 'content=(["\']??)(.*)\1' . /* * Allows for additional attributes after the content attribute. * Searches for anything other than > symbol. */ '[^>]*' . /* * \/?> searches for the closing > symbol, which can be in either /> or > format. * # ends the pattern. */ '\/?>#' . /* * These are the options: * - i : case-insensitive * - s : allows newline characters for the . match (needed for multiline elements) * - U means non-greedy matching */ 'isU'; preg_match_all( $pattern, $html, $elements ); return $elements; } /** * Gets the metadata from a target meta element. * * @since 5.9.0 * * @param array $meta_elements { * A multi-dimensional indexed array on success, else empty array. * * @type string[] $0 Meta elements with a content attribute. * @type string[] $1 Content attribute's opening quotation mark. * @type string[] $2 Content attribute's value for each meta element. * } * @param string $attr Attribute that identifies the element with the target metadata. * @param string $attr_value The attribute's value that identifies the element with the target metadata. * @return string The metadata on success. Empty string if not found. */ private function get_metadata_from_meta_element( $meta_elements, $attr, $attr_value ) { // Bail out if there are no meta elements. if ( empty( $meta_elements[0] ) ) { return ''; } $metadata = ''; $pattern = '#' . /* * Target this attribute and value to find the metadata element. * * Allows for (a) no, single, double quotes and (b) whitespace in the value. * * Why capture the opening quotation mark, i.e. (["\']), and then backreference, * i.e \1, for the closing quotation mark? * To ensure the closing quotation mark matches the opening one. Why? Attribute values * can contain quotation marks, such as an apostrophe in the content. */ $attr . '=([\"\']??)\s*' . $attr_value . '\s*\1' . /* * These are the options: * - i : case-insensitive * - s : allows newline characters for the . match (needed for multiline elements) * - U means non-greedy matching */ '#isU'; // Find the metadata element. foreach ( $meta_elements[0] as $index => $element ) { preg_match( $pattern, $element, $match ); // This is not the metadata element. Skip it. if ( empty( $match ) ) { continue; } /* * Found the metadata element. * Get the metadata from its matching content array. */ if ( isset( $meta_elements[2][ $index ] ) && is_string( $meta_elements[2][ $index ] ) ) { $metadata = trim( $meta_elements[2][ $index ] ); } break; } return $metadata; } }