arest >. */ $at = strpos( $html, '>', $at + 1 ); continue; } /* * is a missing end tag name, which is ignored. * * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name */ if ( '>' === $html[ $at + 1 ] ) { ++$at; continue; } /* * * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); if ( false === $closer_at ) { return false; } $at = $closer_at + 1; continue; } /* * If a non-alpha starts the tag name in a tag closer it's a comment. * Find the first `>`, which closes the comment. * * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name */ if ( $this->is_closing_tag ) { $closer_at = strpos( $html, '>', $at + 3 ); if ( false === $closer_at ) { return false; } $at = $closer_at + 1; continue; } ++$at; } return false; } /** * Parses the next attribute. * * @since 6.2.0 * * @return bool Whether an attribute was found before the end of the document. */ private function parse_next_attribute() { // Skip whitespace and slashes. $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed ); if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } /* * Treat the equal sign as a part of the attribute * name if it is the first encountered byte. * * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state */ $name_length = '=' === $this->html[ $this->bytes_already_parsed ] ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 ) : strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed ); // No attribute, just tag closer. if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= strlen( $this->html ) ) { return false; } $attribute_start = $this->bytes_already_parsed; $attribute_name = substr( $this->html, $attribute_start, $name_length ); $this->bytes_already_parsed += $name_length; if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } $this->skip_whitespace(); if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } $has_value = '=' === $this->html[ $this->bytes_already_parsed ]; if ( $has_value ) { ++$this->bytes_already_parsed; $this->skip_whitespace(); if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } switch ( $this->html[ $this->bytes_already_parsed ] ) { case "'": case '"': $quote = $this->html[ $this->bytes_already_parsed ]; $value_start = $this->bytes_already_parsed + 1; $value_length = strcspn( $this->html, $quote, $value_start ); $attribute_end = $value_start + $value_length + 1; $this->bytes_already_parsed = $attribute_end; break; default: $value_start = $this->bytes_already_parsed; $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start ); $attribute_end = $value_start + $value_length; $this->bytes_already_parsed = $attribute_end; } } else { $value_start = $this->bytes_already_parsed; $value_length = 0; $attribute_end = $attribute_start + $name_length; } if ( $attribute_end >= strlen( $this->html ) ) { return false; } if ( $this->is_closing_tag ) { return true; } /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive */ $comparable_name = strtolower( $attribute_name ); // If an attribute is listed many times, only use the first declaration and ignore the rest. if ( ! array_key_exists( $comparable_name, $this->attributes ) ) { $this->attributes[ $comparable_name ] = new WP_HTML_Attribute_Token( $attribute_name, $value_start, $value_length, $attribute_start, $attribute_end, ! $has_value ); return true; } /* * Track the duplicate attributes so if we remove it, all disappear together. * * While `$this->duplicated_attributes` could always be stored as an `array()`, * which would simplify the logic here, storing a `null` and only allocating * an array when encountering duplicates avoids needless allocations in the * normative case of parsing tags with no duplicate attributes. */ $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end ); if ( null === $this->duplicate_attributes ) { $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) ); } elseif ( ! array_key_exists( $comparable_name, $this->duplicate_attributes ) ) { $this->duplicate_attributes[ $comparable_name ] = array( $duplicate_span ); } else { $this->duplicate_attributes[ $comparable_name ][] = $duplicate_span; } return true; } /** * Move the internal cursor past any immediate successive whitespace. * * @since 6.2.0 */ private function skip_whitespace() { $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n", $this->bytes_already_parsed ); } /** * Applies attribute updates and cleans up once a tag is fully parsed. * * @since 6.2.0 */ private function after_tag() { $this->get_updated_html(); $this->tag_name_starts_at = null; $this->tag_name_length = null; $this->tag_ends_at = null; $this->is_closing_tag = null; $this->attributes = array(); $this->duplicate_attributes = null; } /** * Converts class name updates into tag attributes updates * (they are accumulated in different data formats for performance). * * @since 6.2.0 * * @see WP_HTML_Tag_Processor::$lexical_updates * @see WP_HTML_Tag_Processor::$classname_updates */ private function class_name_updates_to_attributes_updates() { if ( count( $this->classname_updates ) === 0 ) { return; } $existing_class = $this->get_enqueued_attribute_value( 'class' ); if ( null === $existing_class || true === $existing_class ) { $existing_class = ''; } if ( false === $existing_class && isset( $this->attributes['class'] ) ) { $existing_class = substr( $this->html, $this->attributes['class']->value_starts_at, $this->attributes['class']->value_length ); } if ( false === $existing_class ) { $existing_class = ''; } /** * Updated "class" attribute value. * * This is incrementally built while scanning through the existing class * attribute, skipping removed classes on the way, and then appending * added classes at the end. Only when finished processing will the * value contain the final new value. * @var string $class */ $class = ''; /** * Tracks the cursor position in the existing * class attribute value while parsing. * * @var int $at */ $at = 0; /** * Indicates if there's any need to modify the existing class attribute. * * If a call to `add_class()` and `remove_class()` wouldn't impact * the `class` attribute value then there's no need to rebuild it. * For example, when adding a class that's already present or * removing one that isn't. * * This flag enables a performance optimization when none of the enqueued * class updates would impact the `class` attribute; namely, that the * processor can continue without modifying the input document, as if * none of the `add_class()` or `remove_class()` calls had been made. * * This flag is set upon the first change that requires a string update. * * @var bool $modified */ $modified = false; // Remove unwanted classes by only copying the new ones. $existing_class_length = strlen( $existing_class ); while ( $at < $existing_class_length ) { // Skip to the first non-whitespace character. $ws_at = $at; $ws_length = strspn( $existing_class, " \t\f\r\n", $ws_at ); $at += $ws_length; // Capture the class name – it's everything until the next whitespace. $name_length = strcspn( $existing_class, " \t\f\r\n", $at ); if ( 0 === $name_length ) { // If no more class names are found then that's the end. break; } $name = substr( $existing_class, $at, $name_length ); $at += $name_length; // If this class is marked for removal, start processing the next one. $remove_class = ( isset( $this->classname_updates[ $name ] ) && self::REMOVE_CLASS === $this->classname_updates[ $name ] ); // If a class has already been seen then skip it; it should not be added twice. if ( ! $remove_class ) { $this->classname_updates[ $name ] = self::SKIP_CLASS; } if ( $remove_class ) { $modified = true; continue; } /* * Otherwise, append it to the new "class" attribute value. * * There are options for handling whitespace between tags. * Preserving the existing whitespace produces fewer changes * to the HTML content and should clarify the before/after * content when debugging the modified output. * * This approach contrasts normalizing the inter-class * whitespace to a single space, which might appear cleaner * in the output HTML but produce a noisier change. */ $class .= substr( $existing_class, $ws_at, $ws_length ); $class .= $name; } // Add new classes by appending those which haven't already been seen. foreach ( $this->classname_updates as $name => $operation ) { if ( self::ADD_CLASS === $operation ) { $modified = true; $class .= strlen( $class ) > 0 ? ' ' : ''; $class .= $name; } } $this->classname_updates = array(); if ( ! $modified ) { return; } if ( strlen( $class ) > 0 ) { $this->set_attribute( 'class', $class ); } else { $this->remove_attribute( 'class' ); } } /** * Applies attribute updates to HTML document. * * @since 6.2.0 * @since 6.2.1 Accumulates shift for internal cursor and passed pointer. * @since 6.3.0 Invalidate any bookmarks whose targets are overwritten. * * @param int $shift_this_point Accumulate and return shift for this position. * @return int How many bytes the given pointer moved in response to the updates. */ private function apply_attributes_updates( $shift_this_point = 0 ) { if ( ! count( $this->lexical_updates ) ) { return 0; } $accumulated_shift_for_given_point = 0; /* * Attribute updates can be enqueued in any order but updates * to the document must occur in lexical order; that is, each * replacement must be made before all others which follow it * at later string indices in the input document. * * Sorting avoid making out-of-order replacements which * can lead to mangled output, partially-duplicated * attributes, and overwritten attributes. */ usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) ); $bytes_already_copied = 0; $output_buffer = ''; foreach ( $this->lexical_updates as $diff ) { $shift = strlen( $diff->text ) - ( $diff->end - $diff->start ); // Adjust the cursor position by however much an update affects it. if ( $diff->start <= $this->bytes_already_parsed ) { $this->bytes_already_parsed += $shift; } // Accumulate shift of the given pointer within this function call. if ( $diff->start <= $shift_this_point ) { $accumulated_shift_for_given_point += $shift; } $output_buffer .= substr( $this->html, $bytes_already_copied, $diff->start - $bytes_already_copied ); $output_buffer .= $diff->text; $bytes_already_copied = $diff->end; } $this->html = $output_buffer . substr( $this->html, $bytes_already_copied ); /* * Adjust bookmark locations to account for how the text * replacements adjust offsets in the input document. */ foreach ( $this->bookmarks as $bookmark_name => $bookmark ) { /* * Each lexical update which appears before the bookmark's endpoints * might shift the offsets for those endpoints. Loop through each change * and accumulate the total shift for each bookmark, then apply that * shift after tallying the full delta. */ $head_delta = 0; $tail_delta = 0; foreach ( $this->lexical_updates as $diff ) { if ( $bookmark->start < $diff->start && $bookmark->end < $diff->start ) { break; } if ( $bookmark->start >= $diff->start && $bookmark->end < $diff->end ) { $this->release_bookmark( $bookmark_name ); continue 2; } $delta = strlen( $diff->text ) - ( $diff->end - $diff->start ); if ( $bookmark->start >= $diff->start ) { $head_delta += $delta; } if ( $bookmark->end >= $diff->end ) { $tail_delta += $delta; } } $bookmark->start += $head_delta; $bookmark->end += $tail_delta; } $this->lexical_updates = array(); return $accumulated_shift_for_given_point; } /** * Checks whether a bookmark with the given name exists. * * @since 6.3.0 * * @param string $bookmark_name Name to identify a bookmark that potentially exists. * @return bool Whether that bookmark exists. */ public function has_bookmark( $bookmark_name ) { return array_key_exists( $bookmark_name, $this->bookmarks ); } /** * Move the internal cursor in the Tag Processor to a given bookmark's location. * * In order to prevent accidental infinite loops, there's a * maximum limit on the number of times seek() can be called. * * @since 6.2.0 * * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. * @return bool Whether the internal cursor was successfully moved to the bookmark's location. */ public function seek( $bookmark_name ) { if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { _doing_it_wrong( __METHOD__, __( 'Unknown bookmark name.' ), '6.2.0' ); return false; } if ( ++$this->seek_count > static::MAX_SEEK_OPS ) { _doing_it_wrong( __METHOD__, __( 'Too many calls to seek() - this can lead to performance issues.' ), '6.2.0' ); return false; } // Flush out any pending updates to the document. $this->get_updated_html(); // Point this tag processor before the sought tag opener and consume it. $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start; return $this->next_tag( array( 'tag_closers' => 'visit' ) ); } /** * Compare two WP_HTML_Text_Replacement objects. * * @since 6.2.0 * * @param WP_HTML_Text_Replacement $a First attribute update. * @param WP_HTML_Text_Replacement $b Second attribute update. * @return int Comparison value for string order. */ private static function sort_start_ascending( $a, $b ) { $by_start = $a->start - $b->start; if ( 0 !== $by_start ) { return $by_start; } $by_text = isset( $a->text, $b->text ) ? strcmp( $a->text, $b->text ) : 0; if ( 0 !== $by_text ) { return $by_text; } /* * This code should be unreachable, because it implies the two replacements * start at the same location and contain the same text. */ return $a->end - $b->end; } /** * Return the enqueued value for a given attribute, if one exists. * * Enqueued updates can take different data types: * - If an update is enqueued and is boolean, the return will be `true` * - If an update is otherwise enqueued, the return will be the string value of that update. * - If an attribute is enqueued to be removed, the return will be `null` to indicate that. * - If no updates are enqueued, the return will be `false` to differentiate from "removed." * * @since 6.2.0 * * @param string $comparable_name The attribute name in its comparable form. * @return string|boolean|null Value of enqueued update if present, otherwise false. */ private function get_enqueued_attribute_value( $comparable_name ) { if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) { return false; } $enqueued_text = $this->lexical_updates[ $comparable_name ]->text; // Removed attributes erase the entire span. if ( '' === $enqueued_text ) { return null; } /* * Boolean attribute updates are just the attribute name without a corresponding value. * * This value might differ from the given comparable name in that there could be leading * or trailing whitespace, and that the casing follows the name given in `set_attribute`. * * Example: * * $p->set_attribute( 'data-TEST-id', 'update' ); * 'update' === $p->get_enqueued_attribute_value( 'data-test-id' ); * * Detect this difference based on the absence of the `=`, which _must_ exist in any * attribute containing a value, e.g. ``. * ¹ ² * 1. Attribute with a string value. * 2. Boolean attribute whose value is `true`. */ $equals_at = strpos( $enqueued_text, '=' ); if ( false === $equals_at ) { return true; } /* * Finally, a normal update's value will appear after the `=` and * be double-quoted, as performed incidentally by `set_attribute`. * * e.g. `type="text"` * ¹² ³ * 1. Equals is here. * 2. Double-quoting starts one after the equals sign. * 3. Double-quoting ends at the last character in the update. */ $enqueued_value = substr( $enqueued_text, $equals_at + 2, -1 ); return html_entity_decode( $enqueued_value ); } /** * Returns the value of a requested attribute from a matched tag opener if that attribute exists. * * Example: * * $p = new WP_HTML_Tag_Processor( '
Test
' ); * $p->next_tag( array( 'class_name' => 'test' ) ) === true; * $p->get_attribute( 'data-test-id' ) === '14'; * $p->get_attribute( 'enabled' ) === true; * $p->get_attribute( 'aria-label' ) === null; * * $p->next_tag() === false; * $p->get_attribute( 'class' ) === null; * * @since 6.2.0 * * @param string $name Name of attribute whose value is requested. * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. */ public function get_attribute( $name ) { if ( null === $this->tag_name_starts_at ) { return null; } $comparable = strtolower( $name ); /* * For every attribute other than `class` it's possible to perform a quick check if * there's an enqueued lexical update whose value takes priority over what's found in * the input document. * * The `class` attribute is special though because of the exposed helpers `add_class` * and `remove_class`. These form a builder for the `class` attribute, so an additional * check for enqueued class changes is required in addition to the check for any enqueued * attribute values. If any exist, those enqueued class changes must first be flushed out * into an attribute value update. */ if ( 'class' === $name ) { $this->class_name_updates_to_attributes_updates(); } // Return any enqueued attribute value updates if they exist. $enqueued_value = $this->get_enqueued_attribute_value( $comparable ); if ( false !== $enqueued_value ) { return $enqueued_value; } if ( ! isset( $this->attributes[ $comparable ] ) ) { return null; } $attribute = $this->attributes[ $comparable ]; /* * This flag distinguishes an attribute with no value * from an attribute with an empty string value. For * unquoted attributes this could look very similar. * It refers to whether an `=` follows the name. * * e.g.
* ¹ ² * 1. Attribute `boolean-attribute` is `true`. * 2. Attribute `empty-attribute` is `""`. */ if ( true === $attribute->is_true ) { return true; } $raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); return html_entity_decode( $raw_value ); } /** * Gets lowercase names of all attributes matching a given prefix in the current tag. * * Note that matching is case-insensitive. This is in accordance with the spec: * * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * Example: * * $p = new WP_HTML_Tag_Processor( '
Test
' ); * $p->next_tag( array( 'class_name' => 'test' ) ) === true; * $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-enabled', 'data-test-id' ); * * $p->next_tag() === false; * $p->get_attribute_names_with_prefix( 'data-' ) === null; * * @since 6.2.0 * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive * * @param string $prefix Prefix of requested attribute names. * @return array|null List of attribute names, or `null` when no tag opener is matched. */ public function get_attribute_names_with_prefix( $prefix ) { if ( $this->is_closing_tag || null === $this->tag_name_starts_at ) { return null; } $comparable = strtolower( $prefix ); $matches = array(); foreach ( array_keys( $this->attributes ) as $attr_name ) { if ( str_starts_with( $attr_name, $comparable ) ) { $matches[] = $attr_name; } } return $matches; } /** * Returns the uppercase name of the matched tag. * * Example: * * $p = new WP_HTML_Tag_Processor( '
Test
' ); * $p->next_tag() === true; * $p->get_tag() === 'DIV'; * * $p->next_tag() === false; * $p->get_tag() === null; * * @since 6.2.0 * * @return string|null Name of currently matched tag in input HTML, or `null` if none found. */ public function get_tag() { if ( null === $this->tag_name_starts_at ) { return null; } $tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ); return strtoupper( $tag_name ); } /** * Indicates if the currently matched tag contains the self-closing flag. * * No HTML elements ought to have the self-closing flag and for those, the self-closing * flag will be ignored. For void elements this is benign because they "self close" * automatically. For non-void HTML elements though problems will appear if someone * intends to use a self-closing element in place of that element with an empty body. * For HTML foreign elements and custom elements the self-closing flag determines if * they self-close or not. * * This function does not determine if a tag is self-closing, * but only if the self-closing flag is present in the syntax. * * @since 6.3.0 * * @return bool Whether the currently matched tag contains the self-closing flag. */ public function has_self_closing_flag() { if ( ! $this->tag_name_starts_at ) { return false; } return '/' === $this->html[ $this->tag_ends_at - 1 ]; } /** * Indicates if the current tag token is a tag closer. * * Example: * * $p = new WP_HTML_Tag_Processor( '
' ); * $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit' ) ); * $p->is_tag_closer() === false; * * $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit' ) ); * $p->is_tag_closer() === true; * * @since 6.2.0 * * @return bool Whether the current tag is a tag closer. */ public function is_tag_closer() { return $this->is_closing_tag; } /** * Updates or creates a new attribute on the currently matched tag with the passed value. * * For boolean attributes special handling is provided: * - When `true` is passed as the value, then only the attribute name is added to the tag. * - When `false` is passed, the attribute gets removed if it existed before. * * For string attributes, the value is escaped using the `esc_attr` function. * * @since 6.2.0 * @since 6.2.1 Fix: Only create a single update for multiple calls with case-variant attribute names. * * @param string $name The attribute name to target. * @param string|bool $value The new attribute value. * @return bool Whether an attribute value was set. */ public function set_attribute( $name, $value ) { if ( $this->is_closing_tag || null === $this->tag_name_starts_at ) { return false; } /* * WordPress rejects more characters than are strictly forbidden * in HTML5. This is to prevent additional security risks deeper * in the WordPress and plugin stack. Specifically the * less-than (<) greater-than (>) and ampersand (&) aren't allowed. * * The use of a PCRE match enables looking for specific Unicode * code points without writing a UTF-8 decoder. Whereas scanning * for one-byte characters is trivial (with `strcspn`), scanning * for the longer byte sequences would be more complicated. Given * that this shouldn't be in the hot path for execution, it's a * reasonable compromise in efficiency without introducing a * noticeable impact on the overall system. * * @see https://html.spec.whatwg.org/#attributes-2 * * @TODO as the only regex pattern maybe we should take it out? are * Unicode patterns available broadly in Core? */ if ( preg_match( '~[' . // Syntax-like characters. '"\'>& The values "true" and "false" are not allowed on boolean attributes. * > To represent a false value, the attribute has to be omitted altogether. * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes */ if ( false === $value ) { return $this->remove_attribute( $name ); } if ( true === $value ) { $updated_attribute = $name; } else { $escaped_new_value = esc_attr( $value ); $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive */ $comparable_name = strtolower( $name ); if ( isset( $this->attributes[ $comparable_name ] ) ) { /* * Update an existing attribute. * * Example – set attribute id to "new" in
: * *
* ^-------------^ * start end * replacement: `id="new"` * * Result:
*/ $existing_attribute = $this->attributes[ $comparable_name ]; $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $existing_attribute->start, $existing_attribute->end, $updated_attribute ); } else { /* * Create a new attribute at the tag's name end. * * Example – add attribute id="new" to
: * *
* ^ * start and end * replacement: ` id="new"` * * Result:
*/ $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $this->tag_name_starts_at + $this->tag_name_length, $this->tag_name_starts_at + $this->tag_name_length, ' ' . $updated_attribute ); } /* * Any calls to update the `class` attribute directly should wipe out any * enqueued class changes from `add_class` and `remove_class`. */ if ( 'class' === $comparable_name && ! empty( $this->classname_updates ) ) { $this->classname_updates = array(); } return true; } /** * Remove an attribute from the currently-matched tag. * * @since 6.2.0 * * @param string $name The attribute name to remove. * @return bool Whether an attribute was removed. */ public function remove_attribute( $name ) { if ( $this->is_closing_tag ) { return false; } /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive */ $name = strtolower( $name ); /* * Any calls to update the `class` attribute directly should wipe out any * enqueued class changes from `add_class` and `remove_class`. */ if ( 'class' === $name && count( $this->classname_updates ) !== 0 ) { $this->classname_updates = array(); } /* * If updating an attribute that didn't exist in the input * document, then remove the enqueued update and move on. * * For example, this might occur when calling `remove_attribute()` * after calling `set_attribute()` for the same attribute * and when that attribute wasn't originally present. */ if ( ! isset( $this->attributes[ $name ] ) ) { if ( isset( $this->lexical_updates[ $name ] ) ) { unset( $this->lexical_updates[ $name ] ); } return false; } /* * Removes an existing tag attribute. * * Example – remove the attribute id from
: *
* ^-------------^ * start end * replacement: `` * * Result:
*/ $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( $this->attributes[ $name ]->start, $this->attributes[ $name ]->end, '' ); // Removes any duplicated attributes if they were also present. if ( null !== $this->duplicate_attributes && array_key_exists( $name, $this->duplicate_attributes ) ) { foreach ( $this->duplicate_attributes[ $name ] as $attribute_token ) { $this->lexical_updates[] = new WP_HTML_Text_Replacement( $attribute_token->start, $attribute_token->end, '' ); } } return true; } /** * Adds a new class name to the currently matched tag. * * @since 6.2.0 * * @param string $class_name The class name to add. * @return bool Whether the class was set to be added. */ public function add_class( $class_name ) { if ( $this->is_closing_tag ) { return false; } if ( null !== $this->tag_name_starts_at ) { $this->classname_updates[ $class_name ] = self::ADD_CLASS; } return true; } /** * Removes a class name from the currently matched tag. * * @since 6.2.0 * * @param string $class_name The class name to remove. * @return bool Whether the class was set to be removed. */ public function remove_class( $class_name ) { if ( $this->is_closing_tag ) { return false; } if ( null !== $this->tag_name_starts_at ) { $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; } return true; } /** * Returns the string representation of the HTML Tag Processor. * * @since 6.2.0 * * @see WP_HTML_Tag_Processor::get_updated_html() * * @return string The processed HTML. */ public function __toString() { return $this->get_updated_html(); } /** * Returns the string representation of the HTML Tag Processor. * * @since 6.2.0 * @since 6.2.1 Shifts the internal cursor corresponding to the applied updates. * @since 6.4.0 No longer calls subclass method `next_tag()` after updating HTML. * * @return string The processed HTML. */ public function get_updated_html() { $requires_no_updating = 0 === count( $this->classname_updates ) && 0 === count( $this->lexical_updates ); /* * When there is nothing more to update and nothing has already been * updated, return the original document and avoid a string copy. */ if ( $requires_no_updating ) { return $this->html; } /* * Keep track of the position right before the current tag. This will * be necessary for reparsing the current tag after updating the HTML. */ $before_current_tag = $this->tag_name_starts_at - 1; /* * 1. Apply the enqueued edits and update all the pointers to reflect those changes. */ $this->class_name_updates_to_attributes_updates(); $before_current_tag += $this->apply_attributes_updates( $before_current_tag ); /* * 2. Rewind to before the current tag and reparse to get updated attributes. * * At this point the internal cursor points to the end of the tag name. * Rewind before the tag name starts so that it's as if the cursor didn't * move; a call to `next_tag()` will reparse the recently-updated attributes * and additional calls to modify the attributes will apply at this same * location, but in order to avoid issues with subclasses that might add * behaviors to `next_tag()`, the internal methods should be called here * instead. * * It's important to note that in this specific place there will be no change * because the processor was already at a tag when this was called and it's * rewinding only to the beginning of this very tag before reprocessing it * and its attributes. * *

Previous HTMLMore HTML

* ↑ │ back up by the length of the tag name plus the opening < * └←─┘ back up by strlen("em") + 1 ==> 3 */ $this->bytes_already_parsed = $before_current_tag; $this->parse_next_tag(); // Reparse the attributes. while ( $this->parse_next_attribute() ) { continue; } $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed ); $this->tag_ends_at = $tag_ends_at; $this->bytes_already_parsed = $tag_ends_at; return $this->html; } /** * Parses tag query input into internal search criteria. * * @since 6.2.0 * * @param array|string|null $query { * Optional. Which tag name to find, having which class, etc. Default is to find any tag. * * @type string|null $tag_name Which tag to find, or `null` for "any tag." * @type int|null $match_offset Find the Nth tag matching all search criteria. * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this class name to match. * @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g.
. * } */ private function parse_query( $query ) { if ( null !== $query && $query === $this->last_query ) { return; } $this->last_query = $query; $this->sought_tag_name = null; $this->sought_class_name = null; $this->sought_match_offset = 1; $this->stop_on_tag_closers = false; // A single string value means "find the tag of this name". if ( is_string( $query ) ) { $this->sought_tag_name = $query; return; } // An empty query parameter applies no restrictions on the search. if ( null === $query ) { return; } // If not using the string interface, an associative array is required. if ( ! is_array( $query ) ) { _doing_it_wrong( __METHOD__, __( 'The query argument must be an array or a tag name.' ), '6.2.0' ); return; } if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) { $this->sought_tag_name = $query['tag_name']; } if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) { $this->sought_class_name = $query['class_name']; } if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) { $this->sought_match_offset = $query['match_offset']; } if ( isset( $query['tag_closers'] ) ) { $this->stop_on_tag_closers = 'visit' === $query['tag_closers']; } } /** * Checks whether a given tag and its attributes match the search criteria. * * @since 6.2.0 * * @return bool Whether the given tag and its attribute match the search criteria. */ private function matches() { if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) { return false; } // Does the tag name match the requested tag name in a case-insensitive manner? if ( null !== $this->sought_tag_name ) { /* * String (byte) length lookup is fast. If they aren't the * same length then they can't be the same string values. */ if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) { return false; } /* * Check each character to determine if they are the same. * Defer calls to `strtoupper()` to avoid them when possible. * Calling `strcasecmp()` here tested slowed than comparing each * character, so unless benchmarks show otherwise, it should * not be used. * * It's expected that most of the time that this runs, a * lower-case tag name will be supplied and the input will * contain lower-case tag names, thus normally bypassing * the case comparison code. */ for ( $i = 0; $i < $this->tag_name_length; $i++ ) { $html_char = $this->html[ $this->tag_name_starts_at + $i ]; $tag_char = $this->sought_tag_name[ $i ]; if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { return false; } } } if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) { return false; } return true; } }