); } } return true; } /** * Adds a new class name to the currently matched tag. * * @since 6.2.0 * * @param string $class_name The class name to add. * @return bool Whether the class was set to be added. */ public function add_class( $class_name ) { if ( $this->is_closing_tag ) { return false; } if ( null !== $this->tag_name_starts_at ) { $this->classname_updates[ $class_name ] = self::ADD_CLASS; } return true; } /** * Removes a class name from the currently matched tag. * * @since 6.2.0 * * @param string $class_name The class name to remove. * @return bool Whether the class was set to be removed. */ public function remove_class( $class_name ) { if ( $this->is_closing_tag ) { return false; } if ( null !== $this->tag_name_starts_at ) { $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; } return true; } /** * Returns the string representation of the HTML Tag Processor. * * @since 6.2.0 * * @see WP_HTML_Tag_Processor::get_updated_html() * * @return string The processed HTML. */ public function __toString() { return $this->get_updated_html(); } /** * Returns the string representation of the HTML Tag Processor. * * @since 6.2.0 * @since 6.2.1 Shifts the internal cursor corresponding to the applied updates. * * @return string The processed HTML. */ public function get_updated_html() { $requires_no_updating = 0 === count( $this->classname_updates ) && 0 === count( $this->lexical_updates ); /* * When there is nothing more to update and nothing has already been * updated, return the original document and avoid a string copy. */ if ( $requires_no_updating ) { return $this->html; } /* * Keep track of the position right before the current tag. This will * be necessary for reparsing the current tag after updating the HTML. */ $before_current_tag = $this->tag_name_starts_at - 1; /* * 1. Apply the enqueued edits and update all the pointers to reflect those changes. */ $this->class_name_updates_to_attributes_updates(); $before_current_tag += $this->apply_attributes_updates( $before_current_tag ); /* * 2. Rewind to before the current tag and reparse to get updated attributes. * * At this point the internal cursor points to the end of the tag name. * Rewind before the tag name starts so that it's as if the cursor didn't * move; a call to `next_tag()` will reparse the recently-updated attributes * and additional calls to modify the attributes will apply at this same * location. * *

Previous HTMLMore HTML

* ^ | back up by the length of the tag name plus the opening < * \<-/ back up by strlen("em") + 1 ==> 3 */ // Store existing state so it can be restored after reparsing. $previous_parsed_byte_count = $this->bytes_already_parsed; $previous_query = $this->last_query; // Reparse attributes. $this->bytes_already_parsed = $before_current_tag; $this->next_tag(); // Restore previous state. $this->bytes_already_parsed = $previous_parsed_byte_count; $this->parse_query( $previous_query ); return $this->html; } /** * Parses tag query input into internal search criteria. * * @since 6.2.0 * * @param array|string|null $query { * Optional. Which tag name to find, having which class, etc. Default is to find any tag. * * @type string|null $tag_name Which tag to find, or `null` for "any tag." * @type int|null $match_offset Find the Nth tag matching all search criteria. * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this class name to match. * @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g. . * } */ private function parse_query( $query ) { if ( null !== $query && $query === $this->last_query ) { return; } $this->last_query = $query; $this->sought_tag_name = null; $this->sought_class_name = null; $this->sought_match_offset = 1; $this->stop_on_tag_closers = false; // A single string value means "find the tag of this name". if ( is_string( $query ) ) { $this->sought_tag_name = $query; return; } // An empty query parameter applies no restrictions on the search. if ( null === $query ) { return; } // If not using the string interface, an associative array is required. if ( ! is_array( $query ) ) { _doing_it_wrong( __METHOD__, __( 'The query argument must be an array or a tag name.' ), '6.2.0' ); return; } if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) { $this->sought_tag_name = $query['tag_name']; } if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) { $this->sought_class_name = $query['class_name']; } if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) { $this->sought_match_offset = $query['match_offset']; } if ( isset( $query['tag_closers'] ) ) { $this->stop_on_tag_closers = 'visit' === $query['tag_closers']; } } /** * Checks whether a given tag and its attributes match the search criteria. * * @since 6.2.0 * * @return boolean Whether the given tag and its attribute match the search criteria. */ private function matches() { if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) { return false; } // Does the tag name match the requested tag name in a case-insensitive manner? if ( null !== $this->sought_tag_name ) { /* * String (byte) length lookup is fast. If they aren't the * same length then they can't be the same string values. */ if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) { return false; } /* * Check each character to determine if they are the same. * Defer calls to `strtoupper()` to avoid them when possible. * Calling `strcasecmp()` here tested slowed than comparing each * character, so unless benchmarks show otherwise, it should * not be used. * * It's expected that most of the time that this runs, a * lower-case tag name will be supplied and the input will * contain lower-case tag names, thus normally bypassing * the case comparison code. */ for ( $i = 0; $i < $this->tag_name_length; $i++ ) { $html_char = $this->html[ $this->tag_name_starts_at + $i ]; $tag_char = $this->sought_tag_name[ $i ]; if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { return false; } } } $needs_class_name = null !== $this->sought_class_name; if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) { return false; } /* * Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name. * * This will overlook certain classes that exist in other lexical variations * than was supplied to the search query, but requires more complicated searching. */ if ( $needs_class_name ) { $class_start = $this->attributes['class']->value_starts_at; $class_end = $class_start + $this->attributes['class']->value_length; $class_at = $class_start; /* * Ensure that boundaries surround the class name to avoid matching on * substrings of a longer name. For example, the sequence "not-odd" * should not match for the class "odd" even though "odd" is found * within the class attribute text. * * See https://html.spec.whatwg.org/#attributes-3 * See https://html.spec.whatwg.org/#space-separated-tokens */ while ( // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) && $class_at < $class_end ) { /* * Verify this class starts at a boundary. */ if ( $class_at > $class_start ) { $character = $this->html[ $class_at - 1 ]; if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { $class_at += strlen( $this->sought_class_name ); continue; } } /* * Verify this class ends at a boundary as well. */ if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) { $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ]; if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { $class_at += strlen( $this->sought_class_name ); continue; } } return true; } return false; } return true; } }