'😯', * ':(' => 'πŸ™', * ':)' => 'πŸ™‚', * ':?' => 'πŸ˜•', * ) ); * * true === $smilies->contains( ':)' ); * false === $smilies->contains( 'simile' ); * * 'πŸ˜•' === $smilies->read_token( 'Not sure :?.', 9, $length_of_smily_syntax ); * 2 === $length_of_smily_syntax; * * ## Precomputing the Token Map. * * Creating the class involves some work sorting and organizing the tokens and their * replacement values. In order to skip this, it's possible for the class to export * its state and be used as actual PHP source code. * * Example: * * // Export with four spaces as the indent, only for the sake of this docblock. * // The default indent is a tab character. * $indent = ' '; * echo $smilies->precomputed_php_source_table( $indent ); * * // Output, to be pasted into a PHP source file: * WP_Token_Map::from_precomputed_table( * array( * "storage_version" => "6.6.0", * "key_length" => 2, * "groups" => "", * "long_words" => array(), * "small_words" => "8O\x00:)\x00:(\x00:?\x00", * "small_mappings" => array( "😯", "πŸ™‚", "πŸ™", "πŸ˜•" ) * ) * ); * * ## Large vs. small words. * * This class uses a short prefix called the "key" to optimize lookup of its tokens. * This means that some tokens may be shorter than or equal in length to that key. * Those words that are longer than the key are called "large" while those shorter * than or equal to the key length are called "small." * * This separation of large and small words is incidental to the way this class * optimizes lookup, and should be considered an internal implementation detail * of the class. It may still be important to be aware of it, however. * * ## Determining Key Length. * * The choice of the size of the key length should be based on the data being stored in * the token map. It should divide the data as evenly as possible, but should not create * so many groups that a large fraction of the groups only contain a single token. * * For the HTML5 named character references, a key length of 2 was found to provide a * sufficient spread and should be a good default for relatively large sets of tokens. * * However, for some data sets this might be too long. For example, a list of smilies * may be too small for a key length of 2. Perhaps 1 would be more appropriate. It's * best to experiment and determine empirically which values are appropriate. * * ## Generate Pre-Computed Source Code. * * Since the `WP_Token_Map` is designed for relatively static lookups, it can be * advantageous to precompute the values and instantiate a table that has already * sorted and grouped the tokens and built the lookup strings. * * This can be done with `WP_Token_Map::precomputed_php_source_table()`. * * Note that if there is a leading character that all tokens need, such as `&` for * HTML named character references, it can be beneficial to exclude this from the * token map. Instead, find occurrences of the leading character and then use the * token map to see if the following characters complete the token. * * Example: * * $map = WP_Token_Map::from_array( array( 'simple_smile:' => 'πŸ™‚', 'sob:' => '😭', 'soba:' => '🍜' ) ); * echo $map->precomputed_php_source_table(); * // Output * WP_Token_Map::from_precomputed_table( * array( * "storage_version" => "6.6.0", * "key_length" => 2, * "groups" => "si\x00so\x00", * "long_words" => array( * // simple_smile:[πŸ™‚]. * "\x0bmple_smile:\x04πŸ™‚", * // soba:[🍜] sob:[😭]. * "\x03ba:\x04🍜\x02b:\x04😭", * ), * "short_words" => "", * "short_mappings" => array() * } * ); * * This precomputed value can be stored directly in source code and will skip the * startup cost of generating the lookup strings. See `$html5_named_character_entities`. * * Note that any updates to the precomputed format should update the storage version * constant. It would also be best to provide an update function to take older known * versions and upgrade them in place when loading into `from_precomputed_table()`. * * ## Future Direction. * * It may be viable to dynamically increase the length limits such that there's no need to impose them. * The limit appears because of the packing structure, which indicates how many bytes each segment of * text in the lookup tables spans. If, however, care were taken to track the longest word length, then * the packing structure could change its representation to allow for that. Each additional byte storing * length, however, increases the memory overhead and lookup runtime. * * An alternative approach could be to borrow the UTF-8 variable-length encoding and store lengths of less * than 127 as a single byte with the high bit unset, storing longer lengths as the combination of * continuation bytes. * * Since it has not been shown during the development of this class that longer strings are required, this * update is deferred until such a need is clear. * * @since 6.6.0 */ class WP_Token_Map { /** * Denotes the version of the code which produces pre-computed source tables. * * This version will be used not only to verify pre-computed data, but also * to upgrade pre-computed data from older versions. Choosing a name that * corresponds to the WordPress release will help people identify where an * old copy of data came from. */ const STORAGE_VERSION = '6.6.0-trunk'; /** * Maximum length for each key and each transformed value in the table (in bytes). * * @since 6.6.0 */ const MAX_LENGTH = 256; /** * How many bytes of each key are used to form a group key for lookup. * This also determines whether a word is considered short or long. * * @since 6.6.0 * * @var int */ private $key_length = 2; /** * Stores an optimized form of the word set, where words are grouped * by a prefix of the `$key_length` and then collapsed into a string. * * In each group, the keys and lookups form a packed data structure. * The keys in the string are stripped of their "group key," which is * the prefix of length `$this->key_length` shared by all of the items * in the group. Each word in the string is prefixed by a single byte * whose raw unsigned integer value represents how many bytes follow. * * β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β” * β”‚ Length of rest β”‚ Rest of key β”‚ Length of value β”‚ Value β”‚ * β”‚ of key (bytes) β”‚ β”‚ (bytes) β”‚ β”‚ * β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€ * β”‚ 0x08 β”‚ nterDot; β”‚ 0x02 β”‚ Β· β”‚ * β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”˜ * * In this example, the key `CenterDot;` has a group key `Ce`, leaving * eight bytes for the rest of the key, `nterDot;`, and two bytes for * the transformed value `Β·` (or U+B7 or "\xC2\xB7"). * * Example: * * // Stores array( 'CenterDot;' => 'Β·', 'Cedilla;' => 'ΒΈ' ). * $groups = "Ce\x00"; * $large_words = array( "\x08nterDot;\x02Β·\x06dilla;\x02ΒΈ" ) * * The prefixes appear in the `$groups` string, each followed by a null * byte. This makes for quick lookup of where in the group string the key * is found, and then a simple division converts that offset into the index * in the `$large_words` array where the group string is to be found. * * This lookup data structure is designed to optimize cache locality and * minimize indirect memory reads when matching strings in the set. * * @since 6.6.0 * * @var array */ private $large_words = array(); /** * Stores the group keys for sequential string lookup. * * The offset into this string where the group key appears corresponds with the index * into the group array where the rest of the group string appears. This is an optimization * to improve cache locality while searching and minimize indirect memory accesses. * * @since 6.6.0 * * @var string */ private $groups = ''; /** * Stores an optimized row of small words, where every entry is * `$this->key_size + 1` bytes long and zero-extended. * * This packing allows for direct lookup of a short word followed * by the null byte, if extended to `$this->key_size + 1`. * * Example: * * // Stores array( 'GT', 'LT', 'gt', 'lt' ). * "GT\x00LT\x00gt\x00lt\x00" * * @since 6.6.0 * * @var string */ private $small_words = ''; /** * Replacements for the small words, in the same order they appear. * * With the position of a small word it's possible to index the translation * directly, as its position in the `$small_words` string corresponds to * the index of the replacement in the `$small_mapping` array. * * Example: * * array( '>', '<', '>', '<' ) * * @since 6.6.0 * * @var string[] */ private $small_mappings = array(); /** * Create a token map using an associative array of key/value pairs as the input. * * Example: * * $smilies = WP_Token_Map::from_array( array( * '8O' => '😯', * ':(' => 'πŸ™', * ':)' => 'πŸ™‚', * ':?' => 'πŸ˜•', * ) ); * * @since 6.6.0 * * @param array $mappings The keys transform into the values, both are strings. * @param int $key_length Determines the group key length. Leave at the default value * of 2 unless there's an empirical reason to change it. * * @return WP_Token_Map|null Token map, unless unable to create it. */ public static function from_array( array $mappings, int $key_length = 2 ): ?WP_Token_Map { $map = new WP_Token_Map(); $map->key_length = $key_length; // Start by grouping words. $groups = array(); $shorts = array(); foreach ( $mappings as $word => $mapping ) { if ( self::MAX_LENGTH <= strlen( $word ) || self::MAX_LENGTH <= strlen( $mapping ) ) { _doing_it_wrong( __METHOD__, sprintf( /* translators: 1: maximum byte length (a count) */ __( 'Token Map tokens and substitutions must all be shorter than %1$d bytes.' ), self::MAX_LENGTH ), '6.6.0' ); return null; } $length = strlen( $word ); if ( $key_length >= $length ) { $shorts[] = $word; } else { $group = substr( $word, 0, $key_length ); if ( ! isset( $groups[ $group ] ) ) { $groups[ $group ] = array(); } $groups[ $group ][] = array( substr( $word, $key_length ), $mapping ); } } /* * Sort the words to ensure that no smaller substring of a match masks the full match. * For example, `Cap` should not match before `CapitalDifferentialD`. */ usort( $shorts, 'WP_Token_Map::longest_first_then_alphabetical' ); foreach ( $groups as $group_key => $group ) { usort( $groups[ $group_key ], static function ( array $a, array $b ): int { return self::longest_first_then_alphabetical( $a[0], $b[0] ); } ); } // Finally construct the optimized lookups. foreach ( $shorts as $word ) { $map->small_words .= str_pad( $word, $key_length + 1, "\x00", STR_PAD_RIGHT ); $map->small_mappings[] = $mappings[ $word ]; } $group_keys = array_keys( $groups ); sort( $group_keys ); foreach ( $group_keys as $group ) { $map->groups .= "{$group}\x00"; $group_string = ''; foreach ( $groups[ $group ] as $group_word ) { list( $word, $mapping ) = $group_word; $word_length = pack( 'C', strlen( $word ) ); $mapping_length = pack( 'C', strlen( $mapping ) ); $group_string .= "{$word_length}{$word}{$mapping_length}{$mapping}"; } $map->large_words[] = $group_string; } return $map; } /** * Creates a token map from a pre-computed table. * This skips the initialization cost of generating the table. * * This function should only be used to load data created with * WP_Token_Map::precomputed_php_source_tag(). * * @since 6.6.0 * * @param array $state { * Stores pre-computed state for directly loading into a Token Map. * * @type string $storage_version Which version of the code produced this state. * @type int $key_length Group key length. * @type string $groups Group lookup index. * @type array $large_words Large word groups and packed strings. * @type string $small_words Small words packed string. * @type array $small_mappings Small word mappings. * } * * @return WP_Token_Map Map with precomputed data loaded. */ public static function from_precomputed_table( $state ): ?WP_Token_Map { $has_necessary_state = isset( $state['storage_version'], $state['key_length'], $state['groups'], $state['large_words'], $state['small_words'], $state['small_mappings'] ); if ( ! $has_necessary_state ) { _doing_it_wrong( __METHOD__, __( 'Missing required inputs to pre-computed WP_Token_Map.' ), '6.6.0' ); return null; } if ( self::STORAGE_VERSION !== $state['storage_version'] ) { _doing_it_wrong( __METHOD__, /* translators: 1: version string, 2: version string. */ sprintf( __( 'Loaded version \'%1$s\' incompatible with expected version \'%2$s\'.' ), $state['storage_version'], self::STORAGE_VERSION ), '6.6.0' ); return null; } $map = new WP_Token_Map(); $map->key_length = $state['key_length']; $map->groups = $state['groups']; $map->large_words = $state['large_words']; $map->small_words = $state['small_words']; $map->small_mappings = $state['small_mappings']; return $map; } /** * Indicates if a given word is a lookup key in the map. * * Example: * * true === $smilies->contains( ':)' ); * false === $smilies->contains( 'simile' ); * * @since 6.6.0 * * @param string $word Determine if this word is a lookup key in the map. * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. Default 'case-sensitive'. * @return bool Whether there's an entry for the given word in the map. */ public function contains( string $word, string $case_sensitivity = 'case-sensitive' ): bool { $ignore_case = 'ascii-case-insensitive' === $case_sensitivity; if ( $this->key_length >= strlen( $word ) ) { if ( 0 === strlen( $this->small_words ) ) { return false; } $term = str_pad( $word, $this->key_length + 1, "\x00", STR_PAD_RIGHT ); $word_at = $ignore_case ? stripos( $this->small_words, $term ) : strpos( $this->small_words, $term ); if ( false === $word_at ) { return false; } return true; } $group_key = substr( $word, 0, $this->key_length ); $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key ); if ( false === $group_at ) { return false; } $group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ]; $group_length = strlen( $group ); $slug = substr( $word, $this->key_length ); $length = strlen( $slug ); $at = 0; while ( $at < $group_length ) { $token_length = unpack( 'C', $group[ $at++ ] )[1]; $token_at = $at; $at += $token_length; $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; $mapping_at = $at; if ( $token_length === $length && 0 === substr_compare( $group, $slug, $token_at, $token_length, $ignore_case ) ) { return true; } $at = $mapping_at + $mapping_length; } return false; } /** * If the text starting at a given offset is a lookup key in the map, * return the corresponding transformation from the map, else `false`. * * This function returns the translated string, but accepts an optional * parameter `$matched_token_byte_length`, which communicates how many * bytes long the lookup key was, if it found one. This can be used to * advance a cursor in calling code if a lookup key was found. * * Example: * * false === $smilies->read_token( 'Not sure :?.', 0, $token_byte_length ); * 'πŸ˜•' === $smilies->read_token( 'Not sure :?.', 9, $token_byte_length ); * 2 === $token_byte_length; * * Example: * * while ( $at < strlen( $input ) ) { * $next_at = strpos( $input, ':', $at ); * if ( false === $next_at ) { * break; * } * * $smily = $smilies->read_token( $input, $next_at, $token_byte_length ); * if ( false === $next_at ) { * ++$at; * continue; * } * * $prefix = substr( $input, $at, $next_at - $at ); * $at += $token_byte_length; * $output .= "{$prefix}{$smily}"; * } * * @since 6.6.0 * * @param string $text String in which to search for a lookup key. * @param int $offset Optional. How many bytes into the string where the lookup key ought to start. Default 0. * @param int|null &$matched_token_byte_length Optional. Holds byte-length of found token matched, otherwise not set. Default null. * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. Default 'case-sensitive'. * * @return string|null Mapped value of lookup key if found, otherwise `null`. */ public function read_token( string $text, int $offset = 0, &$matched_token_byte_length = null, $case_sensitivity = 'case-sensitive' ): ?string { $ignore_case = 'ascii-case-insensitive' === $case_sensitivity; $text_length = strlen( $text ); // Search for a long word first, if the text is long enough, and if that fails, a short one. if ( $text_length > $this->key_length ) { $group_key = substr( $text, $offset, $this->key_length ); $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key ); if ( false === $group_at ) { // Perhaps a short word then. return strlen( $this->small_words ) > 0 ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity ) : null; } $group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ]; $group_length = strlen( $group ); $at = 0; while ( $at < $group_length ) { $token_length = unpack( 'C', $group[ $at++ ] )[1]; $token = substr( $group, $at, $token_length ); $at += $token_length; $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; $mapping_at = $at; if ( 0 === substr_compare( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) { $matched_token_byte_length = $this->key_length + $token_length; return substr( $group, $mapping_at, $mapping_length ); } $at = $mapping_at + $mapping_length; } } // Perhaps a short word then. return strlen( $this->small_words ) > 0 ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity ) : null; } /** * Finds a match for a short word at the index. * * @since 6.6.0 * * @param string $text String in which to search for a lookup key. * @param int $offset Optional. How many bytes into the string where the lookup key ought to start. Default 0. * @param int|null &$matched_token_byte_length Optional. Holds byte-length of found lookup key if matched, otherwise not set. Default null. * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. Default 'case-sensitive'. * * @return string|null Mapped value of lookup key if found, otherwise `null`. */ private function read_small_token( string $text, int $offset = 0, &$matched_token_byte_length = null, $case_sensitivity = 'case-sensitive' ): ?string { $ignore_case = 'ascii-case-insensitive' === $case_sensitivity; $small_length = strlen( $this->small_words ); $search_text = substr( $text, $offset, $this->key_length ); if ( $ignore_case ) { $search_text = strtoupper( $search_text ); } $starting_char = $search_text[0]; $at = 0; while ( $at < $small_length ) { if ( $starting_char !== $this->small_words[ $at ] && ( ! $ignore_case || strtoupper( $this->small_words[ $at ] ) !== $starting_char ) ) { $at += $this->key_length + 1; continue; } for ( $adjust = 1; $adjust < $this->key_length; $adjust++ ) { if ( "\x00" === $this->small_words[ $at + $adjust ] ) { $matched_token_byte_length = $adjust; return $this->small_mappings[ $at / ( $this->key_length + 1 ) ]; } if ( $search_text[ $adjust ] !== $this->small_words[ $at + $adjust ] && ( ! $ignore_case || strtoupper( $this->small_words[ $at + $adjust ] !== $search_text[ $adjust ] ) ) ) { $at += $this->key_length + 1; continue 2; } } $matched_token_byte_length = $adjust; return $this->small_mappings[ $at / ( $this->key_length + 1 ) ]; } return null; } /** * Exports the token map into an associate array of key/value pairs. * * Example: * * $smilies->to_array() === array( * '8O' => '😯', * ':(' => 'πŸ™', * ':)' => 'πŸ™‚', * ':?' => 'πŸ˜•', * ); * * @return array The lookup key/substitution values as an associate array. */ public function to_array(): array { $tokens = array(); $at = 0; $small_mapping = 0; $small_length = strlen( $this->small_words ); while ( $at < $small_length ) { $key = rtrim( substr( $this->small_words, $at, $this->key_length + 1 ), "\x00" ); $value = $this->small_mappings[ $small_mapping++ ]; $tokens[ $key ] = $value; $at += $this->key_length + 1; } foreach ( $this->large_words as $index => $group ) { $prefix = substr( $this->groups, $index * ( $this->key_length + 1 ), 2 ); $group_length = strlen( $group ); $at = 0; while ( $at < $group_length ) { $length = unpack( 'C', $group[ $at++ ] )[1]; $key = $prefix . substr( $group, $at, $length ); $at += $length; $length = unpack( 'C', $group[ $at++ ] )[1]; $value = substr( $group, $at, $length ); $tokens[ $key ] = $value; $at += $length; } } return $tokens; } /** * Export the token map for quick loading in PHP source code. * * This function has a specific purpose, to make loading of static token maps fast. * It's used to ensure that the HTML character reference lookups add a minimal cost * to initializing the PHP process. * * Example: * * echo $smilies->precomputed_php_source_table(); * * // Output. * WP_Token_Map::from_precomputed_table( * array( * "storage_version" => "6.6.0", * "key_length" => 2, * "groups" => "", * "long_words" => array(), * "small_words" => "8O\x00:)\x00:(\x00:?\x00", * "small_mappings" => array( "😯", "πŸ™‚", "πŸ™", "πŸ˜•" ) * ) * ); * * @since 6.6.0 * * @param string $indent Optional. Use this string for indentation, or rely on the default horizontal tab character. Default "\t". * @return string Value which can be pasted into a PHP source file for quick loading of table. */ public function precomputed_php_source_table( string $indent = "\t" ): string { $i1 = $indent; $i2 = $i1 . $indent; $i3 = $i2 . $indent; $class_version = self::STORAGE_VERSION; $output = self::class . "::from_precomputed_table(\n"; $output .= "{$i1}array(\n"; $output .= "{$i2}\"storage_version\" => \"{$class_version}\",\n"; $output .= "{$i2}\"key_length\" => {$this->key_length},\n"; $group_line = str_replace( "\x00", "\\x00", $this->groups ); $output .= "{$i2}\"groups\" => \"{$group_line}\",\n"; $output .= "{$i2}\"large_words\" => array(\n"; $prefixes = explode( "\x00", $this->groups ); foreach ( $prefixes as $index => $prefix ) { if ( '' === $prefix ) { break; } $group = $this->large_words[ $index ]; $group_length = strlen( $group ); $comment_line = "{$i3}//"; $data_line = "{$i3}\""; $at = 0; while ( $at < $group_length ) { $token_length = unpack( 'C', $group[ $at++ ] )[1]; $token = substr( $group, $at, $token_length ); $at += $token_length; $mapping_length = unpack( 'C', $group[ $at++ ] )[1]; $mapping = substr( $group, $at, $mapping_length ); $at += $mapping_length; $token_digits = str_pad( dechex( $token_length ), 2, '0', STR_PAD_LEFT ); $mapping_digits = str_pad( dechex( $mapping_length ), 2, '0', STR_PAD_LEFT ); $mapping = preg_replace_callback( "~[\\x00-\\x1f\\x22\\x5c]~", static function ( $match_result ) { switch ( $match_result[0] ) { case '"': return '\\"'; case '\\': return '\\\\'; default: $hex = dechex( ord( $match_result[0] ) ); return "\\x{$hex}"; } }, $mapping ); $comment_line .= " {$prefix}{$token}[{$mapping}]"; $data_line .= "\\x{$token_digits}{$token}\\x{$mapping_digits}{$mapping}"; } $comment_line .= ".\n"; $data_line .= "\",\n"; $output .= $comment_line; $output .= $data_line; } $output .= "{$i2}),\n"; $small_words = array(); $small_length = strlen( $this->small_words ); $at = 0; while ( $at < $small_length ) { $small_words[] = substr( $this->small_words, $at, $this->key_length + 1 ); $at += $this->key_length + 1; } $small_text = str_replace( "\x00", '\x00', implode( '', $small_words ) ); $output .= "{$i2}\"small_words\" => \"{$small_text}\",\n"; $output .= "{$i2}\"small_mappings\" => array(\n"; foreach ( $this->small_mappings as $mapping ) { $output .= "{$i3}\"{$mapping}\",\n"; } $output .= "{$i2})\n"; $output .= "{$i1})\n"; $output .= ')'; return $output; } /** * Compares two strings, returning the longest, or whichever * is first alphabetically if they are the same length. * * This is an important sort when building the token map because * it should not form a match on a substring of a longer potential * match. For example, it should not detect `Cap` when matching * against the sprepare_query( $query ); $this->query(); } } /** * Fills in missing query variables with default values. * * @since 4.4.0 * * @param string|array $args Query vars, as passed to `WP_User_Query`. * @return array Complete query variables with undefined ones filled in with defaults. */ public static function fill_query_vars( $args ) { $defaults = array( 'blog_id' => get_current_blog_id(), 'role' => '', 'role__in' => array(), 'role__not_in' => array(), 'capability' => '', 'capability__in' => array(), 'capability__not_in' => array(), 'meta_key' => '', 'meta_value' => '', 'meta_compare' => '', 'include' => array(), 'exclude' => array(), 'search' => '', 'search_columns' => array(), 'orderby' => 'login', 'order' => 'ASC', 'offset' => '', 'number' => '', 'paged' => 1, 'count_total' => true, 'fields' => 'all', 'who' => '', 'has_published_posts' => null, 'nicename' => '', 'nicename__in' => array(), 'nicename__not_in' => array(), 'login' => '', 'login__in' => array(), 'login__not_in' => array(), 'cache_results' => true, ); return wp_parse_args( $args, $defaults ); } /** * Prepares the query variables. * * @since 3.1.0 * @since 4.1.0 Added the ability to order by the `include` value. * @since 4.2.0 Added 'meta_value_num' support for `$orderby` parameter. Added multi-dimensional array syntax * for `$orderby` parameter. * @since 4.3.0 Added 'has_published_posts' parameter. * @since 4.4.0 Added 'paged', 'role__in', and 'role__not_in' parameters. The 'role' parameter was updated to * permit an array or comma-separated list of values. The 'number' parameter was updated to support * querying for all users with using -1. * @since 4.7.0 Added 'nicename', 'nicename__in', 'nicename__not_in', 'login', 'login__in', * and 'login__not_in' parameters. * @since 5.1.0 Introduced the 'meta_compare_key' parameter. * @since 5.3.0 Introduced the 'meta_type_key' parameter. * @since 5.9.0 Added 'capability', 'capability__in', and 'capability__not_in' parameters. * Deprecated the 'who' parameter. * @since 6.3.0 Added 'cache_results' parameter. * * @global wpdb $wpdb WordPress database abstraction object. * @global WP_Roles $wp_roles WordPress role management object. * * @param string|array $query { * Optional. Array or string of query parameters. * * @type int $blog_id The site ID. Default is the current site. * @type string|string[] $role An array or a comma-separated list of role names that users * must match to be included in results. Note that this is * an inclusive list: users must match *each* role. Default empty. * @type string[] $role__in An array of role names. Matched users must have at least one * of these roles. Default empty array. * @type string[] $role__not_in An array of role names to exclude. Users matching one or more * of these roles will not be included in results. Default empty array. * @type string|string[] $meta_key Meta key or keys to filter by. * @type string|string[] $meta_value Meta value or values to filter by. * @type string $meta_compare MySQL operator used for comparing the meta value. * See WP_Meta_Query::__construct() for accepted values and default value. * @type string $meta_compare_key MySQL operator used for comparing the meta key. * See WP_Meta_Query::__construct() for accepted values and default value. * @type string $meta_type MySQL data type that the meta_value column will be CAST to for comparisons. * See WP_Meta_Query::__construct() for accepted values and default value. * @type string $meta_type_key MySQL data type that the meta_key column will be CAST to for comparisons. * See WP_Meta_Query::__construct() for accepted values and default value. * @type array $meta_query An associative array of WP_Meta_Query arguments. * See WP_Meta_Query::__construct() for accepted values. * @type string|string[] $capability An array or a comma-separated list of capability names that users * must match to be included in results. Note that this is * an inclusive list: users must match *each* capability. * Does NOT work for capabilities not in the database or filtered * via {@see 'map_meta_cap'}. Default empty. * @type string[] $capability__in An array of capability names. Matched users must have at least one * of these capabilities. * Does NOT work for capabilities not in the database or filtered * via {@see 'map_meta_cap'}. Default empty array. * @type string[] $capability__not_in An array of capability names to exclude. Users matching one or more * of these capabilities will not be included in results. * Does NOT work for capabilities not in the database or filtered * via {@see 'map_meta_cap'}. Default empty array. * @type int[] $include An array of user IDs to include. Default empty array. * @type int[] $exclude An array of user IDs to exclude. Default empty array. * @type string $search Search keyword. Searches for possible string matches on columns. * When `$search_columns` is left empty, it tries to determine which * column to search in based on search string. Default empty. * @type string[] $search_columns Array of column names to be searched. Accepts 'ID', 'user_login', * 'user_email', 'user_url', 'user_nicename', 'display_name'. * Default empty array. * @type string|array $orderby Field(s) to sort the retrieved users by. May be a single value, * an array of values, or a multi-dimensional array with fields as * keys and orders ('ASC' or 'DESC') as values. Accepted values are: * - 'ID' * - 'display_name' (or 'name') * - 'include' * - 'user_login' (or 'login') * - 'login__in' * - 'user_nicename' (or 'nicename') * - 'nicename__in' * - 'user_email' (or 'email') * - 'user_url' (or 'url') * - 'user_registered' (or 'registered') * - 'post_count' * - 'meta_value' * - 'meta_value_num' * - The value of `$meta_key` * - An array key of `$meta_query` * To use 'meta_value' or 'meta_value_num', `$meta_key` * must be also be defined. Default 'user_login'. * @type string $order Designates ascending or descending order of users. Order values * passed as part of an `$orderby` array take precedence over this * parameter. Accepts 'ASC', 'DESC'. Default 'ASC'. * @type int $offset Number of users to offset in retrieved results. Can be used in * conjunction with pagination. Default 0. * @type int $number Number of users to limit the query for. Can be used in * conjunction with pagination. Value -1 (all) is supported, but * should be used with caution on larger sites. * Default -1 (all users). * @type int $paged When used with number, defines the page of results to return. * Default 1. * @type bool $count_total Whether to count the total number of users found. If pagination * is not needed, setting this to false can improve performance. * Default true. * @type string|string[] $fields Which fields to return. Single or all fields (string), or array * of fields. Accepts: * - 'ID' * - 'display_name' * - 'user_login' * - 'user_nicename' * - 'user_email' * - 'user_url' * - 'user_registered' * - 'user_pass' * - 'user_activation_key' * - 'user_status' * - 'spam' (only available on multisite installs) * - 'deleted' (only available on multisite installs) * - 'all' for all fields and loads user meta. * - 'all_with_meta' Deprecated. Use 'all'. * Default 'all'. * @type string $who Deprecated, use `$capability` instead. * Type of users to query. Accepts 'authors'. * Default empty (all users). * @type bool|string[] $has_published_posts Pass an array of post types to filter results to users who have * published posts in those post types. `true` is an alias for all * public post types. * @type string $nicename The user nicename. Default empty. * @type string[] $nicename__in An array of nicenames to include. Users matching one of these * nicenames will be included in results. Default empty array. * @type string[] $nicename__not_in An array of nicenames to exclude. Users matching one of these * nicenames will not be included in results. Default empty array. * @type string $login The user login. Default empty. * @type string[] $login__in An array of logins to include. Users matching one of these * logins will be included in results. Default empty array. * @type string[] $login__not_in An array of logins to exclude. Users matching one of these * logins will not be included in results. Default empty array. * @type bool $cache_results Whether to cache user information. Default true. * } */ public function prepare_query( $query = array() ) { global $wpdb, $wp_roles; if ( empty( $this->query_vars ) || ! empty( $query ) ) { $this->query_limit = null; $this->query_vars = $this->fill_query_vars( $query ); } /** * Fires before the WP_User_Query has been parsed. * * The passed WP_User_Query object contains the query variables, * not yet passed into SQL. * * @since 4.0.0 * * @param WP_User_Query $query Current instance of WP_User_Query (passed by reference). */ do_action_ref_array( 'pre_get_users', array( &$this ) ); // Ensure that query vars are filled after 'pre_get_users'. $qv =& $this->query_vars; $qv = $this->fill_query_vars( $qv ); $allowed_fields = array( 'id', 'user_login', 'user_pass', 'user_nicename', 'user_email', 'user_url', 'user_registered', 'user_activation_key', 'user_status', 'display_name', ); if ( is_multisite() ) { $allowed_fields[] = 'spam'; $allowed_fields[] = 'deleted'; } if ( is_array( $qv['fields'] ) ) { $qv['fields'] = array_map( 'strtolower', $qv['fields'] ); $qv['fields'] = array_intersect( array_unique( $qv['fields'] ), $allowed_fields ); if ( empty( $qv['fields'] ) ) { $qv['fields'] = array( 'id' ); } $this->query_fields = array(); foreach ( $qv['fields'] as $field ) { $field = 'id' === $field ? 'ID' : sanitize_key( $field ); $this->query_fields[] = "$wpdb->users.$field"; } $this->query_fields = implode( ',', $this->query_fields ); } elseif ( 'all_with_meta' === $qv['fields'] || 'all' === $qv['fields'] || ! in_array( $qv['fields'], $allowed_fields, true ) ) { $this->query_fields = "$wpdb->users.ID"; } else { $field = 'id' === strtolower( $qv['fields'] ) ? 'ID' : sanitize_key( $qv['fields'] ); $this->query_fields = "$wpdb->users.$field"; } if ( isset( $qv['count_total'] ) && $qv['count_total'] ) { $this->query_fields = 'SQL_CALC_FOUND_ROWS ' . $this->query_fields; } $this->query_from = "FROM $wpdb->users"; $this->query_where = 'WHERE 1=1'; // Parse and sanitize 'include', for use by 'orderby' as well as 'include' below. if ( ! empty( $qv['include'] ) ) { $include = wp_parse_id_list( $qv['include'] ); } else { $include = false; } $blog_id = 0; if ( isset( $qv['blog_id'] ) ) { $blog_id = absint( $qv['blog_id'] ); } if ( $qv['has_published_posts'] && $blog_id ) { if ( true === $qv['has_published_posts'] ) { $post_types = get_post_types( array( 'public' => true ) ); } else { $post_types = (array) $qv['has_published_posts']; } foreach ( $post_types as &$post_type ) { $post_type = $wpdb->prepare( '%s', $post_type ); } $posts_table = $wpdb->get_blog_prefix( $blog_id ) . 'posts'; $this->query_where .= " AND $wpdb->users.ID IN ( SELECT DISTINCT $posts_table.post_author FROM $posts_table WHERE $posts_table.post_status = 'publish' AND $posts_table.post_type IN ( " . implode( ', ', $post_types ) . ' ) )'; } // nicename if ( '' !== $qv['nicename'] ) { $this->query_where .= $wpdb->prepare( ' AND user_nicename = %s', $qv['nicename'] ); } if ( ! empty( $qv['nicename__in'] ) ) { $sanitized_nicename__in = array_map( 'esc_sql', $qv['nicename__in'] ); $nicename__in = implode( "','", $sanitized_nicename__in ); $this->query_where .= " AND user_nicename IN ( '$nicename__in' )"; } if ( ! empty( $qv['nicename__not_in'] ) ) { $sanitized_nicename__not_in = array_map( 'esc_sql', $qv['nicename__not_in'] ); $nicename__not_in = implode( "','", $sanitized_nicename__not_in ); $this->query_where .= " AND user_nicename NOT IN ( '$nicename__not_in' )"; } // login if ( '' !== $qv['login'] ) { $this->query_where .= $wpdb->prepare( ' AND user_login = %s', $qv['login'] ); } if ( ! empty( $qv['login__in'] ) ) { $sanitized_login__in = array_map( 'esc_sql', $qv['login__in'] ); $login__in = implode( "','", $sanitized_login__in ); $this->query_where .= " AND user_login IN ( '$login__in' )"; } if ( ! empty( $qv['login__not_in'] ) ) { $sanitized_login__not_in = array_map( 'esc_sql', $qv['login__not_in'] ); $login__not_in = implode( "','", $sanitized_login__not_in ); $this->query_where .= " AND user_login NOT IN ( '$login__not_in' )"; } // Meta query. $this->meta_query = new WP_Meta_Query(); $this->meta_query->parse_query_vars( $qv ); if ( isset( $qv['who'] ) && 'authors' === $qv['who'] && $blog_id ) { _deprecated_argument( 'WP_User_Query', '5.9.0', sprintf( /* translators: 1: who, 2: capability */ __( '%1$s is deprecated. Use %2$s instead.' ), 'who', 'capability' ) ); $who_query = array( 'key' => $wpdb->get_blog_prefix( $blog_id ) . 'user_level', 'value' => 0, 'compare' => '!=', ); // Prevent extra meta query. $qv['blog_id'] = 0; $blog_id = 0; if ( empty( $this->meta_query->queries ) ) { $this->meta_query->queries = array( $who_query ); } else { // Append the cap query to the original queries and reparse the query. $this->meta_query->queries = array( 'relation' => 'AND', array( $this->meta_query->queries, $who_query ), ); } $this->meta_query->parse_query_vars( $this->meta_query->queries ); } // Roles. $roles = array(); if ( isset( $qv['role'] ) ) { if ( is_array( $qv['role'] ) ) { $roles = $qv['role']; } elseif ( is_string( $qv['role'] ) && ! empty( $qv['role'] ) ) { $roles = array_map( 'trim', explode( ',', $qv['role'] ) ); } } $role__in = array(); if ( isset( $qv['role__in'] ) ) { $role__in = (array) $qv['role__in']; } $role__not_in = array(); if ( isset( $qv['role__not_in'] ) ) { $role__not_in = (array) $qv['role__not_in']; } // Capabilities. $available_roles = array(); if ( ! empty( $qv['capability'] ) || ! empty( $qv['capability__in'] ) || ! empty( $qv['capability__not_in'] ) ) { $wp_roles->for_site( $blog_id ); $available_roles = $wp_roles->roles; } $capabilities = array(); if ( ! empty( $qv['capability'] ) ) { if ( is_array( $qv['capability'] ) ) { $capabilities = $qv['capability']; } elseif ( is_string( $qv['capability'] ) ) { $capabilities = array_map( 'trim', explode( ',', $qv['capability'] ) ); } } $capability__in = array(); if ( ! empty( $qv['capability__in'] ) ) { $capability__in = (array) $qv['capability__in']; } $capability__not_in = array(); if ( ! empty( $qv['capability__not_in'] ) ) { $capability__not_in = (array) $qv['capability__not_in']; } // Keep track of all capabilities and the roles they're added on. $caps_with_roles = array(); foreach ( $available_roles as $role => $role_data ) { $role_caps = array_keys( array_filter( $role_data['capabilities'] ) ); foreach ( $capabilities as $cap ) { if ( in_array( $cap, $role_caps, true ) ) { $caps_with_roles[ $cap ][] = $role; break; } } foreach ( $capability__in as $cap ) { if ( in_array( $cap, $role_caps, true ) ) { $role__in[] = $role; break; } } foreach ( $capability__not_in as $cap ) { if ( in_array( $cap, $role_caps, true ) ) { $role__not_in[] = $role; break; } } } $role__in = array_merge( $role__in, $capability__in ); $role__not_in = array_merge( $role__not_in, $capability__not_in ); $roles = array_unique( $roles ); $role__in = array_unique( $role__in ); $role__not_in = array_unique( $role__not_in ); // Support querying by capabilities added directly to users. if ( $blog_id && ! empty( $capabilities ) ) { $capabilities_clauses = array( 'relation' => 'AND' ); foreach ( $capabilities as $cap ) { $clause = array( 'relation' => 'OR' ); $clause[] = array( 'key' => $wpdb->get_blog_prefix( $blog_id ) . 'capabilities', 'value' => '"' . $cap . '"', 'compare' => 'LIKE', ); if ( ! empty( $caps_with_roles[ $cap ] ) ) { foreach ( $caps_with_roles[ $cap ] as $role ) { $clause[] = array( 'key' => $wpdb->get_blog_prefix( $blog_id ) . 'capabilities', 'value' => '"' . $role . '"', 'compare' => 'LIKE', ); } } $capabilities_clauses[] = $clause; } $role_queries[] = $capabilities_clauses; if ( empty( $this->meta_query->queries ) ) { $this->meta_query->queries[] = $capabilities_clauses; } else { // Append the cap query to the original queries and reparse the query. $this->meta_query->queries = array( 'relation' => 'AND', array( $this->meta_query->queries, array( $capabilities_clauses ) ), ); } $this->meta_query->parse_query_vars( $this->meta_query->queries ); } if ( $blog_id && ( ! empty( $roles ) || ! empty( $role__in ) || ! empty( $role__not_in ) || is_multisite() ) ) { $role_queries = array(); $roles_clauses = array( 'relation' => 'AND' ); if ( ! empty( $roles ) ) { foreach ( $roles as $role ) { $roles_clauses[] = array( 'key' => $wpdb->get_blog_prefix( $blog_id ) . 'capabilities', 'value' => '"' . $role . '"', 'compare' => 'LIKE', ); } $role_queries[] = $roles_clauses; } $role__in_clauses = array( 'relation' => 'OR' ); if ( ! empty( $role__in ) ) { foreach ( $role__in as $role ) { $role__in_clauses[] = array( 'key' => $wpdb->get_blog_prefix( $blog_id ) . 'capabilities', 'value' => '"' . $role . '"', 'compare' => 'LIKE', ); } $role_queries[] = $role__in_clauses; } $role__not_in_clauses = array( 'relation' => 'AND'