| Conditions | 54 |
| Paths | 14112 |
| Total Lines | 244 |
| Code Lines | 125 |
| Lines | 18 |
| Ratio | 7.38 % |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | <?php |
||
| 82 | static public function extract_from_content( $content, $what_to_extract = self::ALL, $already_extracted = array() ) { |
||
| 83 | $stripped_content = self::get_stripped_content( $content ); |
||
| 84 | |||
| 85 | // Maybe start with some previously extracted things (e.g. images from extract() |
||
| 86 | $extracted = $already_extracted; |
||
| 87 | |||
| 88 | // Embedded media objects will have already been converted to shortcodes by pre_kses hooks on save. |
||
| 89 | |||
| 90 | if ( self::IMAGES & $what_to_extract ) { |
||
| 91 | $images = Jetpack_Media_Meta_Extractor::extract_images_from_content( $stripped_content, array() ); |
||
| 92 | $extracted = array_merge( $extracted, $images ); |
||
| 93 | } |
||
| 94 | |||
| 95 | // ----------------------------------- MENTIONS ------------------------------ |
||
| 96 | |||
| 97 | View Code Duplication | if ( self::MENTIONS & $what_to_extract ) { |
|
| 98 | if ( preg_match_all( '/(^|\s)@(\w+)/u', $stripped_content, $matches ) ) { |
||
| 99 | $mentions = array_values( array_unique( $matches[2] ) ); //array_unique() retains the keys! |
||
| 100 | $mentions = array_map( 'strtolower', $mentions ); |
||
| 101 | $extracted['mention'] = array( 'name' => $mentions ); |
||
| 102 | if ( !isset( $extracted['has'] ) ) |
||
| 103 | $extracted['has'] = array(); |
||
| 104 | $extracted['has']['mention'] = count( $mentions ); |
||
| 105 | } |
||
| 106 | } |
||
| 107 | |||
| 108 | // ----------------------------------- HASHTAGS ------------------------------ |
||
| 109 | /** Some hosts may not compile with --enable-unicode-properties and kick a warning: |
||
| 110 | * Warning: preg_match_all() [function.preg-match-all]: Compilation failed: support for \P, \p, and \X has not been compiled |
||
| 111 | * Therefore, we only run this code block on wpcom, not in Jetpack. |
||
| 112 | */ |
||
| 113 | if ( ( defined( 'IS_WPCOM' ) && IS_WPCOM ) && ( self::HASHTAGS & $what_to_extract ) ) { |
||
| 114 | //This regex does not exactly match Twitter's |
||
| 115 | // if there are problems/complaints we should implement this: |
||
| 116 | // https://github.com/twitter/twitter-text/blob/master/java/src/com/twitter/Regex.java |
||
| 117 | View Code Duplication | if ( preg_match_all( '/(?:^|\s)#(\w*\p{L}+\w*)/u', $stripped_content, $matches ) ) { |
|
| 118 | $hashtags = array_values( array_unique( $matches[1] ) ); //array_unique() retains the keys! |
||
| 119 | $hashtags = array_map( 'strtolower', $hashtags ); |
||
| 120 | $extracted['hashtag'] = array( 'name' => $hashtags ); |
||
| 121 | if ( !isset( $extracted['has'] ) ) |
||
| 122 | $extracted['has'] = array(); |
||
| 123 | $extracted['has']['hashtag'] = count( $hashtags ); |
||
| 124 | } |
||
| 125 | } |
||
| 126 | |||
| 127 | // ----------------------------------- SHORTCODES ------------------------------ |
||
| 128 | |||
| 129 | // Always look for shortcodes. |
||
| 130 | // If we don't want them, we'll just remove them, so we don't grab them as links below |
||
| 131 | $shortcode_pattern = '/' . get_shortcode_regex() . '/s'; |
||
| 132 | if ( preg_match_all( $shortcode_pattern, $content, $matches ) ) { |
||
| 133 | |||
| 134 | $shortcode_total_count = 0; |
||
| 135 | $shortcode_type_counts = array(); |
||
| 136 | $shortcode_types = array(); |
||
| 137 | $shortcode_details = array(); |
||
| 138 | |||
| 139 | if ( self::SHORTCODES & $what_to_extract ) { |
||
| 140 | |||
| 141 | foreach( $matches[2] as $key => $shortcode ) { |
||
| 142 | //Elasticsearch (and probably other things) doesn't deal well with some chars as key names |
||
| 143 | $shortcode_name = preg_replace( '/[.,*"\'\/\\\\#+ ]/', '_', $shortcode ); |
||
| 144 | |||
| 145 | $attr = shortcode_parse_atts( $matches[3][ $key ] ); |
||
| 146 | |||
| 147 | $shortcode_total_count++; |
||
| 148 | if ( ! isset( $shortcode_type_counts[$shortcode_name] ) ) |
||
| 149 | $shortcode_type_counts[$shortcode_name] = 0; |
||
| 150 | $shortcode_type_counts[$shortcode_name]++; |
||
| 151 | |||
| 152 | // Store (uniquely) presence of all shortcode regardless of whether it's a keeper (for those, get ID below) |
||
| 153 | // @todo Store number of occurrences? |
||
| 154 | if ( ! in_array( $shortcode_name, $shortcode_types ) ) |
||
| 155 | $shortcode_types[] = $shortcode_name; |
||
| 156 | |||
| 157 | // For keeper shortcodes, also store the id/url of the object (e.g. youtube video, TED talk, etc.) |
||
| 158 | if ( in_array( $shortcode, self::$KEEPER_SHORTCODES ) ) { |
||
| 159 | unset( $id ); // Clear shortcode ID data left from the last shortcode |
||
| 160 | // We'll try to get the salient ID from the function jetpack_shortcode_get_xyz_id() |
||
| 161 | // If the shortcode is a class, we'll call XyzShortcode::get_xyz_id() |
||
| 162 | $shortcode_get_id_func = "jetpack_shortcode_get_{$shortcode}_id"; |
||
| 163 | $shortcode_class_name = ucfirst( $shortcode ) . 'Shortcode'; |
||
| 164 | $shortcode_get_id_method = "get_{$shortcode}_id"; |
||
| 165 | if ( function_exists( $shortcode_get_id_func ) ) { |
||
| 166 | $id = call_user_func( $shortcode_get_id_func, $attr ); |
||
| 167 | } else if ( method_exists( $shortcode_class_name, $shortcode_get_id_method ) ) { |
||
| 168 | $id = call_user_func( array( $shortcode_class_name, $shortcode_get_id_method ), $attr ); |
||
| 169 | } |
||
| 170 | if ( ! empty( $id ) |
||
| 171 | && ( ! isset( $shortcode_details[$shortcode_name] ) || ! in_array( $id, $shortcode_details[$shortcode_name] ) ) ) |
||
| 172 | $shortcode_details[$shortcode_name][] = $id; |
||
| 173 | } |
||
| 174 | } |
||
| 175 | |||
| 176 | if ( $shortcode_total_count > 0 ) { |
||
| 177 | // Add the shortcode info to the $extracted array |
||
| 178 | if ( !isset( $extracted['has'] ) ) |
||
| 179 | $extracted['has'] = array(); |
||
| 180 | $extracted['has']['shortcode'] = $shortcode_total_count; |
||
| 181 | $extracted['shortcode'] = array(); |
||
| 182 | foreach ( $shortcode_type_counts as $type => $count ) |
||
| 183 | $extracted['shortcode'][$type] = array( 'count' => $count ); |
||
| 184 | if ( ! empty( $shortcode_types ) ) |
||
| 185 | $extracted['shortcode_types'] = $shortcode_types; |
||
| 186 | foreach ( $shortcode_details as $type => $id ) |
||
| 187 | $extracted['shortcode'][$type]['id'] = $id; |
||
| 188 | } |
||
| 189 | } |
||
| 190 | |||
| 191 | // Remove the shortcodes form our copy of $content, so we don't count links in them as links below. |
||
| 192 | $content = preg_replace( $shortcode_pattern, ' ', $content ); |
||
| 193 | } |
||
| 194 | |||
| 195 | // ----------------------------------- LINKS ------------------------------ |
||
| 196 | |||
| 197 | if ( self::LINKS & $what_to_extract ) { |
||
| 198 | |||
| 199 | // To hold the extracted stuff we find |
||
| 200 | $links = array(); |
||
| 201 | |||
| 202 | // @todo Get the text inside the links? |
||
| 203 | |||
| 204 | // Grab any links, whether in <a href="..." or not, but subtract those from shortcodes and images |
||
| 205 | // (we treat embed links as just another link) |
||
| 206 | if ( preg_match_all( '#(?:^|\s|"|\')(https?://([^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/))))#', $content, $matches ) ) { |
||
| 207 | |||
| 208 | foreach ( $matches[1] as $link_raw ) { |
||
| 209 | $url = parse_url( $link_raw ); |
||
| 210 | |||
| 211 | // Data URI links |
||
| 212 | if ( isset( $url['scheme'] ) && 'data' === $url['scheme'] ) |
||
| 213 | continue; |
||
| 214 | |||
| 215 | // Remove large (and likely invalid) links |
||
| 216 | if ( 4096 < strlen( $link_raw ) ) |
||
| 217 | continue; |
||
| 218 | |||
| 219 | // Build a simple form of the URL so we can compare it to ones we found in IMAGES or SHORTCODES and exclude those |
||
| 220 | $simple_url = $url['scheme'] . '://' . $url['host'] . ( ! empty( $url['path'] ) ? $url['path'] : '' ); |
||
| 221 | if ( isset( $extracted['image']['url'] ) ) { |
||
| 222 | if ( in_array( $simple_url, (array) $extracted['image']['url'] ) ) |
||
| 223 | continue; |
||
| 224 | } |
||
| 225 | |||
| 226 | list( $proto, $link_all_but_proto ) = explode( '://', $link_raw ); |
||
| 227 | |||
| 228 | // Build a reversed hostname |
||
| 229 | $host_parts = array_reverse( explode( '.', $url['host'] ) ); |
||
| 230 | $host_reversed = ''; |
||
| 231 | foreach ( $host_parts as $part ) { |
||
| 232 | $host_reversed .= ( ! empty( $host_reversed ) ? '.' : '' ) . $part; |
||
| 233 | } |
||
| 234 | |||
| 235 | $link_analyzed = ''; |
||
| 236 | if ( !empty( $url['path'] ) ) { |
||
| 237 | // The whole path (no query args or fragments) |
||
| 238 | $path = substr( $url['path'], 1 ); // strip the leading '/' |
||
| 239 | $link_analyzed .= ( ! empty( $link_analyzed ) ? ' ' : '' ) . $path; |
||
| 240 | |||
| 241 | // The path split by / |
||
| 242 | $path_split = explode( '/', $path ); |
||
| 243 | if ( count( $path_split ) > 1 ) { |
||
| 244 | $link_analyzed .= ' ' . implode( ' ', $path_split ); |
||
| 245 | } |
||
| 246 | |||
| 247 | // The fragment |
||
| 248 | if ( ! empty( $url['fragment'] ) ) |
||
| 249 | $link_analyzed .= ( ! empty( $link_analyzed ) ? ' ' : '' ) . $url['fragment']; |
||
| 250 | } |
||
| 251 | |||
| 252 | // @todo Check unique before adding |
||
| 253 | $links[] = array( |
||
| 254 | 'url' => $link_all_but_proto, |
||
| 255 | 'host_reversed' => $host_reversed, |
||
| 256 | 'host' => $url['host'], |
||
| 257 | ); |
||
| 258 | } |
||
| 259 | |||
| 260 | } |
||
| 261 | |||
| 262 | $link_count = count( $links ); |
||
| 263 | if ( $link_count ) { |
||
| 264 | $extracted[ 'link' ] = $links; |
||
| 265 | if ( !isset( $extracted['has'] ) ) |
||
| 266 | $extracted['has'] = array(); |
||
| 267 | $extracted['has']['link'] = $link_count; |
||
| 268 | } |
||
| 269 | } |
||
| 270 | |||
| 271 | // ----------------------------------- EMBEDS ------------------------------ |
||
| 272 | |||
| 273 | //Embeds are just individual links on their own line |
||
| 274 | if ( self::EMBEDS & $what_to_extract ) { |
||
| 275 | |||
| 276 | if ( !function_exists( '_wp_oembed_get_object' ) ) |
||
| 277 | include( ABSPATH . WPINC . '/class-oembed.php' ); |
||
| 278 | |||
| 279 | // get an oembed object |
||
| 280 | $oembed = _wp_oembed_get_object(); |
||
| 281 | |||
| 282 | // Grab any links on their own lines that may be embeds |
||
| 283 | if ( preg_match_all( '|^\s*(https?://[^\s"]+)\s*$|im', $content, $matches ) ) { |
||
| 284 | |||
| 285 | // To hold the extracted stuff we find |
||
| 286 | $embeds = array(); |
||
| 287 | |||
| 288 | foreach ( $matches[1] as $link_raw ) { |
||
| 289 | $url = parse_url( $link_raw ); |
||
| 290 | |||
| 291 | list( $proto, $link_all_but_proto ) = explode( '://', $link_raw ); |
||
| 292 | |||
| 293 | // Check whether this "link" is really an embed. |
||
| 294 | foreach ( $oembed->providers as $matchmask => $data ) { |
||
| 295 | list( $providerurl, $regex ) = $data; |
||
| 296 | |||
| 297 | // Turn the asterisk-type provider URLs into regex |
||
| 298 | if ( !$regex ) { |
||
| 299 | $matchmask = '#' . str_replace( '___wildcard___', '(.+)', preg_quote( str_replace( '*', '___wildcard___', $matchmask ), '#' ) ) . '#i'; |
||
| 300 | $matchmask = preg_replace( '|^#http\\\://|', '#https?\://', $matchmask ); |
||
| 301 | } |
||
| 302 | |||
| 303 | if ( preg_match( $matchmask, $link_raw ) ) { |
||
| 304 | $provider = str_replace( '{format}', 'json', $providerurl ); // JSON is easier to deal with than XML |
||
| 305 | $embeds[] = $link_all_but_proto; // @todo Check unique before adding |
||
| 306 | |||
| 307 | // @todo Try to get ID's for the ones we care about (shortcode_keepers) |
||
| 308 | break; |
||
| 309 | } |
||
| 310 | } |
||
| 311 | } |
||
| 312 | |||
| 313 | if ( ! empty( $embeds ) ) { |
||
| 314 | if ( !isset( $extracted['has'] ) ) |
||
| 315 | $extracted['has'] = array(); |
||
| 316 | $extracted['has']['embed'] = count( $embeds ); |
||
| 317 | $extracted['embed'] = array( 'url' => array() ); |
||
| 318 | foreach ( $embeds as $e ) |
||
| 319 | $extracted['embed']['url'][] = $e; |
||
| 320 | } |
||
| 321 | } |
||
| 322 | } |
||
| 323 | |||
| 324 | return $extracted; |
||
| 325 | } |
||
| 326 | |||
| 437 |
This check looks
TODOcomments that have been left in the code.``TODO``s show that something is left unfinished and should be attended to.