There is a troubleshoot case where replacing empty space with preg_replace causes invalid characters with UTF-8. (eg: à
encoding would fail and result in �
). This is due to a mismatch between the current locale and the defined locale. We can use the /u
flag as a way to properly filter UTF-8
characters.
In some instances, removing new lines and multiple spaces from inline <script>
tags will cause the script to break. Isolating the <script>
tag's content will prevent this from happening which is done by splitting the buffer into an array delimited by opening and closing script tags through preg_split()
.
<?php
/**
* Turn on output buffering, prettify and minimy the HTML output.
*
* @see https://www.php.net/manual/en/function.ob-start.php
*
* @since 1.0.0
*/
add_action( 'wp_loaded', function () {
ob_start( function ( $buffer ) {
/**
* preg_replace() UTF-8 troubleshooting.
*
* Replacing empty space with preg_replace causes invalid characters with UTF-8.
* As preg_replace() depends on the current defined locale, characters not supported will be returned as � invalid.
* The /u flag is used to make regex unicode aware.
*
* @see https://mcmap.net/q/95241/-replacing-empty-space-with-preg_replace-causes-invalid-characters-with-utf-8
*/
$replace = array(
'/\n/smu' => '', //Remove new lines.
'/(\s)+/smu' => '\\1', //Replace multiple spaces with a single one.
'/\s*<\s*/smu' => '<', //Remove spaces before and after opening tags.
'/\s*>\s*/smu' => '>', //Remove spaces before and after closing tags.
'/\s*\/>\s*/smu' => '>', //Remove spaces before and after closing tags for self closing tags.
'/<!--(.|\s)*?-->/' => '', //Remove HTML comments.
'/\s*=\s*\'([^\']*)\'/smu' => '="$1"', //Replace single quotes with double quotes within attributes.
);
/**
* preg_replace() inline <script> troubleshooting.
*
* In some instances, removing new lines and multiple spaces from inline <script> tags will cause the script to break.
* Isolating the <script> tag's content will prevent this from happening which is done by splitting the buffer into an array delimited by opening and closing script tags through preg_split().
*
* @see https://mcmap.net/q/94935/-removing-new-lines-except-in-lt-pre-gt
*/
$buffer = preg_split( '/(<\/?script[^>]*>)/', $buffer, null, PREG_SPLIT_DELIM_CAPTURE );
foreach ( $buffer as $key => $value ) {
//If the $key is a <script> opening tag, $key + 1 is the script tag's content, $key + 2 is the script closing tag.
if ( false !== stripos( $value, '<script' ) ) {
$k = $key;
};
//$key + 1 is the script tag's content, which we want to ignore.
if ( $k + 1 === $key ) {
unset( $k );
continue;
};
$buffer[ $key ] = preg_replace( array_keys( $replace ), array_values( $replace ), $value );
};
return implode( '', $buffer );
} );
} );
/**
* Get current buffer contents and delete current output buffer.
*
* @see https://www.php.net/manual/en/function.ob-get-clean.php
*
* @since 1.0.0
*/
add_action( 'shutdown', function () {
ob_get_clean();
} );