<?php
/**
 *---------------------------------------------------------------------------------------
 * @package       VirtuePlanet Framework for Joomla!
 *---------------------------------------------------------------------------------------
 * @copyright     Copyright (C) 2012-2024 VirtuePlanet Services LLP. All rights reserved.
 * @license       GNU General Public License version 2 or later; see LICENSE.txt
 * @authors       Abhishek Das
 * @email         info@virtueplanet.com
 * @link          https://www.virtueplanet.com
 *---------------------------------------------------------------------------------------
 */
defined('_JEXEC') or die;

class VPFrameworkCompressorHtml extends VPFrameworkCompressor
{
	protected $js_compressor  = null;
	protected $css_compressor = null;
	protected $o_html         = '';
	protected $html           = '';
	protected $strength       = 0;
	
	public function proccess($html, $js_compressor = null, $css_compressor = null)
	{
		$this->html   = $html;
		$this->o_html = $this->html;
		
		if (is_object($js_compressor))
		{
			$this->js_compressor  = $js_compressor;
		}
		
		if (is_object($css_compressor))
		{
			$this->css_compressor = $css_compressor;
		}
		
		try
		{
			return $this->compress();
		}
		catch (Exception $e) 
		{
			return $this->o_html;
		}
	}
	
	public function setStrength($value)
	{
		$this->strength = (int) $value;
	}
	
	protected function compress()
	{
		$comments     = '<!--(?>-?[^-]*+)*?--!?>';
		$double_quote = $this->double_quote;
		$single_quote = $this->single_quote;

		// HTML Elements
		$pre      = "<pre\b[^>]*+>(?><?[^<]*+)*?</pre>";
		$script   = "<script\b[^>]*+>(?><?[^<]*+)*?</script>";
		$style    = "<style\b[^>]*+>(?><?[^<]*+)*?</style>";
		$textarea = "<textarea\b[^>]*+>(?><?[^<]*+)*?</textarea>";

		if ($this->strength > 0)
		{
			// Remove comments (not containing IE conditional comments)
			$regex = "#(?><?[^<]*+(?>$pre|$script|$style|$textarea|<!--\[(?><?[^<]*+)*?" .
			         "<!\s*\[(?>-?[^-]*+)*?--!?>|<!DOCTYPE[^>]++>)?)*?\K(?:$comments|$)#i";

			$this->html = $this->replace($regex, '', $this->html, '1');
		}

		// Reduce runs of whitespace outside all elements to one
		$regex = "#(?>[^<]*+(?:$pre|$script|$style|$textarea|$comments|<(?>[^>=]*+(?:=\s*+(?:$double_quote|$single_quote|['\"])?|(?=>)))*?>)?)*?\K" .
		         '(?:[\t\f ]++(?=[\r\n]\s*+<)|(?>\r?\n|\r)\K\s++(?=<)|[\t\f]++(?=[ ]\s*+<)|[\t\f]\K\s*+(?=<)|[ ]\K\s*+(?=<)|$)#i';

		$this->html = $this->replace($regex, '', $this->html, '2');

		// Minify scripts
		$regex = "#(?><?[^<]*+(?:$comments)?)*?\K" .
		         "(?:(<script\b[^>]*+>)((?><?[^<]*+)*?)(</script>)|" .
		         "(<style\b[^>]*+>)((?><?[^<]*+)*?)(</style>)|$)#i";

		$this->html = $this->replace($regex, '', $this->html, '3', array($this, 'handleStyleScript'));

		if ($this->strength < 1)
		{
			return trim($this->html);
		}

		// Replace line feed with space (legacy)
		$regex = "#(?>[^<]*+(?:$pre|$script|$style|$textarea|$comments|<(?>[^>=]*+(?:=\s*+(?:$double_quote|$single_quote|['\"])?|(?=>)))*?>)?)*?\K" .
		         '(?:[\r\n\t\f]++(?=<)|$)#i';

		$this->html = $this->replace($regex, ' ', $this->html, '4');

		// Remove ws around block elements preserving space around inline elements
		// block/undisplayed elements
		$b = 'address|article|aside|audio|body|blockquote|canvas|dd|div|dl|fieldset|figcaption|figure|footer' .
		     '|form|h[1-6]|head|header|hgroup|html|noscript|ol|output|p|pre|section|style|table|title|tfoot|ul|video';

		// self closing block/undisplayed elements
		$b2 = 'base|meta|link|hr';

		// Inline elements
		$i = 'b|big|i|small|tt|abbr|acronym|cite|code|dfn|em|kbd|strong|samp|var|a|bdo|br|map|object|q|script|span|sub|sup' .
		     '|button|label|select|textarea';

		// self closing inline elements
		$i2 = 'img|input';

		$regex = "#(?>\s*+(?:$pre|$script|$style|$textarea|$comments|<(?:(?>$i)\b[^>]*+>|(?:/(?>$i)\b>|(?>$i2)\b[^>]*+>)\s*+)|<[^>]*+>)|[^<]++)*?\K".
		         "(?:\s++(?=<(?>$b|$b2)\b)|(?:</(?>$b)\b>|<(?>$b2)\b[^>]*+>)\K\s++(?!<(?>$i|$i2)\b)|$)#i";

		$this->html = $this->replace($regex, '', $this->html, '5');

		// Replace runs of whitespace inside elements with single space escaping pre, textarea, scripts and style elements
		// elements to escape
		$e = 'pre|script|style|textarea';

		$regex = "#(?>[^<]*+(?:$pre|$script|$style|$textarea|$comments|<[^>]++>[^<]*+))*?(?:(?:<(?!$e|!)[^>]*+>)?(?>\s?[^\s<]*+)*?\K\s{2,}|\K$)#i";
		
		$this->html = $this->replace($regex, ' ', $this->html, '6');

		// Remove additional ws around attributes
		$regex = "#(?>\s?(?>[^<>]*+(?:<!(?!DOCTYPE)(?>>?[^>]*+)*?>[^<>]*+)?<|(?=[^<>]++>)[^\s>'\"]++(?>$double_quote|$single_quote)?|[^<]*+))*?\K" .
		         "(?>\s\s++|$)#i";

		$this->html = $this->replace($regex, ' ', $this->html, '7');

		if ($this->strength < 2)
		{
			return trim($this->html);
		}

		//remove redundant attributes
		$regex = "#(?:(?=[^<>]++>)|(?><?[^<]*+(?>$comments|<(?!(?:script|style|link)|/html>))?)*?" .
		         "<(?:(?:script|style|link)|/html>))(?>[ ]?[^ >]*+)*?\K" .
		         '(?: (?:type|language)=["\']?(?:(?:text|application)/(?:javascript|css)|javascript)["\']?|[^<]*+\K$)#i';

		$this->html = $this->replace($regex, '', $this->html, '8');

		$token = '<input type="hidden" name="[0-9a-f]{32}" value="1" />';

		// Remove quotes from selected attributes
		$ns1 = '"[^"\'`=<>\s]*+(?:[\'`=<>\s]|(?<=\\\\)")(?>(?:(?<=\\\\)")?[^"]*+)*?(?<!\\\\)"';
		$ns2 = "'[^'\"`=<>\s]*+(?:[\"`=<>\s]|(?<=\\\\)')(?>(?:(?<=\\\\)')?[^']*+)*?(?<!\\\\)'";

		$regex = "#(?:(?=[^>]*+>)|<[a-z0-9]++ )" .
		         "(?>[=]?[^=><]*+(?:=(?:$ns1|$ns2)|>(?>[^<]*+(?:$token|$comments|<(?![a-z0-9]++ ))?)*?(?:<[a-z0-9]++ |$))?)*?" .
		         "(?:=\K([\"'])([^\"'`=<>\s]++)\g{1}[ ]?|\K$)#i";

		$this->html = $this->replace($regex, '$2 ', $this->html, '9');

		// Remove last whitespace in open tag
		$regex  = "#(?>[^<]*+(?:$token|$comments|<(?![a-z0-9]++))?)*?(?:<[a-z0-9]++(?>\s*+[^\s>]++)*?\K" .
		          "(?:\s*+(?=>)|(?<=[\"'])\s++(?=/>))|$\K)#i";

		$this->html = $this->replace($regex, '', $this->html, '10');

		return trim($this->html);
	}
	
	public function handleStyleScript($matches)
	{
		if (empty($matches[0]))
		{
			return $matches[0];
		}
		
		$openTag  = !empty($matches[1]) ? $matches[1] : (!empty($matches[4]) ? $matches[4] : '');
		$content  = !empty($matches[2]) ? $matches[2] : (!empty($matches[5]) ? $matches[5] : '');
		$closeTag = !empty($matches[3]) ? $matches[3] : (!empty($matches[6]) ? $matches[6] : '');
		
		$trimmedContent = trim($content);
		
		if (!empty($trimmedContent))
		{
			$type     = stripos($openTag, 'script') == 1 ? 'js' : 'css';
			$property = $type . '_compressor';

			if (!empty($this->$property))
			{
				$content = $this->cleanScript($type, $content);
				$content = $this->$property->proccess($content);
				
				return "{$openTag}{$content}{$closeTag}";
			}
		}
		
		return $matches[0];
	}
	
	public function cleanScript($type, $content)
	{
		$double_quote   = $this->double_quote;
		$single_quote   = $this->single_quote;
		$comment_block  = $this->comment_block;
		$comment_line   = $this->comment_line;
		$comment_inline = '(?:(?:<!--|-->)[^\r\n]*+)|(?:<!\[CDATA\[|\]\]>)';

		if ($type == 'css')
		{
			$content = preg_replace("#(?>[<\]\-]?[^'\"<\]\-/]*+(?>$double_quote|$single_quote|$comment_block|$comment_line|/)?)*?\K(?:$comment_inline|$)#i", '', $content);
		}
		elseif (!empty($this->js_compressor))
		{
			$content = $this->js_compressor->proccess($content, true);
			$regex   = $this->js_compressor->getRegexLiteral();

			$content = preg_replace("#(?>[<\]\-]?[^'\"<\]\-/]*+(?>$double_quote|$single_quote|$comment_block|$comment_line|$regex|/)?)*?\K(?:$comment_inline|$)#i", '', $content);
		}
		
		return $content;
	}
}