<?php
/**
 *---------------------------------------------------------------------------------------
 * @package       VirtuePlanet Framework for Joomla!
 *---------------------------------------------------------------------------------------
 * @copyright     Copyright (C) 2012-2024 VirtuePlanet Services LLP. All rights reserved.
 * @license       GNU General Public License version 2 or later; see LICENSE.txt
 * @authors       Abhishek Das
 * @email         info@virtueplanet.com
 * @link          https://www.virtueplanet.com
 *---------------------------------------------------------------------------------------
 */
defined('_JEXEC') or die;

jimport('joomla.filesystem.folder');
jimport('joomla.filesystem.file');

class VPFrameworkOptimizerParser extends JObject
{
	protected $params;
	protected $http;
	protected $html;
	protected $browser;
	
	protected $head          = null;
	protected $body          = null;
	protected $processedHead = null;
	protected $processedBody = null;
	protected $isValid       = null;
	
	protected $scripts       = array();
	protected $inlineScripts = array();
	protected $styleSheets   = array();
	protected $inlineStyle   = array();
	
	protected $index         = array();
	protected $loadedAssets  = array();
	
	protected static $tab;
	protected static $lineBreak;
	
	public function __construct($html, $params = null)
	{
		$this->html = $html;
		
		if ($params)
		{
			$this->params = $params;
		}
		else
		{
			$template = plgSystemVPFrameworkHelper::getTemplate();
			$this->params = $template->params;
		}
		
		$this->http = VPFrameworkHttp::getInstance();
		$this->browser = VPFrameworkBrowser::getInstance();
		
		$doc = JFactory::getDocument();
		self::$tab = $doc->_getTab();
		self::$lineBreak = $doc->_getLineEnd();
	}
	
	public function getRegex($type)
	{
		$type = strtolower($type);
		$regex = array();
		
		if (!in_array($type, array('css', 'js', 'noscript', 'if', 'head', 'body')))
		{
			throw new Exception(__METHOD__ . '() failed. Invalid type ' . $type);
			return false;
		}
		
		switch ($type)
		{
			case 'css':
				$attrs = '(?! (?:  itemprop | disabled | type=  (?!  ["\']?text/css  )  | rel=  (?!  ["\']?stylesheet  )  ) ) ';
				
				$regex[0] = '(?:<link';
				$regex[1] = self::attrsRegex($attrs);
				$regex[2] = self::urlRegex(array('href'), array('css', 'php'));
				$regex[3] = '[^>]*+>)';
				$regex[4] = "|(?:<style(?:(?!(?:type=(?![\"']?text/css))|(?:scoped))[^>])*>((?><?[^<]+)*?)</[a-z0-9]++>)";
				break;
			case 'js':
				$attrs = '(?(?=  type=  )  type=["\']?(?:text|application)/(?:javascript|json)  )';
				
				$regex[0] = '(?:<script';
				$regex[1] = self::attrsRegex($attrs);
				$regex[2] = '(?:' . self::urlRegex(array('src'), array('js', 'php')) . ')?';
				$regex[3] = "[^>]*+>(  (?> <? [^<]*+)*?  )</script>)";
				break;
			case 'noscript':
				$regex[0] = '<noscript\b[^>]*+>(?><?[^<]*+)*?</noscript>';
				break;
			case 'if':
				$regex[0] = '<!--(?>-?[^-]*+)*?-->';
				break;
			case 'head':
				$regex[0] = '#<head\b[^>]*+>(?><?[^<]*+)*?</head>#i';
				break;
			case 'body':
				$regex[0] = '#<body\b[^>]*+>.*</body>#si';
				break;
		}
		
		return implode('', $regex);
	}
	
	public function getHead($noError = false)
	{
		if ($this->head === null)
		{
			$regex = $this->getRegex('head');
			
			if (preg_match($regex, $this->html, $matches) === false || empty($matches))
			{
				if ($noError)
				{
					$this->head = false;
				}
				else
				{
					throw new Exception('An error occured while trying to find the <head> tags in the HTML document. Make sure your template has <head> and </head>');
				}
			}
			else
			{
				$this->head = $matches[0];
			}
		}
		
		return $this->head;
	}
	
	public function getBody()
	{
		if ($this->body === null)
		{
			$regex = $this->getRegex('body');
			
			if (preg_match($regex, $this->html, $matches) === false || empty($matches))
			{
				throw new Exception('An error occurred while trying to find the <body> tags in the HTML document. Check your template for open and closing body tags.');
			}
			
			$this->body = $matches[0];
		}
		
		return $this->body;
	}
	
	public function getOriginalBuffer()
	{
		return $this->html;
	}
	
	public function getBuffer()
	{
		$buffer    = $this->getOriginalBuffer();
		$headRegex = $this->getRegex('head');
		$bodyRegex = $this->getRegex('body');
		
		if ($this->processedHead)
		{
			$this->processedHead = self::removeExtraLineBreaks($this->processedHead);
			
			$buffer = preg_replace($headRegex, self::cleanReplacement($this->processedHead), $buffer, 1);
			
			if (!$buffer)
			{
				throw new Exception(__METHOD__ . '() failed. An error occured while trying to replace head buffer.');
			}
		}
		
		if ($this->processedBody)
		{
			$buffer = preg_replace($bodyRegex, self::cleanReplacement($this->processedBody), $buffer, 1);
			
			if (!$buffer)
			{
				throw new Exception(__METHOD__ . '() failed. An error occured while trying to replace body buffer.');
			}
		}
		
		return $buffer;
	}
	
	public function isDuplicated($url)
	{
		if (in_array($url, $this->loadedAssets))
		{
			return true;
		}
		
		$this->loadedAssets[] = $url;
		
		return false;
	}
	
	public function parse()
	{
		$jsRegex  = $this->getRegex('js');
		$cssRegex = $this->getRegex('css');
		$ifRegex  = $this->getRegex('if');
		$nsRegex  = $this->getRegex('noscript');
		
		$regex = "#(?>(?:<(?!!))?[^<]*+(?:$ifRegex|$nsRegex|<!)?)*?\K(?:$jsRegex|$cssRegex|\K$)#six";
		
		$excludeJS = array(
			'.com/maps/api/js', '.com/jsapi',
			'.com/uds', 'typekit.net',
			'plugin_googlemap3', '/jw_allvideos/',
			'recaptcha', 'addthis.com'
		);
		
		$excludeInlineJS = array(
			'document.write', 'var addy', 
			'var mapconfig90', 
		);
		
		$options = array(
			'cssExcludes'       => $this->getExcludes('css'),
			'jsExcludes'        => array_merge($excludeJS, $this->getExcludes('js')),
			'jsInlineExcludes'  => array_merge($excludeInlineJS, $this->getExcludes('js_inline')),
			'cssInlineExcludes' => array()
		);

		// Parse head
		$this->parseSection($regex, 'head', $options);
		
		// Parse body
		$this->parseSection($regex, 'body', $options);
		
		// Apply CDN if enabled
		$this->applyCdn();
	}
	
	public function applyCdn()
	{
		if (!VPFrameworkOptimizer::cdnEnabled($this->params))
		{
			return;
		}
		
		$staticFiles = $this->params->get('optimizer_staticfiles', array('css', 'js', 'jpe?g', 'gif', 'png', 'ico', 'bmp', 'pdf', 'tiff?', 'docx?'));
		$staticFiles = implode('|', $staticFiles);
		
		$uri  = clone VPFrameworkUrl::getInstance();
		$port = $uri->toString(array('port'));
		
		if (empty($port))
		{
			$port = ':80';
		}
		
		$host = preg_quote($uri->getHost()) . '(?:' . $port . ')?';
		$path = $uri->getPath();
		
		$path = preg_split('#/#', $path);
		
		array_pop($path);
		
		$dir = trim(implode('/', $path), '/');
		$dir = str_replace('/administrator', '', $dir);
		
		$match = '(?!data:image|[\'"])' .
		         '(?=((?:(?:https?:)?//' . $host . ')?)((?!http|//).))' .
		         '(?:(?<![=\'(])(?:\g{1}|\g{2})((?>\.?[^.">?]*+)*?\.(?>' . $staticFiles . ')[^">]*+)' .
		         '|(?<![\'="])(?:\g{1}|\g{2})((?>\.?[^.)>?]*+)*?\.(?>' . $staticFiles . ')[^)>]*+)' .
		         '|(?<![="(])(?:\g{1}|\g{2})((?>\.?[^.\'>?]*+)*?\.(?>' . $staticFiles . ')[^\'>]*+)' .
		         '|(?<![\'"(])(?:\g{1}|\g{2})((?>\.?[^.\s*>?]*+)*?\.(?>' . $staticFiles . ')[^\s>]*+))';

		$a = '(?:<(?:link|script|ima?ge?|a))?(?>=?[^=<>]*+)*?(?<= href| src| data-src| data-original| xlink:href)=["\']?';
		$b = '(?:<style[^>]*+>|(?=(?>(?:<(?!style))?[^<]*+)?</style))(?>\(?[^(<>]*+)*?(?<= url)\(["\']?';
		$c = '(?>=?[^=>]++)*?(?<= style)=[^(>]++(?<=url)\(["\']?';

		$regex = "#(?><?[^<]*+)*?(?:(?:$a|$b|$c)\K$match|\K$)#iS";
		
		$that = $this;
		
		// Replace head
		$replacedHead = preg_replace_callback($regex, function($matches) use ($dir, $that) {
			return $that->_applyCdn($matches, $dir);
		}, $this->processedHead);
		
		
		// Replace body
		$replacedBody = preg_replace_callback($regex, function($matches) use ($dir, $that) {
			return $that->_applyCdn($matches, $dir);
		}, $this->processedBody);
		
		// Failed
		if (is_null($replacedHead) || is_null($replacedBody))
		{
			//TODO: Log Cookie-less domain function failed
			return;
		}
		
		$headRegex = $this->getRegex('head');
		$bodyRegex = $this->getRegex('body');

		if (preg_match($headRegex, $replacedHead, $headMatches) === false || empty($headMatches))
		{
			//TODO: Log Failed retrieving head in cookie-less domain function
			return;
		}

		if (preg_match($bodyRegex, $replacedBody, $bodyMatches) === false || empty($bodyMatches))
		{
			//TODO: Log Failed retrieving body in cookie-less domain function
			return;
		}
		
		$this->processedHead = $headMatches[0];
		$this->processedBody = $bodyMatches[0];
	}
	
	public function _applyCdn($matches, $dir)
	{
		$path = (!empty($matches[2]) && $matches[2] != '/' ? '/' . $dir . '/' : '') .
		        (isset($matches[3]) ? $matches[3] : '') .
		        (isset($matches[4]) ? $matches[4] : '') .
		        (isset($matches[5]) ? $matches[5] : '') .
		        (isset($matches[6]) ? $matches[6] : '');
		
		return VPFrameworkOptimizer::setCdn($path, $this->params);
	}
	
	protected function parseSection($regex, $section, $options = array())
	{
		$html = '';
		$section = strtolower($section);
		
		if (!in_array($section, array('body', 'head')))
		{
			throw new Exception(__METHOD__ . '() failed. Invalid section ' . $section);
			return false;
		}
		
		$html = call_user_func(array($this, 'get' . ucfirst($section)));
		
		if (empty($html))
		{
			throw new Exception(__METHOD__ . '() failed. No html found in section ' . $section);
			return false;
		}
		
		if (!$this->params->get('optimizer_combine_css', 0) && !$this->params->get('optimizer_combine_js', 0))
		{
			$this->{'processed' . ucfirst($section)} = $html;
			
			return;
		}
		
		$that = $this;
		
		$processedHtml = preg_replace_callback($regex, function($matches) use($that, $options) {
			return $that->replace($matches, $options);
		}, $html);
		
		if (!$processedHtml)
		{
			throw new Exception('An error occured while parsing section ' . $section);
			return false;
		}
		
		$this->{'processed' . ucfirst($section)} = $processedHtml;
	}
	
	protected function replace(&$matches, $options = array())
	{
		$element = $matches[0];
		$url     = '';
		$content = '';
		$attribs = array();
		
		if (!empty($matches[1]))
		{
			$url = $matches[1];
		}
		elseif (!empty($matches[3]))
		{
			$url = $matches[3];
		}
		
		if (!empty($matches[2]))
		{
			$content = $matches[2];
		}
		elseif (!empty($matches[4]))
		{
			$content = $matches[4];
		}

		if (preg_match('#^<!--#', $element) || (VPFrameworkUrl::isInvalid($url) && trim($content) == ''))
		{
			return $element;
		}
		
		$isInLine = empty($url) && trim($content) != '';
		
		// Find the type of the element
		$type = preg_match('#^<script#i', $element) ? 'js' : 'css';
		
		if ($type == 'css' && !$this->params->get('optimizer_combine_css', 0))
		{
			return $element;
		}
		
		if ($type == 'js' && !$this->params->get('optimizer_combine_js', 0))
		{
			return $element;
		}
		
		// Exclude external urls
		if ($this->params->get('optimizer_exclude_external', 0) && !empty($url) && !VPFrameworkUrl::isInternal($url))
		{
			return $element;
		}
		
		$indexKey = $type . ($isInLine ? '_Inline' : '');
		
		if (!isset($this->index[$indexKey]))
		{
			$this->index[$indexKey] = 0;
		}
		
		if ($type == 'css' && (preg_match('#media=(?(?=["\'])(?:["\']([^"\']+))|(\w+))#i', $element, $mediaMatches) > 0))
		{
			$attribs['media'] = !empty($mediaMatches[1]) ? $mediaMatches[1] : $mediaMatches[2];
		}
		
		if ($type == 'js')
		{
			if (!$isInLine && strpos($element, ' async'))
			{
				$attribs['async'] = true;
			}
			
			if (!$isInLine && strpos($element, ' defer'))
			{
				$attribs['defer'] = true;
			}
			
			if (preg_match('#id=(?(?=["\'])(?:["\']([^"\']+))|(\w+))#i', $element, $idMatches) > 0)
			{
				$attribs['id'] = !empty($idMatches[1]) ? $idMatches[1] : (!empty($idMatches[2]) ? $idMatches[2] : $idMatches[0]);
			}
		}
		
		$excludesKey = $type . ($isInLine ? 'Inline' : '') . 'Excludes';
		$excludes    = isset($options[$excludesKey]) ? $options[$excludesKey] : array();
		$isExclude   = false;

		if (!empty($excludes))
		{
			$context = $isInLine ? $content : $url;
			
			if (VPFrameworkUtility::strposa($context, $excludes) !== false || (!empty($attribs['id']) && VPFrameworkUtility::strposa($attribs['id'], $excludes) !== false))
			{
				$isExclude = true;
			}
		}
		
		if (!$isExclude)
		{
			if (!$isInLine && (!$this->canRetrieveFile($url) || (VPFrameworkUrl::isSSL($url) && !extension_loaded('openssl')) || !VPFrameworkUrl::isHttpScheme($url)))
			{
				$isExclude = true;
			}
			elseif ($isInLine && $type == 'js' && $this->params->get('optimizer_exclude_inline_js'))
			{
				$isExclude = true;
			}
		}
		
		if ($isInLine && !$isExclude && !empty($content) && (strpos($element, 'type="application/json"') || strpos($element, 'type="text/json"')))
		{
			$isExclude = true;
		}

		$attribs['element'] = $element;
		
		if ($isExclude)
		{
			if ($isInLine && !$this->params->get('optimizer_bottom_js', 1))
			{
				return $element;
			}
			
			$attribs['exclude'] = true;
		}

		if ($type == 'css' && !$isExclude && !empty($url) && strpos($url, 'fonts.googleapis.com') !== false)
		{
			$attribs['hash'] = $this->browser->getFontSupport();
		}
		
		$this->index[$indexKey]++;
		
		if ($isInLine)
		{
			$element = VPFrameworkOptimizerBuilder::minifyContent('html', $element, $indexKey);
			
			if ($element)
			{
				$content = $this->getInlineContent($element);
			}
			
			$this->addInlineSet($type, $content, $attribs, $isExclude);
		}
		else
		{
			$this->addSet($type, $url, $attribs, $isExclude);
		}
		
		$result = '';
		
		if ($this->index[$indexKey] == 1)
		{
			if ($type == 'css' || ($type == 'js' && !$this->params->get('optimizer_bottom_js', 1)))
			{
				$result .= '<VPOPTIMIZER_PLACEHOLDER_FOR_' . strtoupper($indexKey) . '/>';
			}
		}

		return $result;
	}
	
	protected function addSet($type, $url, $attributes = array(), $isExclude = false)
	{
		$type = strtolower($type);
		
		if (!in_array($type, array('css', 'js')))
		{
			return;
		}
		
		$item = array();
		
		$item['url']     = $url;
		$item['exclude'] = isset($attributes['exclude']) ? $attributes['exclude'] : null;
		$item['element'] = isset($attributes['element']) ? $attributes['element'] : null;
		$item['hash']    = isset($attributes['hash']) ? $attributes['hash'] : null;
		
		if ($type == 'css')
		{
			$item['media'] = isset($attributes['media']) ? $attributes['media'] : null;
			
			if (empty($this->styleSheets) || $isExclude)
			{
				$this->styleSheets[] = array($url => $item);
			}
			else
			{
				$sets = $this->styleSheets;
				end($sets);
				$last = key($sets);
				
				$this->styleSheets[$last][$url] = $item;
			}
			
			if ($isExclude)
			{
				$this->styleSheets[] = array();
			}
		}
		
		if ($type == 'js')
		{
			$item['id']    = isset($attributes['id']) ? $attributes['id'] : null;
			$item['defer'] = isset($attributes['defer']) ? $attributes['defer'] : null;
			$item['async'] = isset($attributes['async']) ? $attributes['async'] : null;
			$item['hash']  = isset($attributes['hash']) ? $attributes['hash'] : null;
			
			if (empty($this->scripts) || $isExclude)
			{
				$this->scripts[] = array($url => $item);
			}
			else
			{
				$sets = $this->scripts;
				end($sets);
				$last = key($sets);
				
				$this->scripts[$last][$url] = $item;
			}
			
			if ($isExclude)
			{
				$this->scripts[] = array();
			}
		}
	}
	
	protected function addInlineSet($type, $content, $attributes = array(), $isExclude = false)
	{
		$type = strtolower($type);
		
		if (!in_array($type, array('css', 'js')))
		{
			return;
		}
		
		$item = array();
		
		$item['content'] = $content;
		$item['exclude'] = isset($attributes['exclude']) ? $attributes['exclude'] : null;
		$item['element'] = isset($attributes['element']) ? $attributes['element'] : null;
		
		if ($type == 'css')
		{
			if (empty($this->inlineStyle) || $isExclude)
			{
				$this->inlineStyle[] = array($item);
			}
			else
			{
				$sets = $this->inlineStyle;
				end($sets);
				$last = key($sets);
				
				$this->inlineStyle[$last][] = $item;
			}
			
			if ($isExclude)
			{
				$this->inlineStyle[] = array();
			}
		}
		
		if ($type == 'js')
		{
			$item['id'] = isset($attributes['id']) ? $attributes['id'] : null;
		
			if (empty($this->inlineScripts) || $isExclude)
			{
				$this->inlineScripts[] = array($item);
			}
			else
			{
				$sets = $this->inlineScripts;
				end($sets);
				$last = key($sets);
				
				$this->inlineScripts[$last][] = $item;
			}
			
			if ($isExclude)
			{
				$this->inlineScripts[] = array();
			}
		}
	}
	
	protected function getExcludes($type)
	{
		$type = strtolower($type);
		
		if (!in_array($type, array('js', 'js_inline', 'css')))
		{
			throw new Exception(__METHOD__ . '() failed. Invalid asset type ' . $type);
			return false;
		}
		
		$excludes = $this->params->get('optimizer_exclude_' . $type, '');
		
		if (!empty($excludes))
		{
			if (strpos($excludes, ',') !== false)
			{
				$excludes = explode(',', $excludes);
				
			}
			else
			{
				$excludes = array($excludes);
			}
			$excludes = array_filter(array_map('trim', $excludes));
		}
		else
		{
			$excludes = array();
		}
		
		// Always exclude VP One Page Checkout CSS
		if ($type == 'css')
		{
			$excludes[] = 'light-checkout.css';
			$excludes[] = 'dark-checkout.css';
		}
		
		return $excludes;
	}
	
	public function canRetrieveFile($url)
	{
		$isInternalFile = self::isInternalFile($url);
		
		if ($this->params->get('optimizer_include_external', 1))
		{
			if ($isInternalFile)
			{
				return $this->http->hasAdapter();
			}
			else
			{
				return true;
			}
		}
		else
		{
			return $isInternalFile;
		}
	}
	
	public static function isInternalFile($url)
	{
		return !(preg_match('#^(?:http|//)#i', $url) && !VPFrameworkUrl::isInternal($url) || self::isPHPFile($url));
	}
	
	public static function isPHPFile($url)
	{
		return preg_match('#\.php|^(?![^?\#]*\.(?:css|js|png|jpe?g|gif|bmp)(?:[?\#]|$)).++#i', $url);
	}
	
	protected static function attrsRegex($attrs)
	{
		return '(?= (?> [^\s>]*+[\s] ' . $attrs . ' )*+  [^\s>]*+> )';
	}
	
	protected static function urlRegex($attributes, $extensions)
	{
		$attributes = implode('|', $attributes);
		$extensions = implode('|', $extensions);

		$sUrlRegex = <<<URLREGEX
		(?>  [^\s>]*+\s  )+?  (?>$attributes)=["']?
		( (?<!["']) [^\s>]*+  | (?<!') [^"]*+ | [^']*+ )
		                                                        
URLREGEX;

		return $sUrlRegex;
	}
	
	protected static function cleanReplacement($string)
	{
		return strtr($string, array('\\' => '\\\\', '$' => '\$'));
	}
	
	protected static function removeExtraLineBreaks($string)
	{
		return str_replace((self::$tab . self::$lineBreak . self::$tab . self::$lineBreak), '', $string);
	}
	
	protected function getInlineContent($element)
	{
		$comments = '<!--(?>-?[^-]*+)*?--!?>';
		$regex    = "#(?><?[^<]*+(?:$comments)?)*?\K" .
		            "(?:(<script\b[^>]*+>)((?><?[^<]*+)*?)(</script>)|" .
		            "(<style\b[^>]*+>)((?><?[^<]*+)*?)(</style>)|$)#i";
		
		$callback = array($this, '_getInlineContent');
		
		return preg_replace_callback($regex, $callback, $element);
	}
	
	public function _getInlineContent($matches)
	{
		if (empty($matches[0]))
		{
			return '';
		}

		$content = !empty($matches[2]) ? $matches[2] : (!empty($matches[5]) ? $matches[5] : '');

		return $content;
	}
}