70 lines
		
	
	
	
		
			2.2 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			70 lines
		
	
	
	
		
			2.2 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| /**
 | |
|  * Parses a URI into the components and fragment identifier as specified
 | |
|  * by RFC 3986.
 | |
|  */
 | |
| class HTMLPurifier_URIParser
 | |
| {
 | |
| 
 | |
|     /**
 | |
|      * Instance of HTMLPurifier_PercentEncoder to do normalization with.
 | |
|      */
 | |
|     protected $percentEncoder;
 | |
| 
 | |
|     public function __construct() {
 | |
|         $this->percentEncoder = new HTMLPurifier_PercentEncoder();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Parses a URI.
 | |
|      * @param $uri string URI to parse
 | |
|      * @return HTMLPurifier_URI representation of URI. This representation has
 | |
|      *         not been validated yet and may not conform to RFC.
 | |
|      */
 | |
|     public function parse($uri) {
 | |
| 
 | |
|         $uri = $this->percentEncoder->normalize($uri);
 | |
| 
 | |
|         // Regexp is as per Appendix B.
 | |
|         // Note that ["<>] are an addition to the RFC's recommended
 | |
|         // characters, because they represent external delimeters.
 | |
|         $r_URI = '!'.
 | |
|             '(([^:/?#"<>]+):)?'. // 2. Scheme
 | |
|             '(//([^/?#"<>]*))?'. // 4. Authority
 | |
|             '([^?#"<>]*)'.       // 5. Path
 | |
|             '(\?([^#"<>]*))?'.   // 7. Query
 | |
|             '(#([^"<>]*))?'.     // 8. Fragment
 | |
|             '!';
 | |
| 
 | |
|         $matches = array();
 | |
|         $result = preg_match($r_URI, $uri, $matches);
 | |
| 
 | |
|         if (!$result) return false; // *really* invalid URI
 | |
| 
 | |
|         // seperate out parts
 | |
|         $scheme     = !empty($matches[1]) ? $matches[2] : null;
 | |
|         $authority  = !empty($matches[3]) ? $matches[4] : null;
 | |
|         $path       = $matches[5]; // always present, can be empty
 | |
|         $query      = !empty($matches[6]) ? $matches[7] : null;
 | |
|         $fragment   = !empty($matches[8]) ? $matches[9] : null;
 | |
| 
 | |
|         // further parse authority
 | |
|         if ($authority !== null) {
 | |
|             $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
 | |
|             $matches = array();
 | |
|             preg_match($r_authority, $authority, $matches);
 | |
|             $userinfo   = !empty($matches[1]) ? $matches[2] : null;
 | |
|             $host       = !empty($matches[3]) ? $matches[3] : '';
 | |
|             $port       = !empty($matches[4]) ? (int) $matches[5] : null;
 | |
|         } else {
 | |
|             $port = $host = $userinfo = null;
 | |
|         }
 | |
| 
 | |
|         return new HTMLPurifier_URI(
 | |
|             $scheme, $userinfo, $host, $port, $path, $query, $fragment);
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| // vim: et sw=4 sts=4
 |