forked from friendica/php-json-ld
Implement URL parsing/unparsing per RFC 3986.
- Section 5.3 Component Recomposition in RFC 3986 makes a differentiation between undefined components and empty components that the built-in parse_url in python does not. This patch deals with that issue and ensures, for instance, that empty queries and fragments are detected.
This commit is contained in:
parent
fed40914c8
commit
1a9c6bffdd
165
jsonld.php
165
jsonld.php
|
@ -506,59 +506,45 @@ function jsonld_parse_url($url) {
|
||||||
$url = '';
|
$url = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$rval = parse_url($url);
|
$keys = array(
|
||||||
|
'href', 'protocol', 'scheme', '?authority', 'authority',
|
||||||
|
'?auth', 'auth', 'user', 'pass', 'host', '?port', 'port', 'path',
|
||||||
|
'?query', 'query', '?fragment', 'fragment');
|
||||||
|
$regex = "/^(([^:\/?#]+):)?(\/\/(((([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(:(\d*))?))?([^?#]*)(\?([^#]*))?(#(.*))?/";
|
||||||
|
preg_match($regex, $url, $match);
|
||||||
|
|
||||||
// malformed url
|
$rval = array();
|
||||||
if($rval === false) {
|
$flags = array();
|
||||||
$rval = array();
|
$len = count($keys);
|
||||||
}
|
for($i = 0; $i < $len; ++$i) {
|
||||||
|
$key = $keys[$i];
|
||||||
$rval['href'] = $url;
|
if(strpos($key, '?') === 0) {
|
||||||
if(!isset($rval['scheme'])) {
|
$flags[substr($key, 1)] = !empty($match[$i]);
|
||||||
$rval['scheme'] = '';
|
} else if(!isset($match[$i])) {
|
||||||
$rval['protocol'] = '';
|
$rval[$key] = null;
|
||||||
} else {
|
|
||||||
$rval['protocol'] = $rval['scheme'] . ':';
|
|
||||||
}
|
|
||||||
if(!isset($rval['host'])) {
|
|
||||||
$rval['host'] = '';
|
|
||||||
}
|
|
||||||
if(!isset($rval['path'])) {
|
|
||||||
$rval['path'] = '';
|
|
||||||
}
|
|
||||||
if(isset($rval['user']) || isset($rval['pass'])) {
|
|
||||||
$rval['auth'] = '';
|
|
||||||
if(isset($rval['user'])) {
|
|
||||||
$rval['auth'] = $rval['user'];
|
|
||||||
}
|
|
||||||
if(isset($rval['pass'])) {
|
|
||||||
$rval['auth'] .= ":{$rval['pass']}";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// parse authority for unparsed relative network-path reference
|
|
||||||
if(strpos($rval['href'], ':') === false &&
|
|
||||||
strpos($rval['href'], '//') === 0 && $rval['host'] === '') {
|
|
||||||
// must parse authority from pathname
|
|
||||||
$rval['path'] = substr($rval['path'], 2);
|
|
||||||
$idx = strpos($rval['path'], '/');
|
|
||||||
if($idx === false) {
|
|
||||||
$rval['authority'] = $rval['path'];
|
|
||||||
$rval['path'] = '';
|
|
||||||
} else {
|
} else {
|
||||||
$rval['authority'] = substr($rval['path'], 0, $idx);
|
$rval[$key] = $match[$i];
|
||||||
$rval['path'] = substr($rval['path'], $idx);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$rval['authority'] = $rval['host'];
|
|
||||||
if(isset($rval['port'])) {
|
|
||||||
$rval['authority'] .= ":{$rval['port']}";
|
|
||||||
}
|
|
||||||
if(isset($rval['auth'])) {
|
|
||||||
$rval['authority'] = "{$rval['auth']}@{$rval['authority']}";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!$flags['authority']) {
|
||||||
|
$rval['authority'] = null;
|
||||||
|
}
|
||||||
|
if(!$flags['auth']) {
|
||||||
|
$rval['auth'] = $rval['user'] = $rval['pass'] = null;
|
||||||
|
}
|
||||||
|
if(!$flags['port']) {
|
||||||
|
$rval['port'] = null;
|
||||||
|
}
|
||||||
|
if(!$flags['query']) {
|
||||||
|
$rval['query'] = null;
|
||||||
|
}
|
||||||
|
if(!$flags['fragment']) {
|
||||||
|
$rval['fragment'] = null;
|
||||||
|
}
|
||||||
|
|
||||||
$rval['normalizedPath'] = jsonld_remove_dot_segments(
|
$rval['normalizedPath'] = jsonld_remove_dot_segments(
|
||||||
$rval['path'], $rval['authority'] !== '');
|
$rval['path'], !!$rval['authority']);
|
||||||
|
|
||||||
return $rval;
|
return $rval;
|
||||||
}
|
}
|
||||||
|
@ -628,47 +614,66 @@ function jsonld_prepend_base($base, $iri) {
|
||||||
// parse given IRI
|
// parse given IRI
|
||||||
$rel = jsonld_parse_url($iri);
|
$rel = jsonld_parse_url($iri);
|
||||||
|
|
||||||
// start hierarchical part
|
// per RFC3986 5.2.2
|
||||||
$hierPart = $base['protocol'];
|
$transform = array('protocol' => $base['protocol']);
|
||||||
if($rel['authority']) {
|
|
||||||
$hierPart .= "//{$rel['authority']}";
|
|
||||||
} else if($base['href'] !== '') {
|
|
||||||
$hierPart .= "//{$base['authority']}";
|
|
||||||
}
|
|
||||||
|
|
||||||
// per RFC3986 normalize
|
if($rel['authority'] !== null) {
|
||||||
|
$transform['authority'] = $rel['authority'];
|
||||||
// IRI represents an absolute path
|
$transform['path'] = $rel['path'];
|
||||||
if(strpos($rel['path'], '/') === 0) {
|
$transform['query'] = $rel['query'];
|
||||||
$path = $rel['path'];
|
|
||||||
} else {
|
} else {
|
||||||
$path = $base['path'];
|
$transform['authority'] = $base['authority'];
|
||||||
|
|
||||||
// append relative path to the end of the last directory from base
|
if($rel['path'] === '') {
|
||||||
if($rel['path'] !== '') {
|
$transform['path'] = $base['path'];
|
||||||
$idx = strrpos($path, '/');
|
if($rel['query'] !== null) {
|
||||||
$idx = ($idx === false) ? 0 : $idx + 1;
|
$transform['query'] = $rel['query'];
|
||||||
$path = substr($path, 0, $idx);
|
} else {
|
||||||
if(strlen($path) > 0 && substr($path, -1) !== '/') {
|
$transform['query'] = $base['query'];
|
||||||
$path .= '/';
|
|
||||||
}
|
}
|
||||||
$path .= $rel['path'];
|
} else {
|
||||||
|
if(strpos($rel['path'], '/') === 0) {
|
||||||
|
// IRI represents an absolute path
|
||||||
|
$transform['path'] = $rel['path'];
|
||||||
|
} else {
|
||||||
|
// merge paths
|
||||||
|
$path = $base['path'];
|
||||||
|
|
||||||
|
// append relative path to the end of the last directory from base
|
||||||
|
if($rel['path'] !== '') {
|
||||||
|
$idx = strrpos($path, '/');
|
||||||
|
$idx = ($idx === false) ? 0 : $idx + 1;
|
||||||
|
$path = substr($path, 0, $idx);
|
||||||
|
if(strlen($path) > 0 && substr($path, -1) !== '/') {
|
||||||
|
$path .= '/';
|
||||||
|
}
|
||||||
|
$path .= $rel['path'];
|
||||||
|
}
|
||||||
|
|
||||||
|
$transform['path'] = $path;
|
||||||
|
}
|
||||||
|
$transform['query'] = $rel['query'];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove slashes and dots in path
|
// remove slashes and dots in path
|
||||||
$path = jsonld_remove_dot_segments($path, $hierPart !== '');
|
$transform['path'] = jsonld_remove_dot_segments(
|
||||||
|
$transform['path'], !!$transform['authority']);
|
||||||
|
|
||||||
// add query and hash
|
// construct URL
|
||||||
if(isset($rel['query'])) {
|
$rval = $transform['protocol'];
|
||||||
$path .= "?{$rel['query']}";
|
if($transform['authority'] !== null) {
|
||||||
|
$rval .= '//' . $transform['authority'];
|
||||||
}
|
}
|
||||||
if(isset($rel['fragment'])) {
|
$rval .= $transform['path'];
|
||||||
$path .= "#{$rel['fragment']}";
|
if($transform['query'] !== null) {
|
||||||
|
$rval .= '?' . $transform['query'];
|
||||||
|
}
|
||||||
|
if($rel['fragment'] !== null) {
|
||||||
|
$rval .= '#' . $rel['fragment'];
|
||||||
}
|
}
|
||||||
|
|
||||||
$rval = $hierPart . $path;
|
// handle empty base
|
||||||
|
|
||||||
if($rval === '') {
|
if($rval === '') {
|
||||||
$rval = './';
|
$rval = './';
|
||||||
}
|
}
|
||||||
|
@ -716,7 +721,7 @@ function jsonld_remove_base($base, $iri) {
|
||||||
// is a hash or query)
|
// is a hash or query)
|
||||||
$base_segments = explode('/', $base['normalizedPath']);
|
$base_segments = explode('/', $base['normalizedPath']);
|
||||||
$iri_segments = explode('/', $rel['normalizedPath']);
|
$iri_segments = explode('/', $rel['normalizedPath']);
|
||||||
$last = (isset($rel['query']) || isset($rel['fragment'])) ? 0 : 1;
|
$last = ($rel['query'] || $rel['fragment']) ? 0 : 1;
|
||||||
while(count($base_segments) > 0 && count($iri_segments) > $last) {
|
while(count($base_segments) > 0 && count($iri_segments) > $last) {
|
||||||
if($base_segments[0] !== $iri_segments[0]) {
|
if($base_segments[0] !== $iri_segments[0]) {
|
||||||
break;
|
break;
|
||||||
|
@ -740,10 +745,10 @@ function jsonld_remove_base($base, $iri) {
|
||||||
$rval .= implode('/', $iri_segments);
|
$rval .= implode('/', $iri_segments);
|
||||||
|
|
||||||
// add query and hash
|
// add query and hash
|
||||||
if(isset($rel['query'])) {
|
if($rel['query'] !== null) {
|
||||||
$rval .= "?{$rel['query']}";
|
$rval .= "?{$rel['query']}";
|
||||||
}
|
}
|
||||||
if(isset($rel['fragment'])) {
|
if($rel['fragment'] !== null) {
|
||||||
$rval .= "#{$rel['fragment']}";
|
$rval .= "#{$rel['fragment']}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue