forked from friendica/php-json-ld
Add optimization for finding the best CURIE.
- Build a map for searching for a matching IRI when computing the inverse context. Each letter of an IRI can be used to key into the map to find the best set of partial matches (which can be used to create CURIEs). - This approach is a faster alternative to trying each possible term in the active context as a possible CURIE, linearly, one at a time.
This commit is contained in:
parent
956fb8b790
commit
3433a01a65
97
jsonld.php
97
jsonld.php
|
@ -4701,34 +4701,43 @@ class JsonLdProcessor {
|
||||||
}
|
}
|
||||||
|
|
||||||
// no term or @vocab match, check for possible CURIEs
|
// no term or @vocab match, check for possible CURIEs
|
||||||
$iri_len = strlen($iri);
|
|
||||||
$choice = null;
|
$choice = null;
|
||||||
foreach($active_ctx->mappings as $term => $definition) {
|
$idx = 0;
|
||||||
// skip null definitions and terms with colons, they can't be prefixes
|
$partial_matches = array();
|
||||||
if($definition === null || $definition->_term_has_colon) {
|
$iri_map = $active_ctx->fast_curie_map;
|
||||||
continue;
|
// check for partial matches of against `iri`, which means look until
|
||||||
|
// iri.length - 1, not full length
|
||||||
|
$max_partial_length = strlen($iri) - 1;
|
||||||
|
for(; $idx < $max_partial_length && isset($iri_map[$iri[$idx]]); ++$idx) {
|
||||||
|
$iri_map = $iri_map[$iri[$idx]];
|
||||||
|
if(isset($iri_map[''])) {
|
||||||
|
$entry = $iri_map[''][0];
|
||||||
|
$entry->iri_length = $idx + 1;
|
||||||
|
$partial_matches[] = $entry;
|
||||||
}
|
}
|
||||||
// skip entries with @ids that are not partial matches
|
|
||||||
if(!($iri_len > $definition->_id_length &&
|
|
||||||
strpos($iri, $definition->{'@id'}) === 0)) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
// check partial matches in reverse order to prefer longest ones first
|
||||||
|
$partial_matches = array_reverse($partial_matches);
|
||||||
|
foreach($partial_matches as $entry) {
|
||||||
|
$terms = $entry->terms;
|
||||||
|
foreach($terms as $term) {
|
||||||
// a CURIE is usable if:
|
// a CURIE is usable if:
|
||||||
// 1. it has no mapping, OR
|
// 1. it has no mapping, OR
|
||||||
// 2. value is null, which means we're not compacting an @value, AND
|
// 2. value is null, which means we're not compacting an @value, AND
|
||||||
// the mapping matches the IRI)
|
// the mapping matches the IRI
|
||||||
$curie = $term . ':' . substr($iri, $definition->_id_length);
|
$curie = $term . ':' . substr($iri, $entry->iri_length);
|
||||||
$is_usable_curie = (!property_exists($active_ctx->mappings, $curie) ||
|
$is_usable_curie = (!property_exists($active_ctx->mappings, $curie) ||
|
||||||
($value === null && $active_ctx->mappings->{$curie}->{'@id'} === $iri));
|
($value === null &&
|
||||||
|
$active_ctx->mappings->{$curie}->{'@id'} === $iri));
|
||||||
|
|
||||||
// select curie if it is shorter or the same length but lexicographically
|
// select curie if it is shorter or the same length but
|
||||||
// less than the current choice
|
// lexicographically less than the current choice
|
||||||
if($is_usable_curie && ($choice === null ||
|
if($is_usable_curie && ($choice === null ||
|
||||||
self::_compareShortestLeast($curie, $choice) < 0)) {
|
self::_compareShortestLeast($curie, $choice) < 0)) {
|
||||||
$choice = $curie;
|
$choice = $curie;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// return chosen curie
|
// return chosen curie
|
||||||
if($choice !== null) {
|
if($choice !== null) {
|
||||||
|
@ -5363,6 +5372,10 @@ class JsonLdProcessor {
|
||||||
|
|
||||||
$inverse = $active_ctx->inverse = new stdClass();
|
$inverse = $active_ctx->inverse = new stdClass();
|
||||||
|
|
||||||
|
// variables for building fast CURIE map
|
||||||
|
$fast_curie_map = $active_ctx->fast_curie_map = new ArrayObject();
|
||||||
|
$iris_to_terms = array();
|
||||||
|
|
||||||
// handle default language
|
// handle default language
|
||||||
$default_language = '@none';
|
$default_language = '@none';
|
||||||
if(property_exists($active_ctx, '@language')) {
|
if(property_exists($active_ctx, '@language')) {
|
||||||
|
@ -5391,9 +5404,25 @@ class JsonLdProcessor {
|
||||||
$iris = $mapping->{'@id'};
|
$iris = $mapping->{'@id'};
|
||||||
$iris = self::arrayify($iris);
|
$iris = self::arrayify($iris);
|
||||||
foreach($iris as $iri) {
|
foreach($iris as $iri) {
|
||||||
|
$is_keyword = self::_isKeyword($iri);
|
||||||
|
|
||||||
// initialize container map
|
// initialize container map
|
||||||
if(!property_exists($inverse, $iri)) {
|
if(!property_exists($inverse, $iri)) {
|
||||||
$inverse->{$iri} = new stdClass();
|
$inverse->{$iri} = new stdClass();
|
||||||
|
if(!$is_keyword && !$mapping->_term_has_colon) {
|
||||||
|
// init IRI to term map and fast CURIE map
|
||||||
|
$iris_to_terms[$iri] = new ArrayObject();
|
||||||
|
$iris_to_terms[$iri][] = $term;
|
||||||
|
$fast_curie_entry = (object)array(
|
||||||
|
'iri' => $iri, 'terms' => $iris_to_terms[$iri]);
|
||||||
|
if(!array_key_exists($iri[0], (array)$fast_curie_map)) {
|
||||||
|
$fast_curie_map[$iri[0]] = new ArrayObject();
|
||||||
|
}
|
||||||
|
$fast_curie_map[$iri[0]][] = $fast_curie_entry;
|
||||||
|
}
|
||||||
|
} else if(!$is_keyword && !$mapping->_term_has_colon) {
|
||||||
|
// add IRI to term match
|
||||||
|
$iris_to_terms[$iri][] = $term;
|
||||||
}
|
}
|
||||||
$container_map = $inverse->{$iri};
|
$container_map = $inverse->{$iri};
|
||||||
|
|
||||||
|
@ -5437,9 +5466,47 @@ class JsonLdProcessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// build fast CURIE map
|
||||||
|
foreach($fast_curie_map as $key => $value) {
|
||||||
|
$this->_buildIriMap($fast_curie_map, $key, 1);
|
||||||
|
}
|
||||||
|
|
||||||
return $inverse;
|
return $inverse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs a recursive algorithm to build a lookup map for quickly finding
|
||||||
|
* potential CURIEs.
|
||||||
|
*
|
||||||
|
* @param ArrayObject $iri_map the map to build.
|
||||||
|
* @param string $key the current key in the map to work on.
|
||||||
|
* @param int $idx the index into the IRI to compare.
|
||||||
|
*/
|
||||||
|
function _buildIriMap($iri_map, $key, $idx) {
|
||||||
|
$entries = $iri_map[$key];
|
||||||
|
$next = $iri_map[$key] = new ArrayObject();
|
||||||
|
|
||||||
|
foreach($entries as $entry) {
|
||||||
|
$iri = $entry->iri;
|
||||||
|
if($idx >= strlen($iri)) {
|
||||||
|
$letter = '';
|
||||||
|
} else {
|
||||||
|
$letter = $iri[$idx];
|
||||||
|
}
|
||||||
|
if(!isset($next[$letter])) {
|
||||||
|
$next[$letter] = new ArrayObject();
|
||||||
|
}
|
||||||
|
$next[$letter][] = $entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($next as $key => $value) {
|
||||||
|
if($key === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$this->_buildIriMap($next, $key, $idx + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the term for the given entry if not already added.
|
* Adds the term for the given entry if not already added.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in a new issue