Add optimization for finding the best CURIE.

- Build a map for searching for a matching IRI when
  computing the inverse context. Each letter of an
  IRI can be used to key into the map to find the
  best set of partial matches (which can be used
  to create CURIEs).
- This approach is a faster alternative to trying each
  possible term in the active context as a possible
  CURIE, linearly, one at a time.
This commit is contained in:
Dave Longley 2016-04-25 00:16:38 -04:00
parent 956fb8b790
commit 3433a01a65
1 changed files with 90 additions and 23 deletions

View File

@ -4701,32 +4701,41 @@ class JsonLdProcessor {
}
// no term or @vocab match, check for possible CURIEs
$iri_len = strlen($iri);
$choice = null;
foreach($active_ctx->mappings as $term => $definition) {
// skip null definitions and terms with colons, they can't be prefixes
if($definition === null || $definition->_term_has_colon) {
continue;
}
// skip entries with @ids that are not partial matches
if(!($iri_len > $definition->_id_length &&
strpos($iri, $definition->{'@id'}) === 0)) {
continue;
$idx = 0;
$partial_matches = array();
$iri_map = $active_ctx->fast_curie_map;
// check for partial matches of against `iri`, which means look until
// iri.length - 1, not full length
$max_partial_length = strlen($iri) - 1;
for(; $idx < $max_partial_length && isset($iri_map[$iri[$idx]]); ++$idx) {
$iri_map = $iri_map[$iri[$idx]];
if(isset($iri_map[''])) {
$entry = $iri_map[''][0];
$entry->iri_length = $idx + 1;
$partial_matches[] = $entry;
}
}
// check partial matches in reverse order to prefer longest ones first
$partial_matches = array_reverse($partial_matches);
foreach($partial_matches as $entry) {
$terms = $entry->terms;
foreach($terms as $term) {
// a CURIE is usable if:
// 1. it has no mapping, OR
// 2. value is null, which means we're not compacting an @value, AND
// the mapping matches the IRI
$curie = $term . ':' . substr($iri, $entry->iri_length);
$is_usable_curie = (!property_exists($active_ctx->mappings, $curie) ||
($value === null &&
$active_ctx->mappings->{$curie}->{'@id'} === $iri));
// a CURIE is usable if:
// 1. it has no mapping, OR
// 2. value is null, which means we're not compacting an @value, AND
// the mapping matches the IRI)
$curie = $term . ':' . substr($iri, $definition->_id_length);
$is_usable_curie = (!property_exists($active_ctx->mappings, $curie) ||
($value === null && $active_ctx->mappings->{$curie}->{'@id'} === $iri));
// select curie if it is shorter or the same length but lexicographically
// less than the current choice
if($is_usable_curie && ($choice === null ||
self::_compareShortestLeast($curie, $choice) < 0)) {
$choice = $curie;
// select curie if it is shorter or the same length but
// lexicographically less than the current choice
if($is_usable_curie && ($choice === null ||
self::_compareShortestLeast($curie, $choice) < 0)) {
$choice = $curie;
}
}
}
@ -5363,6 +5372,10 @@ class JsonLdProcessor {
$inverse = $active_ctx->inverse = new stdClass();
// variables for building fast CURIE map
$fast_curie_map = $active_ctx->fast_curie_map = new ArrayObject();
$iris_to_terms = array();
// handle default language
$default_language = '@none';
if(property_exists($active_ctx, '@language')) {
@ -5391,9 +5404,25 @@ class JsonLdProcessor {
$iris = $mapping->{'@id'};
$iris = self::arrayify($iris);
foreach($iris as $iri) {
$is_keyword = self::_isKeyword($iri);
// initialize container map
if(!property_exists($inverse, $iri)) {
$inverse->{$iri} = new stdClass();
if(!$is_keyword && !$mapping->_term_has_colon) {
// init IRI to term map and fast CURIE map
$iris_to_terms[$iri] = new ArrayObject();
$iris_to_terms[$iri][] = $term;
$fast_curie_entry = (object)array(
'iri' => $iri, 'terms' => $iris_to_terms[$iri]);
if(!array_key_exists($iri[0], (array)$fast_curie_map)) {
$fast_curie_map[$iri[0]] = new ArrayObject();
}
$fast_curie_map[$iri[0]][] = $fast_curie_entry;
}
} else if(!$is_keyword && !$mapping->_term_has_colon) {
// add IRI to term match
$iris_to_terms[$iri][] = $term;
}
$container_map = $inverse->{$iri};
@ -5437,9 +5466,47 @@ class JsonLdProcessor {
}
}
// build fast CURIE map
foreach($fast_curie_map as $key => $value) {
$this->_buildIriMap($fast_curie_map, $key, 1);
}
return $inverse;
}
/**
* Runs a recursive algorithm to build a lookup map for quickly finding
* potential CURIEs.
*
* @param ArrayObject $iri_map the map to build.
* @param string $key the current key in the map to work on.
* @param int $idx the index into the IRI to compare.
*/
function _buildIriMap($iri_map, $key, $idx) {
$entries = $iri_map[$key];
$next = $iri_map[$key] = new ArrayObject();
foreach($entries as $entry) {
$iri = $entry->iri;
if($idx >= strlen($iri)) {
$letter = '';
} else {
$letter = $iri[$idx];
}
if(!isset($next[$letter])) {
$next[$letter] = new ArrayObject();
}
$next[$letter][] = $entry;
}
foreach($next as $key => $value) {
if($key === '') {
continue;
}
$this->_buildIriMap($next, $key, $idx + 1);
}
}
/**
* Adds the term for the given entry if not already added.
*