globalise_urls works better when retrospectively applying

This commit is contained in:
Matthew Exon 2024-06-20 20:32:52 +01:00 committed by Matthew Exon
parent ed49acf61c
commit c7cb79c495

View file

@ -660,7 +660,11 @@ function retriever_extract(DOMDocument $doc, array $retriever) {
* @return DOMDocument New DOM document with global URLs
*/
function retriever_globalise_urls(DOMDocument $doc, array $resource) {
$components = parse_url($resource['redirect-url']);
$url = $resource['redirect-url'];
if ($url == "") {
$url = $resource['url'];
}
$components = parse_url($url);
if (!array_key_exists('scheme', $components) || !array_key_exists('host', $components) || !array_key_exists('path', $components)) {
return $doc;
}