do a slightly better job at finding relevant content from scraping submitted links
This commit is contained in:
		
					parent
					
						
							
								24a9a41f96
							
						
					
				
			
			
				commit
				
					
						2d9718fee9
					
				
			
		
					 1 changed files with 34 additions and 9 deletions
				
			
		| 
						 | 
				
			
			@ -6,6 +6,8 @@ function parse_url_content(&$a) {
 | 
			
		|||
 | 
			
		||||
	$url = trim($_GET['url']);
 | 
			
		||||
 | 
			
		||||
	$text = null;
 | 
			
		||||
 | 
			
		||||
	$template = "<a href=\"%s\" >%s</a>%s";
 | 
			
		||||
 | 
			
		||||
	if($url) 
 | 
			
		||||
| 
						 | 
				
			
			@ -34,15 +36,38 @@ function parse_url_content(&$a) {
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	$items = $dom->getElementsByTagName('p');
 | 
			
		||||
	if($items) {
 | 
			
		||||
		foreach($items as $item) {
 | 
			
		||||
			$text = $item->textContent;
 | 
			
		||||
			$text = strip_tags($text);
 | 
			
		||||
			if(strlen($text) < 100)
 | 
			
		||||
				continue;
 | 
			
		||||
			$text = substr($text,0,250) . '...' ;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
	$divs = $dom->getElementsByTagName('div');
 | 
			
		||||
	if($divs) {
 | 
			
		||||
		foreach($divs as $div) {
 | 
			
		||||
			$class = $div->getAttribute('class');
 | 
			
		||||
			if($class && stristr($class,'article')) {
 | 
			
		||||
				$items = $div->getElementsByTagName('p');
 | 
			
		||||
				if($items) {
 | 
			
		||||
					foreach($items as $item) {
 | 
			
		||||
						$text = $item->textContent;
 | 
			
		||||
						$text = strip_tags($text);
 | 
			
		||||
						if(strlen($text) < 100)
 | 
			
		||||
							continue;
 | 
			
		||||
						$text = substr($text,0,250) . '...' ;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if(! $text) {
 | 
			
		||||
		$items = $dom->getElementsByTagName('p');
 | 
			
		||||
		if($items) {
 | 
			
		||||
			foreach($items as $item) {
 | 
			
		||||
				$text = $item->textContent;
 | 
			
		||||
				$text = strip_tags($text);
 | 
			
		||||
				if(strlen($text) < 100)
 | 
			
		||||
					continue;
 | 
			
		||||
				$text = substr($text,0,250) . '...' ;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue