initial commit

2015-01-04 21:46:33 +01:00 · 2015-01-04 21:46:33 +01:00 · 8eeedc3a3c
commit 8eeedc3a3c
54 changed files with 179682 additions and 0 deletions
--- a/php-typography/php-parser/change_log_parser.txt
+++ b/php-typography/php-parser/change_log_parser.txt
@ -0,0 +1,37 @@
+1.20 - December 20, 2009
+
+Added HTML5 elements to parsing algorithm for greater contextual awareness
+
+1.19 - December 1, 2009
+
+Corrected some uninitiated variables
+
+1.12 - August 17, 2009
+
+Corrected multibyte handling of nextChr and prevChr
+
+1.10 - August 14, 2009
+
+Increased set of recognized multibyte word characters
+Corrected multibyte handling of nextChr and prevChr
+
+1.4 - July 23, 2009
+
+Added letter connectors (like soft-hyphens) as prohibited characters for get_words if it is set to strictly return letter only words.
+
+1.3 - July 23, 2009
+
+Uninitialized variables corrected throughout.
+
+1.0 - July 15, 2009
+
+Removed beta tag
+
+1.0 beta 7 - July 10, 2009
+
+added "/" as a valid word character so we could capture "this/that" as a word for processing (similar to "mother-in-law")
+Corrected error where characters from the Latin 1 Supplement Block were not recognized as word characters
+
+1.0 beta 1
+
+initial release
--- a/php-typography/php-parser/parseHTML.php
+++ b/php-typography/php-parser/parseHTML.php
@ -0,0 +1,1166 @@
+<?php 
+
+/*   
+Project Name: PHP Parser
+Project URI: http://kingdesk.com/projects/php-parser/
+Author: Jeffrey D. King
+Author URI: http://kingdesk.com/about/jeff/
+
+	Copyright 2009, KINGdesk, LLC. Licensed under the GNU General Public License 2.0. If you use, modify and/or redistribute this software, you must leave the KINGdesk, LLC copyright information, the request for a link to http://kingdesk.com, and the web design services contact information unchanged. If you redistribute this software, or any derivative, it must be released under the GNU General Public License 2.0. This program is distributed without warranty (implied or otherwise) of suitability for any particular purpose. See the GNU General Public License for full license terms <http://creativecommons.org/licenses/GPL/2.0/>.
+
+	WE DON'T WANT YOUR MONEY: NO TIPS NECESSARY!  If you enjoy this plugin, a link to http://kingdesk.com from your website would be appreciated.
+	
+	For web design services, please contact info@kingdesk.com.
+*/
+
+
+// first we define some constants
+// Valid constant names
+define("ALL_TAGS", 1);
+define("OPENING_TAGS", 2);
+define("CLOSING_TAGS", 3);
+define("SELFCLOSING_TAGS", 4);
+define("OPENING_AND_SELFCLOSING_TAGS", 5);
+define("SELFCLOSING_AND_OPENING_TAGS", 5);
+define("OPENING_AND_CLOSING_TAGS", 7);
+define("CLOSING_AND_OPENING_TAGS", 7);
+define("CLOSING_AND_SELFCLOSING_TAGS", 6);
+define("SELFCLOSING_AND_CLOSING_TAGS", 6);
+
+define("ALL_TOKENS", 1);
+define("TEXT_TOKENS", 2);
+define("TAG_TOKENS", 3);
+define("COMMENT_TOKENS", 4);
+define("CDATA_TOKENS", 5);
+define("TEXT_AND_TAG_TOKENS", 6);
+define("TAG_AND_TEXT_TOKENS", 6);
+define("TEXT_AND_COMMENT_TOKENS", 7);
+define("COMMENT_AND_TEXT_TOKENS", 7);
+define("TEXT_AND_CDATA_TOKENS", 8);
+define("CDATA_AND_TEXT_TOKENS", 8);
+define("TAG_AND_COMMENT_TOKENS", 9);
+define("COMMENT_AND_TAG_TOKENS", 9);
+define("TAG_AND_CDATA_TOKENS", 10);
+define("CDATA_AND_TAG_TOKENS", 10);
+define("COMMENT_AND_CDATA_TOKENS", 11);
+define("CDATA_AND_COMMENT_TOKENS", 11);
+define("TEXT_TAG_AND_COMMENT_TOKENS", 12);
+define("TEXT_COMMENT_AND_TAG_TOKENS", 12);
+define("TAG_TEXT_AND_COMMENT_TOKENS", 12);
+define("TAG_COMMENT_AND_TEXT_TOKENS", 12);
+define("COMMENT_TAG_AND_TEXT_TOKENS", 12);
+define("COMMENT_TEXT_AND_TAG_TOKENS", 12);
+define("TEXT_TAG_AND_CDATA_TOKENS", 13);
+define("TEXT_CDATA_AND_TAG_TOKENS", 13);
+define("TAG_TEXT_AND_CDATA_TOKENS", 13);
+define("TAG_CDATA_AND_TEXT_TOKENS", 13);
+define("CDATA_TAG_AND_TEXT_TOKENS", 13);
+define("CDATA_TEXT_AND_TAG_TOKENS", 13);
+define("TEXT_COMMENT_AND_CDATA_TOKENS", 14);
+define("TEXT_CDATA_AND_COMMENT_TOKENS", 14);
+define("COMMENT_TEXT_AND_CDATA_TOKENS", 14);
+define("COMMENT_CDATA_AND_TEXT_TOKENS", 14);
+define("CDATA_COMMENT_AND_TEXT_TOKENS", 14);
+define("CDATA_TEXT_AND_COMMENT_TOKENS", 14);
+define("TAG_COMMENT_AND_CDATA_TOKENS", 15);
+define("TAG_CDATA_AND_COMMENT_TOKENS", 15);
+define("COMMENT_TAG_AND_CDATA_TOKENS", 15);
+define("COMMENT_CDATA_AND_TAG_TOKENS", 15);
+define("CDATA_COMMENT_AND_TAG_TOKENS", 15);
+define("CDATA_TAG_AND_COMMENT_TOKENS", 15);
+
+
+#########################################################################################################
+#########################################################################################################
+##
+##	parsedXHTML assumes valid XHTML:
+##		-every tag must be closed
+##		-every attribute must have a value
+##		-tag names and attributes are all lowercase
+##
+#########################################################################################################
+#########################################################################################################
+class parseHTML {
+
+	var $blockTags = array("address", "article", "aside", "blockquote", "center", "dd", "dialog", "dir", "div", "dl", "dt", "fieldset", "figure", "footer", "form", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "isindex", "li", "menu", "nav", "noframes", "noscript", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "ul");
+	var $html = array();
+			/*
+		$html is an ARRAY with the following structure:
+		index	=> ARRAY: tokenized XHTML
+				"type" 		=> STRING: REQUIRED; "comment" | "dtd" | "cdata" | "xml" | "tag" | "text"
+				"value"		=> STRING: REQUIRED; token content
+				"name"		=> STRING: REQUIRED for type "tag"; element name
+				"openPos"	=> INTEGER: REQUIRED for closing tags (including self-closing); integer corresponding to the index of the opening tag
+								// if a closing tag is missing an opening match, it will be treated as self-closing
+				"closePos"	=> INTEGER: REQUIRED for opening and self-closing tags; integer corresponding to the index of the closing tag
+								// if an opening tag is missing a closing match, it will be treated as closed by its parent's closing tag (or end of string)
+				"attribute"	=> ARRAY: REQUIRED if "tag" has assigned attributes; attribute_names => values
+				"parents"	=> ARRAY: REQUIRED if "tag" has parent tag(s); parent tags: "index" => array("tagName" => tagName, "attributes" => array(name => value, ... ))
+				"locked"	=> BOOLEAN: OPTIONAL; TRUE by default for all types.  It is never set to FALSE, it is just unset.
+				"ERROR"		=> STRING: error message (i.e. improperly nested tag...)
+				"prevChr"	=> CHARACTER: REQUIRED for type "text" if previous character exists; last character of previous "text" if separated by inline tags or HTML comments
+				"nextChr"	=> CHARACTER: REQUIRED for type "text" if next character exists; first character of next "text" if only separated by inline tags or HTML comments
+			*/
+	
+	
+	#=======================================================================
+	#=======================================================================
+	#==	METHODS
+	#=======================================================================
+	#=======================================================================
+	
+	
+	########################################################################
+	#	( UN | RE )LOAD, UPDATE AND CLEAR METHODS
+	#
+	# 
+
+	#	Params:		STRING containing HTML markup.
+	#	Action:		Tokenized $rawHTML saved to $this->html
+	#	Returns:	TRUE on completion
+	function load($rawHTML) {
+		
+		$this->clear();
+		
+		$tokens = array();
+		$index = 0;
+		$nestedTags = array(); // stores $index => "unclosed tag name"
+
+		# find HTML comments
+		$commentTag = '(?:<!(?:--.*?--\s*)+>)'; // required modifier: s (DotAll)
+	
+		# find Document Type Definition		
+		$dtdTag = '(?:<![-a-zA-Z0-9:]+\b(?:.*?(?:--.*?--\s*)?)*>)'; // required modifier: s (DotAll)
+	
+		# find (Unparsed) Character Data
+		$cdataTag = '(?:<\[CDATA\[.*?\]\]>)'; // required modifier: s (DotAll)
+	
+		# find XML Declaration
+		$xmlTag = '(?:<\?xml\s.*?\?>)'; // required modifier: s (DotAll)
+	
+		# find XHTML Tags
+		$htmlTag = '(?:</?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)'; // required modifier: s (DotAll)
+
+		# find XHTML Tags with ability to grab tag name and test for closing tags
+		$htmlTagDetail = '
+				<							# open of HTML element
+				(/)?						# Subpattern 1: test for closing tag
+				([-a-zA-Z0-9:]+)			# Subpattern 2: tag name			
+				(?:
+					[^\'">]+				# matches any attribute names
+					|
+					"[^"]*"					# double quoted attribute value
+					|
+					\'[^\']*\'				# single quoted attribute value
+				)*
+				((?<=/)>)?					# Subpattern 3: test for self-closing tag
+			'; //required modifiers: x
+	
+		# find attribute/value pairs in HTML tags
+		$attributePattern= '
+				\s+							# one or more spaces
+				([-a-zA-Z0-9:]+)			# Subpattern 1: attributeibute name
+				\s*=\s*
+				(?: 
+					"([^"]+)"				# Subpattern 2: possibly attribute value
+					|
+					\'([^\']+)\'			# Subpattern 3: possibly attribute value
+				)
+			'; //required modifiers: x
+
+		# find Find any tag
+		$anyTag = "$commentTag|$dtdTag|$cdataTag|$xmlTag|$htmlTag"; // required modifiers: x (multiline pattern) s (DotAll)
+
+		$parts = preg_split("@($anyTag)@s", $rawHTML, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+		// we will use "prevChr" and "nextChr" to give context to type "text"
+		// "prevChr" is not relevant to the first child of type "text" in a block level HTML element
+		// "nextChr" is not relevant to the last child of type "text" in a block level HTML element
+		// we will use $prevTextIndex to help us properly assign "prevChr" and "nextChr"
+		$prevTextIndex = NULL;
+$i = 0;		
+		foreach ($parts as $part) {
+			if ($part != "") {
+		
+				if(preg_match("@\A$commentTag\Z@s", $part)) {
+					$tokens[$index] = array(
+									"type"		=> 'comment',
+									"value"		=> $part,
+									"locked"	=> TRUE,
+									);
+
+					// remember parents
+					if(!empty($nestedTags))
+						$tokens[$index]["parents"] = $nestedTags;
+				} elseif(preg_match("@\A$dtdTag\Z@s", $part)) {
+					$tokens[$index] = array(
+									"type"		=> 'dtd',
+									"value"		=> $part,
+									"locked"	=> TRUE,
+									);
+
+					// remember parents
+					if(!empty($nestedTags))
+						$tokens[$index]["parents"] = $nestedTags;
+				} elseif(preg_match("@\A$cdataTag\Z@s", $part)) {
+					$tokens[$index] = array(
+									"type"		=> 'cdata',
+									"value"		=> $part,
+									"locked"	=> TRUE,
+									);
+
+					// remember parents
+					if(!empty($nestedTags))
+						$tokens[$index]["parents"] = $nestedTags;
+				} elseif(preg_match("@\A$xmlTag\Z@s", $part)) {
+					$tokens[$index] = array(
+									"type"		=> 'xml',
+									"value"		=> $part,
+									"locked"	=> TRUE,
+									);
+
+					// remember parents
+					if(!empty($nestedTags))
+						$tokens[$index]["parents"] = $nestedTags;
+				} elseif(preg_match("@\A$htmlTagDetail@x", $part, $tagMatch)) {
+					$tagName = $tagMatch[2];
+					$selfClose = (isset($tagMatch[3]) && ($tagMatch[3])) ? TRUE : FALSE;
+					$closing = ($tagMatch[1] || $selfClose) ? TRUE : FALSE;
+				
+					$tokens[$index] = array(
+									"type"		=> 'tag',
+									"value"		=> $part,
+									"name"		=> $tagName,
+									"locked"	=> TRUE,
+										);
+
+					// if tag was block, reset character context for type "text"
+					$isBlock = FALSE;
+					foreach($this->blockTags as $blockTag) {
+						if(strtolower($tokens[$index]["name"]) == strtolower($blockTag)) {
+							$isBlock = TRUE;
+							break;
+						}
+					}
+					if($isBlock)
+						$prevTextIndex = NULL;
+
+					if(!$closing) {
+						// remember parents
+						if(!empty($nestedTags))
+							$tokens[$index]["parents"] = $nestedTags;
+
+						$attribute = array();
+						if(preg_match_all("@$attributePattern@x", $part, $attributeMatch)) {
+							foreach($attributeMatch[1] as $key => $attributeName) {
+								$attributeValue = $attributeMatch[2][$key].$attributeMatch[3][$key]; // one will be null, the other will contain the desired value
+								$attribute[$attributeName] = $attributeValue;
+							}
+						}
+						if(!empty($attribute))
+							$tokens[$index]["attribute"] =  $attribute;
+						
+						//add to $nestedTags
+						$nestedTags[$index]["tagName"] = $tagName;
+						if (isset($tokens[$index]["attribute"])) {
+							$nestedTags[$index]["attributes"] = $tokens[$index]["attribute"];
+						} else {
+							$nestedTags[$index]["attributes"] = NULL;
+						}
+					} else { // is closing
+						if($selfClose) {
+							// remember parents
+							if(!empty($nestedTags))
+								$tokens[$index]["parents"] = $nestedTags;
+							
+							 // set openPos and closePos to this index
+							$tokens[$index]["openPos"] = $index;
+							$tokens[$index]["closePos"] = $index;
+						} else {
+							//remove associated start tag from $nestedTags mark openPos in end tag and closePos in start tag
+							$matched = FALSE;
+							$tempNest = $nestedTags;
+							while(count($nestedTags) > 0) {
+								$lastTag = end($nestedTags);
+								$lastTagIndex = key($nestedTags);
+							
+								unset($nestedTags[$lastTagIndex]);
+							
+								if($lastTag["tagName"] != $tagName) {
+									// we have an improperly nested opening tag, close it at it's parent's closing tag
+									$tokens[$lastTagIndex]["closePos"] = $index;
+									$tokens[$lastTagIndex]["ERROR"] = "MISSING OR IMPROPERLY NESTED CLOSING TAG";
+
+									// if improperly nested tag was block, reset character context for type "text"
+									$isBlock = FALSE;
+									foreach($this->blockTags as $blockTag) {
+										if(strtolower($tokens[$lastTagIndex]["name"]) == strtolower($blockTag)) {
+											$isBlock = TRUE;
+											break;
+										}
+									}
+									if($isBlock)
+										$prevTextIndex = NULL;
+								} else {
+									// we have a matching start tag
+									$tokens[$index]["openPos"] = $lastTagIndex;
+									$tokens[$lastTagIndex]["closePos"] = $index;
+									$matched = TRUE;
+									
+									break;
+								}
+							}
+							if(!$matched) {
+								// restore $nestedTags
+								$nestedTags = $tempNest;
+							
+								// treat unmatched closing tag as self closing
+								$tokens[$index]["openPos"] = $index;
+								$tokens[$index]["closePos"] = $index;
+								$tokens[$lastTagIndex]["ERROR"] = "MISSING OR IMPROPERLY NESTED OPENING TAG";
+							}
+						}
+					}
+				} else {
+					$tokens[$index] = array(
+									"type"=>'text',
+									"value"=>$part,
+									"locked"	=> TRUE,
+									);
+					// remember parents
+					if(!empty($nestedTags))
+						$tokens[$index]["parents"] = $nestedTags;
+
+					// remember character context
+					if($prevTextIndex != NULL) {
+						// assign "prevChr"
+						$tokens[$index]["prevChr"] = mb_substr($tokens[$prevTextIndex]["value"], -1, 1,"UTF-8");
+						//set "nextChr" of previous text token
+						$tokens[$prevTextIndex]["nextChr"] = mb_substr($tokens[$index]["value"], 0, 1,"UTF-8");						
+					}
+					//set $prevTextIndex for next text item
+					$prevTextIndex = $index;
+				}
+				$index++;
+			}
+		}
+		
+		
+		//look for opening tags that never got closed, close at end of file
+		if(!empty($nestedTags))
+			foreach($nestedTags as $key => $tagName) {
+				$tokens[$key]["closePos"] = $index;
+				$tokens[$key]["ERROR"] = "MISSING CLOSING TAG";
+
+			}
+		
+		$this->html = $tokens;
+		return TRUE;
+	}
+
+	#	Action:		reloads $html (i.e. capture new tags inserted in text, or remove those whose values are deleted)
+	#	Returns:	TRUE on completion
+	#	WARNING: Tokens acquired through "get" methods may not match new tokenization
+	function reload() {
+		return $this->load($this->unload());
+	}
+	
+	#	Action:		outputs HTML as string
+	#	Returns:	STRING of HTML markup
+	function unload() {
+		$output = "";
+		foreach($this->html as $token) {
+			$output .= $token["value"];
+		}
+		$this->clear();
+		return $output;
+	}
+	
+	#   Params:		ARRAY of tokens.
+	#	Action:		overwrite "value" for all unlocked matching tokens
+	#	Returns:	TRUE on completion
+	function update($tokens) {
+		foreach($tokens as $index => $token) {
+			if(!isset($this->html[$index]["locked"]) || !$this->html[$index]["locked"])
+				$this->html[$index]["value"] = $token["value"];
+		}
+		return TRUE;		
+	}
+
+	#	Action:		unsets $this->html
+	#	Returns:	TRUE on completion
+	function clear() {
+		$this->html = array();
+		return TRUE;		
+	}
+
+
+	########################################################################
+	#	LOCK / UNLOCK METHODS
+	#	Action:		lock matching tokens
+	#	Returns:	TRUE on completion
+	
+	# Params:	ARRAY of tokens.
+	function lock($tokens) {
+		foreach($tokens as $index => $token) {
+			if(isset($this->html[$index]))
+				$this->html[$index]["locked"] = TRUE;
+		}
+		return TRUE;		
+	}
+	function unlock($tokens) {
+		foreach($tokens as $index => $token) {
+			if(isset($this->html[$index]["locked"]))
+				unset($this->html[$index]["locked"]);
+		}
+		return TRUE;		
+	}
+
+	function lock_comments() {
+		return $this->lock_type("comments");		
+	}
+	function unlock_comments() {
+		return $this->unlock_type("comments");		
+	}
+	
+	function lock_dtd() {
+		return $this->lock_type("dtd");
+	}
+	function unlock_dtd() {
+		return $this->unlock_type("dtd");		
+	}
+	
+	function lock_cdata() {
+		return $this->lock_type("cdata");
+	}
+	function unlock_cdata() {
+		return $this->unlock_type("cdata");
+	}
+	
+	function lock_xml() {
+		return $this->lock_type("tag");
+	}
+	function unlock_xml() {
+		return $this->unlock_type("tag");
+	}
+	
+	#	Params:		$tagType INT equal to OPENING_TAGS, CLOSING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS, SELFCLOSING_AND_CLOSING_TAGS, OPENING_AND_CLOSING_TAGS, ALL_TAGS
+	function lock_tags($tagType = ALL_TAGS) {
+		$tags = $this->get_type("tag");
+
+		if($tagType == OPENING_TAGS) {
+			$openingTags = array();
+			foreach($tags as $index => $tag) {
+				if(!isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$openingTags[$index] = $tag;
+				}
+			}
+			return $this->lock($openingTags);
+		}
+
+		if($tagType == CLOSING_TAGS) {
+			$closingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && !isset($tag["closePos"])) {
+					$closingTags[$index] = $tag;
+				}
+			}
+			return $this->lock($closingTags);
+		}
+
+		if($tagType == SELFCLOSING_TAGS) {
+			$selfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$selfClosingTags[$index] = $tag;
+				}
+			}
+			return $this->lock($selfClosingTags);
+		}
+
+		if($tagType == OPENING_AND_SELFCLOSING_TAGS) {
+			$openingAndSelfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["closePos"])) {
+					$openingAndSelfClosingTags[$index] = $tag;
+				}
+			}
+			return $this->lock($openingAndSelfClosingTags);
+		}
+
+		if($tagType == SELFCLOSING_AND_CLOSING_TAGS) {
+			$selfClosingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"])) {
+					$selfClosingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $this->lock($selfClosingAndClosingTags);
+		}
+
+		if($tagType == OPENING_AND_CLOSING_TAGS) {
+			$openingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if((!isset($tag["openPos"]) && isset($tag["closePos"])) || (isset($tag["openPos"]) && !isset($tag["closePos"]))) {
+					$openingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $this->lock($openingAndClosingTags);
+		}	
+		return $this->lock($tags);
+	}
+	#	Params:		$tagType INT equal to OPENING_TAGS, CLOSING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS, SELFCLOSING_AND_CLOSING_TAGS, OPENING_AND_CLOSING_TAGS, ALL_TAGS
+	function unlock_tags($tagType = ALL_TAGS) {
+		$tags = $this->get_type("tag");
+
+		if($tagType == OPENING_TAGS) {
+			$openingTags = array();
+			foreach($tags as $index => $tag) {
+				if(!isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$openingTags[$index] = $tag;
+				}
+			}
+			return $this->unlock($openingTags);
+		}
+
+		if($tagType == CLOSING_TAGS) {
+			$closingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && !isset($tag["closePos"])) {
+					$closingTags[$index] = $tag;
+				}
+			}
+			return $this->unlock($closingTags);
+		}
+
+		if($tagType == SELFCLOSING_TAGS) {
+			$selfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$selfClosingTags[$index] = $tag;
+				}
+			}
+			return $this->unlock($selfClosingTags);
+		}
+
+		if($tagType == OPENING_AND_SELFCLOSING_TAGS) {
+			$openingAndSelfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["closePos"])) {
+					$openingAndSelfClosingTags[$index] = $tag;
+				}
+			}
+			return $this->unlock($openingAndSelfClosingTags);
+		}
+
+		if($tagType == SELFCLOSING_AND_CLOSING_TAGS) {
+			$selfClosingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"])) {
+					$selfClosingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $this->unlock($selfClosingAndClosingTags);
+		}
+
+		if($tagType == OPENING_AND_CLOSING_TAGS) {
+			$openingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if((!isset($tag["openPos"]) && isset($tag["closePos"])) || (isset($tag["openPos"]) && !isset($tag["closePos"]))) {
+					$openingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $this->unlock($openingAndClosingTags);
+		}
+		return $this->unlock($tags);
+	}
+
+	function lock_text() {
+		return $this->lock_type("text");		
+	}
+	function unlock_text() {
+		return $this->unlock_type("text");		
+	}
+
+	function lock_children($tokens, $tokenType = ALL_TOKENS) {
+		foreach($tokens as $index => $token) {
+			//only process opening tags
+			if( (!isset($token["openPos"]) || !$token["openPos"]) && ( isset($token["closePos"]) && $token["closePos"]) ) {
+				$begIndex = $index+1;
+				$endIndex = $token["closePos"]-1;
+				if($begIndex > $endIndex) continue;
+				$childTokens = $this->get_sequential_tokens($begIndex, $endIndex, $tokenType);
+//print_r($childTokens);
+				$this->lock($childTokens);
+			}
+		}
+		return TRUE;		
+	}
+	function unlock_children($tokens, $tokenType = ALL_TOKENS) {
+		foreach($tokens as $index => $token) {
+			//only process opening tags
+			if( (!isset($token["openPos"]) || !$token["openPos"]) && (isset($token["closePos"]) && $token["closePos"]) ) {
+				$begIndex = $index+1;
+				$endIndex = $token["closePos"]-1;
+				if($begIndex > $endIndex) continue;
+				$childTokens = $this->get_sequential_tokens($begIndex, $endIndex, $tokenType);
+				$this->unlock($childTokens);
+			}
+		}
+		return TRUE;		
+	}
+
+
+	########################################################################
+	#	GET METHODS
+	#   Returns:    ARRAY of matching tokens
+	
+	function get_all() {
+		return $this->html;
+	}
+	function get_locked() {
+		$tokens = array();
+		foreach($this->html as $index => $token) {
+			if($token["locked"])
+				$tokens[$index]=$token;
+		}
+		return $tokens;		
+	}
+	function get_unlocked() {
+		$tokens = array();
+		foreach($this->html as $index => $token) {
+			if(!$token["locked"])
+				$tokens[$index]=$token;
+		}
+		return $tokens;		
+	}
+
+	function get_comments() {
+		return $this->get_type("comments");		
+	}
+	function get_locked_comments() {
+		return $this->get_locked_type("comments");		
+	}
+	function get_unlocked_comments() {
+		return $this->get_unlocked_type("comments");		
+	}
+	
+	function get_dtd() {
+		return $this->get_type("dtd");		
+	}
+	function get_locked_dtd() {
+		return $this->get_locked_type("dtd");		
+	}
+	function get_unlocked_dtd() {
+		return $this->get_unlocked_type("dtd");		
+	}
+	
+	function get_cdata() {
+		return $this->get_type("cdata");		
+	}
+	function get_locked_cdata() {
+		return $this->get_locked_type("cdata");		
+	}
+	function get_unlocked_cdata() {
+		return $this->get_unlocked_type("cdata");		
+	}
+	
+	function get_xml() {
+		return $this->get_type("tag");		
+	}
+	function get_locked_xml() {
+		return $this->get_locked_type("tag");		
+	}
+	function get_unlocked_xml() {
+		return $this->get_unlocked_type("tag");		
+	}
+	
+	#	Params:		$tagType INT equal to OPENING_TAGS, CLOSING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS, SELFCLOSING_AND_CLOSING_TAGS, OPENING_AND_CLOSING_TAGS, ALL_TAGS
+	function get_tags($tagType = ALL_TAGS) {
+		$tags = $this->get_type("tag");
+
+		if($tagType == OPENING_TAGS) {
+			$openingTags = array();
+			foreach($tags as $index => $tag) {
+				if(!isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$openingTags[$index] = $tag;
+				}
+			}
+			return $openingTags;
+		}
+
+		if($tagType == CLOSING_TAGS) {
+			$closingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && !isset($tag["closePos"])) {
+					$closingTags[$index] = $tag;
+				}
+			}
+			return $closingTags;
+		}
+
+		if($tagType == SELFCLOSING_TAGS) {
+			$selfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$selfClosingTags[$index] = $tag;
+				}
+			}
+			return $selfClosingTags;
+		}
+
+		if($tagType == OPENING_AND_SELFCLOSING_TAGS) {
+			$openingAndSelfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["closePos"])) {
+					$openingAndSelfClosingTags[$index] = $tag;
+				}
+			}
+			return $openingAndSelfClosingTags;
+		}
+
+		if($tagType == SELFCLOSING_AND_CLOSING_TAGS) {
+			$selfClosingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"])) {
+					$selfClosingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $selfClosingAndClosingTags;
+		}
+
+		if($tagType == OPENING_AND_CLOSING_TAGS) {
+			$openingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if((!isset($tag["openPos"]) && isset($tag["closePos"])) || (isset($tag["openPos"]) && !isset($tag["closePos"]))) {
+					$openingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $openingAndClosingTags;
+		}	
+		
+		return $tags;
+	}
+	# 	Params:	$tagType INT equal to OPENING_TAGS, CLOSING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS, SELFCLOSING_AND_CLOSING_TAGS, OPENING_AND_CLOSING_TAGS, ALL_TAGS
+	function get_locked_tags($tagType = ALL_TAGS) {
+		$tags = $this->get_locked_type("tag");		
+
+		if($tagType == OPENING_TAGS) {
+			$openingTags = array();
+			foreach($tags as $index => $tag) {
+				if(!isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$openingTags[$index] = $tag;
+				}
+			}
+			return $openingTags;
+		}
+
+		if($tagType == CLOSING_TAGS) {
+			$closingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && !isset($tag["closePos"])) {
+					$closingTags[$index] = $tag;
+				}
+			}
+			return $closingTags;
+		}
+
+		if($tagType == SELFCLOSING_TAGS) {
+			$selfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$selfClosingTags[$index] = $tag;
+				}
+			}
+			return $selfClosingTags;
+		}
+
+		if($tagType == OPENING_AND_SELFCLOSING_TAGS) {
+			$openingAndSelfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["closePos"])) {
+					$openingAndSelfClosingTags[$index] = $tag;
+				}
+			}
+			return $openingAndSelfClosingTags;
+		}
+
+		if($tagType == SELFCLOSING_AND_CLOSING_TAGS) {
+			$selfClosingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"])) {
+					$selfClosingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $selfClosingAndClosingTags;
+		}
+
+		if($tagType == OPENING_AND_CLOSING_TAGS) {
+			$openingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if((!isset($tag["openPos"]) && isset($tag["closePos"])) || (isset($tag["openPos"]) && !isset($tag["closePos"]))) {
+					$openingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $openingAndClosingTags;
+		}	
+		
+		return $tags;
+	}
+	# 	Params:	$tagType INT equal to OPENING_TAGS, CLOSING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS, SELFCLOSING_AND_CLOSING_TAGS, OPENING_AND_CLOSING_TAGS, ALL_TAGS
+	function get_unlocked_tags($tagType = ALL_TAGS) {
+		$tags = $this->get_unlocked_type("tag");		
+
+		if($tagType == OPENING_TAGS) {
+			$openingTags = array();
+			foreach($tags as $index => $tag) {
+				if(!isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$openingTags[$index] = $tag;
+				}
+			}
+			return $openingTags;
+		}
+
+		if($tagType == CLOSING_TAGS) {
+			$closingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && !isset($tag["closePos"])) {
+					$closingTags[$index] = $tag;
+				}
+			}
+			return $closingTags;
+		}
+
+		if($tagType == SELFCLOSING_TAGS) {
+			$selfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"]) && isset($tag["closePos"])) {
+					$selfClosingTags[$index] = $tag;
+				}
+			}
+			return $selfClosingTags;
+		}
+
+		if($tagType == OPENING_AND_SELFCLOSING_TAGS) {
+			$openingAndSelfClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["closePos"])) {
+					$openingAndSelfClosingTags[$index] = $tag;
+				}
+			}
+			return $openingAndSelfClosingTags;
+		}
+
+		if($tagType == SELFCLOSING_AND_CLOSING_TAGS) {
+			$selfClosingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if(isset($tag["openPos"])) {
+					$selfClosingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $selfClosingAndClosingTags;
+		}
+
+		if($tagType == OPENING_AND_CLOSING_TAGS) {
+			$openingAndClosingTags = array();
+			foreach($tags as $index => $tag) {
+				if((!isset($tag["openPos"]) && isset($tag["closePos"])) || (isset($tag["openPos"]) && !isset($tag["closePos"]))) {
+					$openingAndClosingTags[$index] = $tag;
+				}
+			}
+			return $openingAndClosingTags;
+		}	
+		
+		return $tags;
+	}
+	
+	function get_text() {
+		return $this->get_type("text");		
+	}
+	function get_locked_text() {
+		return $this->get_locked_type("text");		
+	}
+	function get_unlocked_text() {
+		return $this->get_unlocked_type("text");		
+	}
+	
+	# 	Params:	$tagNames STRING tag name or ARRAY of tag names
+	#			$tagType INT equal to OPENING_TAGS, CLOSING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS, SELFCLOSING_AND_CLOSING_TAGS, OPENING_AND_CLOSING_TAGS, ALL_TAGS
+	function get_tags_by_name($tagNames, $tagType = ALL_TAGS) {
+		if(is_string($tagNames)) $tagNames = array($tagNames);
+		$tags = $this->get_tags($tagType);
+		$tagsByName = array();
+		
+		foreach ($tags as $index => $tag) {
+			foreach($tagNames as $tagName) {
+				if($tag["name"] == strtolower($tagName))
+					$tagsByName[$index] = $tag;
+			}
+		}
+		return $tagsByName;
+	}
+	#	Params:	$idNames STRING id name or ARRAY of id names
+	function get_tag_by_id($idNames) {
+		return $this->get_tags_by_attribute('id', $idNames, OPENING_AND_SELFCLOSING_TAGS);
+	}
+	#	Params:	$classNames STRING class name or ARRAY of class names
+	#			$tagType INT equal to OPENING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS
+	function get_tags_by_class($classNames, $tagType = OPENING_AND_SELFCLOSING_TAGS) {
+		return $this->get_tags_by_attribute('class', $classNames, $tagType);
+	}
+	#	Params:	$attribute STRING attribute type
+	#			$attributeValue STRING class name or ARRAY of attribute values
+	#			$tagType INT equal to OPENING_TAGS, SELFCLOSING_TAGS, OPENING_AND_SELFCLOSING_TAGS
+	function get_tags_by_attribute($attribute, $attributeValues, $tagType = OPENING_TAGS) {
+		if(is_string($attributeValues)) $attributeValues = array($attributeValues);
+		$tags = $this->get_tags($tagType);
+		$tagsByAttribute = array();
+
+		if(strtolower($attribute) == "id") {
+			foreach($attributeValues as $attributeValue) {
+				foreach ($tags as $index => $tag) {
+					if($tag["attribute"]["id"] == $attributeValue) {
+						$tagsByAttribute[$index] = $tag;
+						break;
+					}
+				}
+			}
+		} elseif(strtolower($attribute) == "class") {
+			foreach ($tags as $index => $tag) {
+				if(isset($tag["attribute"]["class"])) {
+					//because there may be multiple classes
+					$classList = preg_split('#\s+#', $tag["attribute"]["class"] , -1, PREG_SPLIT_NO_EMPTY);
+					foreach($classList as $className) {
+						foreach($attributeValues as $attributeValue) {
+							if($className == $attributeValue) {
+								$tagsByAttribute[$index] = $tag;
+							}
+						}
+					}
+				}
+			}
+		} else {
+			foreach ($tags as $index => $tag) {
+				if(isset($tags["attribute"][$attribute])) {
+					foreach($attributeValues as $attributeValue) {
+						if($tag["attribute"][$attribute] == $attributeValue)
+							$tagsByAttribute[$index] = $tag;
+					}
+				}
+			}
+		}
+		return $tagsByAttribute;
+	}
+
+	#	Params:	ARRAY of tokens
+	function get_children($tokens, $tokenType = ALL_TOKENS) {
+		$results = array();
+		foreach($tokens as $index => $token) {
+			//exclude (self)closing tags
+			if( (isset($token["closePos"]) && $token["closePos"]) && (!isset($token["openPos"]) || !$token["openPos"]) ) {
+				$begIndex = $index+1;
+				$endIndex = $token["closePos"]-1;
+				if($begIndex > $endIndex) continue;
+				$results += $this->get_sequential_tokens($begIndex, $endIndex, $tokenType);  //union avoids dups.
+			}
+		}
+		return $results;		
+	}
+	
+	
+	########################################################################
+	#	CONDITIONAL METHODS
+	#
+	#   Returns:    TRUE or FALSE depending if condition is met
+	
+	#   Parameter:  $tagNames MIXED value(s) of tag name, such as STRING of tag name or ARRAY of tag names
+	#				$token ARRAY token to be evaluated
+	function in_tag($tagNames, $token) {
+		if(is_string($tagNames)) $tagNames = array($tagNames);
+
+		if(isset($token["parents"])){
+			foreach ($token["parents"] as $parent) {
+				if(isset($parent["tagName"])){
+					foreach($tagNames as $tagName) {
+						if($parent["tagName"] == $tagName) return TRUE;
+					}
+				}
+			}
+		}
+		return FALSE;
+	}
+
+
+	#   Parameters: $attributeName STRING name of attribute, such as "id" or "class"
+	# 				$attributeValue MIXED value(s) of attribute, such as STRING of id Name or ARRAY of Class Names
+	#					note: if an ARRAY is passed, method will return TRUE if _any_ of the values match
+	#				$token ARRAY token to be evaluated
+	function in_attribute($attributeName, $attributeValues, $token) {
+		if(is_string($attributeValues)) $attributeValues = array($attributeValues);
+
+		if(isset($token["parents"])){
+			foreach ($token["parents"] as $parent) {
+				if(isset($parent["attributes"][$attributeName])) {
+					if($attributeName == "class" || $attributeName == "CLASS") {
+						//because there may be multiple classes
+						$classList = preg_split('#\s+#', $parent["attributes"][$attributeName] , -1, PREG_SPLIT_NO_EMPTY);
+						foreach($classList as $className) {
+							foreach($attributeValues as $attributeValue) {
+								if($className == $attributeValue) {
+									return TRUE;
+								}
+							}
+						}
+					} else {
+						foreach($attributeValues as $attributeValue) {
+							if($parent["attributes"][$attributeName] == $attributeValue) {
+									return TRUE;
+								}
+						}
+					}
+				}
+			}
+		}
+		return FALSE;
+	}
+
+	#   Parameter:  $idName MIXED - ARRAY or STRING of id Name(s)
+	#					note: if an ARRAY is passed, method will return TRUE if _any_ of the values match
+	#				$token ARRAY token to be evaluated
+	function in_id($idName, $token) {
+		return $this->in_attribute("id", $idName, $token);
+	}
+
+
+	#   Parameter:  $className MIXED - ARRAY or STRING of class Name(s)
+	#					note: if an ARRAY is passed, method will return TRUE if _any_ of the values match
+	#				$token ARRAY token to be evaluated
+	function in_class($className, $token) {
+		return $this->in_attribute("class", $className, $token);
+	}
+
+
+	#=======================================================================
+	#=======================================================================
+	#==	MISC. METHODS
+	#=======================================================================
+	#=======================================================================
+	
+	
+	########################################################################
+	#   LOCK / UNLOCK BY TYPE
+	#	Action:		locks / unlocks matching tokens
+	#   Returns:    TRUE on completion
+
+	#	Params:	STRING type to lock
+	function lock_type($type) {
+		foreach($this->html as $index => &$token) {
+			if($token["type"] == $type)
+				$token["locked"] = TRUE;
+		}
+		return TRUE;		
+	}
+
+	#	Params:	STRING type to lock
+	function unlock_type($type) {
+		foreach($this->html as $index => &$token) {
+			if($token["type"] == $type)
+				unset($token["locked"]);
+		}
+		return TRUE;		
+	}
+
+
+	########################################################################
+	#   GET METHODS
+	#   Returns:	returns matching tokens
+	#
+	
+	#	Params:	STRING type to get
+	function get_type($type) {
+		$tokens = array();
+		foreach($this->html as $index => $token) {
+			if($token["type"] == $type)
+				$tokens[$index] = $token; 
+		}
+		return $tokens;		
+	}
+
+	#	Params:	STRING type to get
+	function get_unlocked_type($type) {
+		$tokens = array();
+		foreach($this->get_type($type) as $index => $token) {
+			if(!(isset($token["locked"])) || !$token["locked"])
+				$tokens[$index] = $token; 
+		}
+		return $tokens;		
+	}
+
+	#	Params:	STRING type to get
+	function get_locked_type($type) {
+		$tokens = array();
+		foreach($this->get_type($type) as $index => $token) {
+			if($token["locked"])
+				$tokens[$index] = $token; 
+		}
+		return $tokens;		
+	}
+
+	#   Params:		STRING beginning index
+	#				STRING ending index
+	function get_sequential_tokens($begIndex, $endIndex, $tokenType = ALL_TOKENS) {
+		$tokens = array();
+		$types = array();
+
+		if($tokenType == TEXT_TOKENS) {
+			$types = array('text');
+		} elseif($tokenType == TAG_TOKENS) {
+			$types = array('tag');
+		} elseif($tokenType == COMMENT_TOKENS) {
+			$types = array('comment');
+		} elseif($tokenType == CDATA_TOKENS) {
+			$types = array('cdata');
+		} elseif($tokenType == TEXT_AND_TAG_TOKENS) {
+			$types = array('text','tag');
+		} elseif($tokenType == TEXT_AND_COMMENT_TOKENS) {
+			$types = array('text','comment');
+		} elseif($tokenType == TEXT_AND_CDATA_TOKENS) {
+			$types = array('text','cdata');
+		} elseif($tokenType == TAG_AND_COMMENT_TOKENS) {
+			$types = array('tag','comment');
+		} elseif($tokenType == TAG_AND_CDATA_TOKENS) {
+			$types = array('tag','cdata');
+		} elseif($tokenType == COMMENT_AND_CDATA_TOKENS) {
+			$types = array('comment','cdata');
+		} elseif($tokenType == TEXT_TAG_AND_COMMENT_TOKENS) {
+			$types = array('text','tag','comment');
+		} elseif($tokenType == TEXT_TAG_AND_CDATA_TOKENS) {
+			$types = array('text','tag','cdata');
+		} elseif($tokenType == TEXT_COMMENT_AND_CDATA_TOKENS) {
+			$types = array('text','comment','cdata');
+		} elseif($tokenType == TAG_COMMENT_AND_CDATA_TOKENS) {
+			$types = array('tag','comment','cdata');
+		} else {
+			$types = array('text','tag','comment','cdata');
+		}
+
+
+		if($begIndex > $endIndex){
+			$temp = $begIndex;
+			$begIndex = $endIndex;
+			$endIndex = $temp;
+		}
+		for($index = $begIndex; $index<=$endIndex; $index++) {
+			if(isset($this->html[$index])) {
+				foreach($types as $type) {
+					if($type == $this->html[$index]["type"]) {
+						$tokens[$index] = $this->html[$index];
+						break;
+					}
+				}
+			}
+		}
+		return $tokens;
+	}
+
+} // end class parseHTML
--- a/php-typography/php-parser/parseText.php
+++ b/php-typography/php-parser/parseText.php
@ -0,0 +1,511 @@
+<?php 
+
+/*   
+Project Name: PHP Parser
+URI: http://kingdesk.com/projects/php-parser/
+Author: Jeffrey D. King
+Author URI: http://kingdesk.com/about/jeff/
+
+	Copyright 2009, KINGdesk, LLC. Licensed under the GNU General Public License 2.0. If you use, modify and/or redistribute this software, you must leave the KINGdesk, LLC copyright information, the request for a link to http://kingdesk.com, and the web design services contact information unchanged. If you redistribute this software, or any derivative, it must be released under the GNU General Public License 2.0. This program is distributed without warranty (implied or otherwise) of suitability for any particular purpose. See the GNU General Public License for full license terms <http://creativecommons.org/licenses/GPL/2.0/>.
+
+	WE DON'T WANT YOUR MONEY: NO TIPS NECESSARY!  If you enjoy this plugin, a link to http://kingdesk.com from your website would be appreciated.
+	
+	For web design services, please contact info@kingdesk.com.
+*/
+
+
+
+
+
+#########################################################################################################
+#########################################################################################################
+##
+##	parseText assumes no HTML markup in text (except for special html characters like &gt;)
+##
+##	if multibyte characters are passed, encoding must be UTF-8 
+##
+#########################################################################################################
+#########################################################################################################
+class parseText {
+
+	var $mb = FALSE; //changes to this must occur prior to load
+	var $parsedHTML;
+	var $text = array();
+			/*
+				$text structure:
+					ARRAY:
+						index	=> ARRAY: tokenized Text
+				 
+				 			// REQUIRED
+							"type" 		=> STRING: "space" | "punctuation" | "word" | "other"
+							"value"		=> STRING: token content
+							"parents"	=> ARRAY: parent tags: "index" => array("tagName" => tagName, "attributes" => array(name => value, ... ))
+			 								// elements must be assigned this value if it has a parent HTML element
+			*/
+
+
+
+	#=======================================================================
+	#=======================================================================
+	#==	METHODS
+	#=======================================================================
+	#=======================================================================
+	
+	
+	########################################################################
+	#	( UN | RE )LOAD, UPDATE AND CLEAR METHODS
+	#
+
+	#   Params:		$rawText STRING containing HTML markup OR ARRAY containg a single parseHTML token
+	# 	Action:		Tokenizes $rawText (or $rawText["value"] - as the case may be) and saves it to $this->text
+	#   Returns:    TRUE on completion
+	function load($rawText) {
+		$this->clear();
+		if(is_string($rawText)) {
+			// not passed a token of class parseHTML so we will fake it
+			$this->parsedHTML = "";
+		} elseif(is_array($rawText)) {
+			// passed an instance of a parseHTML token
+			$this->parsedHTML = $rawText;
+			$rawText = $rawText["value"];
+		} else {
+			// we have an error
+			return FALSE;
+		}
+		
+		$encodings = array("ASCII","UTF-8", "ISO-8859-1");
+		$encoding = mb_detect_encoding($rawText."a", $encodings);
+		if("UTF-8" == $encoding) {
+			$this->mb = TRUE;
+			if(!function_exists('mb_strlen')) return FALSE;
+		} elseif("ASCII" != $encoding) {
+			return FALSE;
+		}
+		$utf8 = ($this->mb) ? "u" : "";
+		
+		$tokens = array();
+	
+		# find spacing FIRST (as it is the primary delimiter)
+		
+		# find the HTML character representation for the following characters:
+		#		tab | line feed | carriage return | space | non-breaking space | ethiopic wordspace
+		#		ogham space mark | en quad space | em quad space | en-space | three-per-em space
+		#		four-per-em space | six-per-em space | figure space | punctuation space | em-space
+		#		thin space | hair space | narrow no-break space
+		#		medium mathematical space | ideographic space
+		# Some characters are used inside words, we will not count these as a space for the purpose
+		# of finding word boundaries:
+		#		zero-width-space ("&#8203;", "&#x200b;")
+		#		zero-width-joiner ("&#8204;", "&#x200c;", "&zwj;")
+		#		zero-width-non-joiner ("&#8205;", "&#x200d;", "&zwnj;")
+
+		$htmlSpaces = '
+				(?:
+					(?:										# alpha matches
+						&
+						(?: nbsp|ensp|emsp|thinsp )
+						;
+					)
+					|
+					(?:										# decimal matches
+						&\#
+						(?: 09|1[03]|32|160|4961|5760|819[2-9]|820[0-2]|8239|8287|12288 )
+						;
+					)
+					|
+					(?:										# hexidecimal matches
+						&\#x
+						(?: 000[9ad]|0020|00a0|1361|1680|200[0-9a]|202f|205f|3000 )
+						;
+					)
+					|
+					(?:										# actual characters
+						\x{0009}|\x{000a}|\x{000d}|\x{0020}|\x{00a0}|\x{1361}|\x{2000}|\x{2001}|\x{2002}|\x{2003}|
+						\x{2004}|\x{2005}|\x{2006}|\x{2007}|\x{2008}|\x{2009}|\x{200a}|\x{202f}|\x{205f}|\x{3000}
+					)
+				)
+			'; // required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+			
+		$space = "(?:\s|$htmlSpaces)+"; // required modifiers: x (multiline pattern) i (case insensitive) $utf8 
+	
+
+	
+		# find punctuation and symbols before words (to capture preceeding delimiating characters like hyphens or underscores)
+		
+		# see http://www.unicode.org/charts/PDF/U2000.pdf
+		# see http://www.unicode.org/charts/PDF/U2E00.pdf
+		# find punctuation and symbols
+		#	dec matches =   33-44|46-47|58-60|62-64|91-94|96|123-126|161-172|174-191|215|247|710|732|977-978|982|8211-8231|8240-8286|8289-8292|8352-8399|8448-8527|8592-9215|9632-9983|11776-11903
+		# 	hex matches = 	0021-002c|002e-002f|003a-003c|003e-0040|005b-e|0060|007b-007e|00a1-00ac|00ae-00bf|00d7|00f7|02c6|02dc|03d1-03d2|
+		# 					03d6|2013-2027|2030-205e|2061-2064|20a0-20cf|2100-214f|2190-23ff|25a0-26ff|2e00-2e7f
+		#
+		# Some characters are used inside words, we will not count these as a space for the purpose
+		# of finding word boundaries:
+		# 		hyphens ("&#45;", "&#173;", "&#8208;", "&#8209;", "&#8210;", "&#x002d;", "&#x00ad;", "&#x2010;", "&#x2011;", "&#x2012;", "&shy;")
+		#		underscore ("&#95;", "&#x005f;")
+		$htmlPunctuation = '
+				(?:
+					(?:										# alpha matches
+						&
+						(?:quot|amp|frasl|lt|gt|iexcl|cent|pound|curren|yen|brvbar|sect|uml|pound|ordf|laquo|not|reg|macr|deg|plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|times|divide|circ|tilde|thetasym|upsih|piv|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|bull|hellip|permil|prime|Prime|lsaquo|rsaquo|oline|frasl|euro|trade|alefsym|larr|uarr|rarr|darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|emptyn|abla|isin|notin|ni|prod|sum|minus|lowast|radic|prop|infin|ang|and|orc|ap|cup|int|there4|simc|ong|asymp|ne|equiv|le|ge|sub|supn|sub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|loz|spades|clubs|hearts|diams)
+						;
+					)
+					|
+					(?:										# decimal matches
+						&\#
+						(?: 3[3-9]|4[0-467]|5[89]|6[02-4]|9[1-46]|12[3-6]|16[1-9]|17[0-24-9]|18[0-9]|19[01]|215|247|710|732|97[78]|982|821[1-9]|822[0-9]|823[01]|82[4-7][0-9]|828[0-6]|8289|829[0-2]|835[2-9]|86[6-9][0-9]|844[89]|84[5-9][0-9]|851[0-9]|852[0-7]|859[2-9]|85[6-9][0-9]|8[6-9][0-9][0-9]|9[01][0-9][0-9]|920[0-9]|921[0-5]|963[2-9]|96[4-9][0-9]|9[78][0-9][0-9]|99[0-7][0-9]|998[0-3]|1177[6-9]|117[89][0-9]|118[0-9][0-9]|1190[0-3] )
+						;
+					)
+					|
+					(?:										# hexidecimal matches
+						&\#x
+						(?: 002[1-9a-cef]|003[a-cef]|0040|005[b-e]|0060|007[b-e]|00a[1-9a-cef]|00b[0-9a-f]|00d7|00f7|02c6|02dc|03d[126]|201[3-9a-f]|202[0-7]|20[34][0-9a-f]|205[0-9a-e]|206[1-4]|20[a-c][0-9a-f]|21[0-4][0-9a-f]|219[0-9a-f]|2[23][0-9a-f][0-9a-f]|25[a-f][0-9a-f]|23[0-9a-f][0-9a-f]|2e[0-7][0-9a-f] )
+						;
+					)
+				)
+			'; // required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+
+
+		$punctuation = "
+				(?:
+					(?:
+						[^\w\s\&\/\@]			# assume characters that are not word spaces or whitespace are punctuation
+												# exclude & as that is an illegal stand-alone character (and would interfere with HTML character representations
+												# exclude slash \/as to not include the last slash in a URL
+												# exclude @ as to keep twitter names together
+												|								
+						$htmlPunctuation			# catch any HTML reps of punctuation
+					)+
+				)
+			";// required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+
+
+		// duplicated in get_words
+		// letter connectors allowed in words
+		# 		hyphens ("&#45;", "&#173;", "&#8208;", "&#8209;", "&#8210;", "&#x002d;", "&#x00ad;", "&#x2010;", "&#x2011;", "&#x2012;", "&shy;")
+		#		underscore ("&#95;", "&#x005f;")
+		#		zero-width-space ("&#8203;", "&#x200b;")
+		#		zero-width-joiner ("&#8204;", "&#x200c;", "&zwj;")
+		#		zero-width-non-joiner ("&#8205;", "&#x200d;", "&zwnj;")
+		$htmlLetterConnectors = '
+			(?:
+				(?:												# alpha matches
+					&
+					(?: shy|zwj|zwnj )
+					;
+				)
+				|
+				(?:												# decimal matches
+					&\#
+					(?: 45|95|173|820[3-589]|8210 )
+					;
+				)
+				|
+				(?:												# hexidecimal matches
+					&\#x
+					(?: 002d|005f|00ad|200[b-d]|201[0-2] )
+					;
+				)
+				|
+				(?:												# actual characters
+					\x{002d}|\x{005f}|\x{00ad}|\x{200b}|\x{200c}|\x{200d}|\x{2010}|\x{2011}|\x{2012}
+				)
+			)
+		'; // required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+
+		 
+		// word character html entities
+		// character	0-9__ A-Z__ a-z___ other_special_chrs_____
+		// decimal		48-57 65-90 97-122 192-214,216-246,248-255, 256-383
+		// hex			31-39 41-5a 61-7a  c0-d6   d8-f6   f8-ff    0100-017f
+		$htmlLetters = '
+			(?:
+				(?:												# alpha matches
+					&
+					(?:Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|Oslash|Ugrave|Uacute|Ucirc|Uuml|Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|oslash|ugrave|uacute|ucirc|uuml|yacute|thorn|yuml)
+					;
+				)
+				|
+				(?:												# decimal matches
+					&\#
+					(?: 4[89]|5[0-7]|9[7-9]|1[01][0-9]|12[0-2]|19[2-9]|20[0-9]|21[0-46-9]|2[23][0-9]|24[0-68-9]|2[5-9][0-9]|3[0-7][0-9]|38[0-3] )
+					;
+				)
+				|
+				(?:												# hexidecimal matches
+					(?:
+						&\#x00
+						(?: 3[1-9]|4[1-9a-f]|5[0-9a]|6[1-9a-f]|7[0-9a]|c[0-9a-f]|d[0-689]|e[0-9a-f]|f[0-689a-f] )
+						;
+					)
+					|
+					(?:
+						&\#x01[0-7][0-9a-f];
+					)
+				)
+				|
+				(?:												# actual characters
+					[0-9A-Za-z]|\x{00c0}|\x{00c1}|\x{00c2}|\x{00c3}|\x{00c4}|\x{00c5}|\x{00c6}|\x{00c7}|\x{00c8}|\x{00c9}|
+					\x{00ca}|\x{00cb}|\x{00cc}|\x{00cd}|\x{00ce}|\x{00cf}|\x{00d0}|\x{00d1}|\x{00d2}|\x{00d3}|\x{00d4}|
+					\x{00d5}|\x{00d6}|\x{00d8}|\x{00d9}|\x{00da}|\x{00db}|\x{00dc}|\x{00dd}|\x{00de}|\x{00df}|\x{00e0}|
+					\x{00e1}|\x{00e2}|\x{00e3}|\x{00e4}|\x{00e5}|\x{00e6}|\x{00e7}|\x{00e8}|\x{00e9}|\x{00ea}|\x{00eb}|
+					\x{00ec}|\x{00ed}|\x{00ee}|\x{00ef}|\x{00f0}|\x{00f1}|\x{00f2}|\x{00f3}|\x{00f4}|\x{00f5}|\x{00f6}|
+					\x{00f8}|\x{00f9}|\x{00fa}|\x{00fb}|\x{00fc}|\x{00fd}|\x{00fe}|\x{00ff}|\x{0100}|\x{0101}|\x{0102}|
+					\x{0103}|\x{0104}|\x{0105}|\x{0106}|\x{0107}|\x{0108}|\x{0109}|\x{010a}|\x{010b}|\x{010c}|\x{010d}|
+					\x{010e}|\x{010f}|\x{0110}|\x{0111}|\x{0112}|\x{0113}|\x{0114}|\x{0115}|\x{0116}|\x{0117}|\x{0118}|
+					\x{0119}|\x{011a}|\x{011b}|\x{011c}|\x{011d}|\x{011e}|\x{011f}|\x{0120}|\x{0121}|\x{0122}|\x{0123}|
+					\x{0124}|\x{0125}|\x{0126}|\x{0127}|\x{0128}|\x{0129}|\x{012a}|\x{012b}|\x{012c}|\x{012d}|\x{012e}|
+					\x{012f}|\x{0130}|\x{0131}|\x{0132}|\x{0133}|\x{0134}|\x{0135}|\x{0136}|\x{0137}|\x{0138}|\x{0139}|
+					\x{013a}|\x{013b}|\x{013c}|\x{013d}|\x{013e}|\x{013f}|\x{0140}|\x{0141}|\x{0142}|\x{0143}|\x{0144}|
+					\x{0145}|\x{0146}|\x{0147}|\x{0148}|\x{0149}|\x{014a}|\x{014b}|\x{014c}|\x{014d}|\x{014e}|\x{014f}|
+					\x{0150}|\x{0151}|\x{0152}|\x{0153}|\x{0154}|\x{0155}|\x{0156}|\x{0157}|\x{0158}|\x{0159}|\x{015a}|
+					\x{015b}|\x{015c}|\x{015d}|\x{015e}|\x{015f}|\x{0160}|\x{0161}|\x{0162}|\x{0163}|\x{0164}|\x{0165}|
+					\x{0166}|\x{0167}|\x{0168}|\x{0169}|\x{016a}|\x{016b}|\x{016c}|\x{016d}|\x{016e}|\x{016f}|\x{0170}|
+					\x{0171}|\x{0172}|\x{0173}|\x{0174}|\x{0175}|\x{0176}|\x{0177}|\x{0178}|\x{0179}|\x{017a}|\x{017b}|
+					\x{017c}|\x{017d}|\x{017e}|\x{017f}
+				)
+			)
+		'; // required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+
+		$word = "
+				(?:
+					(?<![\w\&])							# negative lookbehind to ensure
+														#	1) we are proceeded by a non-word-character, and
+														#	2) we are not inside an HTML character def
+					(?:
+						[\w\-\_\/]
+						|
+						$htmlLetters
+						|
+						$htmlLetterConnectors
+					)+
+				)
+			"; // required modifiers: x (multiline pattern) u (utf8)
+
+		# find any text
+		$anyText = "$space|$punctuation|$word"; // required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+		$parts = preg_split("/($anyText)/ixu", $rawText, -1, PREG_SPLIT_DELIM_CAPTURE);
+
+		$index = 0;
+		foreach ($parts as $part) {
+			if ($part != "") {
+		
+				if(preg_match("/\A$space\Z/xiu", $part)) {
+					$tokens[$index] = array(
+									"type"		=> 'space',
+									"value"		=> $part,
+									);
+				} elseif(preg_match("/\A$punctuation\Z/sxiu", $part)) {
+					$tokens[$index] = array(
+									"type"		=> 'punctuation',
+									"value"		=> $part,
+									);
+				} elseif(preg_match("/\A$word\Z/xu", $part)) {
+					//make sure that things like email addresses and URLs are not broken up into words and punctuation
+					
+					// not preceeded by an "other"
+					if($index-1 >= 0 && $tokens[$index-1]['type'] == 'other') {
+						$oldPart = $tokens[$index-1]['value'];
+						$tokens[$index-1] = array(
+									"type"		=> 'other',
+									"value"		=> $oldPart.$part,
+									);
+						$index = $index-1;
+						
+					// not preceeded by a non-space + punctuation
+					} elseif($index-2 >= 0 && $tokens[$index-1]['type'] == 'punctuation' && $tokens[$index-2]['type'] != 'space') {
+						$oldPart = $tokens[$index-1]['value'];
+						$olderPart = $tokens[$index-2]['value'];
+						$tokens[$index-2] = array(
+									"type"		=> 'other',
+									"value"		=> $olderPart.$oldPart.$part,
+									);
+						unset($tokens[$index-1]);
+						$index = $index-2;
+					} else {	
+						$tokens[$index] = array(
+									"type"		=> 'word',
+									"value"		=> $part,
+									);
+					}
+				} else {
+					//make sure that things like email addresses and URLs are not broken up into words and punctuation
+					// not preceeded by an "other" or "word"
+					if($index-1 >= 0 && ($tokens[$index-1]['type'] == 'word' || $tokens[$index-1]['type'] == 'other')) {
+						$index = $index-1;
+						$oldPart = $tokens[$index]['value'];
+						$tokens[$index] = array(
+									"type"		=> 'other',
+									"value"		=> $oldPart.$part,
+									);
+					// not preceeded by a non-space + punctuation
+					} elseif($index-2 >= 0 && $tokens[$index-1]['type'] == 'punctuation' && $tokens[$index-2]['type'] != 'space') {
+						$oldPart = $tokens[$index-1]['value'];
+						$olderPart = $tokens[$index-2]['value'];
+						$tokens[$index-2] = array(
+									"type"		=> 'other',
+									"value"		=> $olderPart.$oldPart.$part,
+									);
+						unset($tokens[$index-1]);
+						$index = $index-2;
+					} else {	
+						$tokens[$index] = array(
+									"type"		=> 'other',
+									"value"		=> $part,
+									);
+					}
+				}
+				
+				if(isset($this->parsedHTML["parents"]))
+					$tokens[$index]["parents"] = $this->parsedHTML["parents"];
+				
+				$index++;
+			}
+		}
+		
+		$this->text = $tokens;
+		return TRUE;
+	}
+	
+	#	Action:		reloads $this->text (i.e. capture new inserted text, or remove those whose values are deleted)
+	#	Returns:	TRUE on completion
+	#	WARNING: 	Tokens previously acquired through "get" methods may not match new tokenization
+	function reload() {
+		return $this->load($this->unload());
+	}
+	
+	#	Action:		outputs Text as string
+	#	Returns:	STRING of Text (if string was initially loaded), or ARRAY of
+	function unload() {
+		$reassembledText = "";
+		foreach($this->text as $token) {
+			$reassembledText .= $token["value"];
+		}
+		if($this->parsedHTML != "") {
+			// the initial value loaded was a single token of class parseHTML, so we will return in the same format
+			$this->parsedHTML["value"] = $reassembledText;
+			$output = $this->parsedHTML;
+		} else {
+			// the initial value loaded was a string, so we will return in the same format
+			$output = $reassembledText;
+		}
+		$this->clear();
+		return $output;
+	}
+	
+	#	Action:		unsets $this->text
+	#	Returns:	TRUE on completion
+	function clear() {
+		$this->text = array();
+		$this->parsedHTML = "";
+		return TRUE;		
+	}
+	
+	#   Parameter:  ARRAY of tokens
+	#	Action:		overwrite "value" for all matching tokens
+	#	Returns:	TRUE on completion
+	function update($tokens) {
+		foreach($tokens as $index => $token) {
+			$this->text[$index]["value"] = $token["value"];
+		}
+		return TRUE;		
+	}
+
+
+	########################################################################
+	#	GET METHODS
+	#
+	#   Returns:    ARRAY of sought tokens
+
+	function get_all() {
+		return $this->text;
+	}
+
+	function get_spaces() {
+		return $this->get_type("space");
+	}
+
+	function get_punctuation() {
+		return $this->get_type("punctuation");
+	}
+
+	#   Parameter:  $abc letter-only match OPTIONAL INT -1=>prohibit, 0=>allow, 1=>require
+	# 				$caps capital-only match (allows non letter chrs) OPTIONAL INT  -1=>prohibit, 0=>allow, 1=>require
+	function get_words($abc = 0, $caps = 0) {
+		$words = $this->get_type("word");
+		$tokens = array();
+		
+		//duplicated from load
+		$htmlLetterConnectors = '
+			(?:
+				(?:												# alpha matches
+					&
+					(?: shy|zwj|zwnj )
+					;
+				)
+				|
+				(?:												# decimal matches
+					&\#
+					(?: 45|95|173|820[3-589]|8210 )
+					;
+				)
+				|
+				(?:												# hexidecimal matches
+					&\#x
+					(?: 002d|005f|00ad|200[b-d]|201[0-2] )
+					;
+				)
+				|
+				(?:												# actual characters
+					\x{002d}|\x{005f}|\x{00ad}|\x{200b}|\x{200c}|\x{200d}|\x{2010}|\x{2011}|\x{2012}
+				)
+			)
+		'; // required modifiers: x (multiline pattern) i (case insensitive) u (utf8)
+
+
+
+		foreach($words as $index => $token) {
+			if($this->mb) {
+				$capped = mb_strtoupper($token["value"], "UTF-8");
+				$lettered = preg_replace("/".$htmlLetterConnectors."|[0-9\-_&#;\/]/ux", "", $token["value"]);
+			} else {
+				$capped = strtoupper($token["value"]);
+				$lettered = preg_replace("/".$htmlLetterConnectors."|[0-9\-_&#;\/]/ux", "", $token["value"]);
+			}
+			
+			if( ($abc == -1 && $lettered != $token["value"]) && ($caps == -1 && $capped != $token["value"]) ) $tokens[$index] = $token;
+			elseif( ($abc == -1 && $lettered != $token["value"]) && $caps == 0 ) $tokens[$index] = $token;
+			elseif( ($abc == -1 && $lettered != $token["value"]) && ($caps == 1 && $capped == $token["value"]) ) $tokens[$index] = $token;
+			elseif( $abc == 0 && ($caps == -1 && $capped != $token["value"]) ) $tokens[$index] = $token;
+			elseif( $abc == 0 && $caps == 0 ) $tokens[$index] = $token;
+			elseif( $abc == 0 && ($caps == 1 && $capped == $token["value"]) ) $tokens[$index] = $token;
+			elseif( ($abc == 1 && $lettered == $token["value"]) && ($caps == -1 && $capped != $token["value"]) ) $tokens[$index] = $token;
+			elseif( ($abc == 1 && $lettered == $token["value"]) && $caps == 0 ) $tokens[$index] = $token;
+			elseif( ($abc == 1 && $lettered == $token["value"]) && ($caps == 1 && $capped == $token["value"]) ) $tokens[$index] = $token;
+		}
+		return $tokens;
+	}
+
+	function get_other() {
+		return $this->get_type("other");
+	}
+
+
+
+	#=======================================================================
+	#=======================================================================
+	#==	MISC. METHODS
+	#=======================================================================
+	#=======================================================================
+
+	#	Params:	STRING type to get
+	function get_type($type) {
+		$tokens = array();
+		foreach($this->text as $index => $token) {
+			if($token["type"] == $type)
+				$tokens[$index] = $token; 
+		}
+		return $tokens;		
+	}
+	
+} // end class parseText
--- a/php-typography/php-parser/php-parser.php
+++ b/php-typography/php-parser/php-parser.php
@ -0,0 +1,25 @@
+<?php 
+
+/*   
+Project Name: PHP Parser
+URI: http://kingdesk.com/projects/php-parser/
+Author: Jeffrey D. King
+Author URI: http://kingdesk.com/about/jeff/
+Version: 1.19
+
+	Copyright 2009, KINGdesk, LLC. Licensed under the GNU General Public License 2.0. If you use, modify and/or redistribute this software, you must leave the KINGdesk, LLC copyright information, the request for a link to http://kingdesk.com, and the web design services contact information unchanged. If you redistribute this software, or any derivative, it must be released under the GNU General Public License 2.0. This program is distributed without warranty (implied or otherwise) of suitability for any particular purpose. See the GNU General Public License for full license terms <http://creativecommons.org/licenses/GPL/2.0/>.
+
+	WE DON'T WANT YOUR MONEY: NO TIPS NECESSARY!  If you enjoy this plugin, a link to http://kingdesk.com from your website would be appreciated.
+	
+	For web design services, please contact info@kingdesk.com.
+*/
+
+# two classes defined:
+#	- parseHTML
+#	- parseText
+#
+# PHP Parser has been tested in PHP5.  It may work in PHP4, but it has not been tested in that environment
+# if you have problems or success in PHP4, please let us know at info@kingdesk.com
+
+require_once('parseHTML.php');
+require_once('parseText.php');