add uid variable to b8 classes
This commit is contained in:
parent
c8c062d960
commit
64d0616762
5 changed files with 1276 additions and 26 deletions
|
@ -205,7 +205,7 @@ class b8
|
||||||
* @return float The rating between 0 (ham) and 1 (spam)
|
* @return float The rating between 0 (ham) and 1 (spam)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function classify($text)
|
public function classify($uid,$text)
|
||||||
{
|
{
|
||||||
|
|
||||||
# Validate the startup
|
# Validate the startup
|
||||||
|
@ -217,7 +217,7 @@ class b8
|
||||||
|
|
||||||
# Get the internal database variables, containing the number of ham and
|
# Get the internal database variables, containing the number of ham and
|
||||||
# spam texts so the spam probability can be calculated in relation to them
|
# spam texts so the spam probability can be calculated in relation to them
|
||||||
$internals = $this->_database->get_internals();
|
$internals = $this->_database->get_internals($uid);
|
||||||
|
|
||||||
# Calculate the spamminess of all tokens
|
# Calculate the spamminess of all tokens
|
||||||
|
|
||||||
|
@ -231,7 +231,7 @@ class b8
|
||||||
return $tokens;
|
return $tokens;
|
||||||
|
|
||||||
# Fetch all availible data for the token set from the database
|
# Fetch all availible data for the token set from the database
|
||||||
$this->_token_data = $this->_database->get(array_keys($tokens));
|
$this->_token_data = $this->_database->get(array_keys($tokens),$uid);
|
||||||
|
|
||||||
# Calculate the spamminess and importance for each token (or a degenerated form of it)
|
# Calculate the spamminess and importance for each token (or a degenerated form of it)
|
||||||
|
|
||||||
|
@ -441,9 +441,9 @@ class b8
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function learn($text, $category)
|
public function learn($text, $category, $uid)
|
||||||
{
|
{
|
||||||
return $this->_process_text($text, $category, self::LEARN);
|
return $this->_process_text($text, $category, self::LEARN, $uid);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -455,9 +455,9 @@ class b8
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function unlearn($text, $category)
|
public function unlearn($text, $category, $uid)
|
||||||
{
|
{
|
||||||
return $this->_process_text($text, $category, self::UNLEARN);
|
return $this->_process_text($text, $category, self::UNLEARN, $uid);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -470,7 +470,7 @@ class b8
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _process_text($text, $category, $action)
|
private function _process_text($text, $category, $action, $uid = 0)
|
||||||
{
|
{
|
||||||
|
|
||||||
# Validate the startup
|
# Validate the startup
|
||||||
|
@ -494,7 +494,7 @@ class b8
|
||||||
return $tokens;
|
return $tokens;
|
||||||
|
|
||||||
# Pass the tokens and what to do with it to the storage backend
|
# Pass the tokens and what to do with it to the storage backend
|
||||||
return $this->_database->process_text($tokens, $category, $action);
|
return $this->_database->process_text($tokens, $category, $action, $uid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
503
library/spam/b8/b8.php.ORIG
Normal file
503
library/spam/b8/b8.php.ORIG
Normal file
|
@ -0,0 +1,503 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
# Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
|
||||||
|
#
|
||||||
|
# b8 - A Bayesian spam filter written in PHP 5
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU Lesser General Public License as published by
|
||||||
|
# the Free Software Foundation in version 2.1 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
# License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with this program; if not, write to the Free Software Foundation,
|
||||||
|
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright (C) 2006-2010 Tobias Leupold <tobias.leupold@web.de>
|
||||||
|
*
|
||||||
|
* @license LGPL
|
||||||
|
* @access public
|
||||||
|
* @package b8
|
||||||
|
* @author Tobias Leupold
|
||||||
|
* @author Oliver Lillie (aka buggedcom) (original PHP 5 port)
|
||||||
|
*/
|
||||||
|
|
||||||
|
class b8
|
||||||
|
{
|
||||||
|
|
||||||
|
public $config = array(
|
||||||
|
'min_size' => 3,
|
||||||
|
'max_size' => 30,
|
||||||
|
'allow_numbers' => FALSE,
|
||||||
|
'lexer' => 'default',
|
||||||
|
'degenerator' => 'default',
|
||||||
|
'storage' => 'dba',
|
||||||
|
'use_relevant' => 15,
|
||||||
|
'min_dev' => 0.2,
|
||||||
|
'rob_s' => 0.3,
|
||||||
|
'rob_x' => 0.5
|
||||||
|
);
|
||||||
|
|
||||||
|
private $_lexer = NULL;
|
||||||
|
private $_database = NULL;
|
||||||
|
private $_token_data = NULL;
|
||||||
|
|
||||||
|
const SPAM = 'spam';
|
||||||
|
const HAM = 'ham';
|
||||||
|
const LEARN = 'learn';
|
||||||
|
const UNLEARN = 'unlearn';
|
||||||
|
|
||||||
|
const STARTUP_FAIL_DATABASE = 'STARTUP_FAIL_DATABASE';
|
||||||
|
const STARTUP_FAIL_LEXER = 'STARTUP_FAIL_LEXER';
|
||||||
|
const TRAINER_CATEGORY_FAIL = 'TRAINER_CATEGORY_FAIL';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs b8
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
function __construct($config = array(), $database_config)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Validate config data
|
||||||
|
|
||||||
|
if(count($config) > 0) {
|
||||||
|
|
||||||
|
foreach ($config as $name=>$value) {
|
||||||
|
|
||||||
|
switch($name) {
|
||||||
|
|
||||||
|
case 'min_dev':
|
||||||
|
case 'rob_s':
|
||||||
|
case 'rob_x':
|
||||||
|
$this->config[$name] = (float) $value;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'min_size':
|
||||||
|
case 'max_size':
|
||||||
|
case 'use_relevant':
|
||||||
|
$this->config[$name] = (int) $value;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'allow_numbers':
|
||||||
|
$this->config[$name] = (bool) $value;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'lexer':
|
||||||
|
$value = (string) strtolower($value);
|
||||||
|
$this->config[$name] = is_file(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . "lexer_" . $value . '.php') === TRUE ? $value : 'default';
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'storage':
|
||||||
|
$this->config[$name] = (string) $value;
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Setup the database backend
|
||||||
|
|
||||||
|
# Get the basic storage class used by all backends
|
||||||
|
if($this->load_class('b8_storage_base', dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_base.php') === FALSE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
# Get the degenerator we need
|
||||||
|
if($this->load_class('b8_degenerator_' . $this->config['degenerator'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'degenerator' . DIRECTORY_SEPARATOR . 'degenerator_' . $this->config['degenerator'] . '.php') === FALSE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
# Get the actual storage backend we need
|
||||||
|
if($this->load_class('b8_storage_' . $this->config['storage'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'storage' . DIRECTORY_SEPARATOR . 'storage_' . $this->config['storage'] . '.php') === FALSE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
# Setup the backend
|
||||||
|
$class = 'b8_storage_' . $this->config['storage'];
|
||||||
|
$this->_database = new $class(
|
||||||
|
$database_config,
|
||||||
|
$this->config['degenerator'], date('ymd')
|
||||||
|
);
|
||||||
|
|
||||||
|
# Setup the lexer class
|
||||||
|
|
||||||
|
if($this->load_class('b8_lexer_' . $this->config['lexer'], dirname(__FILE__) . DIRECTORY_SEPARATOR . 'lexer' . DIRECTORY_SEPARATOR . 'lexer_' . $this->config['lexer'] . '.php') === FALSE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
$class = 'b8_lexer_' . $this->config['lexer'];
|
||||||
|
$this->_lexer = new $class(
|
||||||
|
array(
|
||||||
|
'min_size' => $this->config['min_size'],
|
||||||
|
'max_size' => $this->config['max_size'],
|
||||||
|
'allow_numbers' => $this->config['allow_numbers']
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load a class file if a class has not been defined yet.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @return boolean Returns TRUE if everything is okay, otherwise FALSE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function load_class($class_name, $class_file)
|
||||||
|
{
|
||||||
|
|
||||||
|
if(class_exists($class_name, FALSE) === FALSE) {
|
||||||
|
|
||||||
|
$included = require_once $class_file;
|
||||||
|
|
||||||
|
if($included === FALSE or class_exists($class_name, FALSE) === FALSE)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates the class has all it needs to work.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @return mixed Returns TRUE if everything is okay, otherwise an error code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function validate()
|
||||||
|
{
|
||||||
|
|
||||||
|
if($this->_database === NULL)
|
||||||
|
return self::STARTUP_FAIL_DATABASE;
|
||||||
|
|
||||||
|
# Connect the database backend if we aren't connected yet
|
||||||
|
|
||||||
|
elseif($this->_database->connected === FALSE) {
|
||||||
|
|
||||||
|
$connection = $this->_database->connect();
|
||||||
|
|
||||||
|
if($connection !== TRUE)
|
||||||
|
return $connection;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if($this->_lexer === NULL)
|
||||||
|
return self::STARTUP_FAIL_LEXER;
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classifies a text
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @package default
|
||||||
|
* @param string $text
|
||||||
|
* @return float The rating between 0 (ham) and 1 (spam)
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function classify($text)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Validate the startup
|
||||||
|
|
||||||
|
$started_up = $this->validate();
|
||||||
|
|
||||||
|
if($started_up !== TRUE)
|
||||||
|
return $started_up;
|
||||||
|
|
||||||
|
# Get the internal database variables, containing the number of ham and
|
||||||
|
# spam texts so the spam probability can be calculated in relation to them
|
||||||
|
$internals = $this->_database->get_internals();
|
||||||
|
|
||||||
|
# Calculate the spamminess of all tokens
|
||||||
|
|
||||||
|
# Get all tokens we want to rate
|
||||||
|
|
||||||
|
$tokens = $this->_lexer->get_tokens($text);
|
||||||
|
|
||||||
|
# Check if the lexer failed
|
||||||
|
# (if so, $tokens will be a lexer error code, if not, $tokens will be an array)
|
||||||
|
if(!is_array($tokens))
|
||||||
|
return $tokens;
|
||||||
|
|
||||||
|
# Fetch all availible data for the token set from the database
|
||||||
|
$this->_token_data = $this->_database->get(array_keys($tokens));
|
||||||
|
|
||||||
|
# Calculate the spamminess and importance for each token (or a degenerated form of it)
|
||||||
|
|
||||||
|
$word_count = array();
|
||||||
|
$rating = array();
|
||||||
|
$importance = array();
|
||||||
|
|
||||||
|
foreach($tokens as $word => $count) {
|
||||||
|
|
||||||
|
$word_count[$word] = $count;
|
||||||
|
|
||||||
|
# Although we only call this function only here ... let's do the
|
||||||
|
# calculation stuff in a function to make this a bit less confusing ;-)
|
||||||
|
$rating[$word] = $this->_get_probability($word, $internals['texts_ham'], $internals['texts_spam']);
|
||||||
|
|
||||||
|
$importance[$word] = abs(0.5 - $rating[$word]);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Order by importance
|
||||||
|
arsort($importance);
|
||||||
|
reset($importance);
|
||||||
|
|
||||||
|
# Get the most interesting tokens (use all if we have less than the given number)
|
||||||
|
|
||||||
|
$relevant = array();
|
||||||
|
|
||||||
|
for($i = 0; $i < $this->config['use_relevant']; $i++) {
|
||||||
|
|
||||||
|
if($tmp = each($importance)) {
|
||||||
|
|
||||||
|
# Important tokens remain
|
||||||
|
|
||||||
|
# If the token's rating is relevant enough, use it
|
||||||
|
|
||||||
|
if(abs(0.5 - $rating[$tmp['key']]) > $this->config['min_dev']) {
|
||||||
|
|
||||||
|
# Tokens that appear more than once also count more than once
|
||||||
|
|
||||||
|
for($x = 0, $l = $word_count[$tmp['key']]; $x < $l; $x++)
|
||||||
|
array_push($relevant, $rating[$tmp['key']]);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
# We have less than words to use, so we already
|
||||||
|
# use what we have and can break here
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate the spamminess of the text (thanks to Mr. Robinson ;-)
|
||||||
|
# We set both hamminess and Spamminess to 1 for the first multiplying
|
||||||
|
$hamminess = 1;
|
||||||
|
$spamminess = 1;
|
||||||
|
|
||||||
|
# Consider all relevant ratings
|
||||||
|
foreach($relevant as $value) {
|
||||||
|
$hamminess *= (1.0 - $value);
|
||||||
|
$spamminess *= $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
# If no token was good for calculation, we really don't know how
|
||||||
|
# to rate this text; so we assume a spam and ham probability of 0.5
|
||||||
|
|
||||||
|
if($hamminess === 1 and $spamminess === 1) {
|
||||||
|
$hamminess = 0.5;
|
||||||
|
$spamminess = 0.5;
|
||||||
|
$n = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# Get the number of relevant ratings
|
||||||
|
$n = count($relevant);
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate the combined rating
|
||||||
|
|
||||||
|
# The actual hamminess and spamminess
|
||||||
|
$hamminess = 1 - pow($hamminess, (1 / $n));
|
||||||
|
$spamminess = 1 - pow($spamminess, (1 / $n));
|
||||||
|
|
||||||
|
# Calculate the combined indicator
|
||||||
|
$probability = ($hamminess - $spamminess) / ($hamminess + $spamminess);
|
||||||
|
|
||||||
|
# We want a value between 0 and 1, not between -1 and +1, so ...
|
||||||
|
$probability = (1 + $probability) / 2;
|
||||||
|
|
||||||
|
# Alea iacta est
|
||||||
|
return $probability;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the spamminess of a single token also considering "degenerated" versions
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @param string $word
|
||||||
|
* @param string $texts_ham
|
||||||
|
* @param string $texts_spam
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
private function _get_probability($word, $texts_ham, $texts_spam)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Let's see what we have!
|
||||||
|
|
||||||
|
if(isset($this->_token_data['tokens'][$word]) === TRUE) {
|
||||||
|
# The token was in the database, so we can use it's data as-is
|
||||||
|
# and calculate the spamminess of this token directly
|
||||||
|
return $this->_calc_probability($this->_token_data['tokens'][$word], $texts_ham, $texts_spam);
|
||||||
|
}
|
||||||
|
|
||||||
|
# Damn. The token was not found, so do we have at least similar words?
|
||||||
|
|
||||||
|
if(isset($this->_token_data['degenerates'][$word]) === TRUE) {
|
||||||
|
|
||||||
|
# We found similar words, so calculate the spamminess for each one
|
||||||
|
# and choose the most important one for the further calculation
|
||||||
|
|
||||||
|
# The default rating is 0.5 simply saying nothing
|
||||||
|
$rating = 0.5;
|
||||||
|
|
||||||
|
foreach($this->_token_data['degenerates'][$word] as $degenerate => $count) {
|
||||||
|
|
||||||
|
# Calculate the rating of the current degenerated token
|
||||||
|
$rating_tmp = $this->_calc_probability($count, $texts_ham, $texts_spam);
|
||||||
|
|
||||||
|
# Is it more important than the rating of another degenerated version?
|
||||||
|
if(abs(0.5 - $rating_tmp) > abs(0.5 - $rating))
|
||||||
|
$rating = $rating_tmp;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return $rating;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
# The token is really unknown, so choose the default rating
|
||||||
|
# for completely unknown tokens. This strips down to the
|
||||||
|
# robX parameter so we can cheap out the freaky math ;-)
|
||||||
|
return $this->config['rob_x'];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do the actual spamminess calculation of a single token
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @param array $data
|
||||||
|
* @param string $texts_ham
|
||||||
|
* @param string $texts_spam
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
private function _calc_probability($data, $texts_ham, $texts_spam)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Calculate the basic probability by Mr. Graham
|
||||||
|
|
||||||
|
# But: consider the number of ham and spam texts saved instead of the
|
||||||
|
# number of entries where the token appeared to calculate a relative
|
||||||
|
# spamminess because we count tokens appearing multiple times not just
|
||||||
|
# once but as often as they appear in the learned texts
|
||||||
|
|
||||||
|
$rel_ham = $data['count_ham'];
|
||||||
|
$rel_spam = $data['count_spam'];
|
||||||
|
|
||||||
|
if($texts_ham > 0)
|
||||||
|
$rel_ham = $data['count_ham'] / $texts_ham;
|
||||||
|
|
||||||
|
if($texts_spam > 0)
|
||||||
|
$rel_spam = $data['count_spam'] / $texts_spam;
|
||||||
|
|
||||||
|
$rating = $rel_spam / ($rel_ham + $rel_spam);
|
||||||
|
|
||||||
|
# Calculate the better probability proposed by Mr. Robinson
|
||||||
|
$all = $data['count_ham'] + $data['count_spam'];
|
||||||
|
return (($this->config['rob_s'] * $this->config['rob_x']) + ($all * $rating)) / ($this->config['rob_s'] + $all);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the validity of the category of a request
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @param string $category
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
private function _check_category($category)
|
||||||
|
{
|
||||||
|
return $category === self::HAM or $category === self::SPAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Learn a reference text
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param string $text
|
||||||
|
* @param const $category Either b8::SPAM or b8::HAM
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function learn($text, $category)
|
||||||
|
{
|
||||||
|
return $this->_process_text($text, $category, self::LEARN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unlearn a reference text
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param string $text
|
||||||
|
* @param const $category Either b8::SPAM or b8::HAM
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function unlearn($text, $category)
|
||||||
|
{
|
||||||
|
return $this->_process_text($text, $category, self::UNLEARN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does the actual interaction with the storage backend for learning or unlearning texts
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @param string $text
|
||||||
|
* @param const $category Either b8::SPAM or b8::HAM
|
||||||
|
* @param const $action Either b8::LEARN or b8::UNLEARN
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
private function _process_text($text, $category, $action)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Validate the startup
|
||||||
|
|
||||||
|
$started_up = $this->validate();
|
||||||
|
|
||||||
|
if($started_up !== TRUE)
|
||||||
|
return $started_up;
|
||||||
|
|
||||||
|
# Look if the request is okay
|
||||||
|
if($this->_check_category($category) === FALSE)
|
||||||
|
return self::TRAINER_CATEGORY_FAIL;
|
||||||
|
|
||||||
|
# Get all tokens from $text
|
||||||
|
|
||||||
|
$tokens = $this->_lexer->get_tokens($text);
|
||||||
|
|
||||||
|
# Check if the lexer failed
|
||||||
|
# (if so, $tokens will be a lexer error code, if not, $tokens will be an array)
|
||||||
|
if(!is_array($tokens))
|
||||||
|
return $tokens;
|
||||||
|
|
||||||
|
# Pass the tokens and what to do with it to the storage backend
|
||||||
|
return $this->_database->process_text($tokens, $category, $action);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
|
@ -71,10 +71,10 @@ abstract class b8_storage_base
|
||||||
* @return mixed Returns TRUE if everything is okay, otherwise an error code.
|
* @return mixed Returns TRUE if everything is okay, otherwise an error code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
protected function check_database()
|
protected function check_database($uid)
|
||||||
{
|
{
|
||||||
|
|
||||||
$internals = $this->get_internals();
|
$internals = $this->get_internals($uid);
|
||||||
|
|
||||||
if(isset($internals['dbversion'])) {
|
if(isset($internals['dbversion'])) {
|
||||||
if($internals['dbversion'] == "2") {
|
if($internals['dbversion'] == "2") {
|
||||||
|
@ -122,7 +122,7 @@ abstract class b8_storage_base
|
||||||
* @return array Returns an array of all internals.
|
* @return array Returns an array of all internals.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function get_internals()
|
public function get_internals($uid)
|
||||||
{
|
{
|
||||||
|
|
||||||
$internals = $this->_get_query(
|
$internals = $this->_get_query(
|
||||||
|
@ -130,7 +130,8 @@ abstract class b8_storage_base
|
||||||
self::INTERNALS_TEXTS_HAM,
|
self::INTERNALS_TEXTS_HAM,
|
||||||
self::INTERNALS_TEXTS_SPAM,
|
self::INTERNALS_TEXTS_SPAM,
|
||||||
self::INTERNALS_DBVERSION
|
self::INTERNALS_DBVERSION
|
||||||
)
|
),
|
||||||
|
$uid
|
||||||
);
|
);
|
||||||
|
|
||||||
return array(
|
return array(
|
||||||
|
@ -149,7 +150,7 @@ abstract class b8_storage_base
|
||||||
* @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
|
* @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function get($tokens)
|
public function get($tokens, $uid)
|
||||||
{
|
{
|
||||||
|
|
||||||
# Validate the startup
|
# Validate the startup
|
||||||
|
@ -160,7 +161,7 @@ abstract class b8_storage_base
|
||||||
return $started_up;
|
return $started_up;
|
||||||
|
|
||||||
# First we see what we have in the database.
|
# First we see what we have in the database.
|
||||||
$token_data = $this->_get_query($tokens);
|
$token_data = $this->_get_query($tokens, $uid);
|
||||||
|
|
||||||
# Check if we have to degenerate some tokens
|
# Check if we have to degenerate some tokens
|
||||||
|
|
||||||
|
@ -203,7 +204,7 @@ abstract class b8_storage_base
|
||||||
$return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
|
$return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
|
||||||
|
|
||||||
# ... and update it's lastseen parameter
|
# ... and update it's lastseen parameter
|
||||||
$this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today']);
|
$this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today'], $uid );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,7 +225,7 @@ abstract class b8_storage_base
|
||||||
$return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
|
$return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
|
||||||
|
|
||||||
# ... and update it's lastseen parameter
|
# ... and update it's lastseen parameter
|
||||||
$this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today']);
|
$this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today'], $uid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,7 +259,7 @@ abstract class b8_storage_base
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function process_text($tokens, $category, $action)
|
public function process_text($tokens, $category, $action, $uid)
|
||||||
{
|
{
|
||||||
|
|
||||||
# Validate the startup
|
# Validate the startup
|
||||||
|
@ -271,10 +272,10 @@ abstract class b8_storage_base
|
||||||
# No matter what we do, we first have to check what data we have.
|
# No matter what we do, we first have to check what data we have.
|
||||||
|
|
||||||
# First get the internals, including the ham texts and spam texts counter
|
# First get the internals, including the ham texts and spam texts counter
|
||||||
$internals = $this->get_internals();
|
$internals = $this->get_internals($uid);
|
||||||
|
|
||||||
# Then, fetch all data for all tokens we have (and update their lastseen parameters)
|
# Then, fetch all data for all tokens we have (and update their lastseen parameters)
|
||||||
$token_data = $this->_get_query(array_keys($tokens));
|
$token_data = $this->_get_query(array_keys($tokens), $uid);
|
||||||
|
|
||||||
# Process all tokens to learn/unlearn
|
# Process all tokens to learn/unlearn
|
||||||
|
|
||||||
|
@ -315,7 +316,7 @@ abstract class b8_storage_base
|
||||||
|
|
||||||
# Now let's see if we have to update or delete the token
|
# Now let's see if we have to update or delete the token
|
||||||
if($count_ham !== 0 or $count_spam !== 0)
|
if($count_ham !== 0 or $count_spam !== 0)
|
||||||
$this->_update($token, "$count_ham $count_spam " . $this->b8_config['today']);
|
$this->_update($token, "$count_ham $count_spam " . $this->b8_config['today'], $uid);
|
||||||
else
|
else
|
||||||
$this->_del($token);
|
$this->_del($token);
|
||||||
|
|
||||||
|
@ -335,7 +336,7 @@ abstract class b8_storage_base
|
||||||
|
|
||||||
$data .= $this->b8_config['today'];
|
$data .= $this->b8_config['today'];
|
||||||
|
|
||||||
$this->_put($token, $data);
|
$this->_put($token, $data, $uid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,12 +350,12 @@ abstract class b8_storage_base
|
||||||
|
|
||||||
if($category === b8::HAM) {
|
if($category === b8::HAM) {
|
||||||
$internals['texts_ham']++;
|
$internals['texts_ham']++;
|
||||||
$this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
|
$this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
|
||||||
}
|
}
|
||||||
|
|
||||||
elseif($category === b8::SPAM) {
|
elseif($category === b8::SPAM) {
|
||||||
$internals['texts_spam']++;
|
$internals['texts_spam']++;
|
||||||
$this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
|
$this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -368,7 +369,7 @@ abstract class b8_storage_base
|
||||||
if($internals['texts_ham'] < 0)
|
if($internals['texts_ham'] < 0)
|
||||||
$internals['texts_ham'] = 0;
|
$internals['texts_ham'] = 0;
|
||||||
|
|
||||||
$this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
|
$this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham'], $uid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -379,7 +380,7 @@ abstract class b8_storage_base
|
||||||
if($internals['texts_spam'] < 0)
|
if($internals['texts_spam'] < 0)
|
||||||
$internals['texts_spam'] = 0;
|
$internals['texts_spam'] = 0;
|
||||||
|
|
||||||
$this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
|
$this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam'], $uid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
395
library/spam/b8/storage/storage_base.php.ORIG
Normal file
395
library/spam/b8/storage/storage_base.php.ORIG
Normal file
|
@ -0,0 +1,395 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
# Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
|
||||||
|
#
|
||||||
|
# This file is part of the b8 package
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU Lesser General Public License as published by
|
||||||
|
# the Free Software Foundation in version 2.1 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
# License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with this program; if not, write to the Free Software Foundation,
|
||||||
|
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Functions used by all storage backends
|
||||||
|
* Copyright (C) 2010 Tobias Leupold <tobias.leupold@web.de>
|
||||||
|
*
|
||||||
|
* @license LGPL
|
||||||
|
* @access public
|
||||||
|
* @package b8
|
||||||
|
* @author Tobias Leupold
|
||||||
|
*/
|
||||||
|
|
||||||
|
abstract class b8_storage_base
|
||||||
|
{
|
||||||
|
|
||||||
|
public $connected = FALSE;
|
||||||
|
|
||||||
|
protected $_degenerator = NULL;
|
||||||
|
|
||||||
|
const INTERNALS_TEXTS_HAM = 'bayes*texts.ham';
|
||||||
|
const INTERNALS_TEXTS_SPAM = 'bayes*texts.spam';
|
||||||
|
const INTERNALS_DBVERSION = 'bayes*dbversion';
|
||||||
|
|
||||||
|
const BACKEND_NOT_CONNECTED = 'BACKEND_NOT_CONNECTED';
|
||||||
|
const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
|
||||||
|
const DATABASE_NOT_B8 = 'DATABASE_NOT_B8';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates the class has all it needs to work.
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @return mixed Returns TRUE if everything is okay, otherwise an error code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function validate()
|
||||||
|
{
|
||||||
|
|
||||||
|
# We set up the degenerator here, as we would have to duplicate code if it
|
||||||
|
# was done in the constructor of the respective storage backend.
|
||||||
|
$class = 'b8_degenerator_' . $this->b8_config['degenerator'];
|
||||||
|
$this->_degenerator = new $class();
|
||||||
|
|
||||||
|
if($this->connected !== TRUE)
|
||||||
|
return self::BACKEND_NOT_CONNECTED;
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a b8 database is used and if it's version is okay
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @return mixed Returns TRUE if everything is okay, otherwise an error code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function check_database()
|
||||||
|
{
|
||||||
|
|
||||||
|
$internals = $this->get_internals();
|
||||||
|
|
||||||
|
if(isset($internals['dbversion'])) {
|
||||||
|
if($internals['dbversion'] == "2") {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_WRONG_VERSION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_NOT_B8;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the "count" data of a token.
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @param string $data
|
||||||
|
* @return array Returns an array of the parsed data: array(count_ham, count_spam, lastseen).
|
||||||
|
*/
|
||||||
|
|
||||||
|
private function _parse_count($data)
|
||||||
|
{
|
||||||
|
|
||||||
|
list($count_ham, $count_spam, $lastseen) = explode(' ', $data);
|
||||||
|
|
||||||
|
$count_ham = (int) $count_ham;
|
||||||
|
$count_spam = (int) $count_spam;
|
||||||
|
|
||||||
|
return array(
|
||||||
|
'count_ham' => $count_ham,
|
||||||
|
'count_spam' => $count_spam
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the database's internal variables.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @return array Returns an array of all internals.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function get_internals()
|
||||||
|
{
|
||||||
|
|
||||||
|
$internals = $this->_get_query(
|
||||||
|
array(
|
||||||
|
self::INTERNALS_TEXTS_HAM,
|
||||||
|
self::INTERNALS_TEXTS_SPAM,
|
||||||
|
self::INTERNALS_DBVERSION
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
return array(
|
||||||
|
'texts_ham' => (int) $internals[self::INTERNALS_TEXTS_HAM],
|
||||||
|
'texts_spam' => (int) $internals[self::INTERNALS_TEXTS_SPAM],
|
||||||
|
'dbversion' => (int) $internals[self::INTERNALS_DBVERSION]
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all data about a list of tags from the database.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param array $tokens
|
||||||
|
* @return mixed Returns FALSE on failure, otherwise returns array of returned data in the format array('tokens' => array(token => count), 'degenerates' => array(token => array(degenerate => count))).
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function get($tokens)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Validate the startup
|
||||||
|
|
||||||
|
$started_up = $this->validate();
|
||||||
|
|
||||||
|
if($started_up !== TRUE)
|
||||||
|
return $started_up;
|
||||||
|
|
||||||
|
# First we see what we have in the database.
|
||||||
|
$token_data = $this->_get_query($tokens);
|
||||||
|
|
||||||
|
# Check if we have to degenerate some tokens
|
||||||
|
|
||||||
|
$missing_tokens = array();
|
||||||
|
|
||||||
|
foreach($tokens as $token) {
|
||||||
|
if(!isset($token_data[$token]))
|
||||||
|
$missing_tokens[] = $token;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(count($missing_tokens) > 0) {
|
||||||
|
|
||||||
|
# We have to degenerate some tokens
|
||||||
|
$degenerates_list = array();
|
||||||
|
|
||||||
|
# Generate a list of degenerated tokens for the missing tokens ...
|
||||||
|
$degenerates = $this->_degenerator->degenerate($missing_tokens);
|
||||||
|
|
||||||
|
# ... and look them up
|
||||||
|
|
||||||
|
foreach($degenerates as $token => $token_degenerates)
|
||||||
|
$degenerates_list = array_merge($degenerates_list, $token_degenerates);
|
||||||
|
|
||||||
|
$token_data = array_merge($token_data, $this->_get_query($degenerates_list));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Here, we have all availible data in $token_data.
|
||||||
|
|
||||||
|
$return_data_tokens = array();
|
||||||
|
$return_data_degenerates = array();
|
||||||
|
|
||||||
|
foreach($tokens as $token) {
|
||||||
|
|
||||||
|
if(isset($token_data[$token]) === TRUE) {
|
||||||
|
|
||||||
|
# The token was found in the database
|
||||||
|
|
||||||
|
# Add the data ...
|
||||||
|
$return_data_tokens[$token] = $this->_parse_count($token_data[$token]);
|
||||||
|
|
||||||
|
# ... and update it's lastseen parameter
|
||||||
|
$this->_update($token, "{$return_data_tokens[$token]['count_ham']} {$return_data_tokens[$token]['count_spam']} " . $this->b8_config['today']);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
|
||||||
|
# The token was not found, so we look if we
|
||||||
|
# can return data for degenerated tokens
|
||||||
|
|
||||||
|
# Check all degenerated forms of the token
|
||||||
|
|
||||||
|
foreach($this->_degenerator->degenerates[$token] as $degenerate) {
|
||||||
|
|
||||||
|
if(isset($token_data[$degenerate]) === TRUE) {
|
||||||
|
|
||||||
|
# A degeneration of the token way found in the database
|
||||||
|
|
||||||
|
# Add the data ...
|
||||||
|
$return_data_degenerates[$token][$degenerate] = $this->_parse_count($token_data[$degenerate]);
|
||||||
|
|
||||||
|
# ... and update it's lastseen parameter
|
||||||
|
$this->_update($degenerate, "{$return_data_degenerates[$token][$degenerate]['count_ham']} {$return_data_degenerates[$token][$degenerate]['count_spam']} " . $this->b8_config['today']);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Now, all token data directly found in the database is in $return_data_tokens
|
||||||
|
# and all data for degenerated versions is in $return_data_degenerates
|
||||||
|
|
||||||
|
# First, we commit the changes to the lastseen parameters
|
||||||
|
$this->_commit();
|
||||||
|
|
||||||
|
# Then, we return what we have
|
||||||
|
return array(
|
||||||
|
'tokens' => $return_data_tokens,
|
||||||
|
'degenerates' => $return_data_degenerates
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores or deletes a list of tokens from the given category.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param array $tokens
|
||||||
|
* @param const $category Either b8::HAM or b8::SPAM
|
||||||
|
* @param const $action Either b8::LEARN or b8::UNLEARN
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function process_text($tokens, $category, $action)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Validate the startup
|
||||||
|
|
||||||
|
$started_up = $this->validate();
|
||||||
|
|
||||||
|
if($started_up !== TRUE)
|
||||||
|
return $started_up;
|
||||||
|
|
||||||
|
# No matter what we do, we first have to check what data we have.
|
||||||
|
|
||||||
|
# First get the internals, including the ham texts and spam texts counter
|
||||||
|
$internals = $this->get_internals();
|
||||||
|
|
||||||
|
# Then, fetch all data for all tokens we have (and update their lastseen parameters)
|
||||||
|
$token_data = $this->_get_query(array_keys($tokens));
|
||||||
|
|
||||||
|
# Process all tokens to learn/unlearn
|
||||||
|
|
||||||
|
foreach($tokens as $token => $count) {
|
||||||
|
|
||||||
|
if(isset($token_data[$token])) {
|
||||||
|
|
||||||
|
# We already have this token, so update it's data
|
||||||
|
|
||||||
|
# Get the existing data
|
||||||
|
list($count_ham, $count_spam, $lastseen) = explode(' ', $token_data[$token]);
|
||||||
|
$count_ham = (int) $count_ham;
|
||||||
|
$count_spam = (int) $count_spam;
|
||||||
|
|
||||||
|
# Increase or decrease the right counter
|
||||||
|
|
||||||
|
if($action === b8::LEARN) {
|
||||||
|
if($category === b8::HAM)
|
||||||
|
$count_ham += $count;
|
||||||
|
elseif($category === b8::SPAM)
|
||||||
|
$count_spam += $count;
|
||||||
|
}
|
||||||
|
|
||||||
|
elseif($action == b8::UNLEARN) {
|
||||||
|
if($category === b8::HAM)
|
||||||
|
$count_ham -= $count;
|
||||||
|
elseif($category === b8::SPAM)
|
||||||
|
$count_spam -= $count;
|
||||||
|
}
|
||||||
|
|
||||||
|
# We don't want to have negative values
|
||||||
|
|
||||||
|
if($count_ham < 0)
|
||||||
|
$count_ham = 0;
|
||||||
|
|
||||||
|
if($count_spam < 0)
|
||||||
|
$count_spam = 0;
|
||||||
|
|
||||||
|
# Now let's see if we have to update or delete the token
|
||||||
|
if($count_ham !== 0 or $count_spam !== 0)
|
||||||
|
$this->_update($token, "$count_ham $count_spam " . $this->b8_config['today']);
|
||||||
|
else
|
||||||
|
$this->_del($token);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
|
||||||
|
# We don't have the token. If we unlearn a text, we can't delete it
|
||||||
|
# as we don't have it anyway, so just do something if we learn a text
|
||||||
|
|
||||||
|
if($action === b8::LEARN) {
|
||||||
|
|
||||||
|
if($category === b8::HAM)
|
||||||
|
$data = '1 0 ';
|
||||||
|
elseif($category === b8::SPAM)
|
||||||
|
$data = '0 1 ';
|
||||||
|
|
||||||
|
$data .= $this->b8_config['today'];
|
||||||
|
|
||||||
|
$this->_put($token, $data);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Now, all token have been processed, so let's update the right text
|
||||||
|
|
||||||
|
if($action === b8::LEARN) {
|
||||||
|
|
||||||
|
if($category === b8::HAM) {
|
||||||
|
$internals['texts_ham']++;
|
||||||
|
$this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
|
||||||
|
}
|
||||||
|
|
||||||
|
elseif($category === b8::SPAM) {
|
||||||
|
$internals['texts_spam']++;
|
||||||
|
$this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
elseif($action == b8::UNLEARN) {
|
||||||
|
|
||||||
|
if($category === b8::HAM) {
|
||||||
|
|
||||||
|
$internals['texts_ham']--;
|
||||||
|
|
||||||
|
if($internals['texts_ham'] < 0)
|
||||||
|
$internals['texts_ham'] = 0;
|
||||||
|
|
||||||
|
$this->_update(self::INTERNALS_TEXTS_HAM, $internals['texts_ham']);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
elseif($category === b8::SPAM) {
|
||||||
|
|
||||||
|
$internals['texts_spam']--;
|
||||||
|
|
||||||
|
if($internals['texts_spam'] < 0)
|
||||||
|
$internals['texts_spam'] = 0;
|
||||||
|
|
||||||
|
$this->_update(self::INTERNALS_TEXTS_SPAM, $internals['texts_spam']);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# We're done and can commit all changes to the database now
|
||||||
|
$this->_commit();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
351
library/spam/b8/storage/storage_frndc.php
Normal file
351
library/spam/b8/storage/storage_frndc.php
Normal file
|
@ -0,0 +1,351 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
# Copyright (C) 2006-2011 Tobias Leupold <tobias.leupold@web.de>
|
||||||
|
#
|
||||||
|
# This file is part of the b8 package
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU Lesser General Public License as published by
|
||||||
|
# the Free Software Foundation in version 2.1 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
# License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with this program; if not, write to the Free Software Foundation,
|
||||||
|
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The MySQL abstraction layer for communicating with the database.
|
||||||
|
* Copyright (C) 2009 Oliver Lillie (aka buggedcom)
|
||||||
|
* Copyright (C) 2010-2011 Tobias Leupold <tobias.leupold@web.de>
|
||||||
|
*
|
||||||
|
* @license LGPL
|
||||||
|
* @access public
|
||||||
|
* @package b8
|
||||||
|
* @author Oliver Lillie (aka buggedcom) (original PHP 5 port and optimizations)
|
||||||
|
* @author Tobias Leupold
|
||||||
|
*/
|
||||||
|
|
||||||
|
class b8_storage_mysql extends b8_storage_base
|
||||||
|
{
|
||||||
|
|
||||||
|
public $config = array(
|
||||||
|
'database' => 'b8_wordlist',
|
||||||
|
'table_name' => 'b8_wordlist',
|
||||||
|
'host' => 'localhost',
|
||||||
|
'user' => FALSE,
|
||||||
|
'pass' => FALSE,
|
||||||
|
'connection' => NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
public $b8_config = array(
|
||||||
|
'degenerator' => NULL,
|
||||||
|
'today' => NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
private $_connection = NULL;
|
||||||
|
private $_deletes = array();
|
||||||
|
private $_puts = array();
|
||||||
|
private $_updates = array();
|
||||||
|
|
||||||
|
const DATABASE_CONNECTION_FAIL = 'DATABASE_CONNECTION_FAIL';
|
||||||
|
const DATABASE_CONNECTION_ERROR = 'DATABASE_CONNECTION_ERROR';
|
||||||
|
const DATABASE_CONNECTION_BAD_RESOURCE = 'DATABASE_CONNECTION_BAD_RESOURCE';
|
||||||
|
const DATABASE_SELECT_ERROR = 'DATABASE_SELECT_ERROR';
|
||||||
|
const DATABASE_TABLE_ACCESS_FAIL = 'DATABASE_TABLE_ACCESS_FAIL';
|
||||||
|
const DATABASE_WRONG_VERSION = 'DATABASE_WRONG_VERSION';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the database layer.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param string $config
|
||||||
|
*/
|
||||||
|
|
||||||
|
function __construct($config, $degenerator, $today)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Pass some variables of the main b8 config to this class
|
||||||
|
$this->b8_config['degenerator'] = $degenerator;
|
||||||
|
$this->b8_config['today'] = $today;
|
||||||
|
|
||||||
|
# Validate the config items
|
||||||
|
|
||||||
|
if(count($config) > 0) {
|
||||||
|
|
||||||
|
foreach ($config as $name => $value) {
|
||||||
|
|
||||||
|
switch($name) {
|
||||||
|
|
||||||
|
case 'table_name':
|
||||||
|
case 'host':
|
||||||
|
case 'user':
|
||||||
|
case 'pass':
|
||||||
|
case 'database':
|
||||||
|
$this->config[$name] = (string) $value;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'connection':
|
||||||
|
|
||||||
|
if($value !== NULL) {
|
||||||
|
|
||||||
|
if(is_resource($value) === TRUE) {
|
||||||
|
$resource_type = get_resource_type($value);
|
||||||
|
$this->config['connection'] = $resource_type !== 'mysql link' && $resource_type !== 'mysql link persistent' ? FALSE : $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
$this->config['connection'] = FALSE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the database connection.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
function __destruct()
|
||||||
|
{
|
||||||
|
|
||||||
|
if($this->_connection === NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
# Commit any changes before closing
|
||||||
|
$this->_commit();
|
||||||
|
|
||||||
|
# Just close the connection if no link-resource was passed and b8 created it's own connection
|
||||||
|
if($this->config['connection'] === NULL)
|
||||||
|
mysql_close($this->_connection);
|
||||||
|
|
||||||
|
$this->connected = FALSE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Connect to the database and do some checks.
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @return mixed Returns TRUE on a successful database connection, otherwise returns a constant from b8.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public function connect()
|
||||||
|
{
|
||||||
|
|
||||||
|
# Are we already connected?
|
||||||
|
if($this->connected === TRUE)
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
# Are we using an existing passed resource?
|
||||||
|
if($this->config['connection'] === FALSE) {
|
||||||
|
# ... yes we are, but the connection is not a resource, so return an error
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_CONNECTION_BAD_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
elseif($this->config['connection'] === NULL) {
|
||||||
|
|
||||||
|
# ... no we aren't so we have to connect.
|
||||||
|
|
||||||
|
if($this->_connection = mysql_connect($this->config['host'], $this->config['user'], $this->config['pass'])) {
|
||||||
|
if(mysql_select_db($this->config['database'], $this->_connection) === FALSE) {
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_SELECT_ERROR . ": " . mysql_error();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_CONNECTION_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
# ... yes we are
|
||||||
|
$this->_connection = $this->config['connection'];
|
||||||
|
}
|
||||||
|
|
||||||
|
# Just in case ...
|
||||||
|
if($this->_connection === NULL) {
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_CONNECTION_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check to see if the wordlist table exists
|
||||||
|
if(mysql_query('DESCRIBE ' . $this->config['table_name'], $this->_connection) === FALSE) {
|
||||||
|
$this->connected = FALSE;
|
||||||
|
return self::DATABASE_TABLE_ACCESS_FAIL . ": " . mysql_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
# Everything is okay and connected
|
||||||
|
$this->connected = TRUE;
|
||||||
|
|
||||||
|
# Let's see if this is a b8 database and the version is okay
|
||||||
|
return $this->check_database();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does the actual interaction with the database when fetching data.
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @param array $tokens
|
||||||
|
* @return mixed Returns an array of the returned data in the format array(token => data) or an empty array if there was no data.
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function _get_query($tokens)
|
||||||
|
{
|
||||||
|
|
||||||
|
# Construct the query ...
|
||||||
|
|
||||||
|
if(count($tokens) > 0) {
|
||||||
|
|
||||||
|
$where = array();
|
||||||
|
|
||||||
|
foreach ($tokens as $token) {
|
||||||
|
$token = mysql_real_escape_string($token, $this->_connection);
|
||||||
|
array_push($where, $token);
|
||||||
|
}
|
||||||
|
|
||||||
|
$where = 'token IN ("' . implode('", "', $where) . '")';
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
$token = mysql_real_escape_string($token, $this->_connection);
|
||||||
|
$where = 'token = "' . $token . '"';
|
||||||
|
}
|
||||||
|
|
||||||
|
# ... and fetch the data
|
||||||
|
|
||||||
|
$result = mysql_query('
|
||||||
|
SELECT token, count
|
||||||
|
FROM ' . $this->config['table_name'] . '
|
||||||
|
WHERE ' . $where . ';
|
||||||
|
', $this->_connection);
|
||||||
|
|
||||||
|
$data = array();
|
||||||
|
|
||||||
|
while ($row = mysql_fetch_array($result, MYSQL_ASSOC))
|
||||||
|
$data[$row['token']] = $row['count'];
|
||||||
|
|
||||||
|
mysql_free_result($result);
|
||||||
|
|
||||||
|
return $data;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store a token to the database.
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @param string $token
|
||||||
|
* @param string $count
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function _put($token, $count) {
|
||||||
|
$token = mysql_real_escape_string($token, $this->_connection);
|
||||||
|
$count = mysql_real_escape_string($count, $this->_connection);;
|
||||||
|
array_push($this->_puts, '("' . $token . '", "' . $count . '")');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update an existing token.
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @param string $token
|
||||||
|
* @param string $count
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function _update($token, $count)
|
||||||
|
{
|
||||||
|
$token = mysql_real_escape_string($token, $this->_connection);
|
||||||
|
$count = mysql_real_escape_string($count, $this->_connection);
|
||||||
|
array_push($this->_updates, '("' . $token . '", "' . $count . '")');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove a token from the database.
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @param string $token
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function _del($token)
|
||||||
|
{
|
||||||
|
$token = mysql_real_escape_string($token, $this->_connection);
|
||||||
|
array_push($this->_deletes, $token);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commits any modification queries.
|
||||||
|
*
|
||||||
|
* @access protected
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
protected function _commit()
|
||||||
|
{
|
||||||
|
|
||||||
|
if(count($this->_deletes) > 0) {
|
||||||
|
|
||||||
|
$result = mysql_query('
|
||||||
|
DELETE FROM ' . $this->config['table_name'] . '
|
||||||
|
WHERE token IN ("' . implode('", "', $this->_deletes) . '");
|
||||||
|
', $this->_connection);
|
||||||
|
|
||||||
|
if(is_resource($result) === TRUE)
|
||||||
|
mysql_free_result($result);
|
||||||
|
|
||||||
|
$this->_deletes = array();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(count($this->_puts) > 0) {
|
||||||
|
|
||||||
|
$result = mysql_query('
|
||||||
|
INSERT INTO ' . $this->config['table_name'] . '(token, count)
|
||||||
|
VALUES ' . implode(', ', $this->_puts) . ';', $this->_connection);
|
||||||
|
|
||||||
|
if(is_resource($result) === TRUE)
|
||||||
|
mysql_free_result($result);
|
||||||
|
|
||||||
|
$this->_puts = array();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(count($this->_updates) > 0) {
|
||||||
|
|
||||||
|
$result = mysql_query('
|
||||||
|
INSERT INTO ' . $this->config['table_name'] . '(token, count)
|
||||||
|
VALUES ' . implode(', ', $this->_updates) . '
|
||||||
|
ON DUPLICATE KEY UPDATE ' . $this->config['table_name'] . '.count = VALUES(count);', $this->_connection);
|
||||||
|
|
||||||
|
if(is_resource($result) === TRUE)
|
||||||
|
mysql_free_result($result);
|
||||||
|
|
||||||
|
$this->_updates = array();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
Loading…
Reference in a new issue