Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
0.00% |
0 / 2 |
CRAP | |
0.00% |
0 / 16 |
PhpStemmer | |
0.00% |
0 / 1 |
|
0.00% |
0 / 2 |
20 | |
0.00% |
0 / 16 |
__construct($lang, $charset = 'utf-8') | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 9 |
|||
normalize(array $tokens) | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 7 |
<?php | |
/** | |
* This file is part of the Statistical Classifier package. | |
* | |
* (c) Cam Spiers <camspiers@gmail.com> | |
* | |
* For the full copyright and license information, please view the LICENSE | |
* file that was distributed with this source code. | |
*/ | |
namespace Camspiers\StatisticalClassifier\Normalizer\Token; | |
/** | |
* @author Cam Spiers <camspiers@gmail.com> | |
* @package Statistical Classifier | |
* @see https://github.com/hthetiot/php-stemmer.git | |
*/ | |
class PhpStemmer implements NormalizerInterface | |
{ | |
/** | |
* Available languages. | |
* | |
* @var array | |
*/ | |
protected $availableLanguages = array('danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian', | |
'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian', | |
'spanish', 'swedish', 'turkish'); | |
/** | |
* Charset. | |
* | |
* @var string | |
*/ | |
protected $charset; | |
/** | |
* Lang. | |
* | |
* @var string | |
*/ | |
protected $lang; | |
/** | |
* @param string $lang | |
* @param string $charset | |
*/ | |
public function __construct($lang, $charset = 'utf-8') | |
{ | |
$lang = strtolower($lang); | |
if (! in_array($lang, $this->availableLanguages)) { | |
throw new \InvalidArgumentException("Invalid language $lang"); | |
} | |
$this->charset = strtoupper(str_replace('-', '_', $charset));; | |
$this->lang = $lang; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function normalize(array $tokens) | |
{ | |
foreach ($tokens as $k => $token) { | |
$tokens[$k] = stemword($token, $this->lang, $this->charset); | |
} | |
return $tokens; | |
} | |
} |