Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00%
0 / 1
0.00%
0 / 2
CRAP
0.00%
0 / 16
PhpStemmer
0.00%
0 / 1
0.00%
0 / 2
20
0.00%
0 / 16
 __construct($lang, $charset = 'utf-8')
0.00%
0 / 1
6
0.00%
0 / 9
 normalize(array $tokens)
0.00%
0 / 1
6
0.00%
0 / 7
<?php
/**
* This file is part of the Statistical Classifier package.
*
* (c) Cam Spiers <camspiers@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Camspiers\StatisticalClassifier\Normalizer\Token;
/**
* @author Cam Spiers <camspiers@gmail.com>
* @package Statistical Classifier
* @see https://github.com/hthetiot/php-stemmer.git
*/
class PhpStemmer implements NormalizerInterface
{
/**
* Available languages.
*
* @var array
*/
protected $availableLanguages = array('danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian',
'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian',
'spanish', 'swedish', 'turkish');
/**
* Charset.
*
* @var string
*/
protected $charset;
/**
* Lang.
*
* @var string
*/
protected $lang;
/**
* @param string $lang
* @param string $charset
*/
public function __construct($lang, $charset = 'utf-8')
{
$lang = strtolower($lang);
if (! in_array($lang, $this->availableLanguages)) {
throw new \InvalidArgumentException("Invalid language $lang");
}
$this->charset = strtoupper(str_replace('-', '_', $charset));;
$this->lang = $lang;
}
/**
* {@inheritdoc}
*/
public function normalize(array $tokens)
{
foreach ($tokens as $k => $token) {
$tokens[$k] = stemword($token, $this->lang, $this->charset);
}
return $tokens;
}
}