Code Coverage for /Users/cameron/Sites/statistical-classifier-pages/statistical-classifier/src/Camspiers/StatisticalClassifier/Normalizer/Token/PhpStemmer.php

1	<?php
2
3	/**
4	* This file is part of the Statistical Classifier package.
5	*
6	* (c) Cam Spiers <camspiers@gmail.com>
7	*
8	* For the full copyright and license information, please view the LICENSE
9	* file that was distributed with this source code.
10	*/
11
12	namespace Camspiers\StatisticalClassifier\Normalizer\Token;
13
14	/**
15	* @author Cam Spiers <camspiers@gmail.com>
16	* @package Statistical Classifier
17	* @see https://github.com/hthetiot/php-stemmer.git
18	*/
19	class PhpStemmer implements NormalizerInterface
20	{
21	/**
22	* Available languages.
23	*
24	* @var array
25	*/
26	protected $availableLanguages = array('danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian',
27	'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian',
28	'spanish', 'swedish', 'turkish');
29
30	/**
31	* Charset.
32	*
33	* @var string
34	*/
35	protected $charset;
36
37	/**
38	* Lang.
39	*
40	* @var string
41	*/
42	protected $lang;
43
44	/**
45	* @param string $lang
46	* @param string $charset
47	*/
48	public function __construct($lang, $charset = 'utf-8')
49	{
50	$lang = strtolower($lang);
51
52	if (! in_array($lang, $this->availableLanguages)) {
53	throw new \InvalidArgumentException("Invalid language $lang");
54	}
55
56	$this->charset = strtoupper(str_replace('-', '_', $charset));;
57	$this->lang = $lang;
58	}
59
60	/**
61	* {@inheritdoc}
62	*/
63	public function normalize(array $tokens)
64	{
65	foreach ($tokens as $k => $token) {
66	$tokens[$k] = stemword($token, $this->lang, $this->charset);
67	}
68
69	return $tokens;
70	}
71	}