Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
CRAP | |
84.21% |
16 / 19 |
DocumentLength | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6.14 | |
84.21% |
16 / 19 |
__invoke($tfidf) | |
0.00% |
0 / 1 |
6.14 | |
84.21% |
16 / 19 |
<?php | |
/** | |
* This file is part of the Statistical Classifier package. | |
* | |
* (c) Cam Spiers <camspiers@gmail.com> | |
* | |
* For the full copyright and license information, please view the LICENSE | |
* file that was distributed with this source code. | |
*/ | |
namespace Camspiers\StatisticalClassifier\Transform; | |
/** | |
* @author Cam Spiers <camspiers@gmail.com> | |
* @package Statistical Classifier | |
*/ | |
class DocumentLength | |
{ | |
public function __invoke($tfidf) | |
{ | |
$transform = $tfidf; | |
foreach ($tfidf as $category => $documents) { | |
foreach ($documents as $documentIndex => $document) { | |
$denominator = 0; | |
foreach ($document as $count) { | |
$denominator += $count * $count; | |
} | |
$denominator = sqrt($denominator); | |
if ($denominator != 0) { | |
foreach ($document as $token => $count) { | |
$transform | |
[$category] | |
[$documentIndex] | |
[$token] = $count / $denominator; | |
} | |
} else { | |
throw new \RuntimeException("Cannot divide by 0 in DocumentLength transform"); | |
} | |
} | |
} | |
return $transform; | |
} | |
} |