blob: b02c2501bfaca52c8c34586352ee25cf5923f6f2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
package ixee.cryptopals.utils
import ByteUtils._
object TextScorer {
val controlChars: Seq[Byte] = Seq(
0x08, 0x09, 0x0a, 0x0b, 0x0d
).map(_.toByte)
def isText(s: Iterable[Byte]): Boolean =
s.forall(_ @& 0x80.toByte == 0) &&
s.filter(_ < 0x20).forall(controlChars.contains _)
def score(s: Seq[Byte]): Double =
if (!isText(s))
-1 // not English text!
else
scoreBy(new String(s.toArray), Frequencies.cornell40kSample)
//score s
def scoreBy(s: String, fm: FrequencyMap): Double = {
val sfm = FrequencyMap.of(s)
fm diff sfm
}
def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean =
if (!isText(s.toCharArray.map(_.toByte)))
false
else
FrequencyMap.of(s).likelyFitting(fm)
}
|