summaryrefslogtreecommitdiff
path: root/src/utils/TextScorer.scala
blob: b02c2501bfaca52c8c34586352ee25cf5923f6f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package ixee.cryptopals.utils

import ByteUtils._

object TextScorer {
  val controlChars: Seq[Byte] = Seq(
    0x08, 0x09, 0x0a, 0x0b, 0x0d
  ).map(_.toByte)

  def isText(s: Iterable[Byte]): Boolean =
    s.forall(_ @& 0x80.toByte == 0) &&
    s.filter(_ < 0x20).forall(controlChars.contains _)

  def score(s: Seq[Byte]): Double =
    if (!isText(s))
      -1  // not English text!
    else
      scoreBy(new String(s.toArray), Frequencies.cornell40kSample)
      //score s

  def scoreBy(s: String, fm: FrequencyMap): Double = {
    val sfm = FrequencyMap.of(s)
    fm diff sfm
  }

  def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean =
    if (!isText(s.toCharArray.map(_.toByte)))
      false
    else
      FrequencyMap.of(s).likelyFitting(fm)
}