package ixee.cryptopals.utils import ByteUtils._ object TextScorer { val controlChars: Seq[Byte] = Seq( 0x08, 0x09, 0x0a, 0x0b, 0x0d ).map(_.toByte) def isText(s: Iterable[Byte]): Boolean = s.forall(_ @& 0x80.toByte == 0) && s.filter(_ < 0x20).forall(controlChars.contains _) def score(s: Seq[Byte]): Double = if (!isText(s)) -1 // not English text! else scoreBy(new String(s.toArray), Frequencies.cornell40kSample) //score s def scoreBy(s: String, fm: FrequencyMap): Double = { val sfm = FrequencyMap.of(s) fm diff sfm } def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean = if (!isText(s.toCharArray.map(_.toByte))) false else FrequencyMap.of(s).likelyFitting(fm) }