From aa390776b26d794ab37aa13833fa7043aad16504 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Nov 2014 03:09:27 -0800 Subject: Add in code for challenge 3 --- src/utils/TextScorer.scala | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/utils/TextScorer.scala (limited to 'src/utils/TextScorer.scala') diff --git a/src/utils/TextScorer.scala b/src/utils/TextScorer.scala new file mode 100644 index 0000000..b02c250 --- /dev/null +++ b/src/utils/TextScorer.scala @@ -0,0 +1,32 @@ +package ixee.cryptopals.utils + +import ByteUtils._ + +object TextScorer { + val controlChars: Seq[Byte] = Seq( + 0x08, 0x09, 0x0a, 0x0b, 0x0d + ).map(_.toByte) + + def isText(s: Iterable[Byte]): Boolean = + s.forall(_ @& 0x80.toByte == 0) && + s.filter(_ < 0x20).forall(controlChars.contains _) + + def score(s: Seq[Byte]): Double = + if (!isText(s)) + -1 // not English text! + else + scoreBy(new String(s.toArray), Frequencies.cornell40kSample) + //score s + + def scoreBy(s: String, fm: FrequencyMap): Double = { + val sfm = FrequencyMap.of(s) + fm diff sfm + } + + def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean = + if (!isText(s.toCharArray.map(_.toByte))) + false + else + FrequencyMap.of(s).likelyFitting(fm) +} + -- cgit v1.1