summaryrefslogtreecommitdiff
path: root/src/utils/TextScorer.scala
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils/TextScorer.scala')
-rw-r--r--src/utils/TextScorer.scala32
1 files changed, 32 insertions, 0 deletions
diff --git a/src/utils/TextScorer.scala b/src/utils/TextScorer.scala
new file mode 100644
index 0000000..b02c250
--- /dev/null
+++ b/src/utils/TextScorer.scala
@@ -0,0 +1,32 @@
+package ixee.cryptopals.utils
+
+import ByteUtils._
+
+object TextScorer {
+ val controlChars: Seq[Byte] = Seq(
+ 0x08, 0x09, 0x0a, 0x0b, 0x0d
+ ).map(_.toByte)
+
+ def isText(s: Iterable[Byte]): Boolean =
+ s.forall(_ @& 0x80.toByte == 0) &&
+ s.filter(_ < 0x20).forall(controlChars.contains _)
+
+ def score(s: Seq[Byte]): Double =
+ if (!isText(s))
+ -1 // not English text!
+ else
+ scoreBy(new String(s.toArray), Frequencies.cornell40kSample)
+ //score s
+
+ def scoreBy(s: String, fm: FrequencyMap): Double = {
+ val sfm = FrequencyMap.of(s)
+ fm diff sfm
+ }
+
+ def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean =
+ if (!isText(s.toCharArray.map(_.toByte)))
+ false
+ else
+ FrequencyMap.of(s).likelyFitting(fm)
+}
+