From aa390776b26d794ab37aa13833fa7043aad16504 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 23 Nov 2014 03:09:27 -0800 Subject: Add in code for challenge 3 --- src/utils/FrequencyMap.scala | 91 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/utils/FrequencyMap.scala (limited to 'src/utils/FrequencyMap.scala') diff --git a/src/utils/FrequencyMap.scala b/src/utils/FrequencyMap.scala new file mode 100644 index 0000000..0bea25c --- /dev/null +++ b/src/utils/FrequencyMap.scala @@ -0,0 +1,91 @@ +package ixee.cryptopals.utils + +import ConversionUtils.tup + +class FrequencyMap(mapargs: Map[Char, Double]) { + sealed trait DiffResult + object Inconclusive extends DiffResult + + val FittingThreshold: Double = 0.95 + + val mappings = mapargs map tup(_.toLower -> _) + + def at(c: Char): Option[Double] = + if (c.isLower) + mappings.get(c) + else // TODO remove the bias here + mappings.get(c.toLower).map(_ * 0.002) // bias HARD against uppercase spam. should make this a per-map setting. + + def diff(other: FrequencyMap): Double = { + // pseudoscience here + /* + * Idea... + * sum difference squares between other and this + * discarding characters that aren't present in other if other's length is < X + * + * .. divide by other.totalCount? + */ + + // since this doesn't have sample counts, can't do switching based on other.sampleCount + // so just do it. + + other.mappings.foldLeft(0.0) { (confidence: Double, next: (Char, Double)) => + confidence + tup(diffAt _)(next) + } + } + + // TODO: don't hardcode these.. + def diffAt(c: Char, charFreq: Double) = + c match { + case ' ' => 0 + case '{' | '}' | '`' | '|' => + squared(0.000001 - charFreq) // { } | and ` are very unlikely irl + case '[' | ']' => + squared(0.000002 - charFreq) // [ ] are more likely + case '"' | '\''=> + squared(0.00001 - charFreq) // " or ' are 100% unlikely 90% of the time + case '~' | '+' | '=' | '<' | '>' | '/' | '\\' => + squared(0.000004 - charFreq) // math is weird kids + case ';' | ':' | '-' | '*' | '(' | '&' | ')' | '_' => + squared(0.000009 - charFreq) // getting into more common punctuation + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + squared(0.00001 - charFreq) // numbers are KINDA uncommon + case '$' | '%' | '#' | '@' | '!' => + squared(0.00003 - charFreq) // more punctuation... + case '.' | ',' => + squared(0.00007 - charFreq) // and the last of the punctuations + case '\n' | '\r' => + squared(0.0000002 - charFreq) // explicit \r \n is rare in freeform text. + case _ => + this.at(c).map(_ - charFreq).map(squared).getOrElse(0.0) + } + + def squared(x: Double) = x * x + + def likelyFitting(other: FrequencyMap) = 1 - (other diff this) > FittingThreshold + + override def toString = mapargs.toString +} + +class SampledFrequencyMap(fm: FrequencyMap, samples: Int) extends FrequencyMap(fm.mappings) { + // come back to this +} + +object FrequencyMap { + def apply(mapargs: (Char, Double)*): FrequencyMap = FrequencyMap(mapargs.toMap) + def apply(mapargs: Map[Char, Double]): FrequencyMap = new FrequencyMap(mapargs) + + def of(s: String) = { + def count[A](m: Map[A, Int], c: A): Map[A, Int] = + m.get(c) match { + case Some(count) => m + (c -> (count + 1)) + case None => m + (c -> 1) + } + + FrequencyMap( + s + .foldLeft(Map[Char, Int]())(count _) + .map(tup(_ -> _ / s.length.toDouble)) + ) + } +} -- cgit v1.1