package ixee.cryptopals.utils import FunctionUtils.tup class FrequencyMap(mapargs: Map[Char, Double]) { sealed trait DiffResult object Inconclusive extends DiffResult val FittingThreshold: Double = 0.95 val mappings = mapargs map tup(_.toLower -> _) def at(c: Char): Option[Double] = if (c.isLower) mappings.get(c) else // TODO remove the bias here mappings.get(c.toLower).map(_ * 0.0004) // bias HARD against uppercase spam. should make this a per-map setting. def diff(other: FrequencyMap): Double = { // pseudoscience here /* * Idea... * sum difference squares between other and this * discarding characters that aren't present in other if other's length is < X * * .. divide by other.totalCount? */ // since this doesn't have sample counts, can't do switching based on other.sampleCount // so just do it. other.mappings.foldLeft(0.0) { (confidence: Double, next: (Char, Double)) => confidence + tup(diffAt _)(next) } } // TODO: don't hardcode these.. def diffAt(c: Char, charFreq: Double) = Math.pow(c match { case ' ' => squared(0.09 - charFreq) case '{' | '}' | '`' | '|' | '^' => squared(0.000001 - charFreq) // { } | and ` are very unlikely irl case '[' | ']' => squared(0.0000015 - charFreq) // [ ] are more likely case '"' | '\'' => squared(0.00001 - charFreq) // " or ' are 100% unlikely 90% of the time case '~' | '+' | '=' | '<' | '>' | '/' | '\\' => 1 //squared(charFreq) //squared(0.00000125 - charFreq) // math is weird kids case ';' | ':' | '-' | '*' | '(' | '&' | ')' | '_' => squared(0.000003 - charFreq) // getting into more common punctuation case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => squared(0.000002 - charFreq) // numbers are KINDA uncommon case '$' | '%' | '#' | '@' => 1 //squared(0.00002 - charFreq) // more punctuation... case '!' => squared(0.6 - charFreq) case '.' | ',' => squared(0.00007 - charFreq) // and the last of the punctuations case '\n' | '\r' => squared(0.000001 - charFreq) // explicit \r \n is rare in freeform text. case _ => this.at(c).map(_ - charFreq).map(squared).getOrElse(0.4) }, 0.5) def squared(x: Double) = x * x def likelyFitting(other: FrequencyMap) = 1 - (other diff this) > FittingThreshold override def toString = mapargs.toString } class SampledFrequencyMap(fm: FrequencyMap, samples: Int) extends FrequencyMap(fm.mappings) { // come back to this } object FrequencyMap { def apply(mapargs: (Char, Double)*): FrequencyMap = FrequencyMap(mapargs.toMap) def apply(mapargs: Map[Char, Double]): FrequencyMap = new FrequencyMap(mapargs) def of(s: String) = { def count[A](m: Map[A, Int], c: A): Map[A, Int] = m.get(c) match { case Some(count) => m + (c -> (count + 1)) case None => m + (c -> 1) } FrequencyMap( s .foldLeft(Map[Char, Int]())(count _) .map(tup(_ -> _ / s.length.toDouble)) ) } }