1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
package ixee.cryptopals.utils
import FunctionUtils.tup
class FrequencyMap(mapargs: Map[Char, Double]) {
sealed trait DiffResult
object Inconclusive extends DiffResult
val FittingThreshold: Double = 0.95
val mappings = mapargs map tup(_.toLower -> _)
def at(c: Char): Option[Double] =
if (c.isLower)
mappings.get(c)
else // TODO remove the bias here
mappings.get(c.toLower).map(_ * 0.0004) // bias HARD against uppercase spam. should make this a per-map setting.
def diff(other: FrequencyMap): Double = {
// pseudoscience here
/*
* Idea...
* sum difference squares between other and this
* discarding characters that aren't present in other if other's length is < X
*
* .. divide by other.totalCount?
*/
// since this doesn't have sample counts, can't do switching based on other.sampleCount
// so just do it.
other.mappings.foldLeft(0.0) { (confidence: Double, next: (Char, Double)) =>
confidence + tup(diffAt _)(next)
}
}
// TODO: don't hardcode these..
def diffAt(c: Char, charFreq: Double) =
Math.pow(c match {
case ' ' =>
squared(0.09 - charFreq)
case '{' | '}' | '`' | '|' | '^' =>
squared(0.000001 - charFreq) // { } | and ` are very unlikely irl
case '[' | ']' =>
squared(0.0000015 - charFreq) // [ ] are more likely
case '"' | '\'' =>
squared(0.00001 - charFreq) // " or ' are 100% unlikely 90% of the time
case '~' | '+' | '=' | '<' | '>' | '/' | '\\' =>
1 //squared(charFreq) //squared(0.00000125 - charFreq) // math is weird kids
case ';' | ':' | '-' | '*' | '(' | '&' | ')' | '_' =>
squared(0.000003 - charFreq) // getting into more common punctuation
case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
squared(0.000002 - charFreq) // numbers are KINDA uncommon
case '$' | '%' | '#' | '@' =>
1 //squared(0.00002 - charFreq) // more punctuation...
case '!' =>
squared(0.6 - charFreq)
case '.' | ',' =>
squared(0.00007 - charFreq) // and the last of the punctuations
case '\n' | '\r' =>
squared(0.000001 - charFreq) // explicit \r \n is rare in freeform text.
case _ =>
this.at(c).map(_ - charFreq).map(squared).getOrElse(0.4)
}, 0.5)
def squared(x: Double) = x * x
def likelyFitting(other: FrequencyMap) = 1 - (other diff this) > FittingThreshold
override def toString = mapargs.toString
}
class SampledFrequencyMap(fm: FrequencyMap, samples: Int) extends FrequencyMap(fm.mappings) {
// come back to this
}
object FrequencyMap {
def apply(mapargs: (Char, Double)*): FrequencyMap = FrequencyMap(mapargs.toMap)
def apply(mapargs: Map[Char, Double]): FrequencyMap = new FrequencyMap(mapargs)
def of(s: String) = {
def count[A](m: Map[A, Int], c: A): Map[A, Int] =
m.get(c) match {
case Some(count) => m + (c -> (count + 1))
case None => m + (c -> 1)
}
FrequencyMap(
s
.foldLeft(Map[Char, Int]())(count _)
.map(tup(_ -> _ / s.length.toDouble))
)
}
}
|