summaryrefslogtreecommitdiff
path: root/src/utils/Frequencies.scala
blob: 63d74ace1c67eb4dc75424519518c453b8b00fa7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package ixee.cryptopals.utils

object Frequencies {
  lazy val frequencies = Seq(
    cryptologicalMathematics,
    cornell40kSample
  )

  // http://www.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html
  val cornell40kSample = FrequencyMap(
    'e' -> 0.1202,
    't' -> 0.0910,
    'a' -> 0.0812,
    'o' -> 0.0768,
    'i' -> 0.0731,
    'n' -> 0.0695,
    's' -> 0.0628,
    'r' -> 0.0602,
    'h' -> 0.0592,
    'd' -> 0.0432,
    'l' -> 0.0398,
    'u' -> 0.0288,
    'c' -> 0.0271,
    'm' -> 0.0261,
    'f' -> 0.0230,
    'y' -> 0.0211,
    'w' -> 0.0209,
    'g' -> 0.0203,
    'p' -> 0.0182,
    'b' -> 0.0149,
    'v' -> 0.0111,
    'k' -> 0.0069,
    'x' -> 0.0017,
    'q' -> 0.0011,
    'j' -> 0.0010,
    'z' -> 0.0007
  )

  // from http://en.algoritmy.net/article/40379/Letter-frequency-English
  val cryptologicalMathematics = FrequencyMap(
    'a' -> 0.08167,
    'b' -> 0.01492,
    'c' -> 0.02782,
    'd' -> 0.04253,
    'e' -> 0.12702,
    'f' -> 0.02228,
    'g' -> 0.02015,
    'h' -> 0.06094,
    'i' -> 0.06966,
    'j' -> 0.00153,
    'k' -> 0.00772,
    'l' -> 0.04025,
    'm' -> 0.02406,
    'n' -> 0.06749,
    'o' -> 0.07507,
    'p' -> 0.01929,
    'q' -> 0.00095,
    'r' -> 0.05987,
    's' -> 0.06327,
    't' -> 0.09056,
    'u' -> 0.02758,
    'v' -> 0.00978,
    'w' -> 0.02360,
    'x' -> 0.00150,
    'y' -> 0.01974,
    'z' -> 0.00074
  )

  // http://en.wikipedia.org/wiki/Letter_frequency#cite_note-13
  // Calculated from "Project Gutenberg Selections" available from the NLTK Corpora
  // http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml
  // has since gone dead.
  val firstLetterOfWord = FrequencyMap(
    'a' -> 0.11602,
    'b' -> 0.04702,
    'c' -> 0.03511,
    'd' -> 0.02670,
    'e' -> 0.02007,
    'f' -> 0.03779,
    'g' -> 0.01950,
    'h' -> 0.07232,
    'i' -> 0.06286,
    'j' -> 0.00597,
    'k' -> 0.00590,
    'l' -> 0.02705,
    'm' -> 0.04374,
    'n' -> 0.02365,
    'o' -> 0.06264,
    'p' -> 0.02545,
    'q' -> 0.00173,
    'r' -> 0.01653,
    's' -> 0.07755,
    't' -> 0.16671,
    'u' -> 0.01487,
    'v' -> 0.00649,
    'w' -> 0.06753,
    'x' -> 0.00017,
    'y' -> 0.01620,
    'z' -> 0.00034
  )
}