summaryrefslogtreecommitdiff
path: root/src/utils/Frequencies.scala
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils/Frequencies.scala')
-rw-r--r--src/utils/Frequencies.scala101
1 files changed, 101 insertions, 0 deletions
diff --git a/src/utils/Frequencies.scala b/src/utils/Frequencies.scala
new file mode 100644
index 0000000..63d74ac
--- /dev/null
+++ b/src/utils/Frequencies.scala
@@ -0,0 +1,101 @@
+package ixee.cryptopals.utils
+
+object Frequencies {
+ lazy val frequencies = Seq(
+ cryptologicalMathematics,
+ cornell40kSample
+ )
+
+ // http://www.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html
+ val cornell40kSample = FrequencyMap(
+ 'e' -> 0.1202,
+ 't' -> 0.0910,
+ 'a' -> 0.0812,
+ 'o' -> 0.0768,
+ 'i' -> 0.0731,
+ 'n' -> 0.0695,
+ 's' -> 0.0628,
+ 'r' -> 0.0602,
+ 'h' -> 0.0592,
+ 'd' -> 0.0432,
+ 'l' -> 0.0398,
+ 'u' -> 0.0288,
+ 'c' -> 0.0271,
+ 'm' -> 0.0261,
+ 'f' -> 0.0230,
+ 'y' -> 0.0211,
+ 'w' -> 0.0209,
+ 'g' -> 0.0203,
+ 'p' -> 0.0182,
+ 'b' -> 0.0149,
+ 'v' -> 0.0111,
+ 'k' -> 0.0069,
+ 'x' -> 0.0017,
+ 'q' -> 0.0011,
+ 'j' -> 0.0010,
+ 'z' -> 0.0007
+ )
+
+ // from http://en.algoritmy.net/article/40379/Letter-frequency-English
+ val cryptologicalMathematics = FrequencyMap(
+ 'a' -> 0.08167,
+ 'b' -> 0.01492,
+ 'c' -> 0.02782,
+ 'd' -> 0.04253,
+ 'e' -> 0.12702,
+ 'f' -> 0.02228,
+ 'g' -> 0.02015,
+ 'h' -> 0.06094,
+ 'i' -> 0.06966,
+ 'j' -> 0.00153,
+ 'k' -> 0.00772,
+ 'l' -> 0.04025,
+ 'm' -> 0.02406,
+ 'n' -> 0.06749,
+ 'o' -> 0.07507,
+ 'p' -> 0.01929,
+ 'q' -> 0.00095,
+ 'r' -> 0.05987,
+ 's' -> 0.06327,
+ 't' -> 0.09056,
+ 'u' -> 0.02758,
+ 'v' -> 0.00978,
+ 'w' -> 0.02360,
+ 'x' -> 0.00150,
+ 'y' -> 0.01974,
+ 'z' -> 0.00074
+ )
+
+ // http://en.wikipedia.org/wiki/Letter_frequency#cite_note-13
+ // Calculated from "Project Gutenberg Selections" available from the NLTK Corpora
+ // http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml
+ // has since gone dead.
+ val firstLetterOfWord = FrequencyMap(
+ 'a' -> 0.11602,
+ 'b' -> 0.04702,
+ 'c' -> 0.03511,
+ 'd' -> 0.02670,
+ 'e' -> 0.02007,
+ 'f' -> 0.03779,
+ 'g' -> 0.01950,
+ 'h' -> 0.07232,
+ 'i' -> 0.06286,
+ 'j' -> 0.00597,
+ 'k' -> 0.00590,
+ 'l' -> 0.02705,
+ 'm' -> 0.04374,
+ 'n' -> 0.02365,
+ 'o' -> 0.06264,
+ 'p' -> 0.02545,
+ 'q' -> 0.00173,
+ 'r' -> 0.01653,
+ 's' -> 0.07755,
+ 't' -> 0.16671,
+ 'u' -> 0.01487,
+ 'v' -> 0.00649,
+ 'w' -> 0.06753,
+ 'x' -> 0.00017,
+ 'y' -> 0.01620,
+ 'z' -> 0.00034
+ )
+}