summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2014-11-23 03:09:27 -0800
committeriximeow <me@iximeow.net>2014-11-23 03:09:27 -0800
commitaa390776b26d794ab37aa13833fa7043aad16504 (patch)
tree5535ab96683e2db3ddbcd31d33b223982d080d0c
parent5dcdf12a091f42b3f80b49b442f9885bcd786972 (diff)
Add in code for challenge 3
-rw-r--r--src/solvers/XorDecrypt.scala29
-rw-r--r--src/utils/ByteUtils.scala56
-rw-r--r--src/utils/ConversionUtils.scala54
-rw-r--r--src/utils/Frequencies.scala101
-rw-r--r--src/utils/FrequencyMap.scala91
-rw-r--r--src/utils/TextScorer.scala32
6 files changed, 315 insertions, 48 deletions
diff --git a/src/solvers/XorDecrypt.scala b/src/solvers/XorDecrypt.scala
new file mode 100644
index 0000000..b7f7c43
--- /dev/null
+++ b/src/solvers/XorDecrypt.scala
@@ -0,0 +1,29 @@
+package ixee.cryptopals.solvers
+
+import ixee.cryptopals.utils._
+import ixee.cryptopals.utils.ByteUtils._
+
+object XorDecrypt {
+ implicit val freq = Frequencies.cornell40kSample
+
+ def tryBestDecrypt(ciphertext: Seq[Byte]) = {
+ val key = findBestSingleByteKey(ciphertext)
+ new String((ciphertext xor Stream.continually(key)).toArray)
+ }
+
+ def findBestSingleByteKey(ciphertext: Seq[Byte]): Byte =
+ candidates(ciphertext)
+ .minBy(_._1)._2.toByte
+
+ def candidates(ciphertext: Seq[Byte]) =
+ (0 until 256)
+ .map(_.toByte)
+ .map(x => TextScorer.score(ciphertext xor Stream.continually(x)))
+ .zipWithIndex
+ .filter(_._1 != -1.0)
+
+ def candidatesAsAscii(ciphertext: Seq[Byte]) =
+ candidates(ciphertext).sortBy(_._1).map(x =>
+ new String((ciphertext xor Stream.continually(x._2.toByte)).toArray)
+ )
+}
diff --git a/src/utils/ByteUtils.scala b/src/utils/ByteUtils.scala
index 85c6058..ffc2e20 100644
--- a/src/utils/ByteUtils.scala
+++ b/src/utils/ByteUtils.scala
@@ -1,5 +1,7 @@
package ixee.cryptopals.utils
+import ConversionUtils.tup
+
object ByteUtils {
trait SizedNumeric[T] {
def manifest: Manifest[T]
@@ -17,6 +19,7 @@ object ByteUtils {
def @<<(t: T, x: Int): T
def @|(a: T, b: T): T
def @&(a: T, b: T): T
+ def @^(a: T, b: T): T
def @+(a: T, b: T): T
}
@@ -25,6 +28,7 @@ object ByteUtils {
def @<<(t: Byte, x: Int): Byte = (t << x).toByte
def @|(a: Byte, b: Byte): Byte = (a | b).toByte
def @&(a: Byte, b: Byte): Byte = (a & b).toByte
+ def @^(a: Byte, b: Byte): Byte = (a ^ b).toByte
def @+(a: Byte, b: Byte): Byte = (a + b).toByte
}
@@ -33,6 +37,7 @@ object ByteUtils {
def @<<(t: Int, x: Int): Int = (t << x)
def @|(a: Int, b: Int): Int = a | b
def @&(a: Int, b: Int): Int = a & b
+ def @^(a: Int, b: Int): Int = a ^ b
def @+(a: Int, b: Int): Int = a + b
}
@@ -41,11 +46,13 @@ object ByteUtils {
def @<<(b: Int): T = implicitly[BitOps[T]].@<<(x, b)
def @|(b: T): T = implicitly[BitOps[T]].@|(x, b)
def @&(b: T): T = implicitly[BitOps[T]].@&(x, b)
+ def @^(b: T): T = implicitly[BitOps[T]].@^(x, b)
def @+(b: T): T = implicitly[BitOps[T]].@+(x, b)
}
implicit class SizedWithByteInfo[T : SizedNumeric](x: T) {
def byteSize = implicitly[SizedNumeric[T]].byteSize
+ def octetSize = byteSize * 2
def bitSize = implicitly[SizedNumeric[T]].bitSize
def toLong = implicitly[SizedNumeric[T]].toLong(x)
def liftedTo[U : SizedNumeric]: U = {
@@ -57,36 +64,12 @@ object ByteUtils {
uSize.buildFrom(x)
}
+ def hex: String = s"%0${octetSize}x" format x
def truncatedTo[U : SizedNumeric]: U = {
implicitly[SizedNumeric[U]].truncate(x)
}
}
- implicit class RichIterableByte(iter: Iterable[Byte]) {
- def to[T : SizedNumeric : BitOps]: T = {
- // Need to know length, so we must force the iter
- iter.hasDefiniteSize match {
- case true => throw new IllegalArgumentException("Argument is not finite!")
- case false => {
- val numeric = implicitly[SizedNumeric[T]]
- if(b.length < numeric.byteSize) {
- throw new RuntimeException("Byte input is not long enough")
- }
-
- var out: Long = b(0)
- for(i <- 1 until numeric.byteSize) {
- out << 8
- out = b(i) | out
- }
- numeric.fromLong(out)
- }
- }
- }
-
- def xor(other: Iterable[Byte]): Iterable[Byte] =
- iter.zip(other).map(_ ^@ _)
-
- }
def sizedNumeric[T : Manifest](bytes: Int)(from: Long => T)(to: T => Long) = new SizedNumeric[T] {
def manifest = implicitly[Manifest[T]]
def byteSize = bytes
@@ -99,25 +82,8 @@ object ByteUtils {
implicit val shortized = sizedNumeric(2) { x: Long => x.toShort } { _.toLong }
implicit val byteSized = sizedNumeric(1) { x: Long => x.toByte } { _.toLong }
- def toArrayBuf[T : SizedNumeric : BitOps](x: T): Array[Byte] = {
- val buf = new Array[Byte](x.byteSize)
- for(i <- 0 until x.byteSize) {
- val shiftAmount: Int = (x.byteSize - i - 1) << 3
- buf(i) = (x @>>> shiftAmount).truncatedTo[Byte]
- }
- buf
- }
-
- def toBinaryString[T : SizedNumeric](x: T)(implicit a: BitOps[T]): String = {
- val buf = new StringBuilder(x.bitSize)
- for(i <- 0 until x.bitSize) {
- val shiftAmount: Int = x.bitSize - i - 1
- buf.append((x @>>> shiftAmount) @& 0x01.liftedTo[T])
- }
- buf.toString()
- }
-
- def toHexString[T : SizedNumeric : BitOps](x: Iterable[T]): String = {
-
+ implicit class SeqByteOps(seq: Seq[Byte]) {
+ def xor(other: Seq[Byte]): Seq[Byte] =
+ seq.zip(other).map(tup(_ @^ _))
}
}
diff --git a/src/utils/ConversionUtils.scala b/src/utils/ConversionUtils.scala
index 26f2d24..09f3aab 100644
--- a/src/utils/ConversionUtils.scala
+++ b/src/utils/ConversionUtils.scala
@@ -1,10 +1,10 @@
package ixee.cryptopals.utils
-import ByteUtils.WithBitOpts
+import ByteUtils._
object ConversionUtils {
- def hexStr2Bytes(s: String): Iterator[Byte] =
- padToByte(s).grouped(2).map(byteStr2Byte)
+ def hexStr2Bytes(s: String): Seq[Byte] =
+ padToByte(s).grouped(2).map(byteStr2Byte).toSeq
def byteStr2Byte(str: String): Byte =
charSeq2Byte(str.toSeq)
@@ -34,5 +34,53 @@ object ConversionUtils {
hexStr2Bytes(s).toArray.toBase64
}
+
+ implicit class RichSeqByte(seq: Seq[Byte]) {
+ def to[T : SizedNumeric : BitOps]: T = {
+ val numeric = implicitly[SizedNumeric[T]]
+ if(seq.length < numeric.byteSize) {
+ throw new RuntimeException("Byte input is not long enough")
+ }
+
+ var out: Long = seq(0)
+ for(i <- 1 until numeric.byteSize) {
+ out << 8
+ out = seq(i) | out
+ }
+ numeric.fromLong(out)
+ }
+
+ def hex: String =
+ seq.map(_.hex).reduceLeft(_ + _)
+
+ def asAscii: String =
+ new String(seq.toArray)
+ }
+
+ implicit class RichStringBytes(s: String) {
+ def asBytes = s.toSeq.map(_.toByte)
+ }
+
+ // Because doing (_ f _).tupled confuses the inferencer...
+ def tup[A, B, C](f: (A, B) => C): ((A, B)) => C = f.tupled
+
+ // TODO: tailrec this, get rid of for
+ def toByteSeq[T : SizedNumeric : BitOps](x: T): Seq[Byte] = {
+ val buf = new Array[Byte](x.byteSize)
+ for(i <- 0 until x.byteSize) {
+ val shiftAmount = (x.byteSize - i - 1) << 3
+ buf(i) = (x @>>> shiftAmount).truncatedTo[Byte]
+ }
+ buf.toSeq
+ }
+
+ def toBinaryString[T : SizedNumeric](x: T)(implicit a: BitOps[T]): String = {
+ val buf = new StringBuilder(x.bitSize)
+ for(i <- 0 until x.bitSize) {
+ val shiftAmount: Int = x.bitSize - i - 1
+ buf.append((x @>>> shiftAmount) @& 0x01.liftedTo[T])
+ }
+ buf.toString()
+ }
}
diff --git a/src/utils/Frequencies.scala b/src/utils/Frequencies.scala
new file mode 100644
index 0000000..63d74ac
--- /dev/null
+++ b/src/utils/Frequencies.scala
@@ -0,0 +1,101 @@
+package ixee.cryptopals.utils
+
+object Frequencies {
+ lazy val frequencies = Seq(
+ cryptologicalMathematics,
+ cornell40kSample
+ )
+
+ // http://www.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html
+ val cornell40kSample = FrequencyMap(
+ 'e' -> 0.1202,
+ 't' -> 0.0910,
+ 'a' -> 0.0812,
+ 'o' -> 0.0768,
+ 'i' -> 0.0731,
+ 'n' -> 0.0695,
+ 's' -> 0.0628,
+ 'r' -> 0.0602,
+ 'h' -> 0.0592,
+ 'd' -> 0.0432,
+ 'l' -> 0.0398,
+ 'u' -> 0.0288,
+ 'c' -> 0.0271,
+ 'm' -> 0.0261,
+ 'f' -> 0.0230,
+ 'y' -> 0.0211,
+ 'w' -> 0.0209,
+ 'g' -> 0.0203,
+ 'p' -> 0.0182,
+ 'b' -> 0.0149,
+ 'v' -> 0.0111,
+ 'k' -> 0.0069,
+ 'x' -> 0.0017,
+ 'q' -> 0.0011,
+ 'j' -> 0.0010,
+ 'z' -> 0.0007
+ )
+
+ // from http://en.algoritmy.net/article/40379/Letter-frequency-English
+ val cryptologicalMathematics = FrequencyMap(
+ 'a' -> 0.08167,
+ 'b' -> 0.01492,
+ 'c' -> 0.02782,
+ 'd' -> 0.04253,
+ 'e' -> 0.12702,
+ 'f' -> 0.02228,
+ 'g' -> 0.02015,
+ 'h' -> 0.06094,
+ 'i' -> 0.06966,
+ 'j' -> 0.00153,
+ 'k' -> 0.00772,
+ 'l' -> 0.04025,
+ 'm' -> 0.02406,
+ 'n' -> 0.06749,
+ 'o' -> 0.07507,
+ 'p' -> 0.01929,
+ 'q' -> 0.00095,
+ 'r' -> 0.05987,
+ 's' -> 0.06327,
+ 't' -> 0.09056,
+ 'u' -> 0.02758,
+ 'v' -> 0.00978,
+ 'w' -> 0.02360,
+ 'x' -> 0.00150,
+ 'y' -> 0.01974,
+ 'z' -> 0.00074
+ )
+
+ // http://en.wikipedia.org/wiki/Letter_frequency#cite_note-13
+ // Calculated from "Project Gutenberg Selections" available from the NLTK Corpora
+ // http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml
+ // has since gone dead.
+ val firstLetterOfWord = FrequencyMap(
+ 'a' -> 0.11602,
+ 'b' -> 0.04702,
+ 'c' -> 0.03511,
+ 'd' -> 0.02670,
+ 'e' -> 0.02007,
+ 'f' -> 0.03779,
+ 'g' -> 0.01950,
+ 'h' -> 0.07232,
+ 'i' -> 0.06286,
+ 'j' -> 0.00597,
+ 'k' -> 0.00590,
+ 'l' -> 0.02705,
+ 'm' -> 0.04374,
+ 'n' -> 0.02365,
+ 'o' -> 0.06264,
+ 'p' -> 0.02545,
+ 'q' -> 0.00173,
+ 'r' -> 0.01653,
+ 's' -> 0.07755,
+ 't' -> 0.16671,
+ 'u' -> 0.01487,
+ 'v' -> 0.00649,
+ 'w' -> 0.06753,
+ 'x' -> 0.00017,
+ 'y' -> 0.01620,
+ 'z' -> 0.00034
+ )
+}
diff --git a/src/utils/FrequencyMap.scala b/src/utils/FrequencyMap.scala
new file mode 100644
index 0000000..0bea25c
--- /dev/null
+++ b/src/utils/FrequencyMap.scala
@@ -0,0 +1,91 @@
+package ixee.cryptopals.utils
+
+import ConversionUtils.tup
+
+class FrequencyMap(mapargs: Map[Char, Double]) {
+ sealed trait DiffResult
+ object Inconclusive extends DiffResult
+
+ val FittingThreshold: Double = 0.95
+
+ val mappings = mapargs map tup(_.toLower -> _)
+
+ def at(c: Char): Option[Double] =
+ if (c.isLower)
+ mappings.get(c)
+ else // TODO remove the bias here
+ mappings.get(c.toLower).map(_ * 0.002) // bias HARD against uppercase spam. should make this a per-map setting.
+
+ def diff(other: FrequencyMap): Double = {
+ // pseudoscience here
+ /*
+ * Idea...
+ * sum difference squares between other and this
+ * discarding characters that aren't present in other if other's length is < X
+ *
+ * .. divide by other.totalCount?
+ */
+
+ // since this doesn't have sample counts, can't do switching based on other.sampleCount
+ // so just do it.
+
+ other.mappings.foldLeft(0.0) { (confidence: Double, next: (Char, Double)) =>
+ confidence + tup(diffAt _)(next)
+ }
+ }
+
+ // TODO: don't hardcode these..
+ def diffAt(c: Char, charFreq: Double) =
+ c match {
+ case ' ' => 0
+ case '{' | '}' | '`' | '|' =>
+ squared(0.000001 - charFreq) // { } | and ` are very unlikely irl
+ case '[' | ']' =>
+ squared(0.000002 - charFreq) // [ ] are more likely
+ case '"' | '\''=>
+ squared(0.00001 - charFreq) // " or ' are 100% unlikely 90% of the time
+ case '~' | '+' | '=' | '<' | '>' | '/' | '\\' =>
+ squared(0.000004 - charFreq) // math is weird kids
+ case ';' | ':' | '-' | '*' | '(' | '&' | ')' | '_' =>
+ squared(0.000009 - charFreq) // getting into more common punctuation
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+ squared(0.00001 - charFreq) // numbers are KINDA uncommon
+ case '$' | '%' | '#' | '@' | '!' =>
+ squared(0.00003 - charFreq) // more punctuation...
+ case '.' | ',' =>
+ squared(0.00007 - charFreq) // and the last of the punctuations
+ case '\n' | '\r' =>
+ squared(0.0000002 - charFreq) // explicit \r \n is rare in freeform text.
+ case _ =>
+ this.at(c).map(_ - charFreq).map(squared).getOrElse(0.0)
+ }
+
+ def squared(x: Double) = x * x
+
+ def likelyFitting(other: FrequencyMap) = 1 - (other diff this) > FittingThreshold
+
+ override def toString = mapargs.toString
+}
+
+class SampledFrequencyMap(fm: FrequencyMap, samples: Int) extends FrequencyMap(fm.mappings) {
+ // come back to this
+}
+
+object FrequencyMap {
+ def apply(mapargs: (Char, Double)*): FrequencyMap = FrequencyMap(mapargs.toMap)
+ def apply(mapargs: Map[Char, Double]): FrequencyMap = new FrequencyMap(mapargs)
+
+ def of(s: String) = {
+ def count[A](m: Map[A, Int], c: A): Map[A, Int] =
+ m.get(c) match {
+ case Some(count) => m + (c -> (count + 1))
+ case None => m + (c -> 1)
+ }
+
+ FrequencyMap(
+ s
+ .foldLeft(Map[Char, Int]())(count _)
+ .map(tup(_ -> _ / s.length.toDouble))
+ )
+ }
+}
diff --git a/src/utils/TextScorer.scala b/src/utils/TextScorer.scala
new file mode 100644
index 0000000..b02c250
--- /dev/null
+++ b/src/utils/TextScorer.scala
@@ -0,0 +1,32 @@
+package ixee.cryptopals.utils
+
+import ByteUtils._
+
+object TextScorer {
+ val controlChars: Seq[Byte] = Seq(
+ 0x08, 0x09, 0x0a, 0x0b, 0x0d
+ ).map(_.toByte)
+
+ def isText(s: Iterable[Byte]): Boolean =
+ s.forall(_ @& 0x80.toByte == 0) &&
+ s.filter(_ < 0x20).forall(controlChars.contains _)
+
+ def score(s: Seq[Byte]): Double =
+ if (!isText(s))
+ -1 // not English text!
+ else
+ scoreBy(new String(s.toArray), Frequencies.cornell40kSample)
+ //score s
+
+ def scoreBy(s: String, fm: FrequencyMap): Double = {
+ val sfm = FrequencyMap.of(s)
+ fm diff sfm
+ }
+
+ def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean =
+ if (!isText(s.toCharArray.map(_.toByte)))
+ false
+ else
+ FrequencyMap.of(s).likelyFitting(fm)
+}
+