Add in code for challenge 3

author: iximeow <me@iximeow.net> 2014-11-23 03:09:27 -0800
committer: iximeow <me@iximeow.net> 2014-11-23 03:09:27 -0800
commit: aa390776b26d794ab37aa13833fa7043aad16504 (patch)
tree: 5535ab96683e2db3ddbcd31d33b223982d080d0c
parent: 5dcdf12a091f42b3f80b49b442f9885bcd786972 (diff)
6 files changed, 315 insertions, 48 deletions
diff --git a/src/solvers/XorDecrypt.scala b/src/solvers/XorDecrypt.scala
new file mode 100644
index 0000000..b7f7c43
--- /dev/null
+++ b/src/solvers/XorDecrypt.scala
@@ -0,0 +1,29 @@
+package ixee.cryptopals.solvers
+
+import ixee.cryptopals.utils._
+import ixee.cryptopals.utils.ByteUtils._
+
+object XorDecrypt {
+  implicit val freq = Frequencies.cornell40kSample
+
+  def tryBestDecrypt(ciphertext: Seq[Byte]) = {
+    val key = findBestSingleByteKey(ciphertext)
+    new String((ciphertext xor Stream.continually(key)).toArray)
+  }
+
+  def findBestSingleByteKey(ciphertext: Seq[Byte]): Byte =
+    candidates(ciphertext)
+      .minBy(_._1)._2.toByte
+
+  def candidates(ciphertext: Seq[Byte]) =
+    (0 until 256)
+      .map(_.toByte)
+      .map(x => TextScorer.score(ciphertext xor Stream.continually(x)))
+      .zipWithIndex
+      .filter(_._1 != -1.0)
+
+  def candidatesAsAscii(ciphertext: Seq[Byte]) =
+    candidates(ciphertext).sortBy(_._1).map(x =>
+      new String((ciphertext xor Stream.continually(x._2.toByte)).toArray)
+    )
+}
diff --git a/src/utils/ByteUtils.scala b/src/utils/ByteUtils.scala
index 85c6058..ffc2e20 100644
--- a/src/utils/ByteUtils.scala
+++ b/src/utils/ByteUtils.scala
@@ -1,5 +1,7 @@
 package ixee.cryptopals.utils
 
+import ConversionUtils.tup
+
 object ByteUtils {
   trait SizedNumeric[T] {
     def manifest: Manifest[T]
@@ -17,6 +19,7 @@ object ByteUtils {
     def @<<(t: T, x: Int): T
     def @|(a: T, b: T): T
     def @&(a: T, b: T): T
+    def @^(a: T, b: T): T
     def @+(a: T, b: T): T
   }
 
@@ -25,6 +28,7 @@ object ByteUtils {
     def @<<(t: Byte, x: Int): Byte = (t << x).toByte
     def @|(a: Byte, b: Byte): Byte = (a | b).toByte
     def @&(a: Byte, b: Byte): Byte = (a & b).toByte
+    def @^(a: Byte, b: Byte): Byte = (a ^ b).toByte
     def @+(a: Byte, b: Byte): Byte = (a + b).toByte
   }
 
@@ -33,6 +37,7 @@ object ByteUtils {
     def @<<(t: Int, x: Int): Int = (t << x)
     def @|(a: Int, b: Int): Int = a | b
     def @&(a: Int, b: Int): Int = a & b
+    def @^(a: Int, b: Int): Int = a ^ b
     def @+(a: Int, b: Int): Int = a + b
   }
 
@@ -41,11 +46,13 @@ object ByteUtils {
     def @<<(b: Int): T = implicitly[BitOps[T]].@<<(x, b)
     def @|(b: T): T = implicitly[BitOps[T]].@|(x, b)
     def @&(b: T): T = implicitly[BitOps[T]].@&(x, b)
+    def @^(b: T): T = implicitly[BitOps[T]].@^(x, b)
     def @+(b: T): T = implicitly[BitOps[T]].@+(x, b)
   }
 
   implicit class SizedWithByteInfo[T : SizedNumeric](x: T) {
     def byteSize = implicitly[SizedNumeric[T]].byteSize
+    def octetSize = byteSize * 2
     def bitSize = implicitly[SizedNumeric[T]].bitSize
     def toLong = implicitly[SizedNumeric[T]].toLong(x)
     def liftedTo[U : SizedNumeric]: U = {
@@ -57,36 +64,12 @@ object ByteUtils {
 
       uSize.buildFrom(x)
     }
+    def hex: String = s"%0${octetSize}x" format x
     def truncatedTo[U : SizedNumeric]: U = {
       implicitly[SizedNumeric[U]].truncate(x)
     }
   }
 
-  implicit class RichIterableByte(iter: Iterable[Byte]) {
-    def to[T : SizedNumeric : BitOps]: T = {
-      // Need to know length, so we must force the iter
-      iter.hasDefiniteSize match {
-        case true => throw new IllegalArgumentException("Argument is not finite!")
-        case false => {
-          val numeric = implicitly[SizedNumeric[T]]
-          if(b.length < numeric.byteSize) {
-            throw new RuntimeException("Byte input is not long enough")
-          }
-
-          var out: Long = b(0)
-          for(i <- 1 until numeric.byteSize) {
-            out << 8
-            out = b(i) | out
-          }
-          numeric.fromLong(out)
-        }
-      }
-    }
-
-    def xor(other: Iterable[Byte]): Iterable[Byte] =
-      iter.zip(other).map(_ ^@ _)
-
-  }
   def sizedNumeric[T : Manifest](bytes: Int)(from: Long => T)(to: T => Long) = new SizedNumeric[T] {
     def manifest = implicitly[Manifest[T]]
     def byteSize = bytes
@@ -99,25 +82,8 @@ object ByteUtils {
   implicit val shortized = sizedNumeric(2) { x: Long => x.toShort } { _.toLong }
   implicit val byteSized = sizedNumeric(1) { x: Long => x.toByte } { _.toLong }
 
-  def toArrayBuf[T : SizedNumeric : BitOps](x: T): Array[Byte] = {
-    val buf = new Array[Byte](x.byteSize)
-    for(i <- 0 until x.byteSize) {
-      val shiftAmount: Int = (x.byteSize - i - 1) << 3
-      buf(i) = (x @>>> shiftAmount).truncatedTo[Byte]
-    }
-    buf
-  }
-
-  def toBinaryString[T : SizedNumeric](x: T)(implicit a: BitOps[T]): String = {
-    val buf = new StringBuilder(x.bitSize)
-    for(i <- 0 until x.bitSize) {
-      val shiftAmount: Int = x.bitSize - i - 1
-      buf.append((x @>>> shiftAmount) @& 0x01.liftedTo[T])
-    }
-    buf.toString()
-  }
-
-  def toHexString[T : SizedNumeric : BitOps](x: Iterable[T]): String = {
-    
+  implicit class SeqByteOps(seq: Seq[Byte]) {
+    def xor(other: Seq[Byte]): Seq[Byte] =
+      seq.zip(other).map(tup(_ @^ _))
   }
 }
diff --git a/src/utils/ConversionUtils.scala b/src/utils/ConversionUtils.scala
index 26f2d24..09f3aab 100644
--- a/src/utils/ConversionUtils.scala
+++ b/src/utils/ConversionUtils.scala
@@ -1,10 +1,10 @@
 package ixee.cryptopals.utils
 
-import ByteUtils.WithBitOpts
+import ByteUtils._
 
 object ConversionUtils {
-  def hexStr2Bytes(s: String): Iterator[Byte] =
-    padToByte(s).grouped(2).map(byteStr2Byte)
+  def hexStr2Bytes(s: String): Seq[Byte] =
+    padToByte(s).grouped(2).map(byteStr2Byte).toSeq
 
   def byteStr2Byte(str: String): Byte =
     charSeq2Byte(str.toSeq)
@@ -34,5 +34,53 @@ object ConversionUtils {
 
     hexStr2Bytes(s).toArray.toBase64
   }
+
+  implicit class RichSeqByte(seq: Seq[Byte]) {
+    def to[T : SizedNumeric : BitOps]: T = {
+      val numeric = implicitly[SizedNumeric[T]]
+      if(seq.length < numeric.byteSize) {
+        throw new RuntimeException("Byte input is not long enough")
+      }
+
+      var out: Long = seq(0)
+      for(i <- 1 until numeric.byteSize) {
+        out << 8
+        out = seq(i) | out
+      }
+      numeric.fromLong(out)
+    }
+
+    def hex: String =
+      seq.map(_.hex).reduceLeft(_ + _)
+
+    def asAscii: String =
+      new String(seq.toArray)
+  }
+
+  implicit class RichStringBytes(s: String) {
+    def asBytes = s.toSeq.map(_.toByte)
+  }
+
+  // Because doing (_ f _).tupled confuses the inferencer...
+  def tup[A, B, C](f: (A, B) => C): ((A, B)) => C = f.tupled
+
+  // TODO: tailrec this, get rid of for
+  def toByteSeq[T : SizedNumeric : BitOps](x: T): Seq[Byte] = {
+    val buf = new Array[Byte](x.byteSize)
+    for(i <- 0 until x.byteSize) {
+      val shiftAmount = (x.byteSize - i - 1) << 3
+      buf(i) = (x @>>> shiftAmount).truncatedTo[Byte]
+    }
+    buf.toSeq
+  }
+
+  def toBinaryString[T : SizedNumeric](x: T)(implicit a: BitOps[T]): String = {
+    val buf = new StringBuilder(x.bitSize)
+    for(i <- 0 until x.bitSize) {
+      val shiftAmount: Int = x.bitSize - i - 1
+      buf.append((x @>>> shiftAmount) @& 0x01.liftedTo[T])
+    }
+    buf.toString()
+  }
 }
 
diff --git a/src/utils/Frequencies.scala b/src/utils/Frequencies.scala
new file mode 100644
index 0000000..63d74ac
--- /dev/null
+++ b/src/utils/Frequencies.scala
@@ -0,0 +1,101 @@
+package ixee.cryptopals.utils
+
+object Frequencies {
+  lazy val frequencies = Seq(
+    cryptologicalMathematics,
+    cornell40kSample
+  )
+
+  // http://www.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html
+  val cornell40kSample = FrequencyMap(
+    'e' -> 0.1202,
+    't' -> 0.0910,
+    'a' -> 0.0812,
+    'o' -> 0.0768,
+    'i' -> 0.0731,
+    'n' -> 0.0695,
+    's' -> 0.0628,
+    'r' -> 0.0602,
+    'h' -> 0.0592,
+    'd' -> 0.0432,
+    'l' -> 0.0398,
+    'u' -> 0.0288,
+    'c' -> 0.0271,
+    'm' -> 0.0261,
+    'f' -> 0.0230,
+    'y' -> 0.0211,
+    'w' -> 0.0209,
+    'g' -> 0.0203,
+    'p' -> 0.0182,
+    'b' -> 0.0149,
+    'v' -> 0.0111,
+    'k' -> 0.0069,
+    'x' -> 0.0017,
+    'q' -> 0.0011,
+    'j' -> 0.0010,
+    'z' -> 0.0007
+  )
+
+  // from http://en.algoritmy.net/article/40379/Letter-frequency-English
+  val cryptologicalMathematics = FrequencyMap(
+    'a' -> 0.08167,
+    'b' -> 0.01492,
+    'c' -> 0.02782,
+    'd' -> 0.04253,
+    'e' -> 0.12702,
+    'f' -> 0.02228,
+    'g' -> 0.02015,
+    'h' -> 0.06094,
+    'i' -> 0.06966,
+    'j' -> 0.00153,
+    'k' -> 0.00772,
+    'l' -> 0.04025,
+    'm' -> 0.02406,
+    'n' -> 0.06749,
+    'o' -> 0.07507,
+    'p' -> 0.01929,
+    'q' -> 0.00095,
+    'r' -> 0.05987,
+    's' -> 0.06327,
+    't' -> 0.09056,
+    'u' -> 0.02758,
+    'v' -> 0.00978,
+    'w' -> 0.02360,
+    'x' -> 0.00150,
+    'y' -> 0.01974,
+    'z' -> 0.00074
+  )
+
+  // http://en.wikipedia.org/wiki/Letter_frequency#cite_note-13
+  // Calculated from "Project Gutenberg Selections" available from the NLTK Corpora
+  // http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml
+  // has since gone dead.
+  val firstLetterOfWord = FrequencyMap(
+    'a' -> 0.11602,
+    'b' -> 0.04702,
+    'c' -> 0.03511,
+    'd' -> 0.02670,
+    'e' -> 0.02007,
+    'f' -> 0.03779,
+    'g' -> 0.01950,
+    'h' -> 0.07232,
+    'i' -> 0.06286,
+    'j' -> 0.00597,
+    'k' -> 0.00590,
+    'l' -> 0.02705,
+    'm' -> 0.04374,
+    'n' -> 0.02365,
+    'o' -> 0.06264,
+    'p' -> 0.02545,
+    'q' -> 0.00173,
+    'r' -> 0.01653,
+    's' -> 0.07755,
+    't' -> 0.16671,
+    'u' -> 0.01487,
+    'v' -> 0.00649,
+    'w' -> 0.06753,
+    'x' -> 0.00017,
+    'y' -> 0.01620,
+    'z' -> 0.00034
+  )
+}
diff --git a/src/utils/FrequencyMap.scala b/src/utils/FrequencyMap.scala
new file mode 100644
index 0000000..0bea25c
--- /dev/null
+++ b/src/utils/FrequencyMap.scala
@@ -0,0 +1,91 @@
+package ixee.cryptopals.utils
+
+import ConversionUtils.tup
+
+class FrequencyMap(mapargs: Map[Char, Double]) {
+  sealed trait DiffResult
+  object Inconclusive extends DiffResult
+
+  val FittingThreshold: Double = 0.95
+
+  val mappings = mapargs map tup(_.toLower -> _)
+
+  def at(c: Char): Option[Double] =
+    if (c.isLower)
+      mappings.get(c)
+    else                                    // TODO remove the bias here
+      mappings.get(c.toLower).map(_ * 0.002) // bias HARD against uppercase spam. should make this a per-map setting.
+
+  def diff(other: FrequencyMap): Double = {
+    // pseudoscience here
+    /*
+     * Idea...
+     *   sum difference squares between other and this
+     *   discarding characters that aren't present in other if other's length is < X
+     *
+     * .. divide by other.totalCount?
+    */
+
+    // since this doesn't have sample counts, can't do switching based on other.sampleCount
+    // so just do it.
+
+    other.mappings.foldLeft(0.0) { (confidence: Double, next: (Char, Double)) =>
+      confidence + tup(diffAt _)(next)
+    }
+  }
+
+  // TODO: don't hardcode these..
+  def diffAt(c: Char, charFreq: Double) =
+    c match {
+      case ' ' => 0
+      case '{' | '}' | '`' | '|' =>
+        squared(0.000001 - charFreq) // { } | and ` are very unlikely irl
+      case '[' | ']' =>
+        squared(0.000002 - charFreq) // [ ] are more likely
+      case '"' | '\''=>
+        squared(0.00001 - charFreq) // " or ' are 100% unlikely 90% of the time
+      case '~' | '+' | '=' | '<' | '>' | '/' | '\\' =>
+        squared(0.000004 - charFreq) // math is weird kids
+      case ';' | ':' | '-' | '*' | '(' | '&' | ')' | '_' =>
+        squared(0.000009 - charFreq) // getting into more common punctuation
+      case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+        squared(0.00001 - charFreq) // numbers are KINDA uncommon
+      case '$' | '%' | '#' | '@' | '!' =>
+        squared(0.00003 - charFreq) // more punctuation...
+      case '.' | ',' =>
+        squared(0.00007 - charFreq) // and the last of the punctuations
+      case '\n' | '\r' =>
+        squared(0.0000002 - charFreq) // explicit \r \n is rare in freeform text.
+      case _ =>
+        this.at(c).map(_ - charFreq).map(squared).getOrElse(0.0)
+    }
+
+  def squared(x: Double) = x * x
+
+  def likelyFitting(other: FrequencyMap) = 1 - (other diff this) > FittingThreshold
+
+  override def toString = mapargs.toString
+}
+
+class SampledFrequencyMap(fm: FrequencyMap, samples: Int) extends FrequencyMap(fm.mappings) {
+  // come back to this
+}
+
+object FrequencyMap {
+  def apply(mapargs: (Char, Double)*): FrequencyMap = FrequencyMap(mapargs.toMap)
+  def apply(mapargs: Map[Char, Double]): FrequencyMap = new FrequencyMap(mapargs)
+
+  def of(s: String) = {
+    def count[A](m: Map[A, Int], c: A): Map[A, Int]  =
+      m.get(c) match {
+        case Some(count) => m + (c -> (count + 1))
+        case None => m + (c -> 1)
+      }
+
+    FrequencyMap(
+      s
+        .foldLeft(Map[Char, Int]())(count _)
+        .map(tup(_ -> _ / s.length.toDouble))
+    )
+  }
+}
diff --git a/src/utils/TextScorer.scala b/src/utils/TextScorer.scala
new file mode 100644
index 0000000..b02c250
--- /dev/null
+++ b/src/utils/TextScorer.scala
@@ -0,0 +1,32 @@
+package ixee.cryptopals.utils
+
+import ByteUtils._
+
+object TextScorer {
+  val controlChars: Seq[Byte] = Seq(
+    0x08, 0x09, 0x0a, 0x0b, 0x0d
+  ).map(_.toByte)
+
+  def isText(s: Iterable[Byte]): Boolean =
+    s.forall(_ @& 0x80.toByte == 0) &&
+    s.filter(_ < 0x20).forall(controlChars.contains _)
+
+  def score(s: Seq[Byte]): Double =
+    if (!isText(s))
+      -1  // not English text!
+    else
+      scoreBy(new String(s.toArray), Frequencies.cornell40kSample)
+      //score s
+
+  def scoreBy(s: String, fm: FrequencyMap): Double = {
+    val sfm = FrequencyMap.of(s)
+    fm diff sfm
+  }
+
+  def looksEnglish(s: String)(implicit fm: FrequencyMap): Boolean =
+    if (!isText(s.toCharArray.map(_.toByte)))
+      false
+    else
+      FrequencyMap.of(s).likelyFitting(fm)
+}
+
author	iximeow <me@iximeow.net>	2014-11-23 03:09:27 -0800
committer	iximeow <me@iximeow.net>	2014-11-23 03:09:27 -0800
commit	aa390776b26d794ab37aa13833fa7043aad16504 (patch)
tree	5535ab96683e2db3ddbcd31d33b223982d080d0c
parent	5dcdf12a091f42b3f80b49b442f9885bcd786972 (diff)