Ed Johns | 7 Jul 17:52 2015
Picon

[PATCH] Curve25519 encryption support (experimental)

Hello,

I was interested in trying your Curve25519 encryption support. I've downloaded the latest alpha code. I see that the patch exists.

Would you have a test case that I could used to get started with this patch?

Thanks

Ed Johns
_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
by NIIBE Yutaka | 7 Jul 04:09 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-231-g0a7547e

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  0a7547e487a8bc4e7ac9599c55579eb2e4a13f06 (commit)
      from  a36ee7501f68ad7ebcfe31f9659430b9d2c3ddd1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0a7547e487a8bc4e7ac9599c55579eb2e4a13f06
Author: NIIBE Yutaka <gniibe <at> fsij.org>
Date:   Mon Jul 6 12:01:00 2015 +0900

    ecc: fix memory leaks.

    cipher/ecc.c (ecc_generate): Fix memory leak on error of
    _gcry_pk_util_parse_flaglist and _gcry_ecc_eddsa_encodepoint.
    (ecc_check_secret_key): Fix memory leak on error of
    _gcry_ecc_update_curve_param.
    (ecc_sign, ecc_verify, ecc_encrypt_raw, ecc_decrypt_raw): Remove
    unnecessary sexp_release and fix memory leak on error of
    _gcry_ecc_fill_in_curve.
    (ecc_decrypt_raw): Fix double free of the point kG and memory leak
    on error of _gcry_ecc_os2ec.

diff --git a/cipher/ecc.c b/cipher/ecc.c
index 5ffe84b..f5bc50a 100644
--- a/cipher/ecc.c
+++ b/cipher/ecc.c
 <at>  <at>  -551,7 +551,6  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
     return GPG_ERR_NO_OBJ; /* No NBITS parameter. */

   rc = _gcry_ecc_fill_in_curve (nbits, curve_name, &E, &nbits);
-  xfree (curve_name); curve_name = NULL;
   if (rc)
     goto leave;

 <at>  <at>  -595,10 +594,9  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
                                         !!(flags & PUBKEY_FLAG_COMP),
                                         &encpk, &encpklen);
       if (rc)
-        return rc;
+        goto leave;
       public = mpi_new (0);
       mpi_set_opaque (public, encpk, encpklen*8);
-      encpk = NULL;
     }
   else
     {
 <at>  <at>  -691,6 +689,7  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
   mpi_free (Qx);
   mpi_free (Qy);
   _gcry_mpi_ec_free (ctx);
+  xfree (curve_name);
   sexp_release (curve_flags);
   sexp_release (curve_info);
   return rc;
 <at>  <at>  -744,7 +743,7  <at>  <at>  ecc_check_secret_key (gcry_sexp_t keyparms)
                                              &sk.E.p, &sk.E.a, &sk.E.b,
                                              &mpi_g, &sk.E.n, &sk.E.h);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   if (mpi_g)
 <at>  <at>  -877,7 +876,6  <at>  <at>  ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -886,7 +884,7  <at>  <at>  ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.
 <at>  <at>  -1043,7 +1041,6  <at>  <at>  ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (s_keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1052,7 +1049,7  <at>  <at>  ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.
 <at>  <at>  -1252,7 +1249,6  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1261,7 +1257,7  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.  */
 <at>  <at>  -1421,7 +1417,6  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1430,7 +1425,7  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.  */
 <at>  <at>  -1467,10 +1462,7  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
    */
   rc = _gcry_ecc_os2ec (&kG, data_e);
   if (rc)
-    {
-      point_free (&kG);
-      return rc;
-    }
+    goto leave;

   ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0,
                                     sk.E.p, sk.E.a, sk.E.b);

-----------------------------------------------------------------------

Summary of changes:
 cipher/ecc.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
NIIBE Yutaka | 6 Jul 05:04 2015

[PATCH] ecc: fix memory leaks.

Hello,

While modifying ecc for Curve25519, I've found some errors in ecc.c.

Here are changes.

    cipher/ecc.c (ecc_generate): Fix memory leak on error of
    _gcry_pk_util_parse_flaglist and _gcry_ecc_eddsa_encodepoint.
    (ecc_check_secret_key): Fix memory leak on error of
    _gcry_ecc_update_curve_param.
    (ecc_sign, ecc_verify, ecc_encrypt_raw, ecc_decrypt_raw): Remove
    unnecessary sexp_release and fix memory leak on error of
    _gcry_ecc_fill_in_curve.
    (ecc_decrypt_raw): Fix double free of the point kG and memory leak
    on error of _gcry_ecc_os2ec.

diff --git a/cipher/ecc.c b/cipher/ecc.c
index 5ffe84b..f5bc50a 100644
--- a/cipher/ecc.c
+++ b/cipher/ecc.c
 <at>  <at>  -551,7 +551,6  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
     return GPG_ERR_NO_OBJ; /* No NBITS parameter. */

   rc = _gcry_ecc_fill_in_curve (nbits, curve_name, &E, &nbits);
-  xfree (curve_name); curve_name = NULL;
   if (rc)
     goto leave;

 <at>  <at>  -595,10 +594,9  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
                                         !!(flags & PUBKEY_FLAG_COMP),
                                         &encpk, &encpklen);
       if (rc)
-        return rc;
+        goto leave;
       public = mpi_new (0);
       mpi_set_opaque (public, encpk, encpklen*8);
-      encpk = NULL;
     }
   else
     {
 <at>  <at>  -691,6 +689,7  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
   mpi_free (Qx);
   mpi_free (Qy);
   _gcry_mpi_ec_free (ctx);
+  xfree (curve_name);
   sexp_release (curve_flags);
   sexp_release (curve_info);
   return rc;
 <at>  <at>  -744,7 +743,7  <at>  <at>  ecc_check_secret_key (gcry_sexp_t keyparms)
                                              &sk.E.p, &sk.E.a, &sk.E.b,
                                              &mpi_g, &sk.E.n, &sk.E.h);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   if (mpi_g)
 <at>  <at>  -877,7 +876,6  <at>  <at>  ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -886,7 +884,7  <at>  <at>  ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.
 <at>  <at>  -1043,7 +1041,6  <at>  <at>  ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (s_keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1052,7 +1049,7  <at>  <at>  ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.
 <at>  <at>  -1252,7 +1249,6  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1261,7 +1257,7  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.  */
 <at>  <at>  -1421,7 +1417,6  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1430,7 +1425,7  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.  */
 <at>  <at>  -1467,10 +1462,7  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
    */
   rc = _gcry_ecc_os2ec (&kG, data_e);
   if (rc)
-    {
-      point_free (&kG);
-      return rc;
-    }
+    goto leave;

   ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0,
                                     sk.E.p, sk.E.a, sk.E.b);
--
NIIBE Yutaka | 3 Jul 11:27 2015

[PATCH] Curve25519 encryption support (experimental)

Hello,

This is pretty immature experimental patch for Curve25519 encryption
support.

Since Montgomery curve is available in libgcrypt, it is used.

I assume that key generation is done with:

    (genkey(ecc(curve Curve25519)(flags eddsa)))

Then, '(flags eddsa)' means that public key is in DJB format with
the prefix 0x40 like EdDSA.

I tested with modified version of GnuPG 2.1.  I'm going to submit
the patch for GnuPG now.

Please note that this is highly experimental.  The format is not
yet decided.

diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h
index f0d97ea..6b3b063 100644
--- a/cipher/ecc-common.h
+++ b/cipher/ecc-common.h
 <at>  <at>  -132,6 +132,8  <at>  <at>  gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input,
                                        ECC_public_key *pk,
                                        gcry_mpi_t r, gcry_mpi_t s,
                                        int hashalgo, gcry_mpi_t pkmpi);
+gpg_err_code_t _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx,
+                                           mpi_point_t result);

 /*-- ecc-gost.c --*/
 gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, ECC_secret_key *skey,
diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c
index 9975bb4..5d855bd 100644
--- a/cipher/ecc-curves.c
+++ b/cipher/ecc-curves.c
 <at>  <at>  -40,7 +40,7  <at>  <at>  static const struct
   const char *other; /* Other name. */
 } curve_aliases[] =
   {
-  /*{ "Curve25519", "1.3.6.1.4.1.3029.1.5.1" },*/
+    { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" },
     { "Ed25519",    "1.3.6.1.4.1.11591.15.1" },

     { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID  */
 <at>  <at>  -129,6 +129,18  <at>  <at>  static const ecc_domain_parms_t domain_parms[] =
       "0x6666666666666666666666666666666666666666666666666666666666666658",
       "0x08"
     },
+    {
+      /* (y^2 = x^3 + 486662*x^2 + x) */
+      "Curve25519", 256, 0,
+      MPI_EC_MONTGOMERY, ECC_DIALECT_ED25519,
+      "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+      "0x01DB41",
+      "0x01",
+      "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED",
+      "0x0000000000000000000000000000000000000000000000000000000000000009",
+      "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9",
+      "0x08"
+    },
 #if 0 /* No real specs yet found.  */
     {
       /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */
diff --git a/cipher/ecc-eddsa.c b/cipher/ecc-eddsa.c
index 4323d8e..72481ba 100644
--- a/cipher/ecc-eddsa.c
+++ b/cipher/ecc-eddsa.c
 <at>  <at>  -400,6 +400,51  <at>  <at>  _gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result,
 }

+gpg_err_code_t
+_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result)
+{
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+
+  if (mpi_is_opaque (pk))
+    {
+      const unsigned char *buf;
+
+      buf = mpi_get_opaque (pk, &rawmpilen);
+      if (!buf)
+        return GPG_ERR_INV_OBJ;
+      rawmpilen = (rawmpilen + 7)/8;
+
+      if (rawmpilen > 1 && (rawmpilen%2) && buf[0] == 0x40)
+        {
+          rawmpilen--;
+          buf++;
+        }
+
+      rawmpi = xtrymalloc (rawmpilen? rawmpilen:1);
+      if (!rawmpi)
+        return gpg_err_code_from_syserror ();
+      memcpy (rawmpi, buf, rawmpilen);
+      reverse_buffer (rawmpi, rawmpilen);
+    }
+  else
+    {
+      /* Note: Without using an opaque MPI it is not reliable possible
+         to find out whether the public key has been given in
+         uncompressed format.  Thus we expect native EdDSA format.  */
+      rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL);
+      if (!rawmpi)
+        return gpg_err_code_from_syserror ();
+    }
+
+  _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0);
+  xfree (rawmpi);
+  mpi_set_ui (result->z, 1);
+
+  return 0;
+}
+
+
 /* Compute the A value as used by EdDSA.  The caller needs to provide
    the context EC and the actual secret D as an MPI.  The function
    returns a newly allocated 64 byte buffer at r_digest; the first 32
diff --git a/cipher/ecc.c b/cipher/ecc.c
index 5ffe84b..e5b3459 100644
--- a/cipher/ecc.c
+++ b/cipher/ecc.c
 <at>  <at>  -174,7 +174,10  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
   point_init (&sk->Q);

   x = mpi_new (pbits);
-  y = mpi_new (pbits);
+  if (r_y == NULL)
+    y = NULL;
+  else
+    y = mpi_new (pbits);
   if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx))
     log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");

 <at>  <at>  -187,7 +190,7  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
    * possibilities without any loss of security.  Note that we don't
    * do that for Ed25519 so that we do not violate the special
    * construction of the secret key.  */
-  if (E->dialect == ECC_DIALECT_ED25519)
+  if (E->dialect == ECC_DIALECT_ED25519 || r_y == NULL)
     point_set (&sk->Q, &Q);
   else
     {
 <at>  <at>  -231,7 +234,8  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
     }

   *r_x = x;
-  *r_y = y;
+  if (r_y)
+    *r_y = y;

   point_free (&Q);
   /* Now we can test our keys (this should never fail!).  */
 <at>  <at>  -307,7 +311,7  <at>  <at>  test_ecdh_only_keys (ECC_secret_key *sk, unsigned int nbits)
   mpi_ec_t ec;

   if (DBG_CIPHER)
-    log_debug ("Testing key.\n");
+    log_debug ("Testing ECDH only key.\n");

   point_init (&R_);

 <at>  <at>  -572,7 +576,9  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)

   ctx = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, 0, E.p, E.a, E.b);

-  if ((flags & PUBKEY_FLAG_EDDSA))
+  if (E.model == MPI_EC_MONTGOMERY)
+    rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, NULL);
+  else if ((flags & PUBKEY_FLAG_EDDSA))
     rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, flags);
   else
     rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, &Qy);
 <at>  <at>  -582,26 +588,41  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
   /* Copy data to the result.  */
   Gx = mpi_new (0);
   Gy = mpi_new (0);
-  if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx))
-    log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G");
-  base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p);
   if (sk.E.dialect == ECC_DIALECT_ED25519 && !(flags & PUBKEY_FLAG_NOCOMP))
     {
       unsigned char *encpk;
       unsigned int encpklen;

-      /* (Gx and Gy are used as scratch variables)  */
-      rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy,
-                                        !!(flags & PUBKEY_FLAG_COMP),
-                                        &encpk, &encpklen);
+      if (E.model != MPI_EC_MONTGOMERY)
+        /* (Gx and Gy are used as scratch variables)  */
+        rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy,
+                                          !!(flags & PUBKEY_FLAG_COMP),
+                                          &encpk, &encpklen);
+      else
+        {
+          int off = !!(flags & PUBKEY_FLAG_COMP);
+
+          encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, off?-1:0,
+                                              &encpklen, NULL);
+          if (encpk == NULL)
+            rc = gpg_err_code_from_syserror ();
+          else
+            {
+              if (off)
+                encpk[0] = 0x40;
+              encpklen += off;
+            }
+        }
       if (rc)
         return rc;
       public = mpi_new (0);
       mpi_set_opaque (public, encpk, encpklen*8);
-      encpk = NULL;
     }
   else
     {
+      if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx))
+        log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G");
+      base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p);
       if (!Qx)
         {
           /* This is the case for a key from _gcry_ecc_eddsa_generate
 <at>  <at>  -1216,6 +1237,18  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
   gcry_mpi_t data = NULL;
   ECC_public_key pk;
   mpi_ec_t ec = NULL;
+  int flags;
+
+  /* Look for flags. */
+  l1 = sexp_find_token (keyparms, "flags", 0);
+  if (l1)
+    {
+      rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+      if (rc)
+        goto leave;
+    }
+  sexp_release (l1);
+  l1 = NULL;

   memset (&pk, 0, sizeof pk);
   _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT,
 <at>  <at>  -1239,7 +1272,9  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
   /*
    * Extract the key.
    */
-  rc = sexp_extract_param (keyparms, NULL, "-p?a?b?g?n?h?+q",
+  rc = sexp_extract_param (keyparms, NULL,
+                           (flags & PUBKEY_FLAG_EDDSA)?
+                           "-p?a?b?g?n?h?/q" : "-p?a?b?g?n?h?+q",
                            &pk.E.p, &pk.E.a, &pk.E.b, &mpi_g, &pk.E.n, &pk.E.h,
                            &mpi_q, NULL);
   if (rc)
 <at>  <at>  -1252,7 +1287,6  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         goto leave;
     }
   /* Add missing parameters using the optional curve parameter.  */
-  sexp_release (l1);
   l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
 <at>  <at>  -1261,7 +1295,7  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.  */
 <at>  <at>  -1292,42 +1326,73  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
       goto leave;
     }

+  /* Compute the encrypted value.  */
+  ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0,
+                                    pk.E.p, pk.E.a, pk.E.b);
+
   /* Convert the public key.  */
   if (mpi_q)
     {
       point_init (&pk.Q);
-      rc = _gcry_ecc_os2ec (&pk.Q, mpi_q);
+      if (ec->model == MPI_EC_MONTGOMERY)
+        rc = _gcry_ecc_mont_decodepoint (mpi_q, ec, &pk.Q);
+      else
+        rc = _gcry_ecc_os2ec (&pk.Q, mpi_q);
       if (rc)
         goto leave;
     }

-  /* Compute the encrypted value.  */
-  ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0,
-                                    pk.E.p, pk.E.a, pk.E.b);
-
   /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */
   {
     mpi_point_struct R;  /* Result that we return.  */
     gcry_mpi_t x, y;
+    unsigned char *rawmpi;
+    unsigned int rawmpilen;

     x = mpi_new (0);
-    y = mpi_new (0);
+    if (ec->model == MPI_EC_MONTGOMERY)
+      y = NULL;
+    else
+      y = mpi_new (0);

     point_init (&R);

     /* R = kQ  <=>  R = kdG  */
     _gcry_mpi_ec_mul_point (&R, data, &pk.Q, ec);
-
     if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
       log_fatal ("ecdh: Failed to get affine coordinates for kdG\n");
-    mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p);
+    if (y)
+      mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p);
+    else
+      {
+        rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL);
+        if (!rawmpi)
+          rc = gpg_err_code_from_syserror ();
+        else
+          {
+            mpi_s = mpi_new (0);
+            mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8);
+          }
+      }

     /* R = kG */
     _gcry_mpi_ec_mul_point (&R, data, &pk.E.G, ec);

     if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
       log_fatal ("ecdh: Failed to get affine coordinates for kG\n");
-    mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p);
+    if (y)
+      mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p);
+    else
+      {
+        rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL);
+        if (!rawmpi)
+          rc = gpg_err_code_from_syserror ();
+        else
+          {
+            mpi_e = mpi_new (0);
+            mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8);
+          }
+      }

     mpi_free (x);
     mpi_free (y);
 <at>  <at>  -1335,7 +1400,8  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
     point_free (&R);
   }

-  rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e);
+  if (!rc)
+    rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e);

  leave:
   _gcry_mpi_release (pk.E.p);
 <at>  <at>  -1351,6 +1417,7  <at>  <at>  ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
   _gcry_mpi_release (mpi_s);
   _gcry_mpi_release (mpi_e);
   xfree (curvename);
+  sexp_release (l1);
   _gcry_mpi_ec_free (ec);
   _gcry_pk_util_free_encoding_ctx (&ctx);
   if (DBG_CIPHER)
 <at>  <at>  -1380,6 +1447,7  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
   mpi_point_struct kG;
   mpi_point_struct R;
   gcry_mpi_t r = NULL;
+  int flags = 0;

   memset (&sk, 0, sizeof sk);
   point_init (&kG);
 <at>  <at>  -1388,6 +1456,17  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
   _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT,
                                    ecc_get_nbits (keyparms));

+  /* Look for flags. */
+  l1 = sexp_find_token (keyparms, "flags", 0);
+  if (l1)
+    {
+      rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+      if (rc)
+        goto leave;
+    }
+  sexp_release (l1);
+  l1 = NULL;
+
   /*
    * Extract the data.
    */
 <at>  <at>  -1430,7 +1509,7  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         {
           rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL);
           if (rc)
-            return rc;
+            goto leave;
         }
     }
   /* Guess required fields if a curve parameter has not been given.  */
 <at>  <at>  -1462,18 +1541,19  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
     }

+  ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0,
+                                    sk.E.p, sk.E.a, sk.E.b);
+
   /*
    * Compute the plaintext.
    */
-  rc = _gcry_ecc_os2ec (&kG, data_e);
+  if (ec->model == MPI_EC_MONTGOMERY)
+    rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG);
+  else
+    rc = _gcry_ecc_os2ec (&kG, data_e);
   if (rc)
-    {
-      point_free (&kG);
-      return rc;
-    }
+    return rc;

-  ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0,
-                                    sk.E.p, sk.E.a, sk.E.b);

   /* R = dkG */
   _gcry_mpi_ec_mul_point (&R, sk.d, &kG, ec);
 <at>  <at>  -1483,12 +1563,30  <at>  <at>  ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
     gcry_mpi_t x, y;

     x = mpi_new (0);
-    y = mpi_new (0);
+    if (ec->model == MPI_EC_MONTGOMERY)
+      y = NULL;
+    else
+      y = mpi_new (0);

     if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
       log_fatal ("ecdh: Failed to get affine coordinates\n");

-    r = _gcry_ecc_ec2os (x, y, sk.E.p);
+    if (y)
+      r = _gcry_ecc_ec2os (x, y, sk.E.p);
+    else
+      {
+        unsigned char *rawmpi;
+        unsigned int rawmpilen;
+
+        rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL);
+        if (!rawmpi)
+          rc = gpg_err_code_from_syserror ();
+        else
+          {
+            r = mpi_new (0);
+            mpi_set_opaque (r, rawmpi, rawmpilen*8);
+          }
+      }
     if (!r)
       rc = gpg_err_code_from_syserror ();
     else
--
by NIIBE Yutaka | 11 Jun 09:28 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-230-ga36ee75

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  a36ee7501f68ad7ebcfe31f9659430b9d2c3ddd1 (commit)
      from  2bddd947fd1c11b4ec461576db65a5e34fea1b07 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit a36ee7501f68ad7ebcfe31f9659430b9d2c3ddd1
Author: NIIBE Yutaka <gniibe <at> fsij.org>
Date:   Thu Jun 11 16:19:49 2015 +0900

    mpi: Support FreeBSD 10 or later.

    * mpi/config.links: Include FreeBSD 10 to 29.

    --

    Thanks to Yuta SATOH.

    GnuPG-bug-id: 1936, 1974

diff --git a/mpi/config.links b/mpi/config.links
index 2fb5e8a..3ead4f0 100644
--- a/mpi/config.links
+++ b/mpi/config.links
 <at>  <at>  -50,11 +50,12  <at>  <at>  case "${host}" in
        path=""
        mpi_cpu_arch="x86"
        ;;
-    i[3467]86*-*-openbsd*      | \
-    i[3467]86*-*-freebsd*-elf  | \
-    i[3467]86*-*-freebsd[3-9]* | \
-    i[3467]86*-*-freebsdelf*   | \
-    i[3467]86*-*-netbsd*       | \
+    i[3467]86*-*-openbsd*         | \
+    i[3467]86*-*-freebsd*-elf     | \
+    i[3467]86*-*-freebsd[3-9]*    | \
+    i[3467]86*-*-freebsd[12][0-9]*| \
+    i[3467]86*-*-freebsdelf*      | \
+    i[3467]86*-*-netbsd*          | \
     i[3467]86*-*-k*bsd*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h	   >>./mpi/asm-syntax.h
 <at>  <at>  -64,6 +65,7  <at>  <at>  case "${host}" in
     i586*-*-openbsd*         | \
     i586*-*-freebsd*-elf     | \
     i586*-*-freebsd[3-9]*    | \
+    i586*-*-freebsd[12][0-9]*| \
     i586*-*-freebsdelf*      | \
     i586*-*-netbsd*	     | \
     i586*-*-k*bsd*	     | \

-----------------------------------------------------------------------

Summary of changes:
 mpi/config.links | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
NIIBE Yutaka | 11 Jun 09:27 2015

FreeBSD 10 or later

Hello,

Here is a patch for FreeBSD 10 problem.

This is an obvious fix.  I'm going to push this now to master.
I'll backport it to 1.6.x, too.

    mpi: Support FreeBSD 10 or later.

    * mpi/config.links: Include FreeBSD 10 to 29.

    --

    Thanks to Yuta SATOH.

    GnuPG-bug-id: 1936, 1974

diff --git a/mpi/config.links b/mpi/config.links
index 2fb5e8a..3ead4f0 100644
--- a/mpi/config.links
+++ b/mpi/config.links
 <at>  <at>  -50,11 +50,12  <at>  <at>  case "${host}" in
        path=""
        mpi_cpu_arch="x86"
        ;;
-    i[3467]86*-*-openbsd*      | \
-    i[3467]86*-*-freebsd*-elf  | \
-    i[3467]86*-*-freebsd[3-9]* | \
-    i[3467]86*-*-freebsdelf*   | \
-    i[3467]86*-*-netbsd*       | \
+    i[3467]86*-*-openbsd*         | \
+    i[3467]86*-*-freebsd*-elf     | \
+    i[3467]86*-*-freebsd[3-9]*    | \
+    i[3467]86*-*-freebsd[12][0-9]*| \
+    i[3467]86*-*-freebsdelf*      | \
+    i[3467]86*-*-netbsd*          | \
     i[3467]86*-*-k*bsd*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h	   >>./mpi/asm-syntax.h
 <at>  <at>  -64,6 +65,7  <at>  <at>  case "${host}" in
     i586*-*-openbsd*         | \
     i586*-*-freebsd*-elf     | \
     i586*-*-freebsd[3-9]*    | \
+    i586*-*-freebsd[12][0-9]*| \
     i586*-*-freebsdelf*      | \
     i586*-*-netbsd*	     | \
     i586*-*-k*bsd*	     | \
--
by Werner Koch | 21 May 16:58 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-229-g2bddd94

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  2bddd947fd1c11b4ec461576db65a5e34fea1b07 (commit)
       via  102d68b3bd77813a3ff989526855bb1e283bf9d7 (commit)
       via  8124e357b732a719696bfd5271def4e528f2a1e1 (commit)
      from  9b0c6c8141ae9bd056392a3f6b5704b505fc8501 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 2bddd947fd1c11b4ec461576db65a5e34fea1b07
Author: Werner Koch <wk <at> gnupg.org>
Date:   Thu May 21 16:24:36 2015 +0200

    ecc: Add key generation flag "no-keytest".

    * src/cipher.h (PUBKEY_FLAG_NO_KEYTEST): New.
    * cipher/pubkey-util.c (_gcry_pk_util_parse_flaglist): Add flag
    "no-keytest".  Return an error for invalid flags of length 10.

    * cipher/ecc.c (nist_generate_key): Replace arg random_level by flags
    set random level depending on flags.
    * cipher/ecc-eddsa.c (_gcry_ecc_eddsa_genkey): Ditto.
    * cipher/ecc.c (ecc_generate): Pass flags to generate fucntion and
    remove var random_level.
    (nist_generate_key): Implement "no-keytest" flag.

    * tests/keygen.c (check_ecc_keys): Add tests for transient-key and
    no-keytest.
    --

    After key creation we usually run a test to check whether the keys
    really work.  However for transient keys this might be too time
    consuming and given that a failed test would anyway abort the process
    the optional use of a flag to skip the test is appropriate.

    Using Ed25519 for EdDSA and the "no-keytest" flags halves the time to
    create such a key.  This was measured by looping the last test from
    check_ecc_keys() 1000 times with and without the flag.

    Due to a bug in the flags parser unknown flags with a length of 10
    characters were not detected.  Thus the "no-keytest" flag can be
    employed by all software even for libraries before this.  That bug is
    however solved with this version.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/NEWS b/NEWS
index 4c74533..d90ee6d 100644
--- a/NEWS
+++ b/NEWS
 <at>  <at>  -23,6 +23,10  <at>  <at>  Noteworthy changes in version 1.7.0 (unreleased)

  * Added OCB mode.

+ * New flag "no-keytest" for ECC key generation.  Due to a bug in the
+   parser that flag will also be accepted but ignored by older version
+   of Libgcrypt.
+
  * Interface changes relative to the 1.6.0 release:
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  gcry_mac_get_algo               NEW.
diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h
index 83bf20d..f0d97ea 100644
--- a/cipher/ecc-common.h
+++ b/cipher/ecc-common.h
 <at>  <at>  -123,7 +123,7  <at>  <at>  gpg_err_code_t _gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest,
 gpg_err_code_t _gcry_ecc_eddsa_genkey (ECC_secret_key *sk,
                                        elliptic_curve_t *E,
                                        mpi_ec_t ctx,
-                                       gcry_random_level_t random_level);
+                                       int flags);
 gpg_err_code_t _gcry_ecc_eddsa_sign (gcry_mpi_t input,
                                      ECC_secret_key *sk,
                                      gcry_mpi_t r_r, gcry_mpi_t s,
diff --git a/cipher/ecc-eddsa.c b/cipher/ecc-eddsa.c
index a12ebab..4323d8e 100644
--- a/cipher/ecc-eddsa.c
+++ b/cipher/ecc-eddsa.c
 <at>  <at>  -465,15 +465,28  <at>  <at>  _gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest,
 }

 
-/* Ed25519 version of the key generation.  */
+/**
+ * _gcry_ecc_eddsa_genkey - EdDSA version of the key generation.
+ *
+ *  <at> sk:  A struct to receive the secret key.
+ *  <at> E:   Parameters of the curve.
+ *  <at> ctx: Elliptic curve computation context.
+ *  <at> flags: Flags controlling aspects of the creation.
+ *
+ * Return: An error code.
+ *
+ * The only  <at> flags bit used by this function is %PUBKEY_FLAG_TRANSIENT
+ * to use a faster RNG.
+ */
 gpg_err_code_t
 _gcry_ecc_eddsa_genkey (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
-                        gcry_random_level_t random_level)
+                        int flags)
 {
   gpg_err_code_t rc;
   int b = 256/8;             /* The only size we currently support.  */
   gcry_mpi_t a, x, y;
   mpi_point_struct Q;
+  gcry_random_level_t random_level;
   char *dbuf;
   size_t dlen;
   gcry_buffer_t hvec[1];
 <at>  <at>  -482,6 +495,11  <at>  <at>  _gcry_ecc_eddsa_genkey (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
   point_init (&Q);
   memset (hvec, 0, sizeof hvec);

+  if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
+    random_level = GCRY_STRONG_RANDOM;
+  else
+    random_level = GCRY_VERY_STRONG_RANDOM;
+
   a = mpi_snew (0);
   x = mpi_new (0);
   y = mpi_new (0);
diff --git a/cipher/ecc.c b/cipher/ecc.c
index 262fcd8..5ffe84b 100644
--- a/cipher/ecc.c
+++ b/cipher/ecc.c
 <at>  <at>  -1,6 +1,6  <at>  <at> 
 /* ecc.c  -  Elliptic Curve Cryptography
  * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
- * Copyright (C) 2013 g10 Code GmbH
+ * Copyright (C) 2013, 2015 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
 <at>  <at>  -106,12 +106,11  <at>  <at>  _gcry_register_pk_ecc_progress (void (*cb) (void *, const char *,

 
 /**
- * nist_generate_key - Standard version of the key generation.
- *
+ * nist_generate_key - Standard version of the ECC key generation.
  *  <at> sk:  A struct to receive the secret key.
  *  <at> E:   Parameters of the curve.
  *  <at> ctx: Elliptic curve computation context.
- *  <at> random_level: The quality of the random.
+ *  <at> flags: Flags controlling aspects of the creation.
  *  <at> nbits: Only for testing
  *  <at> r_x: On success this receives an allocated MPI with the affine
  *       x-coordinate of the poblic key.  On error NULL is stored.
 <at>  <at>  -119,19 +118,29  <at>  <at>  _gcry_register_pk_ecc_progress (void (*cb) (void *, const char *,
  *
  * Return: An error code.
  *
+ * The  <at> flags bits used by this function are %PUBKEY_FLAG_TRANSIENT to
+ * use a faster RNG, and %PUBKEY_FLAG_NO_KEYTEST to skip the assertion
+ * that the key works as expected.
+ *
  * FIXME: Check whether N is needed.
  */
 static gpg_err_code_t
 nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
-                   gcry_random_level_t random_level, unsigned int nbits,
+                   int flags, unsigned int nbits,
                    gcry_mpi_t *r_x, gcry_mpi_t *r_y)
 {
   mpi_point_struct Q;
+  gcry_random_level_t random_level;
   gcry_mpi_t x, y;
   const unsigned int pbits = mpi_get_nbits (E->p);

   point_init (&Q);

+  if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
+    random_level = GCRY_STRONG_RANDOM;
+  else
+    random_level = GCRY_VERY_STRONG_RANDOM;
+
   /* Generate a secret.  */
   if (ctx->dialect == ECC_DIALECT_ED25519)
     {
 <at>  <at>  -226,7 +235,9  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,

   point_free (&Q);
   /* Now we can test our keys (this should never fail!).  */
-  if (sk->E.model != MPI_EC_MONTGOMERY)
+  if ((flags & PUBKEY_FLAG_NO_KEYTEST))
+    ; /* User requested to skip the test.  */
+  else if (sk->E.model != MPI_EC_MONTGOMERY)
     test_keys (sk, nbits - 64);
   else
     test_ecdh_only_keys (sk, nbits - 64);
 <at>  <at>  -492,7 +503,6  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
   gcry_mpi_t Qy = NULL;
   char *curve_name = NULL;
   gcry_sexp_t l1;
-  gcry_random_level_t random_level;
   mpi_ec_t ctx = NULL;
   gcry_sexp_t curve_info = NULL;
   gcry_sexp_t curve_flags = NULL;
 <at>  <at>  -560,17 +570,12  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
       log_printpnt ("ecgen curve G", &E.G, NULL);
     }

-  if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
-    random_level = GCRY_STRONG_RANDOM;
-  else
-    random_level = GCRY_VERY_STRONG_RANDOM;
-
   ctx = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, 0, E.p, E.a, E.b);

   if ((flags & PUBKEY_FLAG_EDDSA))
-    rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, random_level);
+    rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, flags);
   else
-    rc = nist_generate_key (&sk, &E, ctx, random_level, nbits, &Qx, &Qy);
+    rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, &Qy);
   if (rc)
     goto leave;

diff --git a/cipher/pubkey-util.c b/cipher/pubkey-util.c
index 514f1eb..afa3454 100644
--- a/cipher/pubkey-util.c
+++ b/cipher/pubkey-util.c
 <at>  <at>  -1,7 +1,7  <at>  <at> 
 /* pubkey-util.c - Supporting functions for all pubkey modules.
  * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005,
  *               2007, 2008, 2011 Free Software Foundation, Inc.
- * Copyright (C) 2013  g10 Code GmbH
+ * Copyright (C) 2013, 2015 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
 <at>  <at>  -155,6 +155,10  <at>  <at>  _gcry_pk_util_parse_flaglist (gcry_sexp_t list,
         case 10:
           if (!memcmp (s, "igninvflag", 10))
             igninvflag = 1;
+          else if (!memcmp (s, "no-keytest", 10))
+            flags |= PUBKEY_FLAG_NO_KEYTEST;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
           break;

         case 11:
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index ab4f685..f13695a 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
 <at>  <at>  -2327,6 +2327,13  <at>  <at>  random number generator.  This flag may be used for keys which are
 only used for a short time or per-message and do not require full
 cryptographic strength.

+ <at> item no-keytest
+ <at> cindex no-keytest
+This flag skips internal failsafe tests to assert that a generated key
+is properly working.  It currently has an effect only for standard ECC
+key generation.  It is mostly useful along with transient-key to
+achieve fastest ECC key generation.
+
  <at> item use-x931
  <at> cindex X9.31
 Force the use of the ANSI X9.31 key generation algorithm instead of
diff --git a/src/cipher.h b/src/cipher.h
index 7ad0b2c..ef183fd 100644
--- a/src/cipher.h
+++ b/src/cipher.h
 <at>  <at>  -40,6 +40,7  <at>  <at> 
 #define PUBKEY_FLAG_NOCOMP         (1 << 11)
 #define PUBKEY_FLAG_EDDSA          (1 << 12)
 #define PUBKEY_FLAG_GOST           (1 << 13)
+#define PUBKEY_FLAG_NO_KEYTEST     (1 << 14)

 
 enum pk_operation
diff --git a/tests/keygen.c b/tests/keygen.c
index 4aff9c9..8b9a1d5 100644
--- a/tests/keygen.c
+++ b/tests/keygen.c
 <at>  <at>  -1,5 +1,6  <at>  <at> 
 /* keygen.c  -  key generation regression tests
  * Copyright (C) 2003, 2005, 2012 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2015 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
 <at>  <at>  -14,8 +15,7  <at>  <at> 
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */

 #ifdef HAVE_CONFIG_H
 <at>  <at>  -432,7 +432,43  <at>  <at>  check_ecc_keys (void)
     show_sexp ("ECC key:\n", key);

   check_generated_ecc_key (key);
+  gcry_sexp_release (key);
+
+
+  if (verbose)
+    show ("creating ECC key using curve Ed25519 for ECDSA (transient-key)\n");
+  rc = gcry_sexp_build (&keyparm, NULL,
+                        "(genkey(ecc(curve Ed25519)(flags transient-key)))");
+  if (rc)
+    die ("error creating S-expression: %s\n", gpg_strerror (rc));
+  rc = gcry_pk_genkey (&key, keyparm);
+  gcry_sexp_release (keyparm);
+  if (rc)
+    die ("error generating ECC key using curve Ed25519 for ECDSA"
+         " (transient-key): %s\n",
+         gpg_strerror (rc));
+  if (verbose > 1)
+    show_sexp ("ECC key:\n", key);
+  check_generated_ecc_key (key);
+  gcry_sexp_release (key);

+  if (verbose)
+    show ("creating ECC key using curve Ed25519 for ECDSA "
+          "(transient-key no-keytest)\n");
+  rc = gcry_sexp_build (&keyparm, NULL,
+                        "(genkey(ecc(curve Ed25519)"
+                        "(flags transient-key no-keytest)))");
+  if (rc)
+    die ("error creating S-expression: %s\n", gpg_strerror (rc));
+  rc = gcry_pk_genkey (&key, keyparm);
+  gcry_sexp_release (keyparm);
+  if (rc)
+    die ("error generating ECC key using curve Ed25519 for ECDSA"
+         " (transient-key no-keytest): %s\n",
+         gpg_strerror (rc));
+  if (verbose > 1)
+    show_sexp ("ECC key:\n", key);
+  check_generated_ecc_key (key);
   gcry_sexp_release (key);
 }

commit 102d68b3bd77813a3ff989526855bb1e283bf9d7
Author: Werner Koch <wk <at> gnupg.org>
Date:   Thu May 21 11:12:42 2015 +0200

    ecc: Avoid double conversion to affine coordinates in keygen.

    * cipher/ecc.c (nist_generate_key): Add args r_x and r_y.
    (ecc_generate): Rename vars.  Convert to affine coordinates only if
    not returned by the lower level generation function.
    --

    nist_generate_key already needs to convert to affine coordinates to
    implement Jivsov's trick.  Thus we can return them and avoid calling
    it in ecc_generate again.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/cipher/ecc.c b/cipher/ecc.c
index 2f5e401..262fcd8 100644
--- a/cipher/ecc.c
+++ b/cipher/ecc.c
 <at>  <at>  -105,12 +105,30  <at>  <at>  _gcry_register_pk_ecc_progress (void (*cb) (void *, const char *,

 
 
-/* Standard version of the key generation.  */
+/**
+ * nist_generate_key - Standard version of the key generation.
+ *
+ *  <at> sk:  A struct to receive the secret key.
+ *  <at> E:   Parameters of the curve.
+ *  <at> ctx: Elliptic curve computation context.
+ *  <at> random_level: The quality of the random.
+ *  <at> nbits: Only for testing
+ *  <at> r_x: On success this receives an allocated MPI with the affine
+ *       x-coordinate of the poblic key.  On error NULL is stored.
+ *  <at> r_y: Ditto for the y-coordinate.
+ *
+ * Return: An error code.
+ *
+ * FIXME: Check whether N is needed.
+ */
 static gpg_err_code_t
 nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
-                   gcry_random_level_t random_level, unsigned int nbits)
+                   gcry_random_level_t random_level, unsigned int nbits,
+                   gcry_mpi_t *r_x, gcry_mpi_t *r_y)
 {
   mpi_point_struct Q;
+  gcry_mpi_t x, y;
+  const unsigned int pbits = mpi_get_nbits (E->p);

   point_init (&Q);

 <at>  <at>  -146,6 +164,11  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
   sk->E.h = mpi_copy (E->h);
   point_init (&sk->Q);

+  x = mpi_new (pbits);
+  y = mpi_new (pbits);
+  if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx))
+    log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
+
   /* We want the Q=(x,y) be a "compliant key" in terms of the
    * http://tools.ietf.org/html/draft-jivsov-ecc-compact, which simply
    * means that we choose either Q=(x,y) or -Q=(x,p-y) such that we
 <at>  <at>  -159,16 +182,10  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
     point_set (&sk->Q, &Q);
   else
     {
-      gcry_mpi_t x, y, negative;
-      const unsigned int pbits = mpi_get_nbits (E->p);
+      gcry_mpi_t negative;

-      x = mpi_new (pbits);
-      y = mpi_new (pbits);
       negative = mpi_new (pbits);

-      if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx))
-        log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
-
       if (E->model == MPI_EC_WEIERSTRASS)
         mpi_sub (negative, E->p, y);      /* negative = p - y */
       else
 <at>  <at>  -178,12 +195,18  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
         {
           /* We need to end up with -Q; this assures that new Q's y is
              the smallest one */
-          mpi_sub (sk->d, E->n, sk->d);   /* d = order - d */
           if (E->model == MPI_EC_WEIERSTRASS)
-            mpi_point_snatch_set (&sk->Q, x, negative,
-                                       mpi_alloc_set_ui (1));
+            {
+              mpi_free (y);
+              y = negative;
+            }
           else
-            mpi_point_snatch_set (&sk->Q, negative, y, mpi_alloc_set_ui (1));
+            {
+              mpi_free (x);
+              x = negative;
+            }
+          mpi_sub (sk->d, E->n, sk->d);   /* d = order - d */
+          mpi_point_set (&sk->Q, x, y, mpi_const (MPI_C_ONE));

           if (DBG_CIPHER)
             log_debug ("ecgen converted Q to a compliant point\n");
 <at>  <at>  -191,23 +214,16  <at>  <at>  nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx,
       else /* p - y >= p */
         {
           /* No change is needed exactly 50% of the time: just copy. */
+          mpi_free (negative);
           point_set (&sk->Q, &Q);
           if (DBG_CIPHER)
             log_debug ("ecgen didn't need to convert Q to a compliant point\n");
-
-          mpi_free (negative);
-          if (E->model == MPI_EC_WEIERSTRASS)
-            mpi_free (x);
-          else
-            mpi_free (y);
         }
-
-      if (E->model == MPI_EC_WEIERSTRASS)
-        mpi_free (y);
-      else
-        mpi_free (x);
     }

+  *r_x = x;
+  *r_y = y;
+
   point_free (&Q);
   /* Now we can test our keys (this should never fail!).  */
   if (sk->E.model != MPI_EC_MONTGOMERY)
 <at>  <at>  -470,8 +486,10  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
   unsigned int nbits;
   elliptic_curve_t E;
   ECC_secret_key sk;
-  gcry_mpi_t x = NULL;
-  gcry_mpi_t y = NULL;
+  gcry_mpi_t Gx = NULL;
+  gcry_mpi_t Gy = NULL;
+  gcry_mpi_t Qx = NULL;
+  gcry_mpi_t Qy = NULL;
   char *curve_name = NULL;
   gcry_sexp_t l1;
   gcry_random_level_t random_level;
 <at>  <at>  -548,26 +566,27  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
     random_level = GCRY_VERY_STRONG_RANDOM;

   ctx = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, 0, E.p, E.a, E.b);
-  x = mpi_new (0);
-  y = mpi_new (0);

   if ((flags & PUBKEY_FLAG_EDDSA))
     rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, random_level);
   else
-    rc = nist_generate_key (&sk, &E, ctx, random_level, nbits);
+    rc = nist_generate_key (&sk, &E, ctx, random_level, nbits, &Qx, &Qy);
   if (rc)
     goto leave;

   /* Copy data to the result.  */
-  if (_gcry_mpi_ec_get_affine (x, y, &sk.E.G, ctx))
+  Gx = mpi_new (0);
+  Gy = mpi_new (0);
+  if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx))
     log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G");
-  base = _gcry_ecc_ec2os (x, y, sk.E.p);
+  base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p);
   if (sk.E.dialect == ECC_DIALECT_ED25519 && !(flags & PUBKEY_FLAG_NOCOMP))
     {
       unsigned char *encpk;
       unsigned int encpklen;

-      rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, x, y,
+      /* (Gx and Gy are used as scratch variables)  */
+      rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy,
                                         !!(flags & PUBKEY_FLAG_COMP),
                                         &encpk, &encpklen);
       if (rc)
 <at>  <at>  -578,9 +597,16  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
     }
   else
     {
-      if (_gcry_mpi_ec_get_affine (x, y, &sk.Q, ctx))
-        log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
-      public = _gcry_ecc_ec2os (x, y, sk.E.p);
+      if (!Qx)
+        {
+          /* This is the case for a key from _gcry_ecc_eddsa_generate
+             with no compression.  */
+          Qx = mpi_new (0);
+          Qy = mpi_new (0);
+          if (_gcry_mpi_ec_get_affine (Qx, Qy, &sk.Q, ctx))
+            log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
+        }
+      public = _gcry_ecc_ec2os (Qx, Qy, sk.E.p);
     }
   secret = sk.d; sk.d = NULL;
   if (E.name)
 <at>  <at>  -614,7 +640,8  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
                      curve_info, curve_flags,
                      sk.E.p, sk.E.a, sk.E.b, base, sk.E.n, sk.E.h, public,
                      curve_info, curve_flags,
-                     sk.E.p, sk.E.a, sk.E.b, base, sk.E.n, sk.E.h, public, secret);
+                     sk.E.p, sk.E.a, sk.E.b, base, sk.E.n, sk.E.h, public,
+                                                                   secret);
   else
     rc = sexp_build (r_skey, NULL,
                      "(key-data"
 <at>  <at>  -654,8 +681,10  <at>  <at>  ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
     mpi_free (sk.d);
   }
   _gcry_ecc_curve_free (&E);
-  mpi_free (x);
-  mpi_free (y);
+  mpi_free (Gx);
+  mpi_free (Gy);
+  mpi_free (Qx);
+  mpi_free (Qy);
   _gcry_mpi_ec_free (ctx);
   sexp_release (curve_flags);
   sexp_release (curve_info);

commit 8124e357b732a719696bfd5271def4e528f2a1e1
Author: Werner Koch <wk <at> gnupg.org>
Date:   Mon May 4 16:46:02 2015 +0200

    random: Change initial extra seeding from 2400 bits to 128 bits.

    * random/random-csprng.c (read_pool): Reduce initial seeding.
    --

    See discussion starting at
     https://lists.gnupg.org/pipermail/gnupg-devel/2015-April/029750.html
    and also in May.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/random/random-csprng.c b/random/random-csprng.c
index 332744b..da50fda 100644
--- a/random/random-csprng.c
+++ b/random/random-csprng.c
 <at>  <at>  -973,8 +973,8  <at>  <at>  read_pool (byte *buffer, size_t length, int level)

       pool_balance = 0;
       needed = length - pool_balance;
-      if (needed < POOLSIZE/2)
-        needed = POOLSIZE/2;
+      if (needed < 16)  /* At least 128 bits.  */
+        needed = 16;
       else if( needed > POOLSIZE )
         BUG ();
       read_random_source (RANDOM_ORIGIN_EXTRAPOLL, needed,

-----------------------------------------------------------------------

Summary of changes:
 NEWS                   |   4 ++
 cipher/ecc-common.h    |   2 +-
 cipher/ecc-eddsa.c     |  22 ++++++++-
 cipher/ecc.c           | 128 +++++++++++++++++++++++++++++++------------------
 cipher/pubkey-util.c   |   6 ++-
 doc/gcrypt.texi        |   7 +++
 random/random-csprng.c |   4 +-
 src/cipher.h           |   1 +
 tests/keygen.c         |  40 +++++++++++++++-
 9 files changed, 159 insertions(+), 55 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
Christian Grothoff | 19 May 13:56 2015

Re: triple DH

Hi!

Bart just prompted me to look over libgcrypt's key generation for EdDSA
vs. ECDHE (again).
I noticed a two odd things.  First, in 'ecc.c::nist_generate_key' you do
(for EdDSA):

      rndbuf = _gcry_random_bytes_secure (32, random_level);
      rndbuf[0] &= 0x7f;  /* Clear bit 255. */
      rndbuf[0] |= 0x40;  /* Set bit 254.   */
      rndbuf[31] &= 0xf8; /* Clear bits 2..0 so that d mod 8 == 0  */
      _gcry_mpi_set_buffer (sk->d, rndbuf, 32, 0);

The bit operations may seem to be to follow the EdDSA spec, but that's
actually false. Those
bit operations must be done AFTER the hashing, and you do those there as
well, in ecc-edsa.c::508:

 reverse_buffer (hash_d, 32);  /* Only the first half of the hash.  */
  hash_d[0] = (hash_d[0] & 0x7f) | 0x40;
  hash_d[31] &= 0xf8;
  _gcry_mpi_set_buffer (a, hash_d, 32, 0);

So in ecc:c::nist_generate_key() they seem to be misplaced and just
draining a bit of
entropy from the key generation process (effectively reducing key size
from 256 bits
of entropy to 251).

Now, what I was actually tring to do was establish why ECDHE key
generation is 3x
slower than EdDSA key generation (both on Ed25519).  We use the
following code:

// Slow 'ECDHE' version:
  if (0 != (rc = gcry_sexp_build (&s_keyparam, NULL,
                                  "(genkey(ecc(curve Ed25519)"
                                  "(flags)))")))
  {
    LOG_GCRY (GNUNET_ERROR_TYPE_ERROR, "gcry_sexp_build", rc);
    return NULL;
  }
  if (0 != (rc = gcry_pk_genkey (&priv_sexp, s_keyparam)))
  {
    LOG_GCRY (GNUNET_ERROR_TYPE_ERROR, "gcry_pk_genkey", rc);
    gcry_sexp_release (s_keyparam);
    return NULL;
  }

// Fast 'EdDSA' version:
  if (0 != (rc = gcry_sexp_build (&s_keyparam, NULL,
                                  "(genkey(ecc(curve Ed25519)"
                                  "(flags eddsa)))")))
  {
    LOG_GCRY (GNUNET_ERROR_TYPE_ERROR, "gcry_sexp_build", rc);
    return NULL;
  }
  if (0 != (rc = gcry_pk_genkey (&priv_sexp, s_keyparam)))
  {
    LOG_GCRY (GNUNET_ERROR_TYPE_ERROR, "gcry_pk_genkey", rc);
    gcry_sexp_release (s_keyparam);
    return NULL;
  }

The benchmarking results are rather dramatic:
On 05/19/2015 01:22 PM, Bart Polot wrote:
> Still happens in svn head:
>
> [bart <at> voyager ~/g/src/util] (master *% u+1)$ ./perf_crypto_asymmetric
>                   Init:     54 µs
>  EdDSA      create key:   3502 µs <---
>  EdDSA      get pubilc:   3395 µs
>  EdDSA   sign HashCode:   7924 µs
>  EdDSA verify HashCode:   6731 µs
>   ECDH      create key:  11054 µs <---
>   ECDH      get public:   2353 µs
>   ECDH           do DH:   2684 µs
> [bart <at> voyager ~/g/src/util] (master *% u+1)$ pacman -Q libgcrypt
> libgcrypt 1.6.3-2
> [bart <at> voyager ~/g/src/util] (master *% u+1)$
>
>

Why is this? In ecc.c:158, we see that

 if (E->dialect == ECC_DIALECT_ED25519)
    point_set (&sk->Q, &Q);
  else
    {
    // ... lots of code
    }

the key generation logic diverges here.  The reason is that for NIST
curves (and other non-Curve25519)
some logic is needed to ensure that the Q has the right sign.  So I
understand why this code is there,
but why is it needed on Curve25519? AFAIK for ECDHE on Curve25519 we
still don't need this.

If I set the 'eddsa' flag when generating the ECDHE key, everything
still works fine (done so in GNUnet
SVN 35742), so that's an easy workaround. Still, feels 'wrong' to use
such a hack.

Happy hacking!

Christian

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
by Jussi Kivilinna | 17 May 15:17 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-226-g9b0c6c8

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  9b0c6c8141ae9bd056392a3f6b5704b505fc8501 (commit)
       via  eb0ed576893b6c7990dbcb568510f831d246cea6 (commit)
       via  12bc93ca8187b8061c2e705427ef22f5a71d29b0 (commit)
       via  8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f (commit)
       via  b65e9e71d5ee992db5c96793c6af999545daad28 (commit)
       via  9597cfddf03c467825da152be5ca0d12a8c30d88 (commit)
       via  6a6646df80386204675d8b149ab60e74d7ca124c (commit)
       via  9a4fb3709864bf3e3918800d44ff576590cd4e92 (commit)
       via  e05682093ffb003b589a697428d918d755ac631d (commit)
       via  c46b015bedba7ce0db68929bd33a86a54ab3d919 (commit)
       via  ee8fc4edcb3466b03246c8720b90731bf274ff1d (commit)
      from  bac42c68b069f17abcca810a21439c7233815747 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9b0c6c8141ae9bd056392a3f6b5704b505fc8501
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 13:07:34 2015 +0300

    Enable AMD64 Twofish implementation on WIN64
    
    * cipher/twofish-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/twofish.c (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
    (twofish_amd64_encrypt_block, twofish_amd64_decrypt_block)
    (twofish_amd64_ctr_enc, twofish_amd64_cbc_dec)
    (twofish_amd64_cfb_dec): New wrapper functions for AMD64
    assembly functions.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index a225307..ea88b94 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
 <at>  <at>  -20,7 +20,14  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_TWOFISH)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 #ifdef __PIC__
 #  define RIP %rip
 <at>  <at>  -166,7 +173,7  <at>  <at> 
 
 .align 8
 .globl _gcry_twofish_amd64_encrypt_block
-.type   _gcry_twofish_amd64_encrypt_block, <at> function;
+ELF(.type   _gcry_twofish_amd64_encrypt_block, <at> function;)
 
 _gcry_twofish_amd64_encrypt_block:
 	/* input:
 <at>  <at>  -205,11 +212,11  <at>  <at>  _gcry_twofish_amd64_encrypt_block:
 	addq $(3 * 8), %rsp;
 
 	ret;
-.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
+ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
 .align 8
 .globl _gcry_twofish_amd64_decrypt_block
-.type   _gcry_twofish_amd64_decrypt_block, <at> function;
+ELF(.type   _gcry_twofish_amd64_decrypt_block, <at> function;)
 
 _gcry_twofish_amd64_decrypt_block:
 	/* input:
 <at>  <at>  -248,7 +255,7  <at>  <at>  _gcry_twofish_amd64_decrypt_block:
 	addq $(3 * 8), %rsp;
 
 	ret;
-.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
+ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
 #undef CTX
 
 <at>  <at>  -462,7 +469,7  <at>  <at>  _gcry_twofish_amd64_decrypt_block:
 	outunpack3(RAB, 2);
 
 .align 8
-.type __twofish_enc_blk3, <at> function;
+ELF(.type __twofish_enc_blk3, <at> function;)
 
 __twofish_enc_blk3:
 	/* input:
 <at>  <at>  -485,10 +492,10  <at>  <at>  __twofish_enc_blk3:
 	outunpack_enc3();
 
 	ret;
-.size __twofish_enc_blk3,.-__twofish_enc_blk3;
+ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
 
 .align 8
-.type  __twofish_dec_blk3, <at> function;
+ELF(.type  __twofish_dec_blk3, <at> function;)
 
 __twofish_dec_blk3:
 	/* input:
 <at>  <at>  -511,11 +518,11  <at>  <at>  __twofish_dec_blk3:
 	outunpack_dec3();
 
 	ret;
-.size __twofish_dec_blk3,.-__twofish_dec_blk3;
+ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
 
 .align 8
 .globl _gcry_twofish_amd64_ctr_enc
-.type   _gcry_twofish_amd64_ctr_enc, <at> function;
+ELF(.type   _gcry_twofish_amd64_ctr_enc, <at> function;)
 _gcry_twofish_amd64_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -593,11 +600,11  <at>  <at>  _gcry_twofish_amd64_ctr_enc:
 	addq $(8 * 8), %rsp;
 
 	ret;
-.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;
+ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
 
 .align 8
 .globl _gcry_twofish_amd64_cbc_dec
-.type   _gcry_twofish_amd64_cbc_dec, <at> function;
+ELF(.type   _gcry_twofish_amd64_cbc_dec, <at> function;)
 _gcry_twofish_amd64_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -659,11 +666,11  <at>  <at>  _gcry_twofish_amd64_cbc_dec:
 	addq $(9 * 8), %rsp;
 
 	ret;
-.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;
+ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
 
 .align 8
 .globl _gcry_twofish_amd64_cfb_dec
-.type   _gcry_twofish_amd64_cfb_dec, <at> function;
+ELF(.type   _gcry_twofish_amd64_cfb_dec, <at> function;)
 _gcry_twofish_amd64_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -725,7 +732,7  <at>  <at>  _gcry_twofish_amd64_cfb_dec:
 	addq $(8 * 8), %rsp;
 
 	ret;
-.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;
+ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
 
 #endif /*USE_TWOFISH*/
 #endif /*__x86_64*/
diff --git a/cipher/twofish.c b/cipher/twofish.c
index ecd76e3..ce83fad 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
 <at>  <at>  -53,7 +53,8  <at>  <at> 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 <at>  <at>  -754,6 +755,77  <at>  <at>  extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
 extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
 					const byte *in, byte *iv);
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+              const void *arg3, const void *arg4)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (arg1),
+                  "+S" (arg2),
+                  "+d" (arg3),
+                  "+c" (arg4)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
+static inline void
+twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL);
+#else
+  _gcry_twofish_amd64_encrypt_block(c, out, in);
+#endif
+}
+
+static inline void
+twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL);
+#else
+  _gcry_twofish_amd64_decrypt_block(c, out, in);
+#endif
+}
+
+static inline void
+twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
+                      byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr);
+#else
+  _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
+#endif
+}
+
+static inline void
+twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
+                      byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv);
+#else
+  _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
+#endif
+}
+
+static inline void
+twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
+                      byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv);
+#else
+  _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
+#endif
+}
+
 #elif defined(USE_ARM_ASM)
 
 /* Assembly implementations of Twofish. */
 <at>  <at>  -833,7 +905,7  <at>  <at>  static unsigned int
 twofish_encrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
-  _gcry_twofish_amd64_encrypt_block(ctx, out, in);
+  twofish_amd64_encrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
 <at>  <at>  -900,7 +972,7  <at>  <at>  static unsigned int
 twofish_decrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
-  _gcry_twofish_amd64_decrypt_block(ctx, out, in);
+  twofish_amd64_decrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
 <at>  <at>  -980,7 +1052,7  <at>  <at>  _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-        _gcry_twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+        twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 3;
         outbuf += 3 * TWOFISH_BLOCKSIZE;
 <at>  <at>  -1038,7 +1110,7  <at>  <at>  _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-        _gcry_twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+        twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * TWOFISH_BLOCKSIZE;
 <at>  <at>  -1087,7 +1159,7  <at>  <at>  _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-        _gcry_twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+        twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * TWOFISH_BLOCKSIZE;

commit eb0ed576893b6c7990dbcb568510f831d246cea6
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 13:07:48 2015 +0300

    Enable AMD64 Serpent implementations on WIN64
    
    * cipher/serpent-avx2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/serpent-sse2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/chacha20.c (USE_SSE2, USE_AVX2): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_SSE2 || USE_AVX2] (ASM_FUNC_ABI): New.
    (_gcry_serpent_sse2_ctr_enc, _gcry_serpent_sse2_cbc_dec)
    (_gcry_serpent_sse2_cfb_dec, _gcry_serpent_avx2_ctr_enc)
    (_gcry_serpent_avx2_cbc_dec, _gcry_serpent_avx2_cfb_dec): Add
    ASM_FUNC_ABI.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 03d29ae..3f59f06 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
 <at>  <at>  -20,9 +20,16  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \
     defined(ENABLE_AVX2_SUPPORT)
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 #ifdef __PIC__
 #  define RIP (%rip)
 #else
 <at>  <at>  -404,7 +411,7  <at>  <at> 
 .text
 
 .align 8
-.type   __serpent_enc_blk16, <at> function;
+ELF(.type   __serpent_enc_blk16, <at> function;)
 __serpent_enc_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -489,10 +496,10  <at>  <at>  __serpent_enc_blk16:
 	transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_enc_blk16,.-__serpent_enc_blk16;
+ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
 
 .align 8
-.type   __serpent_dec_blk16, <at> function;
+ELF(.type   __serpent_dec_blk16, <at> function;)
 __serpent_dec_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -579,7 +586,7  <at>  <at>  __serpent_dec_blk16:
 	transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_dec_blk16,.-__serpent_dec_blk16;
+ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
 <at>  <at>  -589,7 +596,7  <at>  <at>  __serpent_dec_blk16:
 
 .align 8
 .globl _gcry_serpent_avx2_ctr_enc
-.type   _gcry_serpent_avx2_ctr_enc, <at> function;
+ELF(.type   _gcry_serpent_avx2_ctr_enc, <at> function;)
 _gcry_serpent_avx2_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -695,11 +702,11  <at>  <at>  _gcry_serpent_avx2_ctr_enc:
 	vzeroall;
 
 	ret
-.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;
+ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
 
 .align 8
 .globl _gcry_serpent_avx2_cbc_dec
-.type   _gcry_serpent_avx2_cbc_dec, <at> function;
+ELF(.type   _gcry_serpent_avx2_cbc_dec, <at> function;)
 _gcry_serpent_avx2_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -746,11 +753,11  <at>  <at>  _gcry_serpent_avx2_cbc_dec:
 	vzeroall;
 
 	ret
-.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;
+ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
 
 .align 8
 .globl _gcry_serpent_avx2_cfb_dec
-.type   _gcry_serpent_avx2_cfb_dec, <at> function;
+ELF(.type   _gcry_serpent_avx2_cfb_dec, <at> function;)
 _gcry_serpent_avx2_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -799,7 +806,7  <at>  <at>  _gcry_serpent_avx2_cfb_dec:
 	vzeroall;
 
 	ret
-.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;
+ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
 
 .data
 .align 16
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index 395f660..adbf4e2 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
 <at>  <at>  -20,7 +20,14  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 #ifdef __PIC__
 #  define RIP (%rip)
 <at>  <at>  -427,7 +434,7  <at>  <at> 
 .text
 
 .align 8
-.type   __serpent_enc_blk8, <at> function;
+ELF(.type   __serpent_enc_blk8, <at> function;)
 __serpent_enc_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -512,10 +519,10  <at>  <at>  __serpent_enc_blk8:
 	transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_enc_blk8,.-__serpent_enc_blk8;
+ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
 
 .align 8
-.type   __serpent_dec_blk8, <at> function;
+ELF(.type   __serpent_dec_blk8, <at> function;)
 __serpent_dec_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -602,11 +609,11  <at>  <at>  __serpent_dec_blk8:
 	transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_dec_blk8,.-__serpent_dec_blk8;
+ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
 
 .align 8
 .globl _gcry_serpent_sse2_ctr_enc
-.type   _gcry_serpent_sse2_ctr_enc, <at> function;
+ELF(.type   _gcry_serpent_sse2_ctr_enc, <at> function;)
 _gcry_serpent_sse2_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -732,11 +739,11  <at>  <at>  _gcry_serpent_sse2_ctr_enc:
 	pxor RNOT, RNOT;
 
 	ret
-.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;
+ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
 
 .align 8
 .globl _gcry_serpent_sse2_cbc_dec
-.type   _gcry_serpent_sse2_cbc_dec, <at> function;
+ELF(.type   _gcry_serpent_sse2_cbc_dec, <at> function;)
 _gcry_serpent_sse2_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -793,11 +800,11  <at>  <at>  _gcry_serpent_sse2_cbc_dec:
 	pxor RNOT, RNOT;
 
 	ret
-.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;
+ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
 
 .align 8
 .globl _gcry_serpent_sse2_cfb_dec
-.type   _gcry_serpent_sse2_cfb_dec, <at> function;
+ELF(.type   _gcry_serpent_sse2_cfb_dec, <at> function;)
 _gcry_serpent_sse2_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -857,7 +864,7  <at>  <at>  _gcry_serpent_sse2_cfb_dec:
 	pxor RNOT, RNOT;
 
 	ret
-.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;
+ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
 
 #endif /*defined(USE_SERPENT)*/
 #endif /*__x86_64*/
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 0be49da..7d0e112 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
 <at>  <at>  -34,13 +34,15  <at>  <at> 
 
 /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
 #undef USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSE2 1
 #endif
 
 /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
 #undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # if defined(ENABLE_AVX2_SUPPORT)
 #  define USE_AVX2 1
 # endif
 <at>  <at>  -86,6 +88,18  <at>  <at>  typedef struct serpent_context
 } serpent_context_t;
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_SSE2) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+#  define ASM_FUNC_ABI
+# endif
+#endif
+
+
 #ifdef USE_SSE2
 /* Assembler implementations of Serpent using SSE2.  Process 8 block in
    parallel.
 <at>  <at>  -93,17 +107,17  <at>  <at>  typedef struct serpent_context
 extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *ctr);
+				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX2
 <at>  <at>  -113,17 +127,17  <at>  <at>  extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
 extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *ctr);
+				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_NEON

commit 12bc93ca8187b8061c2e705427ef22f5a71d29b0
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 12:37:21 2015 +0300

    Enable AMD64 Salsa20 implementation on WIN64
    
    * cipher/salsa20-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/salsa20.c (USE_AMD64): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_AMD64] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
    (_gcry_salsa20_amd64_keysetup, _gcry_salsa20_amd64_ivsetup)
    (_gcry_salsa20_amd64_encrypt_blocks): Add ASM_FUNC_ABI.
    [USE_AMD64] (salsa20_core): Add ASM_EXTRA_STACK.
    (salsa20_do_encrypt_stream) [USE_AMD64]: Add ASM_EXTRA_STACK.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index 7046dbb..470c32a 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
 <at>  <at>  -25,13 +25,20  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SALSA20)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 .text
 
 .align 8
 .globl _gcry_salsa20_amd64_keysetup
-.type  _gcry_salsa20_amd64_keysetup, <at> function;
+ELF(.type  _gcry_salsa20_amd64_keysetup, <at> function;)
 _gcry_salsa20_amd64_keysetup:
 	movl   0(%rsi),%r8d
 	movl   4(%rsi),%r9d
 <at>  <at>  -83,7 +90,7  <at>  <at>  _gcry_salsa20_amd64_keysetup:
 
 .align 8
 .globl _gcry_salsa20_amd64_ivsetup
-.type  _gcry_salsa20_amd64_ivsetup, <at> function;
+ELF(.type  _gcry_salsa20_amd64_ivsetup, <at> function;)
 _gcry_salsa20_amd64_ivsetup:
 	movl   0(%rsi),%r8d
 	movl   4(%rsi),%esi
 <at>  <at>  -97,7 +104,7  <at>  <at>  _gcry_salsa20_amd64_ivsetup:
 
 .align 8
 .globl _gcry_salsa20_amd64_encrypt_blocks
-.type  _gcry_salsa20_amd64_encrypt_blocks, <at> function;
+ELF(.type  _gcry_salsa20_amd64_encrypt_blocks, <at> function;)
 _gcry_salsa20_amd64_encrypt_blocks:
 	/*
 	 * Modifications to original implementation:
 <at>  <at>  -918,7 +925,7  <at>  <at>  _gcry_salsa20_amd64_encrypt_blocks:
 	add  $64,%rdi
 	add  $64,%rsi
 	jmp .L_bytes_are_64_128_or_192
-.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;
+ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
 
 #endif /*defined(USE_SALSA20)*/
 #endif /*__x86_64*/
diff --git a/cipher/salsa20.c b/cipher/salsa20.c
index d75fe51..fa3d23b 100644
--- a/cipher/salsa20.c
+++ b/cipher/salsa20.c
 <at>  <at>  -43,7 +43,8  <at>  <at> 
 
 /* USE_AMD64 indicates whether to compile with AMD64 code. */
 #undef USE_AMD64
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64 1
 #endif
 
 <at>  <at>  -118,12 +119,25  <at>  <at>  static const char *selftest (void);
 
 
 #ifdef USE_AMD64
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
 /* AMD64 assembly implementations of Salsa20. */
-void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits);
-void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv);
+void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits)
+                                 ASM_FUNC_ABI;
+void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv)
+                                ASM_FUNC_ABI;
 unsigned int
 _gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
-                                   size_t len, int rounds);
+                                   size_t len, int rounds) ASM_FUNC_ABI;
 
 static void
 salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
 <at>  <at>  -141,7 +155,8  <at>  <at>  static unsigned int
 salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
 {
   memset(dst, 0, SALSA20_BLOCK_SIZE);
-  return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds);
+  return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds)
+         + ASM_EXTRA_STACK;
 }
 
 #else /* USE_AMD64 */
 <at>  <at>  -418,6 +433,7  <at>  <at>  salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
       size_t nblocks = length / SALSA20_BLOCK_SIZE;
       burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
                                                 nblocks, rounds);
+      burn += ASM_EXTRA_STACK;
       length -= SALSA20_BLOCK_SIZE * nblocks;
       outbuf += SALSA20_BLOCK_SIZE * nblocks;
       inbuf  += SALSA20_BLOCK_SIZE * nblocks;

commit 8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 12:39:39 2015 +0300

    Enable AMD64 Poly1305 implementations on WIN64
    
    * cipher/poly1305-avx2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/poly1305-sse2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/poly1305-internal.h (POLY1305_SYSV_FUNC_ABI): New.
    (POLY1305_USE_SSE2, POLY1305_USE_AVX2): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (OPS_FUNC_ABI): New.
    (poly1305_ops_t): Use OPS_FUNC_ABI.
    * cipher/poly1305.c (_gcry_poly1305_amd64_sse2_init_ext)
    (_gcry_poly1305_amd64_sse2_finish_ext)
    (_gcry_poly1305_amd64_sse2_blocks, _gcry_poly1305_amd64_avx2_init_ext)
    (_gcry_poly1305_amd64_avx2_finish_ext)
    (_gcry_poly1305_amd64_avx2_blocks, _gcry_poly1305_armv7_neon_init_ext)
    (_gcry_poly1305_armv7_neon_finish_ext)
    (_gcry_poly1305_armv7_neon_blocks, poly1305_init_ext_ref32)
    (poly1305_blocks_ref32, poly1305_finish_ext_ref32)
    (poly1305_init_ext_ref8, poly1305_blocks_ref8)
    (poly1305_finish_ext_ref8): Use OPS_FUNC_ABI.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S
index 0ba7e76..9362a5a 100644
--- a/cipher/poly1305-avx2-amd64.S
+++ b/cipher/poly1305-avx2-amd64.S
 <at>  <at>  -25,15 +25,23  <at>  <at> 
 
 #include <config.h>
 
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AVX2_SUPPORT)
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 .text
 
 
 .align 8
 .globl _gcry_poly1305_amd64_avx2_init_ext
-.type  _gcry_poly1305_amd64_avx2_init_ext, <at> function;
+ELF(.type  _gcry_poly1305_amd64_avx2_init_ext, <at> function;)
 _gcry_poly1305_amd64_avx2_init_ext:
 .Lpoly1305_init_ext_avx2_local:
 	xor %edx, %edx
 <at>  <at>  -391,12 +399,12  <at>  <at>  _gcry_poly1305_amd64_avx2_init_ext:
 	popq %r13
 	popq %r12
 	ret
-.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_avx2_blocks
-.type  _gcry_poly1305_amd64_avx2_blocks, <at> function;
+ELF(.type  _gcry_poly1305_amd64_avx2_blocks, <at> function;)
 _gcry_poly1305_amd64_avx2_blocks:
 .Lpoly1305_blocks_avx2_local:
 	vzeroupper
 <at>  <at>  -717,12 +725,12  <at>  <at>  _gcry_poly1305_amd64_avx2_blocks:
 	leave
 	addq $8, %rax
 	ret
-.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;
+ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_avx2_finish_ext
-.type  _gcry_poly1305_amd64_avx2_finish_ext, <at> function;
+ELF(.type  _gcry_poly1305_amd64_avx2_finish_ext, <at> function;)
 _gcry_poly1305_amd64_avx2_finish_ext:
 .Lpoly1305_finish_ext_avx2_local:
 	vzeroupper
 <at>  <at>  -949,6 +957,6  <at>  <at>  _gcry_poly1305_amd64_avx2_finish_ext:
 	popq %rbp
 	addq $(8*5), %rax
 ret
-.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;)
 
 #endif
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h
index dfc0c04..bcbe5df 100644
--- a/cipher/poly1305-internal.h
+++ b/cipher/poly1305-internal.h
 <at>  <at>  -44,24 +44,30  <at>  <at> 
 #define POLY1305_REF_ALIGNMENT sizeof(void *)
 
 
+#undef POLY1305_SYSV_FUNC_ABI
+
 /* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
 #undef POLY1305_USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define POLY1305_USE_SSE2 1
 # define POLY1305_SSE2_BLOCKSIZE 32
 # define POLY1305_SSE2_STATESIZE 248
 # define POLY1305_SSE2_ALIGNMENT 16
+# define POLY1305_SYSV_FUNC_ABI 1
 #endif
 
 
 /* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
 #undef POLY1305_USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AVX2_SUPPORT)
 # define POLY1305_USE_AVX2 1
 # define POLY1305_AVX2_BLOCKSIZE 64
 # define POLY1305_AVX2_STATESIZE 328
 # define POLY1305_AVX2_ALIGNMENT 32
+# define POLY1305_SYSV_FUNC_ABI 1
 #endif
 
 
 <at>  <at>  -112,6 +118,17  <at>  <at> 
 #endif
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef OPS_FUNC_ABI
+#if defined(POLY1305_SYSV_FUNC_ABI) && \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define OPS_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define OPS_FUNC_ABI
+#endif
+
+
 typedef struct poly1305_key_s
 {
   byte b[POLY1305_KEYLEN];
 <at>  <at>  -121,10 +138,10  <at>  <at>  typedef struct poly1305_key_s
 typedef struct poly1305_ops_s
 {
   size_t block_size;
-  void (*init_ext) (void *ctx, const poly1305_key_t * key);
-  unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes);
+  void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI;
+  unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI;
   unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining,
-			      byte mac[POLY1305_TAGLEN]);
+			      byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI;
 } poly1305_ops_t;
 
 
diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S
index 106b119..219eb07 100644
--- a/cipher/poly1305-sse2-amd64.S
+++ b/cipher/poly1305-sse2-amd64.S
 <at>  <at>  -25,14 +25,22  <at>  <at> 
 
 #include <config.h>
 
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 
 .text
 
 
 .align 8
 .globl _gcry_poly1305_amd64_sse2_init_ext
-.type  _gcry_poly1305_amd64_sse2_init_ext, <at> function;
+ELF(.type  _gcry_poly1305_amd64_sse2_init_ext, <at> function;)
 _gcry_poly1305_amd64_sse2_init_ext:
 .Lpoly1305_init_ext_x86_local:
 	xor %edx, %edx
 <at>  <at>  -273,12 +281,12  <at>  <at>  _gcry_poly1305_amd64_sse2_init_ext:
 	popq %r13
 	popq %r12
 	ret
-.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_sse2_finish_ext
-.type  _gcry_poly1305_amd64_sse2_finish_ext, <at> function;
+ELF(.type  _gcry_poly1305_amd64_sse2_finish_ext, <at> function;)
 _gcry_poly1305_amd64_sse2_finish_ext:
 .Lpoly1305_finish_ext_x86_local:
 	pushq %rbp
 <at>  <at>  -424,12 +432,12  <at>  <at>  _gcry_poly1305_amd64_sse2_finish_ext:
 	popq %rbp
 	addq $8, %rax
 	ret
-.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_sse2_blocks
-.type  _gcry_poly1305_amd64_sse2_blocks, <at> function;
+ELF(.type  _gcry_poly1305_amd64_sse2_blocks, <at> function;)
 _gcry_poly1305_amd64_sse2_blocks:
 .Lpoly1305_blocks_x86_local:
 	pushq %rbp
 <at>  <at>  -1030,6 +1038,6  <at>  <at>  _gcry_poly1305_amd64_sse2_blocks:
 	pxor %xmm8, %xmm8
 	pxor %xmm0, %xmm0
 	ret
-.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;
+ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;)
 
 #endif
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 28dbbf8..1adf0e7 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
 <at>  <at>  -40,12 +40,13  <at>  <at>  static const char *selftest (void);
 
 #ifdef POLY1305_USE_SSE2
 
-void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key)
+                                       OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m,
 						  size_t remaining,
-						  byte mac[16]);
+						  byte mac[16]) OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m,
-					      size_t bytes);
+					      size_t bytes) OPS_FUNC_ABI;
 
 static const poly1305_ops_t poly1305_amd64_sse2_ops = {
   POLY1305_SSE2_BLOCKSIZE,
 <at>  <at>  -59,12 +60,13  <at>  <at>  static const poly1305_ops_t poly1305_amd64_sse2_ops = {
 
 #ifdef POLY1305_USE_AVX2
 
-void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key)
+                                       OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m,
 						  size_t remaining,
-						  byte mac[16]);
+						  byte mac[16]) OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m,
-					      size_t bytes);
+					      size_t bytes) OPS_FUNC_ABI;
 
 static const poly1305_ops_t poly1305_amd64_avx2_ops = {
   POLY1305_AVX2_BLOCKSIZE,
 <at>  <at>  -78,12 +80,13  <at>  <at>  static const poly1305_ops_t poly1305_amd64_avx2_ops = {
 
 #ifdef POLY1305_USE_NEON
 
-void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key)
+                                       OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m,
 						  size_t remaining,
-						  byte mac[16]);
+						  byte mac[16]) OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m,
-					      size_t bytes);
+					      size_t bytes) OPS_FUNC_ABI;
 
 static const poly1305_ops_t poly1305_armv7_neon_ops = {
   POLY1305_NEON_BLOCKSIZE,
 <at>  <at>  -110,7 +113,7  <at>  <at>  typedef struct poly1305_state_ref32_s
 } poly1305_state_ref32_t;
 
 
-static void
+static OPS_FUNC_ABI void
 poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
 {
   poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
 <at>  <at>  -142,7 +145,7  <at>  <at>  poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
 {
   poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
 <at>  <at>  -230,7 +233,7  <at>  <at>  poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_finish_ext_ref32 (void *state, const byte * m,
 			   size_t remaining, byte mac[POLY1305_TAGLEN])
 {
 <at>  <at>  -370,7 +373,7  <at>  <at>  typedef struct poly1305_state_ref8_t
 } poly1305_state_ref8_t;
 
 
-static void
+static OPS_FUNC_ABI void
 poly1305_init_ext_ref8 (void *state, const poly1305_key_t * key)
 {
   poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
 <at>  <at>  -471,7 +474,7  <at>  <at>  poly1305_freeze_ref8 (byte h[17])
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
 {
   poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
 <at>  <at>  -519,7 +522,7  <at>  <at>  poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_finish_ext_ref8 (void *state, const byte * m, size_t remaining,
 			  byte mac[POLY1305_TAGLEN])
 {

commit b65e9e71d5ee992db5c96793c6af999545daad28
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 10:31:18 2015 +0300

    Enable AMD64 3DES implementation on WIN64
    
    * cipher/des-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/des.c (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
    (tripledes_ecb_crypt) [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call
    assembly function through 'call_sysv_fn'.
    (tripledes_amd64_ctr_enc, tripledes_amd64_cbc_dec)
    (tripledes_amd64_cfb_dec): New wrapper functions for bulk
    assembly functions.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index e8b2c56..307d211 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
 <at>  <at>  -20,7 +20,8  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(USE_DES) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
 #ifdef __PIC__
 #  define RIP (%rip)
 <at>  <at>  -28,6 +29,12  <at>  <at> 
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .text
 
 #define s1 0
 <at>  <at>  -185,7 +192,7  <at>  <at> 
 
 .align 8
 .globl _gcry_3des_amd64_crypt_block
-.type  _gcry_3des_amd64_crypt_block, <at> function;
+ELF(.type  _gcry_3des_amd64_crypt_block, <at> function;)
 
 _gcry_3des_amd64_crypt_block:
 	/* input:
 <at>  <at>  -271,7 +278,7  <at>  <at>  _gcry_3des_amd64_crypt_block:
 	popq %rbp;
 
 	ret;
-.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;
+ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
 
 /***********************************************************************
  * 3-way 3DES
 <at>  <at>  -458,7 +465,7  <at>  <at>  _gcry_3des_amd64_crypt_block:
 	movl   right##d, 4(io);
 
 .align 8
-.type  _gcry_3des_amd64_crypt_blk3, <at> function;
+ELF(.type  _gcry_3des_amd64_crypt_blk3, <at> function;)
 _gcry_3des_amd64_crypt_blk3:
 	/* input:
 	 *  %rdi: round keys, CTX
 <at>  <at>  -528,11 +535,11  <at>  <at>  _gcry_3des_amd64_crypt_blk3:
 	final_permutation3(RR, RL);
 
 	ret;
-.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;
+ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
 
 .align 8
 .globl  _gcry_3des_amd64_cbc_dec
-.type   _gcry_3des_amd64_cbc_dec, <at> function;
+ELF(.type   _gcry_3des_amd64_cbc_dec, <at> function;)
 _gcry_3des_amd64_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -604,11 +611,11  <at>  <at>  _gcry_3des_amd64_cbc_dec:
 	popq %rbp;
 
 	ret;
-.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
 .align 8
 .globl  _gcry_3des_amd64_ctr_enc
-.type   _gcry_3des_amd64_ctr_enc, <at> function;
+ELF(.type   _gcry_3des_amd64_ctr_enc, <at> function;)
 _gcry_3des_amd64_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -682,11 +689,11  <at>  <at>  _gcry_3des_amd64_ctr_enc:
 	popq %rbp;
 
 	ret;
-.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
 .align 8
 .globl  _gcry_3des_amd64_cfb_dec
-.type   _gcry_3des_amd64_cfb_dec, <at> function;
+ELF(.type   _gcry_3des_amd64_cfb_dec, <at> function;)
 _gcry_3des_amd64_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -757,7 +764,7  <at>  <at>  _gcry_3des_amd64_cfb_dec:
 	popq %rbx;
 	popq %rbp;
 	ret;
-.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;
+ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
 
 .data
 .align 16
diff --git a/cipher/des.c b/cipher/des.c
index d4863d1..be62763 100644
--- a/cipher/des.c
+++ b/cipher/des.c
 <at>  <at>  -127,7 +127,8  <at>  <at> 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 <at>  <at>  -771,6 +772,24  <at>  <at>  extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
 
 #define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+              const void *arg3, const void *arg4)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (arg1),
+                  "+S" (arg2),
+                  "+d" (arg3),
+                  "+c" (arg4)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
 /*
  * Electronic Codebook Mode Triple-DES encryption/decryption of data
  * according to 'mode'.  Sometimes this mode is named 'EDE' mode
 <at>  <at>  -784,11 +803,45  <at>  <at>  tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
 
   keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_3des_amd64_crypt_block, keys, to, from, NULL);
+#else
   _gcry_3des_amd64_crypt_block(keys, to, from);
+#endif
 
   return 0;
 }
 
+static inline void
+tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_3des_amd64_ctr_enc, keys, out, in, ctr);
+#else
+  _gcry_3des_amd64_ctr_enc(keys, out, in, ctr);
+#endif
+}
+
+static inline void
+tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_3des_amd64_cbc_dec, keys, out, in, iv);
+#else
+  _gcry_3des_amd64_cbc_dec(keys, out, in, iv);
+#endif
+}
+
+static inline void
+tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_3des_amd64_cfb_dec, keys, out, in, iv);
+#else
+  _gcry_3des_amd64_cfb_dec(keys, out, in, iv);
+#endif
+}
+
 #else /*USE_AMD64_ASM*/
 
 #define TRIPLEDES_ECB_BURN_STACK 32
 <at>  <at>  -871,7 +924,7  <at>  <at>  _gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-        _gcry_3des_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
+        tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
 
         nblocks -= 3;
         outbuf += 3 * DES_BLOCKSIZE;
 <at>  <at>  -926,7 +979,7  <at>  <at>  _gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-        _gcry_3des_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
+        tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * DES_BLOCKSIZE;
 <at>  <at>  -974,7 +1027,7  <at>  <at>  _gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-        _gcry_3des_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
+        tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
 
         nblocks -= 3;
         outbuf += 3 * DES_BLOCKSIZE;

commit 9597cfddf03c467825da152be5ca0d12a8c30d88
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Tue May 5 21:02:43 2015 +0300

    Enable AMD64 ChaCha20 implementations on WIN64
    
    * cipher/chacha20-avx2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/chacha20-sse2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/chacha20-ssse3-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/chacha20.c (USE_SSE2, USE_SSSE3, USE_AVX2): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
    (chacha20_blocks_t, _gcry_chacha20_amd64_sse2_blocks)
    (_gcry_chacha20_amd64_ssse3_blocks, _gcry_chacha20_amd64_avx2_blocks)
    (_gcry_chacha20_armv7_neon_blocks, chacha20_blocks): Add ASM_FUNC_ABI.
    (chacha20_core): Add ASM_EXTRA_STACK.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/chacha20-avx2-amd64.S b/cipher/chacha20-avx2-amd64.S
index 1f33de8..12bed35 100644
--- a/cipher/chacha20-avx2-amd64.S
+++ b/cipher/chacha20-avx2-amd64.S
 <at>  <at>  -26,7 +26,8  <at>  <at> 
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AVX2_SUPPORT) && USE_CHACHA20
 
 #ifdef __PIC__
 <at>  <at>  -35,11 +36,17  <at>  <at> 
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .text
 
 .align 8
 .globl _gcry_chacha20_amd64_avx2_blocks
-.type  _gcry_chacha20_amd64_avx2_blocks, <at> function;
+ELF(.type  _gcry_chacha20_amd64_avx2_blocks, <at> function;)
 _gcry_chacha20_amd64_avx2_blocks:
 .Lchacha_blocks_avx2_local:
 	vzeroupper
 <at>  <at>  -938,7 +945,7  <at>  <at>  _gcry_chacha20_amd64_avx2_blocks:
 	vzeroall
 	movl $(63 + 512), %eax
 	ret
-.size _gcry_chacha20_amd64_avx2_blocks,.-_gcry_chacha20_amd64_avx2_blocks;
+ELF(.size _gcry_chacha20_amd64_avx2_blocks,.-_gcry_chacha20_amd64_avx2_blocks;)
 
 .data
 .align 16
diff --git a/cipher/chacha20-sse2-amd64.S b/cipher/chacha20-sse2-amd64.S
index 4811f40..2b9842c 100644
--- a/cipher/chacha20-sse2-amd64.S
+++ b/cipher/chacha20-sse2-amd64.S
 <at>  <at>  -26,13 +26,20  <at>  <at> 
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && USE_CHACHA20
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && USE_CHACHA20
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 .text
 
 .align 8
 .globl _gcry_chacha20_amd64_sse2_blocks
-.type  _gcry_chacha20_amd64_sse2_blocks, <at> function;
+ELF(.type  _gcry_chacha20_amd64_sse2_blocks, <at> function;)
 _gcry_chacha20_amd64_sse2_blocks:
 .Lchacha_blocks_sse2_local:
 	pushq %rbx
 <at>  <at>  -646,7 +653,7  <at>  <at>  _gcry_chacha20_amd64_sse2_blocks:
 	pxor %xmm8, %xmm8
 	pxor %xmm0, %xmm0
 	ret
-.size _gcry_chacha20_amd64_sse2_blocks,.-_gcry_chacha20_amd64_sse2_blocks;
+ELF(.size _gcry_chacha20_amd64_sse2_blocks,.-_gcry_chacha20_amd64_sse2_blocks;)
 
 #endif /*defined(USE_CHACHA20)*/
 #endif /*__x86_64*/
diff --git a/cipher/chacha20-ssse3-amd64.S b/cipher/chacha20-ssse3-amd64.S
index 50c2ff8..a1a843f 100644
--- a/cipher/chacha20-ssse3-amd64.S
+++ b/cipher/chacha20-ssse3-amd64.S
 <at>  <at>  -26,7 +26,8  <at>  <at> 
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_SSSE3) && USE_CHACHA20
 
 #ifdef __PIC__
 <at>  <at>  -35,11 +36,17  <at>  <at> 
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .text
 
 .align 8
 .globl _gcry_chacha20_amd64_ssse3_blocks
-.type  _gcry_chacha20_amd64_ssse3_blocks, <at> function;
+ELF(.type  _gcry_chacha20_amd64_ssse3_blocks, <at> function;)
 _gcry_chacha20_amd64_ssse3_blocks:
 .Lchacha_blocks_ssse3_local:
 	pushq %rbx
 <at>  <at>  -614,7 +621,7  <at>  <at>  _gcry_chacha20_amd64_ssse3_blocks:
 	pxor %xmm8, %xmm8
 	pxor %xmm0, %xmm0
 	ret
-.size _gcry_chacha20_amd64_ssse3_blocks,.-_gcry_chacha20_amd64_ssse3_blocks;
+ELF(.size _gcry_chacha20_amd64_ssse3_blocks,.-_gcry_chacha20_amd64_ssse3_blocks;)
 
 .data
 .align 16;
diff --git a/cipher/chacha20.c b/cipher/chacha20.c
index 2eaeffd..e25e239 100644
--- a/cipher/chacha20.c
+++ b/cipher/chacha20.c
 <at>  <at>  -50,20 +50,23  <at>  <at> 
 
 /* USE_SSE2 indicates whether to compile with Intel SSE2 code. */
 #undef USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSE2 1
 #endif
 
 /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
 #undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_SSSE3)
 # define USE_SSSE3 1
 #endif
 
 /* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
 #undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AVX2_SUPPORT)
 # define USE_AVX2 1
 #endif
 <at>  <at>  -82,8 +85,23  <at>  <at> 
 struct CHACHA20_context_s;
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if (defined(USE_SSE2) || defined(USE_SSSE3) || defined(USE_AVX2)) && \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+
 typedef unsigned int (* chacha20_blocks_t)(u32 *state, const byte *src,
-                                           byte *dst, size_t bytes);
+                                           byte *dst,
+                                           size_t bytes) ASM_FUNC_ABI;
 
 typedef struct CHACHA20_context_s
 {
 <at>  <at>  -97,28 +115,32  <at>  <at>  typedef struct CHACHA20_context_s
 #ifdef USE_SSE2
 
 unsigned int _gcry_chacha20_amd64_sse2_blocks(u32 *state, const byte *in,
-                                              byte *out, size_t bytes);
+                                              byte *out,
+                                              size_t bytes) ASM_FUNC_ABI;
 
 #endif /* USE_SSE2 */
 
 #ifdef USE_SSSE3
 
 unsigned int _gcry_chacha20_amd64_ssse3_blocks(u32 *state, const byte *in,
-                                               byte *out, size_t bytes);
+                                               byte *out,
+                                               size_t bytes) ASM_FUNC_ABI;
 
 #endif /* USE_SSSE3 */
 
 #ifdef USE_AVX2
 
 unsigned int _gcry_chacha20_amd64_avx2_blocks(u32 *state, const byte *in,
-                                              byte *out, size_t bytes);
+                                              byte *out,
+                                              size_t bytes) ASM_FUNC_ABI;
 
 #endif /* USE_AVX2 */
 
 #ifdef USE_NEON
 
 unsigned int _gcry_chacha20_armv7_neon_blocks(u32 *state, const byte *in,
-                                              byte *out, size_t bytes);
+                                              byte *out,
+                                              size_t bytes) ASM_FUNC_ABI;
 
 #endif /* USE_NEON */
 
 <at>  <at>  -141,7 +163,7  <at>  <at>  static const char *selftest (void);
 
 
 #ifndef USE_SSE2
-static unsigned int
+ASM_FUNC_ABI static unsigned int
 chacha20_blocks (u32 *state, const byte *src, byte *dst, size_t bytes)
 {
   u32 pad[CHACHA20_INPUT_LENGTH];
 <at>  <at>  -269,7 +291,8  <at>  <at>  chacha20_blocks (u32 *state, const byte *src, byte *dst, size_t bytes)
 static unsigned int
 chacha20_core(u32 *dst, struct CHACHA20_context_s *ctx)
 {
-  return ctx->blocks(ctx->input, NULL, (byte *)dst, CHACHA20_BLOCK_SIZE);
+  return ctx->blocks(ctx->input, NULL, (byte *)dst, CHACHA20_BLOCK_SIZE)
+         + ASM_EXTRA_STACK;
 }
 
 

commit 6a6646df80386204675d8b149ab60e74d7ca124c
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Tue May 5 20:46:10 2015 +0300

    Enable AMD64 CAST5 implementation on WIN64
    
    * cipher/cast5-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (RIP): Remove.
    (GET_EXTERN_POINTER): Use 'leaq' version on WIN64.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/cast5.c (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
    (do_encrypt_block, do_decrypt_block)
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly
    function through 'call_sysv_fn'.
    (cast5_amd64_ctr_enc, cast5_amd64_cbc_dec)
    (cast5_amd64_cfb_dec): New wrapper functions for bulk
    assembly functions.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index 41fbb74..a5f078e 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
 <at>  <at>  -20,14 +20,19  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_CAST5)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5)
 
-#ifdef __PIC__
-#  define RIP %rip
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
+#  define GET_EXTERN_POINTER(name, reg) leaq name, reg
+#else
 #  define GET_EXTERN_POINTER(name, reg) movq name <at> GOTPCREL(%rip), reg
+#endif
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
 #else
-#  define RIP
-#  define GET_EXTERN_POINTER(name, reg) leaq name, reg
+# define ELF(...) /*_*/
 #endif
 
 .text
 <at>  <at>  -180,7 +185,7  <at>  <at> 
 
 .align 8
 .globl _gcry_cast5_amd64_encrypt_block
-.type   _gcry_cast5_amd64_encrypt_block, <at> function;
+ELF(.type   _gcry_cast5_amd64_encrypt_block, <at> function;)
 
 _gcry_cast5_amd64_encrypt_block:
 	/* input:
 <at>  <at>  -216,11 +221,11  <at>  <at>  _gcry_cast5_amd64_encrypt_block:
 	popq %rbx;
 	popq %rbp;
 	ret;
-.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;
+ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
 
 .align 8
 .globl _gcry_cast5_amd64_decrypt_block
-.type   _gcry_cast5_amd64_decrypt_block, <at> function;
+ELF(.type   _gcry_cast5_amd64_decrypt_block, <at> function;)
 
 _gcry_cast5_amd64_decrypt_block:
 	/* input:
 <at>  <at>  -256,7 +261,7  <at>  <at>  _gcry_cast5_amd64_decrypt_block:
 	popq %rbx;
 	popq %rbp;
 	ret;
-.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;
+ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
 
 /**********************************************************************
   4-way cast5, four blocks parallel
 <at>  <at>  -359,7 +364,7  <at>  <at>  _gcry_cast5_amd64_decrypt_block:
 	rorq $32,		d;
 
 .align 8
-.type   __cast5_enc_blk4, <at> function;
+ELF(.type   __cast5_enc_blk4, <at> function;)
 
 __cast5_enc_blk4:
 	/* input:
 <at>  <at>  -384,10 +389,10  <at>  <at>  __cast5_enc_blk4:
 
 	outbswap_block4(RLR0, RLR1, RLR2, RLR3);
 	ret;
-.size __cast5_enc_blk4,.-__cast5_enc_blk4;
+ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
 
 .align 8
-.type   __cast5_dec_blk4, <at> function;
+ELF(.type   __cast5_dec_blk4, <at> function;)
 
 __cast5_dec_blk4:
 	/* input:
 <at>  <at>  -414,11 +419,11  <at>  <at>  __cast5_dec_blk4:
 
 	outbswap_block4(RLR0, RLR1, RLR2, RLR3);
 	ret;
-.size __cast5_dec_blk4,.-__cast5_dec_blk4;
+ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
 
 .align 8
 .globl _gcry_cast5_amd64_ctr_enc
-.type   _gcry_cast5_amd64_ctr_enc, <at> function;
+ELF(.type   _gcry_cast5_amd64_ctr_enc, <at> function;)
 _gcry_cast5_amd64_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -472,11 +477,11  <at>  <at>  _gcry_cast5_amd64_ctr_enc:
 	popq %rbx;
 	popq %rbp;
 	ret
-.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;
+ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
 
 .align 8
 .globl _gcry_cast5_amd64_cbc_dec
-.type   _gcry_cast5_amd64_cbc_dec, <at> function;
+ELF(.type   _gcry_cast5_amd64_cbc_dec, <at> function;)
 _gcry_cast5_amd64_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -526,11 +531,11  <at>  <at>  _gcry_cast5_amd64_cbc_dec:
 	popq %rbp;
 	ret;
 
-.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;
+ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
 
 .align 8
 .globl _gcry_cast5_amd64_cfb_dec
-.type   _gcry_cast5_amd64_cfb_dec, <at> function;
+ELF(.type   _gcry_cast5_amd64_cfb_dec, <at> function;)
 _gcry_cast5_amd64_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -581,7 +586,7  <at>  <at>  _gcry_cast5_amd64_cfb_dec:
 	popq %rbp;
 	ret;
 
-.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;
+ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
 
 #endif /*defined(USE_CAST5)*/
 #endif /*__x86_64*/
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 115e1e6..94dcee7 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
 <at>  <at>  -48,7 +48,8  <at>  <at> 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 <at>  <at>  -372,16 +373,72  <at>  <at>  extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
 extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+              const void *arg3, const void *arg4)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (arg1),
+                  "+S" (arg2),
+                  "+d" (arg3),
+                  "+c" (arg4)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
 static void
 do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_cast5_amd64_encrypt_block, context, outbuf, inbuf, NULL);
+#else
   _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
+#endif
 }
 
 static void
 do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_cast5_amd64_decrypt_block, context, outbuf, inbuf, NULL);
+#else
   _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
+#endif
+}
+
+static void
+cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_cast5_amd64_ctr_enc, ctx, out, in, ctr);
+#else
+  _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
+#endif
+}
+
+static void
+cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_cast5_amd64_cbc_dec, ctx, out, in, iv);
+#else
+  _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
+#endif
+}
+
+static void
+cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_cast5_amd64_cfb_dec, ctx, out, in, iv);
+#else
+  _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
+#endif
 }
 
 static unsigned int
 <at>  <at>  -396,7 +453,7  <at>  <at>  static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
-  _gcry_cast5_amd64_decrypt_block (c, outbuf, inbuf);
+  do_decrypt_block (c, outbuf, inbuf);
   return /*burn_stack*/ (2*8);
 }
 
 <at>  <at>  -582,7 +639,7  <at>  <at>  _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+        cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 4;
         outbuf += 4 * CAST5_BLOCKSIZE;
 <at>  <at>  -651,7 +708,7  <at>  <at>  _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+        cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * CAST5_BLOCKSIZE;
 <at>  <at>  -710,7 +767,7  <at>  <at>  _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+        cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * CAST5_BLOCKSIZE;

commit 9a4fb3709864bf3e3918800d44ff576590cd4e92
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 13:33:07 2015 +0300

    Enable AMD64 Camellia implementations on WIN64
    
    * cipher/camellia-aesni-avx-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/camellia-aesni-avx2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/camellia-glue.c (USE_AESNI_AVX, USE_AESNI_AVX2): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_AESNI_AVX || USE_AESNI_AVX2] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
    (_gcry_camellia_aesni_avx_ctr_enc, _gcry_camellia_aesni_avx_cbc_dec)
    (_gcry_camellia_aesni_avx_cfb_dec, _gcry_camellia_aesni_avx_keygen)
    (_gcry_camellia_aesni_avx2_ctr_enc, _gcry_camellia_aesni_avx2_cbc_dec)
    (_gcry_camellia_aesni_avx2_cfb_dec): Add ASM_FUNC_ABI.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 6d157a7..c047a21 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
 <at>  <at>  -20,7 +20,8  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
 
 #ifdef __PIC__
 <at>  <at>  -29,6 +30,12  <at>  <at> 
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
 /* struct CAMELLIA_context: */
 <at>  <at>  -769,7 +776,7  <at>  <at> 
 .text
 
 .align 8
-.type   __camellia_enc_blk16, <at> function;
+ELF(.type   __camellia_enc_blk16, <at> function;)
 
 __camellia_enc_blk16:
 	/* input:
 <at>  <at>  -853,10 +860,10  <at>  <at>  __camellia_enc_blk16:
 		     %xmm15, %rax, %rcx, 24);
 
 	jmp .Lenc_done;
-.size __camellia_enc_blk16,.-__camellia_enc_blk16;
+ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
 
 .align 8
-.type   __camellia_dec_blk16, <at> function;
+ELF(.type   __camellia_dec_blk16, <at> function;)
 
 __camellia_dec_blk16:
 	/* input:
 <at>  <at>  -938,7 +945,7  <at>  <at>  __camellia_dec_blk16:
 	      ((key_table + (24) * 8) + 4)(CTX));
 
 	jmp .Ldec_max24;
-.size __camellia_dec_blk16,.-__camellia_dec_blk16;
+ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
 <at>  <at>  -948,7 +955,7  <at>  <at>  __camellia_dec_blk16:
 
 .align 8
 .globl _gcry_camellia_aesni_avx_ctr_enc
-.type   _gcry_camellia_aesni_avx_ctr_enc, <at> function;
+ELF(.type   _gcry_camellia_aesni_avx_ctr_enc, <at> function;)
 
 _gcry_camellia_aesni_avx_ctr_enc:
 	/* input:
 <at>  <at>  -1062,11 +1069,11  <at>  <at>  _gcry_camellia_aesni_avx_ctr_enc:
 
 	leave;
 	ret;
-.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;
+ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
 
 .align 8
 .globl _gcry_camellia_aesni_avx_cbc_dec
-.type   _gcry_camellia_aesni_avx_cbc_dec, <at> function;
+ELF(.type   _gcry_camellia_aesni_avx_cbc_dec, <at> function;)
 
 _gcry_camellia_aesni_avx_cbc_dec:
 	/* input:
 <at>  <at>  -1130,11 +1137,11  <at>  <at>  _gcry_camellia_aesni_avx_cbc_dec:
 
 	leave;
 	ret;
-.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;
+ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
 
 .align 8
 .globl _gcry_camellia_aesni_avx_cfb_dec
-.type   _gcry_camellia_aesni_avx_cfb_dec, <at> function;
+ELF(.type   _gcry_camellia_aesni_avx_cfb_dec, <at> function;)
 
 _gcry_camellia_aesni_avx_cfb_dec:
 	/* input:
 <at>  <at>  -1202,7 +1209,7  <at>  <at>  _gcry_camellia_aesni_avx_cfb_dec:
 
 	leave;
 	ret;
-.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;
+ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
 
 /*
  * IN:
 <at>  <at>  -1309,7 +1316,7  <at>  <at>  _gcry_camellia_aesni_avx_cfb_dec:
 .text
 
 .align 8
-.type  __camellia_avx_setup128, <at> function;
+ELF(.type  __camellia_avx_setup128, <at> function;)
 __camellia_avx_setup128:
 	/* input:
 	 *	%rdi: ctx, CTX; subkey storage at key_table(CTX)
 <at>  <at>  -1650,10 +1657,10  <at>  <at>  __camellia_avx_setup128:
 	vzeroall;
 
 	ret;
-.size __camellia_avx_setup128,.-__camellia_avx_setup128;
+ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
 
 .align 8
-.type  __camellia_avx_setup256, <at> function;
+ELF(.type  __camellia_avx_setup256, <at> function;)
 
 __camellia_avx_setup256:
 	/* input:
 <at>  <at>  -2127,11 +2134,11  <at>  <at>  __camellia_avx_setup256:
 	vzeroall;
 
 	ret;
-.size __camellia_avx_setup256,.-__camellia_avx_setup256;
+ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
 
 .align 8
 .globl _gcry_camellia_aesni_avx_keygen
-.type  _gcry_camellia_aesni_avx_keygen, <at> function;
+ELF(.type  _gcry_camellia_aesni_avx_keygen, <at> function;)
 
 _gcry_camellia_aesni_avx_keygen:
 	/* input:
 <at>  <at>  -2159,7 +2166,7  <at>  <at>  _gcry_camellia_aesni_avx_keygen:
 	vpor %xmm2, %xmm1, %xmm1;
 
 	jmp __camellia_avx_setup256;
-.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;
+ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;)
 
 #endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
 #endif /*__x86_64*/
diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S
index 25f48bc..a3fa229 100644
--- a/cipher/camellia-aesni-avx2-amd64.S
+++ b/cipher/camellia-aesni-avx2-amd64.S
 <at>  <at>  -20,7 +20,8  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
 
 #ifdef __PIC__
 <at>  <at>  -29,6 +30,12  <at>  <at> 
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
 /* struct CAMELLIA_context: */
 <at>  <at>  -748,7 +755,7  <at>  <at> 
 .text
 
 .align 8
-.type   __camellia_enc_blk32, <at> function;
+ELF(.type   __camellia_enc_blk32, <at> function;)
 
 __camellia_enc_blk32:
 	/* input:
 <at>  <at>  -832,10 +839,10  <at>  <at>  __camellia_enc_blk32:
 		     %ymm15, %rax, %rcx, 24);
 
 	jmp .Lenc_done;
-.size __camellia_enc_blk32,.-__camellia_enc_blk32;
+ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;)
 
 .align 8
-.type   __camellia_dec_blk32, <at> function;
+ELF(.type   __camellia_dec_blk32, <at> function;)
 
 __camellia_dec_blk32:
 	/* input:
 <at>  <at>  -917,7 +924,7  <at>  <at>  __camellia_dec_blk32:
 	      ((key_table + (24) * 8) + 4)(CTX));
 
 	jmp .Ldec_max24;
-.size __camellia_dec_blk32,.-__camellia_dec_blk32;
+ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
 <at>  <at>  -927,7 +934,7  <at>  <at>  __camellia_dec_blk32:
 
 .align 8
 .globl _gcry_camellia_aesni_avx2_ctr_enc
-.type   _gcry_camellia_aesni_avx2_ctr_enc, <at> function;
+ELF(.type   _gcry_camellia_aesni_avx2_ctr_enc, <at> function;)
 
 _gcry_camellia_aesni_avx2_ctr_enc:
 	/* input:
 <at>  <at>  -1111,11 +1118,11  <at>  <at>  _gcry_camellia_aesni_avx2_ctr_enc:
 
 	leave;
 	ret;
-.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;
+ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;)
 
 .align 8
 .globl _gcry_camellia_aesni_avx2_cbc_dec
-.type   _gcry_camellia_aesni_avx2_cbc_dec, <at> function;
+ELF(.type   _gcry_camellia_aesni_avx2_cbc_dec, <at> function;)
 
 _gcry_camellia_aesni_avx2_cbc_dec:
 	/* input:
 <at>  <at>  -1183,11 +1190,11  <at>  <at>  _gcry_camellia_aesni_avx2_cbc_dec:
 
 	leave;
 	ret;
-.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;
+ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;)
 
 .align 8
 .globl _gcry_camellia_aesni_avx2_cfb_dec
-.type   _gcry_camellia_aesni_avx2_cfb_dec, <at> function;
+ELF(.type   _gcry_camellia_aesni_avx2_cfb_dec, <at> function;)
 
 _gcry_camellia_aesni_avx2_cfb_dec:
 	/* input:
 <at>  <at>  -1257,7 +1264,7  <at>  <at>  _gcry_camellia_aesni_avx2_cfb_dec:
 
 	leave;
 	ret;
-.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;
+ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;)
 
 #endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/
 #endif /*__x86_64*/
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index f18d135..5032321 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
 <at>  <at>  -75,7 +75,8  <at>  <at> 
 /* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
 #undef USE_AESNI_AVX
 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
-# if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 #  define USE_AESNI_AVX 1
 # endif
 #endif
 <at>  <at>  -83,7 +84,8  <at>  <at> 
 /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
 #undef USE_AESNI_AVX2
 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
-# if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 #  define USE_AESNI_AVX2 1
 # endif
 #endif
 <at>  <at>  -100,6 +102,20  <at>  <at>  typedef struct
 #endif /*USE_AESNI_AVX2*/
 } CAMELLIA_context;
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
 #ifdef USE_AESNI_AVX
 /* Assembler implementations of Camellia using AES-NI and AVX.  Process data
    in 16 block same time.
 <at>  <at>  -107,21 +123,21  <at>  <at>  typedef struct
 extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
 					     unsigned char *out,
 					     const unsigned char *in,
-					     unsigned char *ctr);
+					     unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
 					     unsigned char *out,
 					     const unsigned char *in,
-					     unsigned char *iv);
+					     unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx,
 					     unsigned char *out,
 					     const unsigned char *in,
-					     unsigned char *iv);
+					     unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
 					    const unsigned char *key,
-					    unsigned int keylen);
+					    unsigned int keylen) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AESNI_AVX2
 <at>  <at>  -131,17 +147,17  <at>  <at>  extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
 extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx,
 					      unsigned char *out,
 					      const unsigned char *in,
-					      unsigned char *ctr);
+					      unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx,
 					      unsigned char *out,
 					      const unsigned char *in,
-					      unsigned char *iv);
+					      unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx,
 					      unsigned char *out,
 					      const unsigned char *in,
-					      unsigned char *iv);
+					      unsigned char *iv) ASM_FUNC_ABI;
 #endif
 
 static const char *selftest(void);
 <at>  <at>  -318,7 +334,7  <at>  <at>  _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
       if (did_use_aesni_avx2)
         {
           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
-                                        2 * sizeof(void *);
+                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
 
           if (burn_stack_depth < avx2_burn_stack_depth)
             burn_stack_depth = avx2_burn_stack_depth;
 <at>  <at>  -347,8 +363,11  <at>  <at>  _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
 
       if (did_use_aesni_avx)
         {
-          if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
-            burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
+          int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx_burn_stack_depth)
+            burn_stack_depth = avx_burn_stack_depth;
         }
 
       /* Use generic code to handle smaller chunks... */
 <at>  <at>  -409,7 +428,7  <at>  <at>  _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
       if (did_use_aesni_avx2)
         {
           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
-                                        2 * sizeof(void *);
+                                        2 * sizeof(void *) + ASM_EXTRA_STACK;;
 
           if (burn_stack_depth < avx2_burn_stack_depth)
             burn_stack_depth = avx2_burn_stack_depth;
 <at>  <at>  -437,8 +456,11  <at>  <at>  _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
 
       if (did_use_aesni_avx)
         {
-          if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
-            burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
+          int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx_burn_stack_depth)
+            burn_stack_depth = avx_burn_stack_depth;
         }
 
       /* Use generic code to handle smaller chunks... */
 <at>  <at>  -491,7 +513,7  <at>  <at>  _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
       if (did_use_aesni_avx2)
         {
           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
-                                        2 * sizeof(void *);
+                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
 
           if (burn_stack_depth < avx2_burn_stack_depth)
             burn_stack_depth = avx2_burn_stack_depth;
 <at>  <at>  -519,8 +541,11  <at>  <at>  _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
 
       if (did_use_aesni_avx)
         {
-          if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
-            burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
+          int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx_burn_stack_depth)
+            burn_stack_depth = avx_burn_stack_depth;
         }
 
       /* Use generic code to handle smaller chunks... */

commit e05682093ffb003b589a697428d918d755ac631d
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Sun May 3 17:28:40 2015 +0300

    Enable AMD64 Blowfish implementation on WIN64
    
    * cipher/blowfish-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/blowfish.c (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
    (do_encrypt, do_encrypt_block, do_decrypt_block)
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly
    function through 'call_sysv_fn'.
    (blowfish_amd64_ctr_enc, blowfish_amd64_cbc_dec)
    (blowfish_amd64_cfb_dec): New wrapper functions for bulk
    assembly functions.
    ..
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 87b676f..21b63fc 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
 <at>  <at>  -20,7 +20,15  <at>  <at> 
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(USE_BLOWFISH) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_BLOWFISH) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 .text
 
 <at>  <at>  -120,7 +128,7  <at>  <at> 
 	movq RX0, 		(RIO);
 
 .align 8
-.type   __blowfish_enc_blk1, <at> function;
+ELF(.type   __blowfish_enc_blk1, <at> function;)
 
 __blowfish_enc_blk1:
 	/* input:
 <at>  <at>  -145,11 +153,11  <at>  <at>  __blowfish_enc_blk1:
 	movq %r11, %rbp;
 
 	ret;
-.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
+ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_do_encrypt
-.type   _gcry_blowfish_amd64_do_encrypt, <at> function;
+ELF(.type   _gcry_blowfish_amd64_do_encrypt, <at> function;)
 
 _gcry_blowfish_amd64_do_encrypt:
 	/* input:
 <at>  <at>  -171,11 +179,11  <at>  <at>  _gcry_blowfish_amd64_do_encrypt:
 	movl RX0d, (RX2);
 
 	ret;
-.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;
+ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_encrypt_block
-.type   _gcry_blowfish_amd64_encrypt_block, <at> function;
+ELF(.type   _gcry_blowfish_amd64_encrypt_block, <at> function;)
 
 _gcry_blowfish_amd64_encrypt_block:
 	/* input:
 <at>  <at>  -195,11 +203,11  <at>  <at>  _gcry_blowfish_amd64_encrypt_block:
 	write_block();
 
 	ret;
-.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;
+ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_decrypt_block
-.type   _gcry_blowfish_amd64_decrypt_block, <at> function;
+ELF(.type   _gcry_blowfish_amd64_decrypt_block, <at> function;)
 
 _gcry_blowfish_amd64_decrypt_block:
 	/* input:
 <at>  <at>  -231,7 +239,7  <at>  <at>  _gcry_blowfish_amd64_decrypt_block:
 	movq %r11, %rbp;
 
 	ret;
-.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;
+ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
 
 /**********************************************************************
   4-way blowfish, four blocks parallel
 <at>  <at>  -319,7 +327,7  <at>  <at>  _gcry_blowfish_amd64_decrypt_block:
 	bswapq 			RX3;
 
 .align 8
-.type   __blowfish_enc_blk4, <at> function;
+ELF(.type   __blowfish_enc_blk4, <at> function;)
 
 __blowfish_enc_blk4:
 	/* input:
 <at>  <at>  -343,10 +351,10  <at>  <at>  __blowfish_enc_blk4:
 	outbswap_block4();
 
 	ret;
-.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;
+ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
 
 .align 8
-.type   __blowfish_dec_blk4, <at> function;
+ELF(.type   __blowfish_dec_blk4, <at> function;)
 
 __blowfish_dec_blk4:
 	/* input:
 <at>  <at>  -372,11 +380,11  <at>  <at>  __blowfish_dec_blk4:
 	outbswap_block4();
 
 	ret;
-.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;
+ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_ctr_enc
-.type   _gcry_blowfish_amd64_ctr_enc, <at> function;
+ELF(.type   _gcry_blowfish_amd64_ctr_enc, <at> function;)
 _gcry_blowfish_amd64_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -429,11 +437,11  <at>  <at>  _gcry_blowfish_amd64_ctr_enc:
 	popq %rbp;
 
 	ret;
-.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;
+ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_cbc_dec
-.type   _gcry_blowfish_amd64_cbc_dec, <at> function;
+ELF(.type   _gcry_blowfish_amd64_cbc_dec, <at> function;)
 _gcry_blowfish_amd64_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -477,11 +485,11  <at>  <at>  _gcry_blowfish_amd64_cbc_dec:
 	popq %rbp;
 
 	ret;
-.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;
+ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_cfb_dec
-.type   _gcry_blowfish_amd64_cfb_dec, <at> function;
+ELF(.type   _gcry_blowfish_amd64_cfb_dec, <at> function;)
 _gcry_blowfish_amd64_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
 <at>  <at>  -527,7 +535,7  <at>  <at>  _gcry_blowfish_amd64_cfb_dec:
 	popq %rbx;
 	popq %rbp;
 	ret;
-.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;
+ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
 
 #endif /*defined(USE_BLOWFISH)*/
 #endif /*__x86_64*/
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index ae470d8..a3fc26c 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
 <at>  <at>  -45,7 +45,8  <at>  <at> 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     (BLOWFISH_ROUNDS == 16)
 # define USE_AMD64_ASM 1
 #endif
 <at>  <at>  -280,22 +281,87  <at>  <at>  extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
 extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+              const void *arg3, const void *arg4)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (arg1),
+                  "+S" (arg2),
+                  "+d" (arg3),
+                  "+c" (arg4)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL);
+#else
   _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
+#endif
 }
 
 static void
 do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf,
+                NULL);
+#else
   _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
+#endif
 }
 
 static void
 do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf,
+                NULL);
+#else
   _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
+#endif
+}
+
+static inline void
+blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr);
+#else
+  _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
+#endif
+}
+
+static inline void
+blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv);
+#else
+  _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
+#endif
+}
+
+static inline void
+blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv);
+#else
+  _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
+#endif
 }
 
 static unsigned int
 <at>  <at>  -605,7 +671,7  <at>  <at>  _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+        blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
 <at>  <at>  -674,7 +740,7  <at>  <at>  _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+        blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
 <at>  <at>  -734,7 +800,7  <at>  <at>  _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+        blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;

commit c46b015bedba7ce0db68929bd33a86a54ab3d919
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Sun May 3 17:06:56 2015 +0300

    Enable AMD64 arcfour implementation on WIN64
    
    * cipher/arcfour-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/arcfour.c (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (do_encrypt, do_decrypt) [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Use
    assembly block to call AMD64 assembly function.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index 8b8031a..2e52ea0 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
 <at>  <at>  -15,12 +15,19  <at>  <at> 
 
 #ifdef __x86_64__
 #include <config.h>
-#if defined(USE_ARCFOUR) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 .text
 .align 16
 .globl _gcry_arcfour_amd64
-.type _gcry_arcfour_amd64, <at> function
+ELF(.type _gcry_arcfour_amd64, <at> function)
 _gcry_arcfour_amd64:
 	push	%rbp
 	push	%rbx
 <at>  <at>  -91,7 +98,7  <at>  <at>  _gcry_arcfour_amd64:
 	pop	%rbp
 	ret
 .L__gcry_arcfour_amd64_end:
-.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64
+ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
 
 #endif
 #endif
diff --git a/cipher/arcfour.c b/cipher/arcfour.c
index 27537bf..44e8ef4 100644
--- a/cipher/arcfour.c
+++ b/cipher/arcfour.c
 <at>  <at>  -33,7 +33,8  <at>  <at> 
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
 <at>  <at>  -53,7 +54,21  <at>  <at>  static void
 encrypt_stream (void *context,
                 byte *outbuf, const byte *inbuf, size_t length)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  const void *fn = _gcry_arcfour_amd64;
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (context),
+                  "+S" (length),
+                  "+d" (inbuf),
+                  "+c" (outbuf)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
   _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
+#endif
 }
 
 #else /*!USE_AMD64_ASM*/

commit ee8fc4edcb3466b03246c8720b90731bf274ff1d
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Thu May 14 10:02:51 2015 +0300

    Update documentation for Poly1305-ChaCha20 AEAD, RFC-7539
    
    * cipher/cipher-poly1305.c: Add RFC-7539 to header.
    * doc/gcrypt.texi: Update Poly1305 AEAD documentation with mention of
    RFC-7539; Drop Salsa from supported stream ciphers for Poly1305 AEAD.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/cipher-poly1305.c b/cipher/cipher-poly1305.c
index f283333..965a7b6 100644
--- a/cipher/cipher-poly1305.c
+++ b/cipher/cipher-poly1305.c
 <at>  <at>  -1,4 +1,4  <at>  <at> 
-/* cipher-pol1305.c  -  Poly1305 based AEAD cipher mode
+/* cipher-poly1305.c  -  Poly1305 based AEAD cipher mode, RFC-7539
  * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
  *
  * This file is part of Libgcrypt.
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index 8683ca8..ab4f685 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
 <at>  <at>  -1643,9 +1643,10  <at>  <at>  Associated Data (AEAD) block cipher mode, which is specified in
 'NIST Special Publication 800-38D'.
 
  <at> item  GCRY_CIPHER_MODE_POLY1305
- <at> cindex Poly1305 based AEAD mode
-Poly1305 is an Authenticated Encryption with Associated Data (AEAD)
-mode, which can be used with ChaCha20 and Salsa20 stream ciphers.
+ <at> cindex Poly1305 based AEAD mode with ChaCha20
+This mode implements the Poly1305 Authenticated Encryption with Associated
+Data (AEAD) mode according to RFC-7539. This mode can be used with ChaCha20
+stream cipher.
 
  <at> item  GCRY_CIPHER_MODE_OCB
  <at> cindex OCB, OCB3
 <at>  <at>  -1687,7 +1688,7  <at>  <at>  and the according constants.  Note that some modes are incompatible
 with some algorithms - in particular, stream mode
 ( <at> code{GCRY_CIPHER_MODE_STREAM}) only works with stream ciphers.
 Poly1305 AEAD mode ( <at> code{GCRY_CIPHER_MODE_POLY1305}) only works with
-ChaCha and Salsa stream ciphers. The block cipher modes
+ChaCha20 stream cipher. The block cipher modes
 ( <at> code{GCRY_CIPHER_MODE_ECB},  <at> code{GCRY_CIPHER_MODE_CBC},
  <at> code{GCRY_CIPHER_MODE_CFB},  <at> code{GCRY_CIPHER_MODE_OFB} and
  <at> code{GCRY_CIPHER_MODE_CTR}) will work with any block cipher

-----------------------------------------------------------------------

Summary of changes:
 cipher/arcfour-amd64.S             | 13 ++++--
 cipher/arcfour.c                   | 17 +++++++-
 cipher/blowfish-amd64.S            | 46 ++++++++++++---------
 cipher/blowfish.c                  | 74 +++++++++++++++++++++++++++++++--
 cipher/camellia-aesni-avx-amd64.S  | 41 +++++++++++--------
 cipher/camellia-aesni-avx2-amd64.S | 29 ++++++++-----
 cipher/camellia-glue.c             | 61 +++++++++++++++++++--------
 cipher/cast5-amd64.S               | 43 ++++++++++---------
 cipher/cast5.c                     | 67 +++++++++++++++++++++++++++---
 cipher/chacha20-avx2-amd64.S       | 13 ++++--
 cipher/chacha20-sse2-amd64.S       | 13 ++++--
 cipher/chacha20-ssse3-amd64.S      | 13 ++++--
 cipher/chacha20.c                  | 43 ++++++++++++++-----
 cipher/cipher-poly1305.c           |  2 +-
 cipher/des-amd64.S                 | 29 ++++++++-----
 cipher/des.c                       | 61 +++++++++++++++++++++++++--
 cipher/poly1305-avx2-amd64.S       | 22 ++++++----
 cipher/poly1305-internal.h         | 27 +++++++++---
 cipher/poly1305-sse2-amd64.S       | 22 ++++++----
 cipher/poly1305.c                  | 33 ++++++++-------
 cipher/salsa20-amd64.S             | 17 +++++---
 cipher/salsa20.c                   | 26 +++++++++---
 cipher/serpent-avx2-amd64.S        | 29 ++++++++-----
 cipher/serpent-sse2-amd64.S        | 29 ++++++++-----
 cipher/serpent.c                   | 30 ++++++++++----
 cipher/twofish-amd64.S             | 37 ++++++++++-------
 cipher/twofish.c                   | 84 +++++++++++++++++++++++++++++++++++---
 doc/gcrypt.texi                    |  9 ++--
 28 files changed, 699 insertions(+), 231 deletions(-)


hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Jussi Kivilinna | 14 May 13:11 2015
Picon
Picon

[PATCH 01/10] Enable AMD64 arcfour implementation on WIN64

* cipher/arcfour-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/arcfour.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(do_encrypt, do_decrypt) [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Use
assembly block to call AMD64 assembly function.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
---
 cipher/arcfour-amd64.S |   13 ++++++++++---
 cipher/arcfour.c       |   17 ++++++++++++++++-
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index 8b8031a..2e52ea0 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
 <at>  <at>  -15,12 +15,19  <at>  <at> 

 #ifdef __x86_64__
 #include <config.h>
-#if defined(USE_ARCFOUR) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif

 .text
 .align 16
 .globl _gcry_arcfour_amd64
-.type _gcry_arcfour_amd64, <at> function
+ELF(.type _gcry_arcfour_amd64, <at> function)
 _gcry_arcfour_amd64:
 	push	%rbp
 	push	%rbx
 <at>  <at>  -91,7 +98,7  <at>  <at>  _gcry_arcfour_amd64:
 	pop	%rbp
 	ret
 .L__gcry_arcfour_amd64_end:
-.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64
+ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)

 #endif
 #endif
diff --git a/cipher/arcfour.c b/cipher/arcfour.c
index 27537bf..44e8ef4 100644
--- a/cipher/arcfour.c
+++ b/cipher/arcfour.c
 <at>  <at>  -33,7 +33,8  <at>  <at> 

 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif

 <at>  <at>  -53,7 +54,21  <at>  <at>  static void
 encrypt_stream (void *context,
                 byte *outbuf, const byte *inbuf, size_t length)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  const void *fn = _gcry_arcfour_amd64;
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (context),
+                  "+S" (length),
+                  "+d" (inbuf),
+                  "+c" (outbuf)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
   _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
+#endif
 }

 #else /*!USE_AMD64_ASM*/
Jussi Kivilinna | 14 May 09:18 2015
Picon
Picon

[PATCH] Update documentation for Poly1305-ChaCha20 AEAD, RFC-7539

* cipher/cipher-poly1305.c: Add RFC-7539 to header.
* doc/gcrypt.texi: Update Poly1305 AEAD documentation with mention of
RFC-7539; Drop Salsa from supported stream ciphers for Poly1305 AEAD.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
---
 cipher/cipher-poly1305.c |    2 +-
 doc/gcrypt.texi          |    9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/cipher/cipher-poly1305.c b/cipher/cipher-poly1305.c
index f283333..965a7b6 100644
--- a/cipher/cipher-poly1305.c
+++ b/cipher/cipher-poly1305.c
 <at>  <at>  -1,4 +1,4  <at>  <at> 
-/* cipher-pol1305.c  -  Poly1305 based AEAD cipher mode
+/* cipher-poly1305.c  -  Poly1305 based AEAD cipher mode, RFC-7539
  * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
  *
  * This file is part of Libgcrypt.
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index 8683ca8..ab4f685 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
 <at>  <at>  -1643,9 +1643,10  <at>  <at>  Associated Data (AEAD) block cipher mode, which is specified in
 'NIST Special Publication 800-38D'.

  <at> item  GCRY_CIPHER_MODE_POLY1305
- <at> cindex Poly1305 based AEAD mode
-Poly1305 is an Authenticated Encryption with Associated Data (AEAD)
-mode, which can be used with ChaCha20 and Salsa20 stream ciphers.
+ <at> cindex Poly1305 based AEAD mode with ChaCha20
+This mode implements the Poly1305 Authenticated Encryption with Associated
+Data (AEAD) mode according to RFC-7539. This mode can be used with ChaCha20
+stream cipher.

  <at> item  GCRY_CIPHER_MODE_OCB
  <at> cindex OCB, OCB3
 <at>  <at>  -1687,7 +1688,7  <at>  <at>  and the according constants.  Note that some modes are incompatible
 with some algorithms - in particular, stream mode
 ( <at> code{GCRY_CIPHER_MODE_STREAM}) only works with stream ciphers.
 Poly1305 AEAD mode ( <at> code{GCRY_CIPHER_MODE_POLY1305}) only works with
-ChaCha and Salsa stream ciphers. The block cipher modes
+ChaCha20 stream cipher. The block cipher modes
 ( <at> code{GCRY_CIPHER_MODE_ECB},  <at> code{GCRY_CIPHER_MODE_CBC},
  <at> code{GCRY_CIPHER_MODE_CFB},  <at> code{GCRY_CIPHER_MODE_OFB} and
  <at> code{GCRY_CIPHER_MODE_CTR}) will work with any block cipher

Gmane