Danny Crane | 5 Sep 02:17 2015
Picon

libgcrypt dependency issues

Hi,

In libgcrypt-1.5.3, I am looking for the function definition of gcry_mpi_powm. I find that it is defined in

src/visibility.h:#define gcry_mpi_powm               _gcry_mpi_powm

But I cannot find the definition of _gcry_mpi_powm. Could anyone points me to the function definition of this?


Thanks,
_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
by Werner Koch | 4 Sep 12:47 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-261-ge97c62a

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  e97c62a4a687b56d00a2d0a63e072a977f8eb81c (commit)
       via  e2785a2268702312529521df3bd2f4e6b43cea3a (commit)
      from  c17f84bd02d7ee93845e92e20f6ddba814961588 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit e97c62a4a687b56d00a2d0a63e072a977f8eb81c
Author: Werner Koch <wk <at> gnupg.org>
Date:   Fri Sep 4 12:39:56 2015 +0200

    w32: Avoid a few compiler warnings.

    * cipher/cipher-selftest.c (_gcry_selftest_helper_cbc)
    (_gcry_selftest_helper_cfb, _gcry_selftest_helper_ctr): Mark variable
    as unused.
    * random/rndw32.c (slow_gatherer): Avoid signed pointer mismatch
    warning.
    * src/secmem.c (init_pool): Avoid unused variable warning.
    * tests/random.c (writen, readn): Include on if needed.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/cipher/cipher-selftest.c b/cipher/cipher-selftest.c
index 470499f..cecbab7 100644
--- a/cipher/cipher-selftest.c
+++ b/cipher/cipher-selftest.c
 <at>  <at>  -131,6 +131,8  <at>  <at>  _gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func,
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
               "%s-CBC-%d test failed (plaintext mismatch)", cipher,
 	      blocksize * 8);
+#else
+      (void)cipher; /* Not used.  */
 #endif
       return "selftest for CBC failed - see syslog for details";
     }
 <at>  <at>  -251,6 +253,8  <at>  <at>  _gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func,
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
               "%s-CFB-%d test failed (plaintext mismatch)", cipher,
 	      blocksize * 8);
+#else
+      (void)cipher; /* Not used.  */
 #endif
       return "selftest for CFB failed - see syslog for details";
     }
 <at>  <at>  -379,6 +383,8  <at>  <at>  _gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func,
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
               "%s-CTR-%d test failed (plaintext mismatch)", cipher,
 	      blocksize * 8);
+#else
+      (void)cipher; /* Not used.  */
 #endif
       return "selftest for CTR failed - see syslog for details";
     }
diff --git a/random/rndw32.c b/random/rndw32.c
index 4ab1bca..1325b18 100644
--- a/random/rndw32.c
+++ b/random/rndw32.c
 <at>  <at>  -513,7 +513,7  <at>  <at>  slow_gatherer ( void (*add)(const void*, size_t, enum random_origins),

           status = RegQueryValueEx (hKey, "ProductType", 0, NULL,
                                     szValue, &dwSize);
-          if (status == ERROR_SUCCESS && stricmp (szValue, "WinNT"))
+          if (status == ERROR_SUCCESS && stricmp ((char*)szValue, "WinNT"))
             {
               /* Note: There are (at least) three cases for ProductType:
                  WinNT = NT Workstation, ServerNT = NT Server, LanmanNT =
diff --git a/src/secmem.c b/src/secmem.c
index d75c14c..2109bc2 100644
--- a/src/secmem.c
+++ b/src/secmem.c
 <at>  <at>  -363,8 +363,6  <at>  <at>  lock_pool (void *p, size_t n)
 static void
 init_pool (size_t n)
 {
-  size_t pgsize;
-  long int pgsize_val;
   memblock_t *mb;

   pool_size = n;
 <at>  <at>  -372,48 +370,54  <at>  <at>  init_pool (size_t n)
   if (disable_secmem)
     log_bug ("secure memory is disabled");

-#if defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE)
-  pgsize_val = sysconf (_SC_PAGESIZE);
-#elif defined(HAVE_GETPAGESIZE)
-  pgsize_val = getpagesize ();
-#else
-  pgsize_val = -1;
-#endif
-  pgsize = (pgsize_val != -1 && pgsize_val > 0)? pgsize_val:DEFAULT_PAGE_SIZE;
-

 #if HAVE_MMAP
-  pool_size = (pool_size + pgsize - 1) & ~(pgsize - 1);
-#ifdef MAP_ANONYMOUS
-  pool = mmap (0, pool_size, PROT_READ | PROT_WRITE,
-	       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-#else /* map /dev/zero instead */
   {
-    int fd;
+    size_t pgsize;
+    long int pgsize_val;
+
+# if defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE)
+    pgsize_val = sysconf (_SC_PAGESIZE);
+# elif defined(HAVE_GETPAGESIZE)
+    pgsize_val = getpagesize ();
+# else
+    pgsize_val = -1;
+# endif
+    pgsize = (pgsize_val != -1 && pgsize_val > 0)? pgsize_val:DEFAULT_PAGE_SIZE;
+
+    pool_size = (pool_size + pgsize - 1) & ~(pgsize - 1);
+# ifdef MAP_ANONYMOUS
+    pool = mmap (0, pool_size, PROT_READ | PROT_WRITE,
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+# else /* map /dev/zero instead */
+    {
+      int fd;

-    fd = open ("/dev/zero", O_RDWR);
-    if (fd == -1)
-      {
-	log_error ("can't open /dev/zero: %s\n", strerror (errno));
-	pool = (void *) -1;
-      }
+      fd = open ("/dev/zero", O_RDWR);
+      if (fd == -1)
+        {
+          log_error ("can't open /dev/zero: %s\n", strerror (errno));
+          pool = (void *) -1;
+        }
+      else
+        {
+          pool = mmap (0, pool_size,
+                       (PROT_READ | PROT_WRITE), MAP_PRIVATE, fd, 0);
+          close (fd);
+        }
+    }
+# endif
+    if (pool == (void *) -1)
+      log_info ("can't mmap pool of %u bytes: %s - using malloc\n",
+                (unsigned) pool_size, strerror (errno));
     else
       {
-	pool = mmap (0, pool_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-        close (fd);
+        pool_is_mmapped = 1;
+        pool_okay = 1;
       }
   }
-#endif
-  if (pool == (void *) -1)
-    log_info ("can't mmap pool of %u bytes: %s - using malloc\n",
-	      (unsigned) pool_size, strerror (errno));
-  else
-    {
-      pool_is_mmapped = 1;
-      pool_okay = 1;
-    }
+#endif /*HAVE_MMAP*/

-#endif
   if (!pool_okay)
     {
       pool = malloc (pool_size);
diff --git a/tests/fipsdrv.c b/tests/fipsdrv.c
index eef2ddd..b3da2a3 100644
--- a/tests/fipsdrv.c
+++ b/tests/fipsdrv.c
 <at>  <at>  -2358,14 +2358,14  <at>  <at>  main (int argc, char **argv)
             {
               if (!(++count % 1000))
                 fprintf (stderr, PGM ": %lu random bytes so far\n",
-                         (unsigned long int)count * sizeof buffer);
+                         (unsigned long int)(count * sizeof buffer));
             }
         }
       while (loop_mode);

       if (progress)
         fprintf (stderr, PGM ": %lu random bytes\n",
-                         (unsigned long int)count * sizeof buffer);
+                 (unsigned long int)(count * sizeof buffer));

       deinit_external_rng_test (context);
     }
diff --git a/tests/gchash.c b/tests/gchash.c
index 7a2aad6..7ff99e0 100644
--- a/tests/gchash.c
+++ b/tests/gchash.c
 <at>  <at>  -109,7 +109,7  <at>  <at>  main (int argc, char **argv)
       h  = gcry_md_read(hd, 0);

       for (i = 0; i < gcry_md_get_algo_dlen (algo); i++)
-        printf("%02hhx", h[i]);
+        printf("%02x", h[i]);
       printf("  %s\n", *argv);

       gcry_md_reset(hd);
diff --git a/tests/random.c b/tests/random.c
index 10bf646..d7a624a 100644
--- a/tests/random.c
+++ b/tests/random.c
 <at>  <at>  -87,7 +87,7  <at>  <at>  progress_cb (void *cb_data, const char *what, int printchar,
 }

 
-
+#ifndef HAVE_W32_SYSTEM
 static int
 writen (int fd, const void *buf, size_t nbytes)
 {
 <at>  <at>  -110,7 +110,10  <at>  <at>  writen (int fd, const void *buf, size_t nbytes)

   return 0;
 }
+#endif /*!HAVE_W32_SYSTEM*/
+

+#ifndef HAVE_W32_SYSTEM
 static int
 readn (int fd, void *buf, size_t buflen, size_t *ret_nread)
 {
 <at>  <at>  -136,7 +139,7  <at>  <at>  readn (int fd, void *buf, size_t buflen, size_t *ret_nread)
     *ret_nread = buflen - nleft;
   return 0;
 }
-
+#endif /*!HAVE_W32_SYSTEM*/

 
 /* Check that forking won't return the same random. */

commit e2785a2268702312529521df3bd2f4e6b43cea3a
Author: Werner Koch <wk <at> gnupg.org>
Date:   Fri Sep 4 12:32:16 2015 +0200

    w32: Fix alignment problem with AESNI on Windows >= 8

    * cipher/cipher-selftest.c (_gcry_cipher_selftest_alloc_ctx): New.
    * cipher/rijndael.c (selftest_basic_128, selftest_basic_192)
    (selftest_basic_256): Allocate context on the heap.
    --

    The stack alignment on Windows changed and because ld seems to limit
    stack variables to a 8 byte alignment (we request 16), we get bus
    errors from the selftests if AESNI is in use.

    GnuPG-bug-id: 2085
    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/cipher/cipher-selftest.c b/cipher/cipher-selftest.c
index 852368a..470499f 100644
--- a/cipher/cipher-selftest.c
+++ b/cipher/cipher-selftest.c
 <at>  <at>  -44,6 +44,29  <at>  <at> 
 #endif

 
+/* Return an allocated buffers of size CONTEXT_SIZE with an alignment
+   of 16.  The caller must free that buffer using the address returned
+   at R_MEM.  Returns NULL and sets ERRNO on failure.  */
+void *
+_gcry_cipher_selftest_alloc_ctx (const int context_size, unsigned char **r_mem)
+{
+  int offs;
+  unsigned int ctx_aligned_size, memsize;
+
+  ctx_aligned_size = context_size + 15;
+  ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+  memsize = ctx_aligned_size + 16;
+
+  *r_mem = xtrycalloc (1, memsize);
+  if (!*r_mem)
+    return NULL;
+
+  offs = (16 - ((uintptr_t)*r_mem & 15)) & 15;
+  return (void*)(*r_mem + offs);
+}
+
+
 /* Run the self-tests for <block cipher>-CBC-≤block size>, tests bulk CBC
    decryption.  Returns NULL on success. */
 const char *
diff --git a/cipher/cipher-selftest.h b/cipher/cipher-selftest.h
index a70667a..a435080 100644
--- a/cipher/cipher-selftest.h
+++ b/cipher/cipher-selftest.h
 <at>  <at>  -40,6 +40,11  <at>  <at>  typedef void (*gcry_cipher_bulk_ctr_enc_t)(void *context, unsigned char *iv,
 					   const void *inbuf_arg,
 					   size_t nblocks);

+/* Helper function to allocate an aligned context for selftests.  */
+void *_gcry_cipher_selftest_alloc_ctx (const int context_size,
+                                       unsigned char **r_mem);
+
+
 /* Helper function for bulk CBC decryption selftest */
 const char *
 _gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey,
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index eff59c2..0130924 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
 <at>  <at>  -1358,7 +1358,8  <at>  <at>  _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
 static const char*
 selftest_basic_128 (void)
 {
-  RIJNDAEL_context ctx;
+  RIJNDAEL_context *ctx;
+  unsigned char *ctxmem;
   unsigned char scratch[16];

   /* The test vectors are from the AES supplied ones; more or less
 <at>  <at>  -1401,11 +1402,21  <at>  <at>  selftest_basic_128 (void)
     };
 #endif

-  rijndael_setkey (&ctx, key_128, sizeof (key_128));
-  rijndael_encrypt (&ctx, scratch, plaintext_128);
+  /* Because gcc/ld can only align the CTX struct on 8 bytes on the
+     stack, we need to allocate that context on the heap.  */
+  ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+  if (!ctx)
+    return "failed to allocate memory";
+
+  rijndael_setkey (ctx, key_128, sizeof (key_128));
+  rijndael_encrypt (ctx, scratch, plaintext_128);
   if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128)))
-     return "AES-128 test encryption failed.";
-  rijndael_decrypt (&ctx, scratch, scratch);
+    {
+      xfree (ctxmem);
+      return "AES-128 test encryption failed.";
+    }
+  rijndael_decrypt (ctx, scratch, scratch);
+  xfree (ctxmem);
   if (memcmp (scratch, plaintext_128, sizeof (plaintext_128)))
     return "AES-128 test decryption failed.";

 <at>  <at>  -1416,7 +1427,8  <at>  <at>  selftest_basic_128 (void)
 static const char*
 selftest_basic_192 (void)
 {
-  RIJNDAEL_context ctx;
+  RIJNDAEL_context *ctx;
+  unsigned char *ctxmem;
   unsigned char scratch[16];

   static unsigned char plaintext_192[16] =
 <at>  <at>  -1436,11 +1448,18  <at>  <at>  selftest_basic_192 (void)
       0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA
     };

-  rijndael_setkey (&ctx, key_192, sizeof(key_192));
-  rijndael_encrypt (&ctx, scratch, plaintext_192);
+  ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+  if (!ctx)
+    return "failed to allocate memory";
+  rijndael_setkey (ctx, key_192, sizeof(key_192));
+  rijndael_encrypt (ctx, scratch, plaintext_192);
   if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192)))
-    return "AES-192 test encryption failed.";
-  rijndael_decrypt (&ctx, scratch, scratch);
+    {
+      xfree (ctxmem);
+      return "AES-192 test encryption failed.";
+    }
+  rijndael_decrypt (ctx, scratch, scratch);
+  xfree (ctxmem);
   if (memcmp (scratch, plaintext_192, sizeof (plaintext_192)))
     return "AES-192 test decryption failed.";

 <at>  <at>  -1452,7 +1471,8  <at>  <at>  selftest_basic_192 (void)
 static const char*
 selftest_basic_256 (void)
 {
-  RIJNDAEL_context ctx;
+  RIJNDAEL_context *ctx;
+  unsigned char *ctxmem;
   unsigned char scratch[16];

   static unsigned char plaintext_256[16] =
 <at>  <at>  -1473,11 +1493,18  <at>  <at>  selftest_basic_256 (void)
       0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3
     };

-  rijndael_setkey (&ctx, key_256, sizeof(key_256));
-  rijndael_encrypt (&ctx, scratch, plaintext_256);
+  ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+  if (!ctx)
+    return "failed to allocate memory";
+  rijndael_setkey (ctx, key_256, sizeof(key_256));
+  rijndael_encrypt (ctx, scratch, plaintext_256);
   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
-    return "AES-256 test encryption failed.";
-  rijndael_decrypt (&ctx, scratch, scratch);
+    {
+      xfree (ctxmem);
+      return "AES-256 test encryption failed.";
+    }
+  rijndael_decrypt (ctx, scratch, scratch);
+  xfree (ctxmem);
   if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
     return "AES-256 test decryption failed.";

-----------------------------------------------------------------------

Summary of changes:
 cipher/cipher-selftest.c | 29 +++++++++++++++++++
 cipher/cipher-selftest.h |  5 ++++
 cipher/rijndael.c        | 57 +++++++++++++++++++++++++++----------
 random/rndw32.c          |  2 +-
 src/secmem.c             | 74 +++++++++++++++++++++++++-----------------------
 tests/fipsdrv.c          |  4 +--
 tests/gchash.c           |  2 +-
 tests/random.c           |  7 +++--
 8 files changed, 124 insertions(+), 56 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
by Werner Koch | 1 Sep 07:35 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-259-gc17f84b

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  c17f84bd02d7ee93845e92e20f6ddba814961588 (commit)
       via  dd87639abd38afc91a6f27af33f0ba17402ad02d (commit)
      from  a785cc3db0c4e8eb8ebbf784b833a40d2c42ec3e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit c17f84bd02d7ee93845e92e20f6ddba814961588
Author: Werner Koch <wk <at> gnupg.org>
Date:   Mon Aug 31 23:13:27 2015 +0200

    rsa: Add verify after sign to avoid Lenstra's CRT attack.

    * cipher/rsa.c (rsa_sign): Check the CRT.
    --

    Failures in the computation of the CRT (e.g. due faulty hardware) can
    lead to a leak of the private key.  The standard precaution against
    this is to verify the signature after signing.  GnuPG does this itself
    and even has an option to disable this.  However, the low performance
    impact of this extra precaution suggest that it should always be done
    and Libgcrypt is the right place here.  For decryption is not done
    because the application will detect the failure due to garbled
    plaintext and in any case no key derived material will be send to the
    user.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/cipher/rsa.c b/cipher/rsa.c
index e4f73d5..45a481b 100644
--- a/cipher/rsa.c
+++ b/cipher/rsa.c
 <at>  <at>  -1112,7 +1112,9  <at>  <at>  rsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
   struct pk_encoding_ctx ctx;
   gcry_mpi_t data = NULL;
   RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
+  RSA_public_key pk;
   gcry_mpi_t sig = NULL;
+  gcry_mpi_t result = NULL;

   _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN,
                                    rsa_get_nbits (keyparms));
 <at>  <at>  -1148,11 +1150,25  <at>  <at>  rsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
         }
     }

-  /* Do RSA computation and build the result.  */
+  /* Do RSA computation.  */
   sig = mpi_new (0);
   secret (sig, data, &sk);
   if (DBG_CIPHER)
     log_printmpi ("rsa_sign    res", sig);
+
+  /* Check that the created signature is good.  This detects a failure
+     of the CRT algorithm  (Lenstra's attack on RSA's use of the CRT).  */
+  result = mpi_new (0);
+  pk.n = sk.n;
+  pk.e = sk.e;
+  public (result, sig, &pk);
+  if (mpi_cmp (result, data))
+    {
+      rc = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+  /* Convert the result.  */
   if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN))
     {
       /* We need to make sure to return the correct length to avoid
 <at>  <at>  -1172,6 +1188,7  <at>  <at>  rsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)

 
  leave:
+  _gcry_mpi_release (result);
   _gcry_mpi_release (sig);
   _gcry_mpi_release (sk.n);
   _gcry_mpi_release (sk.e);

commit dd87639abd38afc91a6f27af33f0ba17402ad02d
Author: Werner Koch <wk <at> gnupg.org>
Date:   Mon Aug 31 22:41:12 2015 +0200

    Add pubkey algo id for EdDSA.

    * src/gcrypt.h.in (GCRY_PK_EDDSA): New.
    --

    These ids are not actually used by Libgcrypt but other software makes
    use of such algorithm ids.  Thus we provide them here.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/NEWS b/NEWS
index d90ee6d..22565ed 100644
--- a/NEWS
+++ b/NEWS
 <at>  <at>  -39,6 +39,7  <at>  <at>  Noteworthy changes in version 1.7.0 (unreleased)
  GCRY_CIPHER_MODE_OCB            NEW.
  GCRYCTL_SET_TAGLEN              NEW.
  gcry_cipher_final               NEW macro.
+ GCRY_PK_EDDSA                   NEW constant.

 
 Noteworthy changes in version 1.6.0 (2013-12-16)
diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in
index 884034c..585da6a 100644
--- a/src/gcrypt.h.in
+++ b/src/gcrypt.h.in
 <at>  <at>  -1053,8 +1053,9  <at>  <at>  enum gcry_pk_algos
     GCRY_PK_DSA   = 17,     /* Digital Signature Algorithm.  */
     GCRY_PK_ECC   = 18,     /* Generic ECC.  */
     GCRY_PK_ELG   = 20,     /* Elgamal       */
-    GCRY_PK_ECDSA = 301,    /* (deprecated: use 18).  */
-    GCRY_PK_ECDH  = 302     /* (deprecated: use 18).  */
+    GCRY_PK_ECDSA = 301,    /* (only for external use).  */
+    GCRY_PK_ECDH  = 302,    /* (only for external use).  */
+    GCRY_PK_EDDSA = 303     /* (only for external use).  */
   };

 /* Flags describing usage capabilities of a PK algorithm. */

-----------------------------------------------------------------------

Summary of changes:
 NEWS            |  1 +
 cipher/rsa.c    | 19 ++++++++++++++++++-
 src/gcrypt.h.in |  5 +++--
 3 files changed, 22 insertions(+), 3 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
by Werner Koch | 25 Aug 21:15 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-257-ga785cc3

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  a785cc3db0c4e8eb8ebbf784b833a40d2c42ec3e (commit)
      from  fb3cb47b0a29d3e73150297aa4495c20915e4a75 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit a785cc3db0c4e8eb8ebbf784b833a40d2c42ec3e
Author: Werner Koch <wk <at> gnupg.org>
Date:   Tue Aug 25 21:11:05 2015 +0200

    Add configure option --enable-build-timestamp.

    * configure.ac (BUILD_TIMESTAMP): Set to "<none>" by default.
    --

    This is based on
    libgpg-error commit d620005fd1a655d591fccb44639e22ea445e4554
    but changed to be disabled by default.  Check there for some
    background.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/configure.ac b/configure.ac
index 48e2179..2acfa36 100644
--- a/configure.ac
+++ b/configure.ac
 <at>  <at>  -2272,7 +2272,16  <at>  <at>  changequote([,])dnl
 BUILD_FILEVERSION="${BUILD_FILEVERSION}mym4_revision_dec"
 AC_SUBST(BUILD_FILEVERSION)

-BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date`
+AC_ARG_ENABLE([build-timestamp],
+  AC_HELP_STRING([--enable-build-timestamp],
+                 [set an explicit build timestamp for reproducibility.
+                  (default is the current time in ISO-8601 format)]),
+     [if test "$enableval" = "yes"; then
+        BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date`
+      else
+        BUILD_TIMESTAMP="$enableval"
+      fi],
+     [BUILD_TIMESTAMP="<none>"])
 AC_SUBST(BUILD_TIMESTAMP)
 AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP",
                    [The time this package was configured for a build])

-----------------------------------------------------------------------

Summary of changes:
 configure.ac | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
by Werner Koch | 23 Aug 17:22 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-256-gfb3cb47

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  fb3cb47b0a29d3e73150297aa4495c20915e4a75 (commit)
      from  65639ecaaeba642e40487446c40d045482001285 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit fb3cb47b0a29d3e73150297aa4495c20915e4a75
Author: Werner Koch <wk <at> gnupg.org>
Date:   Sun Aug 23 17:20:18 2015 +0200

    tests: Add missing files for the make distcheck target.

    * tests/Makefile.am (EXTRA_DIST): Add sha3-x test vector files.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9f8839a..a5c10dd 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
 <at>  <at>  -57,7 +57,8  <at>  <at>  noinst_HEADERS = t-common.h

 EXTRA_DIST = README rsa-16k.key cavs_tests.sh cavs_driver.pl \
 	     pkcs1v2-oaep.h pkcs1v2-pss.h pkcs1v2-v15c.h pkcs1v2-v15s.h \
-	     t-ed25519.inp stopwatch.h hashtest-256g.in
+	     t-ed25519.inp stopwatch.h hashtest-256g.in \
+	     sha3-224.h sha3-256.h sha3-384.h sha3-512.h

 LDADD = $(standard_ldadd) $(GPG_ERROR_LIBS)
 t_lock_LDADD = $(standard_ldadd) $(GPG_ERROR_MT_LIBS)

-----------------------------------------------------------------------

Summary of changes:
 tests/Makefile.am | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
by Werner Koch | 19 Aug 12:46 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-255-g65639ec

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  65639ecaaeba642e40487446c40d045482001285 (commit)
      from  48822ae0b436bcea0fe92dbf0d88475ba3179320 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 65639ecaaeba642e40487446c40d045482001285
Author: Werner Koch <wk <at> gnupg.org>
Date:   Wed Aug 19 12:43:43 2015 +0200

    Change SHA-3 algorithm ids

    * src/gcrypt.h.in (GCRY_MD_SHA3_224, GCRY_MD_SHA3_256)
    (GCRY_MD_SHA3_384, GCRY_MD_SHA3_512): Change values.
    --

    By using algorithm ids outside of the RFC-4880 range we make debugging
    of GnuPG easier.

    Signed-off-by: Werner Koch <wk <at> gnupg.org>

diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in
index 4b4646b..884034c 100644
--- a/src/gcrypt.h.in
+++ b/src/gcrypt.h.in
 <at>  <at>  -1144,7 +1144,7  <at>  <at>  gcry_error_t gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp,
  ************************************/

 /* Algorithm IDs for the hash functions we know about. Not all of them
-   are implemnted. */
+   are implemented. */
 enum gcry_md_algos
   {
     GCRY_MD_NONE    = 0,
 <at>  <at>  -1158,10 +1158,6  <at>  <at>  enum gcry_md_algos
     GCRY_MD_SHA384  = 9,
     GCRY_MD_SHA512  = 10,
     GCRY_MD_SHA224  = 11,
-    GCRY_MD_SHA3_224= 12,
-    GCRY_MD_SHA3_256= 13,
-    GCRY_MD_SHA3_384= 14,
-    GCRY_MD_SHA3_512= 15,

     GCRY_MD_MD4           = 301,
     GCRY_MD_CRC32         = 302,
 <at>  <at>  -1173,7 +1169,11  <at>  <at>  enum gcry_md_algos
     GCRY_MD_GOSTR3411_94  = 308, /* GOST R 34.11-94.  */
     GCRY_MD_STRIBOG256    = 309, /* GOST R 34.11-2012, 256 bit.  */
     GCRY_MD_STRIBOG512    = 310, /* GOST R 34.11-2012, 512 bit.  */
-    GCRY_MD_GOSTR3411_CP  = 311  /* GOST R 34.11-94 with CryptoPro-A S-Box.  */
+    GCRY_MD_GOSTR3411_CP  = 311,  /* GOST R 34.11-94 with CryptoPro-A S-Box.  */
+    GCRY_MD_SHA3_224      = 312,
+    GCRY_MD_SHA3_256      = 313,
+    GCRY_MD_SHA3_384      = 314,
+    GCRY_MD_SHA3_512      = 315
   };

 /* Flags used with the open function.  */

-----------------------------------------------------------------------

Summary of changes:
 src/gcrypt.h.in | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
Adrya Stembridge | 18 Aug 16:45 2015
Picon

Libgcrypt warning: MD5 used - FIPS mode inactivated

I'm at my wits end with an odd problem involving libgcrypt and am hoping the list can offer insight or assistance.

In summary: 
I recently activated the FIPS module on a CentOS 6.7 machine and immediately began seeing libgcrypt warnings when using certain resources (http and tsql for example).   This only occurs with one system.  Another machine with CentOS 6.7 using FIPS does not have the libgcrypt warnings.  

What could be causing libgcrypt to use MD5 when FIPS is enabled?    Is it possible to force libgcrypt to use SHA instead of MD5?  
 

Details below... 

Steps to reproduce: 

Enable openSSH FIPS 140-2 module using these instructions.

1) edit /etc/sysconfig/prelink and set PRELINKING=NO. Issue prelink -u -a at a prompt.
2) yum install dracut-fips
3) dracut -f
4) add "fips=1" and "boot=/dev/sda3" to kernel line of grub.conf. df /boot revealed the correct boot partion.
5) ensure /etc/ssh/sshd_config is configured with:

Protocol 2
Ciphers aes128-ctr,aes192-ctr,aes256-ctr,aes128-cbc,3des-cbc,aes192-cbc,aes256-cbc
Macs hmac-sha1,hmac-sha2-256,hmac-sha2-512


After rebooting, I confirmed that FIPS mode is enabled by usingopenssl md5 somefile (fails)andopenssl sha1 somefile (succeeds)Also:

$ cat /proc/sys/crypto/fips_enabled
1
Finally, knowing that FIPS is enabled, I attempted to connect to a remote SQL Server instance with a config that worked prior to enabling FIPS:[mybox ~]# tsql -S egServer80 -U myusername
Password:
locale is "en_US.UTF-8"
locale charset is "UTF-8"
using default charset "UTF-8"
Error 20002 (severity 9):
    Adaptive Server connection failed
There was a problem connecting to the server
I checked the log files and find this:tsql: Libgcrypt warning: MD5 used - FIPS mode inactivatedEnabling debug in freetds yielded this additional error:14:56:46.617196 3577 (net.c:1366):'''handshake failed: GnuTLS internal error.

Additional Information: 
Backing out the FIPS module (removing fips=1 from grub.conf) and rebooting sets things back to normal (I was able to tsql into my SQL Server instance again).

I can reproduce the same libgcrypt/tsql error without enabling FIPS 140-2 module in grub, by creating an empty file/etc/gcrypt/fips_enabled. Removing this file sets the system back to normal, and tsql works again.

CentOS version 6.7
libgcrypt version 1.4.5
freetds version 0.91
openssl version 1.0.1e

As before, a second CentOS 6.7 machine with FIPS enabled and an identical freetds config connecting to the same external resource, does not have have the libgcrypt warnings.  


Many thanks.
_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Jussi Kivilinna | 11 Aug 22:12 2015
Picon
Picon

[PATCH] Simplify OCB offset calculation for parallel implementations

* cipher/camellia-glue.c (_gcry_camellia_ocb_crypt)
(_gcry_camellia_ocb_auth): Precalculate Ls array always, instead of
just if 'blkn % <parallel blocks> == 0'.
* cipher/serpent.c (_gcry_serpent_ocb_crypt)
(_gcry_serpent_ocb_auth): Ditto.
* cipher/rijndael-aesni.c (get_l): Remove low-bit checks.
(aes_ocb_enc, aes_ocb_dec, _gcry_aes_aesni_ocb_auth): Handle leading
blocks until block counter is multiple of 4, so that parallel block
processing loop can use 'c->u_mode.ocb.L' array directly.
* tests/basic.c (check_ocb_cipher_largebuf): Rename to...
(check_ocb_cipher_largebuf_split): ...this and add option to process
large buffer as two split buffers.
(check_ocb_cipher_largebuf): New.
--

Patch simplifies source and reduce object size.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
---
 cipher/camellia-glue.c  |  254 +++++++++------------
 cipher/rijndael-aesni.c |  562 ++++++++++++++++++++++++-----------------------
 cipher/serpent.c        |  370 +++++++++++++------------------
 tests/basic.c           |   48 ++++
 4 files changed, 586 insertions(+), 648 deletions(-)

diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 2d5dd20..dee0169 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
 <at>  <at>  -631,58 +631,47  <at>  <at>  _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_aesni_avx2 = 0;
       const void *Ls[32];
+      unsigned int n = 32 - (blkn % 32);
+      const void **l;
       int i;

-      if (blkn % 32 == 0)
+      if (nblocks >= 32)
 	{
 	  for (i = 0; i < 32; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 32] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 32] = c->u_mode.ocb.L[0];
 	    }

-	  Ls[7] = c->u_mode.ocb.L[3];
-	  Ls[15] = c->u_mode.ocb.L[4];
-	  Ls[23] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 32] = c->u_mode.ocb.L[3];
+	  Ls[(15 + n) % 32] = c->u_mode.ocb.L[4];
+	  Ls[(23 + n) % 32] = c->u_mode.ocb.L[3];
+	  l = &Ls[(31 + n) % 32];

-      /* Process data in 32 block chunks. */
-      while (nblocks >= 32)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 32 == 0)
+	  /* Process data in 32 block chunks. */
+	  while (nblocks >= 32)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      Ls[31] = ocb_get_l(c, l_tmp, blkn);
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 32);
+
+	      if (encrypt)
+		_gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+						  c->u_ctr.ctr, Ls);
+	      else
+		_gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+						  c->u_ctr.ctr, Ls);
+
+	      nblocks -= 32;
+	      outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+	      inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx2 = 1;
 	    }
-	  else
-	    {
-	      for (i = 0; i < 32; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
-
-	  if (encrypt)
-	    _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-					      c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-					      c->u_ctr.ctr, Ls);
-
-	  nblocks -= 32;
-	  outbuf += 32 * CAMELLIA_BLOCK_SIZE;
-	  inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx2 = 1;
 	}

       if (did_use_aesni_avx2)
 <at>  <at>  -703,56 +692,45  <at>  <at>  _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_aesni_avx = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;

-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }

-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];

-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
+
+	      if (encrypt)
+		_gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+						c->u_ctr.ctr, Ls);
+	      else
+		_gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+						c->u_ctr.ctr, Ls);
+
+	      nblocks -= 16;
+	      outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+	      inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx = 1;
 	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
-
-	  if (encrypt)
-	    _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-					    c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-					    c->u_ctr.ctr, Ls);
-
-	  nblocks -= 16;
-	  outbuf += 16 * CAMELLIA_BLOCK_SIZE;
-	  inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx = 1;
 	}

       if (did_use_aesni_avx)
 <at>  <at>  -803,53 +781,43  <at>  <at>  _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_aesni_avx2 = 0;
       const void *Ls[32];
+      unsigned int n = 32 - (blkn % 32);
+      const void **l;
       int i;

-      if (blkn % 32 == 0)
+      if (nblocks >= 32)
 	{
 	  for (i = 0; i < 32; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 32] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 32] = c->u_mode.ocb.L[0];
 	    }

-	  Ls[7] = c->u_mode.ocb.L[3];
-	  Ls[15] = c->u_mode.ocb.L[4];
-	  Ls[23] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 32] = c->u_mode.ocb.L[3];
+	  Ls[(15 + n) % 32] = c->u_mode.ocb.L[4];
+	  Ls[(23 + n) % 32] = c->u_mode.ocb.L[3];
+	  l = &Ls[(31 + n) % 32];

-      /* Process data in 32 block chunks. */
-      while (nblocks >= 32)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 32 == 0)
+	  /* Process data in 32 block chunks. */
+	  while (nblocks >= 32)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      Ls[31] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 32; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 32);

-	  _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-					    c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
+						 c->u_mode.ocb.aad_offset,
+						 c->u_mode.ocb.aad_sum, Ls);

-	  nblocks -= 32;
-	  abuf += 32 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx2 = 1;
+	      nblocks -= 32;
+	      abuf += 32 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx2 = 1;
+	    }
 	}

       if (did_use_aesni_avx2)
 <at>  <at>  -870,51 +838,41  <at>  <at>  _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_aesni_avx = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;

-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }

-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];

-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);

-	  _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-					    c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
+						c->u_mode.ocb.aad_offset,
+						c->u_mode.ocb.aad_sum, Ls);

-	  nblocks -= 16;
-	  abuf += 16 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx = 1;
+	      nblocks -= 16;
+	      abuf += 16 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx = 1;
+	    }
 	}

       if (did_use_aesni_avx)
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 882cc79..be57b3d 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
 <at>  <at>  -1307,11 +1307,7  <at>  <at>  get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
   const unsigned char *l;
   unsigned int ntz;

-  if (i & 1)
-    return c->u_mode.ocb.L[0];
-  else if (i & 2)
-    return c->u_mode.ocb.L[1];
-  else if (i & 0xffffffffU)
+  if (i & 0xffffffffU)
     {
       asm ("rep;bsf %k[low], %k[ntz]\n\t"
            : [ntz] "=r" (ntz)
 <at>  <at>  -1376,7 +1372,7  <at>  <at>  aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
-  const unsigned char *l[4] = {};
+  const unsigned char *l;
   aesni_prepare_2_6_variable;

   aesni_prepare ();
 <at>  <at>  -1390,103 +1386,112  <at>  <at>  aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                   [ctr] "m" (*c->u_ctr.ctr)
                 : "memory" );

-  if (nblocks > 3)
+
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm0,   %%xmm6\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      if (n % 4 == 0)
-	{
-	  l[0] = c->u_mode.ocb.L[0];
-	  l[1] = c->u_mode.ocb.L[1];
-	  l[2] = c->u_mode.ocb.L[0];
-	}
-
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (n % 4 == 0)
-	    {
-	      n += 4;
-	      l[3] = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
-	    }
-	  else
-	    {
-	      l[0] = get_l(c, l_tmp.x1, n + 1, c->u_iv.iv, c->u_ctr.ctr);
-	      l[1] = get_l(c, l_tmp.x1, n + 2, c->u_iv.iv, c->u_ctr.ctr);
-	      l[2] = get_l(c, l_tmp.x1, n + 3, c->u_iv.iv, c->u_ctr.ctr);
-	      l[3] = get_l(c, l_tmp.x1, n + 4, c->u_iv.iv, c->u_ctr.ctr);
-	      n += 4;
-	    }
-
-	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-	  /* Checksum_i = Checksum_{i-1} xor P_i  */
-	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-	  asm volatile ("movdqu %[l0],     %%xmm0\n\t"
-			"movdqu %[inbuf0], %%xmm1\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm1,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm1\n\t"
-			"movdqu %%xmm5,    %[outbuf0]\n\t"
-			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
-			: [l0] "m" (*l[0]),
-			  [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l1],     %%xmm0\n\t"
-			"movdqu %[inbuf1], %%xmm2\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm2,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm2\n\t"
-			"movdqu %%xmm5,    %[outbuf1]\n\t"
-			: [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
-			: [l1] "m" (*l[1]),
-			  [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l2],     %%xmm0\n\t"
-			"movdqu %[inbuf2], %%xmm3\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm3,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm3\n\t"
-			"movdqu %%xmm5,    %[outbuf2]\n\t"
-			: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
-			: [l2] "m" (*l[2]),
-			  [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l3],     %%xmm0\n\t"
-			"movdqu %[inbuf3], %%xmm4\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm4,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			:
-			: [l3] "m" (*l[3]),
-			  [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
-			: "memory" );
-
-	  do_aesni_enc_vec4 (ctx);
-
-	  asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm1\n\t"
-			"movdqu %%xmm1,    %[outbuf0]\n\t"
-			"movdqu %[outbuf1],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm2\n\t"
-			"movdqu %%xmm2,    %[outbuf1]\n\t"
-			"movdqu %[outbuf2],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm3\n\t"
-			"movdqu %%xmm3,    %[outbuf2]\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			"movdqu %%xmm4,    %[outbuf3]\n\t"
-			: [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
-			  [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
-			  [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
-			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-			:
-			: "memory" );
-
-	  outbuf += 4*BLOCKSIZE;
-	  inbuf  += 4*BLOCKSIZE;
-	}
+      /* l_tmp will be used only every 65536-th block. */
+      n += 4;
+      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+		    "movdqu %[inbuf0], %%xmm1\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm1,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    "movdqu %%xmm5,    %[outbuf0]\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+		    : [l0] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+		    "movdqu %[inbuf1], %%xmm2\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm2,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    "movdqu %%xmm5,    %[outbuf1]\n\t"
+		    : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+		    : [l1] "m" (*c->u_mode.ocb.L[1]),
+		      [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+		    "movdqu %[inbuf2], %%xmm3\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm3,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    "movdqu %%xmm5,    %[outbuf2]\n\t"
+		    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+		    : [l2] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+		    "movdqu %[inbuf3], %%xmm4\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm4,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    :
+		    : [l3] "m" (*l),
+		      [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+		    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm1\n\t"
+		    "movdqu %%xmm1,    %[outbuf0]\n\t"
+		    "movdqu %[outbuf1],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm2\n\t"
+		    "movdqu %%xmm2,    %[outbuf1]\n\t"
+		    "movdqu %[outbuf2],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm3\n\t"
+		    "movdqu %%xmm3,    %[outbuf2]\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    "movdqu %%xmm4,    %[outbuf3]\n\t"
+		    : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+		      [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+		      [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+		      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+		    :
+		    : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
     }

   for ( ;nblocks; nblocks-- )
     {
-      l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);

       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
 <at>  <at>  -1497,7 +1502,7  <at>  <at>  aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                     "pxor   %%xmm0,   %%xmm6\n\t"
                     "pxor   %%xmm5,   %%xmm0\n\t"
                     :
-                    : [l] "m" (*l[0]),
+                    : [l] "m" (*l),
                       [inbuf] "m" (*inbuf)
                     : "memory" );

 <at>  <at>  -1537,7 +1542,7  <at>  <at>  aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
-  const unsigned char *l[4] = {};
+  const unsigned char *l;
   aesni_prepare_2_6_variable;

   aesni_prepare ();
 <at>  <at>  -1551,103 +1556,111  <at>  <at>  aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                   [ctr] "m" (*c->u_ctr.ctr)
                 : "memory" );

-  if (nblocks > 3)
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "pxor   %%xmm0, %%xmm6\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      if (n % 4 == 0)
-	{
-	  l[0] = c->u_mode.ocb.L[0];
-	  l[1] = c->u_mode.ocb.L[1];
-	  l[2] = c->u_mode.ocb.L[0];
-	}
-
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (n % 4 == 0)
-	    {
-	      n += 4;
-	      l[3] = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
-	    }
-	  else
-	    {
-	      l[0] = get_l(c, l_tmp.x1, n + 1, c->u_iv.iv, c->u_ctr.ctr);
-	      l[1] = get_l(c, l_tmp.x1, n + 2, c->u_iv.iv, c->u_ctr.ctr);
-	      l[2] = get_l(c, l_tmp.x1, n + 3, c->u_iv.iv, c->u_ctr.ctr);
-	      l[3] = get_l(c, l_tmp.x1, n + 4, c->u_iv.iv, c->u_ctr.ctr);
-	      n += 4;
-	    }
-
-	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-	  /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
-	  /* Checksum_i = Checksum_{i-1} xor P_i  */
-	  asm volatile ("movdqu %[l0],     %%xmm0\n\t"
-			"movdqu %[inbuf0], %%xmm1\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm1\n\t"
-			"movdqu %%xmm5,    %[outbuf0]\n\t"
-			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
-			: [l0] "m" (*l[0]),
-			  [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l1],     %%xmm0\n\t"
-			"movdqu %[inbuf1], %%xmm2\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm2\n\t"
-			"movdqu %%xmm5,    %[outbuf1]\n\t"
-			: [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
-			: [l1] "m" (*l[1]),
-			  [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l2],     %%xmm0\n\t"
-			"movdqu %[inbuf2], %%xmm3\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm3\n\t"
-			"movdqu %%xmm5,    %[outbuf2]\n\t"
-			: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
-			: [l2] "m" (*l[2]),
-			  [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l3],     %%xmm0\n\t"
-			"movdqu %[inbuf3], %%xmm4\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			:
-			: [l3] "m" (*l[3]),
-			  [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
-			: "memory" );
-
-	  do_aesni_dec_vec4 (ctx);
-
-	  asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm1\n\t"
-			"movdqu %%xmm1,    %[outbuf0]\n\t"
-			"movdqu %[outbuf1],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm2\n\t"
-			"movdqu %%xmm2,    %[outbuf1]\n\t"
-			"movdqu %[outbuf2],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm3\n\t"
-			"movdqu %%xmm3,    %[outbuf2]\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			"movdqu %%xmm4,    %[outbuf3]\n\t"
-			"pxor   %%xmm1,    %%xmm6\n\t"
-			"pxor   %%xmm2,    %%xmm6\n\t"
-			"pxor   %%xmm3,    %%xmm6\n\t"
-			"pxor   %%xmm4,    %%xmm6\n\t"
-			: [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
-			  [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
-			  [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
-			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-			:
-			: "memory" );
-
-	  outbuf += 4*BLOCKSIZE;
-	  inbuf  += 4*BLOCKSIZE;
-	}
+      /* l_tmp will be used only every 65536-th block. */
+      n += 4;
+      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+		    "movdqu %[inbuf0], %%xmm1\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    "movdqu %%xmm5,    %[outbuf0]\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+		    : [l0] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+		    "movdqu %[inbuf1], %%xmm2\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    "movdqu %%xmm5,    %[outbuf1]\n\t"
+		    : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+		    : [l1] "m" (*c->u_mode.ocb.L[1]),
+		      [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+		    "movdqu %[inbuf2], %%xmm3\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    "movdqu %%xmm5,    %[outbuf2]\n\t"
+		    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+		    : [l2] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+		    "movdqu %[inbuf3], %%xmm4\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    :
+		    : [l3] "m" (*l),
+		      [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+		    : "memory" );
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm1\n\t"
+		    "movdqu %%xmm1,    %[outbuf0]\n\t"
+		    "movdqu %[outbuf1],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm2\n\t"
+		    "movdqu %%xmm2,    %[outbuf1]\n\t"
+		    "movdqu %[outbuf2],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm3\n\t"
+		    "movdqu %%xmm3,    %[outbuf2]\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    "movdqu %%xmm4,    %[outbuf3]\n\t"
+		    "pxor   %%xmm1,    %%xmm6\n\t"
+		    "pxor   %%xmm2,    %%xmm6\n\t"
+		    "pxor   %%xmm3,    %%xmm6\n\t"
+		    "pxor   %%xmm4,    %%xmm6\n\t"
+		    : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+		      [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+		      [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+		      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+		    :
+		    : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
     }

   for ( ;nblocks; nblocks-- )
     {
-      l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);

       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
 <at>  <at>  -1657,7 +1670,7  <at>  <at>  aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                     "pxor   %%xmm1,   %%xmm5\n\t"
                     "pxor   %%xmm5,   %%xmm0\n\t"
                     :
-                    : [l] "m" (*l[0]),
+                    : [l] "m" (*l),
                       [inbuf] "m" (*inbuf)
                     : "memory" );

 <at>  <at>  -1708,7 +1721,7  <at>  <at>  _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
-  const unsigned char *l[4] = {};
+  const unsigned char *l;
   aesni_prepare_2_6_variable;

   aesni_prepare ();
 <at>  <at>  -1722,90 +1735,91  <at>  <at>  _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                   [ctr] "m" (*c->u_mode.ocb.aad_sum)
                 : "memory" );

-  if (nblocks > 3)
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                c->u_mode.ocb.aad_sum);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[abuf],  %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [abuf] "m" (*abuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm0,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += BLOCKSIZE;
+    }
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      if (n % 4 == 0)
-	{
-	  l[0] = c->u_mode.ocb.L[0];
-	  l[1] = c->u_mode.ocb.L[1];
-	  l[2] = c->u_mode.ocb.L[0];
-	}
-
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (n % 4 == 0)
-	    {
-	      n += 4;
-	      l[3] = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	    }
-	  else
-	    {
-	      l[0] = get_l(c, l_tmp.x1, n + 1, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      l[1] = get_l(c, l_tmp.x1, n + 2, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      l[2] = get_l(c, l_tmp.x1, n + 3, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      l[3] = get_l(c, l_tmp.x1, n + 4, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      n += 4;
-	    }
-
-	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-	  /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-	  asm volatile ("movdqu %[l0],     %%xmm0\n\t"
-			"movdqu %[abuf0],  %%xmm1\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm1\n\t"
-			:
-			: [l0] "m" (*l[0]),
-			  [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l1],     %%xmm0\n\t"
-			"movdqu %[abuf1],  %%xmm2\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm2\n\t"
-			:
-			: [l1] "m" (*l[1]),
-			  [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l2],     %%xmm0\n\t"
-			"movdqu %[abuf2],  %%xmm3\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm3\n\t"
-			:
-			: [l2] "m" (*l[2]),
-			  [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l3],     %%xmm0\n\t"
-			"movdqu %[abuf3],  %%xmm4\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			:
-			: [l3] "m" (*l[3]),
-			  [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
-			: "memory" );
-
-	  do_aesni_enc_vec4 (ctx);
-
-	  asm volatile ("pxor   %%xmm1,   %%xmm6\n\t"
-			"pxor   %%xmm2,   %%xmm6\n\t"
-			"pxor   %%xmm3,   %%xmm6\n\t"
-			"pxor   %%xmm4,   %%xmm6\n\t"
-			:
-			:
-			: "memory" );
-
-	  abuf += 4*BLOCKSIZE;
-	}
+      /* l_tmp will be used only every 65536-th block. */
+      n += 4;
+      l = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
+		c->u_mode.ocb.aad_sum);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+		    "movdqu %[abuf0],  %%xmm1\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    :
+		    : [l0] "m" (*c->u_mode.ocb.L[0]),
+		      [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+		    "movdqu %[abuf1],  %%xmm2\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    :
+		    : [l1] "m" (*c->u_mode.ocb.L[1]),
+		      [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+		    "movdqu %[abuf2],  %%xmm3\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    :
+		    : [l2] "m" (*c->u_mode.ocb.L[0]),
+		      [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+		    "movdqu %[abuf3],  %%xmm4\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    :
+		    : [l3] "m" (*l),
+		      [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+		    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("pxor   %%xmm1,   %%xmm6\n\t"
+		    "pxor   %%xmm2,   %%xmm6\n\t"
+		    "pxor   %%xmm3,   %%xmm6\n\t"
+		    "pxor   %%xmm4,   %%xmm6\n\t"
+		    :
+		    :
+		    : "memory" );
+
+      abuf += 4*BLOCKSIZE;
     }

   for ( ;nblocks; nblocks-- )
     {
-      l[0] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                   c->u_mode.ocb.aad_sum);
+      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                c->u_mode.ocb.aad_sum);

       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
 <at>  <at>  -1814,7 +1828,7  <at>  <at>  _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                     "pxor   %%xmm1,   %%xmm5\n\t"
                     "pxor   %%xmm5,   %%xmm0\n\t"
                     :
-                    : [l] "m" (*l[0]),
+                    : [l] "m" (*l),
                       [abuf] "m" (*abuf)
                     : "memory" );

diff --git a/cipher/serpent.c b/cipher/serpent.c
index a47a1b7..fc3afa6 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
 <at>  <at>  -1250,56 +1250,45  <at>  <at>  _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_avx2 = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;

-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }

-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];

-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
+
+	      if (encrypt)
+		_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+	      else
+		_gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+
+	      nblocks -= 16;
+	      outbuf += 16 * sizeof(serpent_block_t);
+	      inbuf  += 16 * sizeof(serpent_block_t);
+	      did_use_avx2 = 1;
 	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
-
-	  if (encrypt)
-	    _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-
-	  nblocks -= 16;
-	  outbuf += 16 * sizeof(serpent_block_t);
-	  inbuf  += 16 * sizeof(serpent_block_t);
-	  did_use_avx2 = 1;
 	}

       if (did_use_avx2)
 <at>  <at>  -1317,51 +1306,39  <at>  <at>  _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   {
     int did_use_sse2 = 0;
     const void *Ls[8];
-    int i;
+    unsigned int n = 8 - (blkn % 8);
+    const void **l;

-    if (blkn % 8 == 0)
+    if (nblocks >= 8)
       {
-	Ls[0] = c->u_mode.ocb.L[0];
-	Ls[1] = c->u_mode.ocb.L[1];
-	Ls[2] = c->u_mode.ocb.L[0];
-	Ls[3] = c->u_mode.ocb.L[2];
-	Ls[4] = c->u_mode.ocb.L[0];
-	Ls[5] = c->u_mode.ocb.L[1];
-	Ls[6] = c->u_mode.ocb.L[0];
-      }
-
-    /* Process data in 8 block chunks. */
-    while (nblocks >= 8)
-      {
-	/* l_tmp will be used only every 65536-th block. */
-	if (blkn % 8 == 0)
+	Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	l = &Ls[(7 + n) % 8];
+
+	/* Process data in 8 block chunks. */
+	while (nblocks >= 8)
 	  {
+	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	  }
-	else
-	  {
-	    for (i = 0; i < 8; i += 4)
-	      {
-		Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		blkn += 4;
-	      }
+	    *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+
+	    if (encrypt)
+	      _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+	    else
+	      _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+
+	    nblocks -= 8;
+	    outbuf += 8 * sizeof(serpent_block_t);
+	    inbuf  += 8 * sizeof(serpent_block_t);
+	    did_use_sse2 = 1;
 	  }
-
-	if (encrypt)
-	  _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-	else
-	  _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-
-	nblocks -= 8;
-	outbuf += 8 * sizeof(serpent_block_t);
-	inbuf  += 8 * sizeof(serpent_block_t);
-	did_use_sse2 = 1;
       }

     if (did_use_sse2)
 <at>  <at>  -1380,51 +1357,39  <at>  <at>  _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_neon = 0;
       const void *Ls[8];
-      int i;
+      unsigned int n = 8 - (blkn % 8);
+      const void **l;

-      if (blkn % 8 == 0)
+      if (nblocks >= 8)
 	{
-	  Ls[0] = c->u_mode.ocb.L[0];
-	  Ls[1] = c->u_mode.ocb.L[1];
-	  Ls[2] = c->u_mode.ocb.L[0];
-	  Ls[3] = c->u_mode.ocb.L[2];
-	  Ls[4] = c->u_mode.ocb.L[0];
-	  Ls[5] = c->u_mode.ocb.L[1];
-	  Ls[6] = c->u_mode.ocb.L[0];
-	}
-
-      /* Process data in 8 block chunks. */
-      while (nblocks >= 8)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 8 == 0)
+	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	  l = &Ls[(7 + n) % 8];
+
+	  /* Process data in 8 block chunks. */
+	  while (nblocks >= 8)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 8; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+
+	      if (encrypt)
+		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+	      else
+		_gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+
+	      nblocks -= 8;
+	      outbuf += 8 * sizeof(serpent_block_t);
+	      inbuf  += 8 * sizeof(serpent_block_t);
+	      did_use_neon = 1;
 	    }
-
-	  if (encrypt)
-	    _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-				       c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-				       c->u_ctr.ctr, Ls);
-
-	  nblocks -= 8;
-	  outbuf += 8 * sizeof(serpent_block_t);
-	  inbuf  += 8 * sizeof(serpent_block_t);
-	  did_use_neon = 1;
 	}

       if (did_use_neon)
 <at>  <at>  -1471,51 +1436,40  <at>  <at>  _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_avx2 = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;

-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }

-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];

-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);

-	  _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-				      c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+					  c->u_mode.ocb.aad_sum, Ls);

-	  nblocks -= 16;
-	  abuf += 16 * sizeof(serpent_block_t);
-	  did_use_avx2 = 1;
+	      nblocks -= 16;
+	      abuf += 16 * sizeof(serpent_block_t);
+	      did_use_avx2 = 1;
+	    }
 	}

       if (did_use_avx2)
 <at>  <at>  -1533,46 +1487,34  <at>  <at>  _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   {
     int did_use_sse2 = 0;
     const void *Ls[8];
-    int i;
+    unsigned int n = 8 - (blkn % 8);
+    const void **l;

-    if (blkn % 8 == 0)
+    if (nblocks >= 8)
       {
-	Ls[0] = c->u_mode.ocb.L[0];
-	Ls[1] = c->u_mode.ocb.L[1];
-	Ls[2] = c->u_mode.ocb.L[0];
-	Ls[3] = c->u_mode.ocb.L[2];
-	Ls[4] = c->u_mode.ocb.L[0];
-	Ls[5] = c->u_mode.ocb.L[1];
-	Ls[6] = c->u_mode.ocb.L[0];
-      }
-
-    /* Process data in 8 block chunks. */
-    while (nblocks >= 8)
-      {
-	/* l_tmp will be used only every 65536-th block. */
-	if (blkn % 8 == 0)
+	Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	l = &Ls[(7 + n) % 8];
+
+	/* Process data in 8 block chunks. */
+	while (nblocks >= 8)
 	  {
+	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	  }
-	else
-	  {
-	    for (i = 0; i < 8; i += 4)
-	      {
-		Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		blkn += 4;
-	      }
-	  }
+	    *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);

-	_gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-				    c->u_mode.ocb.aad_sum, Ls);
+	    _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+					c->u_mode.ocb.aad_sum, Ls);

-	nblocks -= 8;
-	abuf += 8 * sizeof(serpent_block_t);
-	did_use_sse2 = 1;
+	    nblocks -= 8;
+	    abuf += 8 * sizeof(serpent_block_t);
+	    did_use_sse2 = 1;
+	  }
       }

     if (did_use_sse2)
 <at>  <at>  -1591,46 +1533,34  <at>  <at>  _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_neon = 0;
       const void *Ls[8];
-      int i;
-
-      if (blkn % 8 == 0)
-	{
-	  Ls[0] = c->u_mode.ocb.L[0];
-	  Ls[1] = c->u_mode.ocb.L[1];
-	  Ls[2] = c->u_mode.ocb.L[0];
-	  Ls[3] = c->u_mode.ocb.L[2];
-	  Ls[4] = c->u_mode.ocb.L[0];
-	  Ls[5] = c->u_mode.ocb.L[1];
-	  Ls[6] = c->u_mode.ocb.L[0];
-	}
+      unsigned int n = 8 - (blkn % 8);
+      const void **l;

-      /* Process data in 8 block chunks. */
-      while (nblocks >= 8)
+      if (nblocks >= 8)
 	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 8 == 0)
+	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	  l = &Ls[(7 + n) % 8];
+
+	  /* Process data in 8 block chunks. */
+	  while (nblocks >= 8)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 8; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);

-	  _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-				      c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+					  c->u_mode.ocb.aad_sum, Ls);

-	  nblocks -= 8;
-	  abuf += 8 * sizeof(serpent_block_t);
-	  did_use_neon = 1;
+	      nblocks -= 8;
+	      abuf += 8 * sizeof(serpent_block_t);
+	      did_use_neon = 1;
+	    }
 	}

       if (did_use_neon)
diff --git a/tests/basic.c b/tests/basic.c
index c1aa76a..4ea91a9 100644
--- a/tests/basic.c
+++ b/tests/basic.c
 <at>  <at>  -3153,7 +3153,8  <at>  <at>  do_check_ocb_cipher (int inplace)

 
 static void
-check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
+check_ocb_cipher_largebuf_split (int algo, int keylen, const char *tagexpect,
+				 unsigned int splitpos)
 {
   static const unsigned char key[32] =
         "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
 <at>  <at>  -3219,7 +3220,14  <at>  <at>  check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
       goto out_free;
     }

-  err = gcry_cipher_authenticate (hde, inbuf, buflen);
+  if (splitpos)
+    {
+      err = gcry_cipher_authenticate (hde, inbuf, splitpos);
+    }
+  if (!err)
+    {
+      err = gcry_cipher_authenticate (hde, inbuf + splitpos, buflen - splitpos);
+    }
   if (err)
     {
       fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n",
 <at>  <at>  -3229,10 +3237,18  <at>  <at>  check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
       goto out_free;
     }

-  err = gcry_cipher_final (hde);
+  if (splitpos)
+    {
+      err = gcry_cipher_encrypt (hde, outbuf, splitpos, inbuf, splitpos);
+    }
   if (!err)
     {
-      err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen);
+      err = gcry_cipher_final (hde);
+      if (!err)
+	{
+	  err = gcry_cipher_encrypt (hde, outbuf + splitpos, buflen - splitpos,
+				    inbuf + splitpos, buflen - splitpos);
+	}
     }
   if (err)
     {
 <at>  <at>  -3267,10 +3283,18  <at>  <at>  check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
     }

   /* Now for the decryption.  */
-  err = gcry_cipher_final (hdd);
+  if (splitpos)
+    {
+      err = gcry_cipher_decrypt (hdd, outbuf, splitpos, NULL, 0);
+    }
   if (!err)
     {
-      err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0);
+      err = gcry_cipher_final (hdd);
+      if (!err)
+	{
+	  err = gcry_cipher_decrypt (hdd, outbuf + splitpos, buflen - splitpos,
+				     NULL, 0);
+	}
     }
   if (err)
     {
 <at>  <at>  -3319,6 +3343,18  <at>  <at>  out_free:

 
 static void
+check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
+{
+  unsigned int split;
+
+  for (split = 0; split < 32 * 16; split = split * 2 + 16)
+    {
+      check_ocb_cipher_largebuf_split(algo, keylen, tagexpect, split);
+    }
+}
+
+
+static void
 check_ocb_cipher (void)
 {
   /* Check OCB cipher with separate destination and source buffers for
by Jussi Kivilinna | 12 Aug 17:19 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-253-g24ebf53

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  24ebf53f1e8a8afa27dcd768339bda70a740bb03 (commit)
       via  e11895da1f4af9782d89e92ba2e6b1a63235b54b (commit)
      from  80321eb3a63a20f86734d6eebb3f419c0ec895aa (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 24ebf53f1e8a8afa27dcd768339bda70a740bb03
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Tue Aug 11 07:22:16 2015 +0300

    Simplify OCB offset calculation for parallel implementations
    
    * cipher/camellia-glue.c (_gcry_camellia_ocb_crypt)
    (_gcry_camellia_ocb_auth): Precalculate Ls array always, instead of
    just if 'blkn % <parallel blocks> == 0'.
    * cipher/serpent.c (_gcry_serpent_ocb_crypt)
    (_gcry_serpent_ocb_auth): Ditto.
    * cipher/rijndael-aesni.c (get_l): Remove low-bit checks.
    (aes_ocb_enc, aes_ocb_dec, _gcry_aes_aesni_ocb_auth): Handle leading
    blocks until block counter is multiple of 4, so that parallel block
    processing loop can use 'c->u_mode.ocb.L' array directly.
    * tests/basic.c (check_ocb_cipher_largebuf): Rename to...
    (check_ocb_cipher_largebuf_split): ...this and add option to process
    large buffer as two split buffers.
    (check_ocb_cipher_largebuf): New.
    --
    
    Patch simplifies source and reduce object size.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 2d5dd20..dee0169 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
 <at>  <at>  -631,58 +631,47  <at>  <at>  _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_aesni_avx2 = 0;
       const void *Ls[32];
+      unsigned int n = 32 - (blkn % 32);
+      const void **l;
       int i;
 
-      if (blkn % 32 == 0)
+      if (nblocks >= 32)
 	{
 	  for (i = 0; i < 32; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 32] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 32] = c->u_mode.ocb.L[0];
 	    }
 
-	  Ls[7] = c->u_mode.ocb.L[3];
-	  Ls[15] = c->u_mode.ocb.L[4];
-	  Ls[23] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 32] = c->u_mode.ocb.L[3];
+	  Ls[(15 + n) % 32] = c->u_mode.ocb.L[4];
+	  Ls[(23 + n) % 32] = c->u_mode.ocb.L[3];
+	  l = &Ls[(31 + n) % 32];
 
-      /* Process data in 32 block chunks. */
-      while (nblocks >= 32)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 32 == 0)
+	  /* Process data in 32 block chunks. */
+	  while (nblocks >= 32)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      Ls[31] = ocb_get_l(c, l_tmp, blkn);
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 32);
+
+	      if (encrypt)
+		_gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+						  c->u_ctr.ctr, Ls);
+	      else
+		_gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+						  c->u_ctr.ctr, Ls);
+
+	      nblocks -= 32;
+	      outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+	      inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx2 = 1;
 	    }
-	  else
-	    {
-	      for (i = 0; i < 32; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
-
-	  if (encrypt)
-	    _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-					      c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-					      c->u_ctr.ctr, Ls);
-
-	  nblocks -= 32;
-	  outbuf += 32 * CAMELLIA_BLOCK_SIZE;
-	  inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx2 = 1;
 	}
 
       if (did_use_aesni_avx2)
 <at>  <at>  -703,56 +692,45  <at>  <at>  _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_aesni_avx = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;
 
-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }
 
-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];
 
-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
+
+	      if (encrypt)
+		_gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+						c->u_ctr.ctr, Ls);
+	      else
+		_gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+						c->u_ctr.ctr, Ls);
+
+	      nblocks -= 16;
+	      outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+	      inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx = 1;
 	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
-
-	  if (encrypt)
-	    _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-					    c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-					    c->u_ctr.ctr, Ls);
-
-	  nblocks -= 16;
-	  outbuf += 16 * CAMELLIA_BLOCK_SIZE;
-	  inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx = 1;
 	}
 
       if (did_use_aesni_avx)
 <at>  <at>  -803,53 +781,43  <at>  <at>  _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_aesni_avx2 = 0;
       const void *Ls[32];
+      unsigned int n = 32 - (blkn % 32);
+      const void **l;
       int i;
 
-      if (blkn % 32 == 0)
+      if (nblocks >= 32)
 	{
 	  for (i = 0; i < 32; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 32] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 32] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 32] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 32] = c->u_mode.ocb.L[0];
 	    }
 
-	  Ls[7] = c->u_mode.ocb.L[3];
-	  Ls[15] = c->u_mode.ocb.L[4];
-	  Ls[23] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 32] = c->u_mode.ocb.L[3];
+	  Ls[(15 + n) % 32] = c->u_mode.ocb.L[4];
+	  Ls[(23 + n) % 32] = c->u_mode.ocb.L[3];
+	  l = &Ls[(31 + n) % 32];
 
-      /* Process data in 32 block chunks. */
-      while (nblocks >= 32)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 32 == 0)
+	  /* Process data in 32 block chunks. */
+	  while (nblocks >= 32)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      Ls[31] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 32; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 32);
 
-	  _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-					    c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
+						 c->u_mode.ocb.aad_offset,
+						 c->u_mode.ocb.aad_sum, Ls);
 
-	  nblocks -= 32;
-	  abuf += 32 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx2 = 1;
+	      nblocks -= 32;
+	      abuf += 32 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx2 = 1;
+	    }
 	}
 
       if (did_use_aesni_avx2)
 <at>  <at>  -870,51 +838,41  <at>  <at>  _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_aesni_avx = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;
 
-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }
 
-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];
 
-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
 
-	  _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-					    c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
+						c->u_mode.ocb.aad_offset,
+						c->u_mode.ocb.aad_sum, Ls);
 
-	  nblocks -= 16;
-	  abuf += 16 * CAMELLIA_BLOCK_SIZE;
-	  did_use_aesni_avx = 1;
+	      nblocks -= 16;
+	      abuf += 16 * CAMELLIA_BLOCK_SIZE;
+	      did_use_aesni_avx = 1;
+	    }
 	}
 
       if (did_use_aesni_avx)
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 6678785..5c85903 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
 <at>  <at>  -1338,11 +1338,7  <at>  <at>  get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
   const unsigned char *l;
   unsigned int ntz;
 
-  if (i & 1)
-    return c->u_mode.ocb.L[0];
-  else if (i & 2)
-    return c->u_mode.ocb.L[1];
-  else if (i & 0xffffffffU)
+  if (i & 0xffffffffU)
     {
       asm ("rep;bsf %k[low], %k[ntz]\n\t"
            : [ntz] "=r" (ntz)
 <at>  <at>  -1407,7 +1403,7  <at>  <at>  aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
-  const unsigned char *l[4] = {};
+  const unsigned char *l;
   aesni_prepare_2_6_variable;
 
   aesni_prepare ();
 <at>  <at>  -1421,103 +1417,112  <at>  <at>  aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                   [ctr] "m" (*c->u_ctr.ctr)
                 : "memory" );
 
-  if (nblocks > 3)
+
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm0,   %%xmm6\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      if (n % 4 == 0)
-	{
-	  l[0] = c->u_mode.ocb.L[0];
-	  l[1] = c->u_mode.ocb.L[1];
-	  l[2] = c->u_mode.ocb.L[0];
-	}
-
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (n % 4 == 0)
-	    {
-	      n += 4;
-	      l[3] = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
-	    }
-	  else
-	    {
-	      l[0] = get_l(c, l_tmp.x1, n + 1, c->u_iv.iv, c->u_ctr.ctr);
-	      l[1] = get_l(c, l_tmp.x1, n + 2, c->u_iv.iv, c->u_ctr.ctr);
-	      l[2] = get_l(c, l_tmp.x1, n + 3, c->u_iv.iv, c->u_ctr.ctr);
-	      l[3] = get_l(c, l_tmp.x1, n + 4, c->u_iv.iv, c->u_ctr.ctr);
-	      n += 4;
-	    }
-
-	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-	  /* Checksum_i = Checksum_{i-1} xor P_i  */
-	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-	  asm volatile ("movdqu %[l0],     %%xmm0\n\t"
-			"movdqu %[inbuf0], %%xmm1\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm1,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm1\n\t"
-			"movdqu %%xmm5,    %[outbuf0]\n\t"
-			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
-			: [l0] "m" (*l[0]),
-			  [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l1],     %%xmm0\n\t"
-			"movdqu %[inbuf1], %%xmm2\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm2,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm2\n\t"
-			"movdqu %%xmm5,    %[outbuf1]\n\t"
-			: [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
-			: [l1] "m" (*l[1]),
-			  [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l2],     %%xmm0\n\t"
-			"movdqu %[inbuf2], %%xmm3\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm3,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm3\n\t"
-			"movdqu %%xmm5,    %[outbuf2]\n\t"
-			: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
-			: [l2] "m" (*l[2]),
-			  [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l3],     %%xmm0\n\t"
-			"movdqu %[inbuf3], %%xmm4\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm4,    %%xmm6\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			:
-			: [l3] "m" (*l[3]),
-			  [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
-			: "memory" );
-
-	  do_aesni_enc_vec4 (ctx);
-
-	  asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm1\n\t"
-			"movdqu %%xmm1,    %[outbuf0]\n\t"
-			"movdqu %[outbuf1],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm2\n\t"
-			"movdqu %%xmm2,    %[outbuf1]\n\t"
-			"movdqu %[outbuf2],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm3\n\t"
-			"movdqu %%xmm3,    %[outbuf2]\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			"movdqu %%xmm4,    %[outbuf3]\n\t"
-			: [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
-			  [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
-			  [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
-			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-			:
-			: "memory" );
-
-	  outbuf += 4*BLOCKSIZE;
-	  inbuf  += 4*BLOCKSIZE;
-	}
+      /* l_tmp will be used only every 65536-th block. */
+      n += 4;
+      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+		    "movdqu %[inbuf0], %%xmm1\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm1,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    "movdqu %%xmm5,    %[outbuf0]\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+		    : [l0] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+		    "movdqu %[inbuf1], %%xmm2\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm2,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    "movdqu %%xmm5,    %[outbuf1]\n\t"
+		    : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+		    : [l1] "m" (*c->u_mode.ocb.L[1]),
+		      [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+		    "movdqu %[inbuf2], %%xmm3\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm3,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    "movdqu %%xmm5,    %[outbuf2]\n\t"
+		    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+		    : [l2] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+		    "movdqu %[inbuf3], %%xmm4\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm4,    %%xmm6\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    :
+		    : [l3] "m" (*l),
+		      [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+		    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm1\n\t"
+		    "movdqu %%xmm1,    %[outbuf0]\n\t"
+		    "movdqu %[outbuf1],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm2\n\t"
+		    "movdqu %%xmm2,    %[outbuf1]\n\t"
+		    "movdqu %[outbuf2],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm3\n\t"
+		    "movdqu %%xmm3,    %[outbuf2]\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    "movdqu %%xmm4,    %[outbuf3]\n\t"
+		    : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+		      [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+		      [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+		      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+		    :
+		    : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
     }
 
   for ( ;nblocks; nblocks-- )
     {
-      l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
 <at>  <at>  -1528,7 +1533,7  <at>  <at>  aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                     "pxor   %%xmm0,   %%xmm6\n\t"
                     "pxor   %%xmm5,   %%xmm0\n\t"
                     :
-                    : [l] "m" (*l[0]),
+                    : [l] "m" (*l),
                       [inbuf] "m" (*inbuf)
                     : "memory" );
 
 <at>  <at>  -1568,7 +1573,7  <at>  <at>  aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
-  const unsigned char *l[4] = {};
+  const unsigned char *l;
   aesni_prepare_2_6_variable;
 
   aesni_prepare ();
 <at>  <at>  -1582,103 +1587,111  <at>  <at>  aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                   [ctr] "m" (*c->u_ctr.ctr)
                 : "memory" );
 
-  if (nblocks > 3)
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "pxor   %%xmm0, %%xmm6\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      if (n % 4 == 0)
-	{
-	  l[0] = c->u_mode.ocb.L[0];
-	  l[1] = c->u_mode.ocb.L[1];
-	  l[2] = c->u_mode.ocb.L[0];
-	}
-
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (n % 4 == 0)
-	    {
-	      n += 4;
-	      l[3] = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
-	    }
-	  else
-	    {
-	      l[0] = get_l(c, l_tmp.x1, n + 1, c->u_iv.iv, c->u_ctr.ctr);
-	      l[1] = get_l(c, l_tmp.x1, n + 2, c->u_iv.iv, c->u_ctr.ctr);
-	      l[2] = get_l(c, l_tmp.x1, n + 3, c->u_iv.iv, c->u_ctr.ctr);
-	      l[3] = get_l(c, l_tmp.x1, n + 4, c->u_iv.iv, c->u_ctr.ctr);
-	      n += 4;
-	    }
-
-	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-	  /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
-	  /* Checksum_i = Checksum_{i-1} xor P_i  */
-	  asm volatile ("movdqu %[l0],     %%xmm0\n\t"
-			"movdqu %[inbuf0], %%xmm1\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm1\n\t"
-			"movdqu %%xmm5,    %[outbuf0]\n\t"
-			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
-			: [l0] "m" (*l[0]),
-			  [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l1],     %%xmm0\n\t"
-			"movdqu %[inbuf1], %%xmm2\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm2\n\t"
-			"movdqu %%xmm5,    %[outbuf1]\n\t"
-			: [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
-			: [l1] "m" (*l[1]),
-			  [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l2],     %%xmm0\n\t"
-			"movdqu %[inbuf2], %%xmm3\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm3\n\t"
-			"movdqu %%xmm5,    %[outbuf2]\n\t"
-			: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
-			: [l2] "m" (*l[2]),
-			  [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l3],     %%xmm0\n\t"
-			"movdqu %[inbuf3], %%xmm4\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			:
-			: [l3] "m" (*l[3]),
-			  [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
-			: "memory" );
-
-	  do_aesni_dec_vec4 (ctx);
-
-	  asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm1\n\t"
-			"movdqu %%xmm1,    %[outbuf0]\n\t"
-			"movdqu %[outbuf1],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm2\n\t"
-			"movdqu %%xmm2,    %[outbuf1]\n\t"
-			"movdqu %[outbuf2],%%xmm0\n\t"
-			"pxor   %%xmm0,    %%xmm3\n\t"
-			"movdqu %%xmm3,    %[outbuf2]\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			"movdqu %%xmm4,    %[outbuf3]\n\t"
-			"pxor   %%xmm1,    %%xmm6\n\t"
-			"pxor   %%xmm2,    %%xmm6\n\t"
-			"pxor   %%xmm3,    %%xmm6\n\t"
-			"pxor   %%xmm4,    %%xmm6\n\t"
-			: [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
-			  [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
-			  [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
-			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-			:
-			: "memory" );
-
-	  outbuf += 4*BLOCKSIZE;
-	  inbuf  += 4*BLOCKSIZE;
-	}
+      /* l_tmp will be used only every 65536-th block. */
+      n += 4;
+      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+		    "movdqu %[inbuf0], %%xmm1\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    "movdqu %%xmm5,    %[outbuf0]\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+		    : [l0] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+		    "movdqu %[inbuf1], %%xmm2\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    "movdqu %%xmm5,    %[outbuf1]\n\t"
+		    : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+		    : [l1] "m" (*c->u_mode.ocb.L[1]),
+		      [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+		    "movdqu %[inbuf2], %%xmm3\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    "movdqu %%xmm5,    %[outbuf2]\n\t"
+		    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+		    : [l2] "m" (*c->u_mode.ocb.L[0]),
+		      [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+		    "movdqu %[inbuf3], %%xmm4\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    :
+		    : [l3] "m" (*l),
+		      [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+		    : "memory" );
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm1\n\t"
+		    "movdqu %%xmm1,    %[outbuf0]\n\t"
+		    "movdqu %[outbuf1],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm2\n\t"
+		    "movdqu %%xmm2,    %[outbuf1]\n\t"
+		    "movdqu %[outbuf2],%%xmm0\n\t"
+		    "pxor   %%xmm0,    %%xmm3\n\t"
+		    "movdqu %%xmm3,    %[outbuf2]\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    "movdqu %%xmm4,    %[outbuf3]\n\t"
+		    "pxor   %%xmm1,    %%xmm6\n\t"
+		    "pxor   %%xmm2,    %%xmm6\n\t"
+		    "pxor   %%xmm3,    %%xmm6\n\t"
+		    "pxor   %%xmm4,    %%xmm6\n\t"
+		    : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+		      [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+		      [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+		      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+		    :
+		    : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
     }
 
   for ( ;nblocks; nblocks-- )
     {
-      l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
 <at>  <at>  -1688,7 +1701,7  <at>  <at>  aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                     "pxor   %%xmm1,   %%xmm5\n\t"
                     "pxor   %%xmm5,   %%xmm0\n\t"
                     :
-                    : [l] "m" (*l[0]),
+                    : [l] "m" (*l),
                       [inbuf] "m" (*inbuf)
                     : "memory" );
 
 <at>  <at>  -1739,7 +1752,7  <at>  <at>  _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
-  const unsigned char *l[4] = {};
+  const unsigned char *l;
   aesni_prepare_2_6_variable;
 
   aesni_prepare ();
 <at>  <at>  -1753,90 +1766,91  <at>  <at>  _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                   [ctr] "m" (*c->u_mode.ocb.aad_sum)
                 : "memory" );
 
-  if (nblocks > 3)
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                c->u_mode.ocb.aad_sum);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[abuf],  %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [abuf] "m" (*abuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm0,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += BLOCKSIZE;
+    }
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      if (n % 4 == 0)
-	{
-	  l[0] = c->u_mode.ocb.L[0];
-	  l[1] = c->u_mode.ocb.L[1];
-	  l[2] = c->u_mode.ocb.L[0];
-	}
-
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (n % 4 == 0)
-	    {
-	      n += 4;
-	      l[3] = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	    }
-	  else
-	    {
-	      l[0] = get_l(c, l_tmp.x1, n + 1, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      l[1] = get_l(c, l_tmp.x1, n + 2, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      l[2] = get_l(c, l_tmp.x1, n + 3, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      l[3] = get_l(c, l_tmp.x1, n + 4, c->u_mode.ocb.aad_offset,
-			  c->u_mode.ocb.aad_sum);
-	      n += 4;
-	    }
-
-	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-	  /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-	  asm volatile ("movdqu %[l0],     %%xmm0\n\t"
-			"movdqu %[abuf0],  %%xmm1\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm1\n\t"
-			:
-			: [l0] "m" (*l[0]),
-			  [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l1],     %%xmm0\n\t"
-			"movdqu %[abuf1],  %%xmm2\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm2\n\t"
-			:
-			: [l1] "m" (*l[1]),
-			  [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l2],     %%xmm0\n\t"
-			"movdqu %[abuf2],  %%xmm3\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm3\n\t"
-			:
-			: [l2] "m" (*l[2]),
-			  [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
-			: "memory" );
-	  asm volatile ("movdqu %[l3],     %%xmm0\n\t"
-			"movdqu %[abuf3],  %%xmm4\n\t"
-			"pxor   %%xmm0,    %%xmm5\n\t"
-			"pxor   %%xmm5,    %%xmm4\n\t"
-			:
-			: [l3] "m" (*l[3]),
-			  [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
-			: "memory" );
-
-	  do_aesni_enc_vec4 (ctx);
-
-	  asm volatile ("pxor   %%xmm1,   %%xmm6\n\t"
-			"pxor   %%xmm2,   %%xmm6\n\t"
-			"pxor   %%xmm3,   %%xmm6\n\t"
-			"pxor   %%xmm4,   %%xmm6\n\t"
-			:
-			:
-			: "memory" );
-
-	  abuf += 4*BLOCKSIZE;
-	}
+      /* l_tmp will be used only every 65536-th block. */
+      n += 4;
+      l = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
+		c->u_mode.ocb.aad_sum);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+		    "movdqu %[abuf0],  %%xmm1\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm1\n\t"
+		    :
+		    : [l0] "m" (*c->u_mode.ocb.L[0]),
+		      [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+		    "movdqu %[abuf1],  %%xmm2\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm2\n\t"
+		    :
+		    : [l1] "m" (*c->u_mode.ocb.L[1]),
+		      [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+		    "movdqu %[abuf2],  %%xmm3\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm3\n\t"
+		    :
+		    : [l2] "m" (*c->u_mode.ocb.L[0]),
+		      [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+		    "movdqu %[abuf3],  %%xmm4\n\t"
+		    "pxor   %%xmm0,    %%xmm5\n\t"
+		    "pxor   %%xmm5,    %%xmm4\n\t"
+		    :
+		    : [l3] "m" (*l),
+		      [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+		    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("pxor   %%xmm1,   %%xmm6\n\t"
+		    "pxor   %%xmm2,   %%xmm6\n\t"
+		    "pxor   %%xmm3,   %%xmm6\n\t"
+		    "pxor   %%xmm4,   %%xmm6\n\t"
+		    :
+		    :
+		    : "memory" );
+
+      abuf += 4*BLOCKSIZE;
     }
 
   for ( ;nblocks; nblocks-- )
     {
-      l[0] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                   c->u_mode.ocb.aad_sum);
+      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                c->u_mode.ocb.aad_sum);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
 <at>  <at>  -1845,7 +1859,7  <at>  <at>  _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                     "pxor   %%xmm1,   %%xmm5\n\t"
                     "pxor   %%xmm5,   %%xmm0\n\t"
                     :
-                    : [l] "m" (*l[0]),
+                    : [l] "m" (*l),
                       [abuf] "m" (*abuf)
                     : "memory" );
 
diff --git a/cipher/serpent.c b/cipher/serpent.c
index a47a1b7..fc3afa6 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
 <at>  <at>  -1250,56 +1250,45  <at>  <at>  _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_avx2 = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;
 
-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }
 
-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];
 
-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
+
+	      if (encrypt)
+		_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+	      else
+		_gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+
+	      nblocks -= 16;
+	      outbuf += 16 * sizeof(serpent_block_t);
+	      inbuf  += 16 * sizeof(serpent_block_t);
+	      did_use_avx2 = 1;
 	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
-
-	  if (encrypt)
-	    _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-
-	  nblocks -= 16;
-	  outbuf += 16 * sizeof(serpent_block_t);
-	  inbuf  += 16 * sizeof(serpent_block_t);
-	  did_use_avx2 = 1;
 	}
 
       if (did_use_avx2)
 <at>  <at>  -1317,51 +1306,39  <at>  <at>  _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   {
     int did_use_sse2 = 0;
     const void *Ls[8];
-    int i;
+    unsigned int n = 8 - (blkn % 8);
+    const void **l;
 
-    if (blkn % 8 == 0)
+    if (nblocks >= 8)
       {
-	Ls[0] = c->u_mode.ocb.L[0];
-	Ls[1] = c->u_mode.ocb.L[1];
-	Ls[2] = c->u_mode.ocb.L[0];
-	Ls[3] = c->u_mode.ocb.L[2];
-	Ls[4] = c->u_mode.ocb.L[0];
-	Ls[5] = c->u_mode.ocb.L[1];
-	Ls[6] = c->u_mode.ocb.L[0];
-      }
-
-    /* Process data in 8 block chunks. */
-    while (nblocks >= 8)
-      {
-	/* l_tmp will be used only every 65536-th block. */
-	if (blkn % 8 == 0)
+	Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	l = &Ls[(7 + n) % 8];
+
+	/* Process data in 8 block chunks. */
+	while (nblocks >= 8)
 	  {
+	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	  }
-	else
-	  {
-	    for (i = 0; i < 8; i += 4)
-	      {
-		Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		blkn += 4;
-	      }
+	    *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+
+	    if (encrypt)
+	      _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+	    else
+	      _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+
+	    nblocks -= 8;
+	    outbuf += 8 * sizeof(serpent_block_t);
+	    inbuf  += 8 * sizeof(serpent_block_t);
+	    did_use_sse2 = 1;
 	  }
-
-	if (encrypt)
-	  _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-	else
-	  _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-				      c->u_ctr.ctr, Ls);
-
-	nblocks -= 8;
-	outbuf += 8 * sizeof(serpent_block_t);
-	inbuf  += 8 * sizeof(serpent_block_t);
-	did_use_sse2 = 1;
       }
 
     if (did_use_sse2)
 <at>  <at>  -1380,51 +1357,39  <at>  <at>  _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_neon = 0;
       const void *Ls[8];
-      int i;
+      unsigned int n = 8 - (blkn % 8);
+      const void **l;
 
-      if (blkn % 8 == 0)
+      if (nblocks >= 8)
 	{
-	  Ls[0] = c->u_mode.ocb.L[0];
-	  Ls[1] = c->u_mode.ocb.L[1];
-	  Ls[2] = c->u_mode.ocb.L[0];
-	  Ls[3] = c->u_mode.ocb.L[2];
-	  Ls[4] = c->u_mode.ocb.L[0];
-	  Ls[5] = c->u_mode.ocb.L[1];
-	  Ls[6] = c->u_mode.ocb.L[0];
-	}
-
-      /* Process data in 8 block chunks. */
-      while (nblocks >= 8)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 8 == 0)
+	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	  l = &Ls[(7 + n) % 8];
+
+	  /* Process data in 8 block chunks. */
+	  while (nblocks >= 8)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 8; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+
+	      if (encrypt)
+		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+	      else
+		_gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+					  c->u_ctr.ctr, Ls);
+
+	      nblocks -= 8;
+	      outbuf += 8 * sizeof(serpent_block_t);
+	      inbuf  += 8 * sizeof(serpent_block_t);
+	      did_use_neon = 1;
 	    }
-
-	  if (encrypt)
-	    _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-				       c->u_ctr.ctr, Ls);
-	  else
-	    _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-				       c->u_ctr.ctr, Ls);
-
-	  nblocks -= 8;
-	  outbuf += 8 * sizeof(serpent_block_t);
-	  inbuf  += 8 * sizeof(serpent_block_t);
-	  did_use_neon = 1;
 	}
 
       if (did_use_neon)
 <at>  <at>  -1471,51 +1436,40  <at>  <at>  _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_avx2 = 0;
       const void *Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      const void **l;
       int i;
 
-      if (blkn % 16 == 0)
+      if (nblocks >= 16)
 	{
 	  for (i = 0; i < 16; i += 8)
 	    {
-	      Ls[i + 0] = c->u_mode.ocb.L[0];
-	      Ls[i + 1] = c->u_mode.ocb.L[1];
-	      Ls[i + 2] = c->u_mode.ocb.L[0];
-	      Ls[i + 3] = c->u_mode.ocb.L[2];
-	      Ls[i + 4] = c->u_mode.ocb.L[0];
-	      Ls[i + 5] = c->u_mode.ocb.L[1];
-	      Ls[i + 6] = c->u_mode.ocb.L[0];
+	      Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+	      Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+	      Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+	      Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
 	    }
 
-	  Ls[7] = c->u_mode.ocb.L[3];
-	}
+	  Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+	  l = &Ls[(15 + n) % 16];
 
-      /* Process data in 16 block chunks. */
-      while (nblocks >= 16)
-	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 16 == 0)
+	  /* Process data in 16 block chunks. */
+	  while (nblocks >= 16)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      Ls[15] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 16; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
 
-	  _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-				      c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+					  c->u_mode.ocb.aad_sum, Ls);
 
-	  nblocks -= 16;
-	  abuf += 16 * sizeof(serpent_block_t);
-	  did_use_avx2 = 1;
+	      nblocks -= 16;
+	      abuf += 16 * sizeof(serpent_block_t);
+	      did_use_avx2 = 1;
+	    }
 	}
 
       if (did_use_avx2)
 <at>  <at>  -1533,46 +1487,34  <at>  <at>  _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   {
     int did_use_sse2 = 0;
     const void *Ls[8];
-    int i;
+    unsigned int n = 8 - (blkn % 8);
+    const void **l;
 
-    if (blkn % 8 == 0)
+    if (nblocks >= 8)
       {
-	Ls[0] = c->u_mode.ocb.L[0];
-	Ls[1] = c->u_mode.ocb.L[1];
-	Ls[2] = c->u_mode.ocb.L[0];
-	Ls[3] = c->u_mode.ocb.L[2];
-	Ls[4] = c->u_mode.ocb.L[0];
-	Ls[5] = c->u_mode.ocb.L[1];
-	Ls[6] = c->u_mode.ocb.L[0];
-      }
-
-    /* Process data in 8 block chunks. */
-    while (nblocks >= 8)
-      {
-	/* l_tmp will be used only every 65536-th block. */
-	if (blkn % 8 == 0)
+	Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	l = &Ls[(7 + n) % 8];
+
+	/* Process data in 8 block chunks. */
+	while (nblocks >= 8)
 	  {
+	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	  }
-	else
-	  {
-	    for (i = 0; i < 8; i += 4)
-	      {
-		Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		blkn += 4;
-	      }
-	  }
+	    *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
 
-	_gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-				    c->u_mode.ocb.aad_sum, Ls);
+	    _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+					c->u_mode.ocb.aad_sum, Ls);
 
-	nblocks -= 8;
-	abuf += 8 * sizeof(serpent_block_t);
-	did_use_sse2 = 1;
+	    nblocks -= 8;
+	    abuf += 8 * sizeof(serpent_block_t);
+	    did_use_sse2 = 1;
+	  }
       }
 
     if (did_use_sse2)
 <at>  <at>  -1591,46 +1533,34  <at>  <at>  _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_neon = 0;
       const void *Ls[8];
-      int i;
-
-      if (blkn % 8 == 0)
-	{
-	  Ls[0] = c->u_mode.ocb.L[0];
-	  Ls[1] = c->u_mode.ocb.L[1];
-	  Ls[2] = c->u_mode.ocb.L[0];
-	  Ls[3] = c->u_mode.ocb.L[2];
-	  Ls[4] = c->u_mode.ocb.L[0];
-	  Ls[5] = c->u_mode.ocb.L[1];
-	  Ls[6] = c->u_mode.ocb.L[0];
-	}
+      unsigned int n = 8 - (blkn % 8);
+      const void **l;
 
-      /* Process data in 8 block chunks. */
-      while (nblocks >= 8)
+      if (nblocks >= 8)
 	{
-	  /* l_tmp will be used only every 65536-th block. */
-	  if (blkn % 8 == 0)
+	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+	  l = &Ls[(7 + n) % 8];
+
+	  /* Process data in 8 block chunks. */
+	  while (nblocks >= 8)
 	    {
+	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      Ls[7] = ocb_get_l(c, l_tmp, blkn);
-	    }
-	  else
-	    {
-	      for (i = 0; i < 8; i += 4)
-		{
-		  Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
-		  Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
-		  Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
-		  Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
-		  blkn += 4;
-		}
-	    }
+	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
 
-	  _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-				      c->u_mode.ocb.aad_sum, Ls);
+	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+					  c->u_mode.ocb.aad_sum, Ls);
 
-	  nblocks -= 8;
-	  abuf += 8 * sizeof(serpent_block_t);
-	  did_use_neon = 1;
+	      nblocks -= 8;
+	      abuf += 8 * sizeof(serpent_block_t);
+	      did_use_neon = 1;
+	    }
 	}
 
       if (did_use_neon)
diff --git a/tests/basic.c b/tests/basic.c
index c1aa76a..4ea91a9 100644
--- a/tests/basic.c
+++ b/tests/basic.c
 <at>  <at>  -3153,7 +3153,8  <at>  <at>  do_check_ocb_cipher (int inplace)
 
 
 static void
-check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
+check_ocb_cipher_largebuf_split (int algo, int keylen, const char *tagexpect,
+				 unsigned int splitpos)
 {
   static const unsigned char key[32] =
         "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
 <at>  <at>  -3219,7 +3220,14  <at>  <at>  check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
       goto out_free;
     }
 
-  err = gcry_cipher_authenticate (hde, inbuf, buflen);
+  if (splitpos)
+    {
+      err = gcry_cipher_authenticate (hde, inbuf, splitpos);
+    }
+  if (!err)
+    {
+      err = gcry_cipher_authenticate (hde, inbuf + splitpos, buflen - splitpos);
+    }
   if (err)
     {
       fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n",
 <at>  <at>  -3229,10 +3237,18  <at>  <at>  check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
       goto out_free;
     }
 
-  err = gcry_cipher_final (hde);
+  if (splitpos)
+    {
+      err = gcry_cipher_encrypt (hde, outbuf, splitpos, inbuf, splitpos);
+    }
   if (!err)
     {
-      err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen);
+      err = gcry_cipher_final (hde);
+      if (!err)
+	{
+	  err = gcry_cipher_encrypt (hde, outbuf + splitpos, buflen - splitpos,
+				    inbuf + splitpos, buflen - splitpos);
+	}
     }
   if (err)
     {
 <at>  <at>  -3267,10 +3283,18  <at>  <at>  check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
     }
 
   /* Now for the decryption.  */
-  err = gcry_cipher_final (hdd);
+  if (splitpos)
+    {
+      err = gcry_cipher_decrypt (hdd, outbuf, splitpos, NULL, 0);
+    }
   if (!err)
     {
-      err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0);
+      err = gcry_cipher_final (hdd);
+      if (!err)
+	{
+	  err = gcry_cipher_decrypt (hdd, outbuf + splitpos, buflen - splitpos,
+				     NULL, 0);
+	}
     }
   if (err)
     {
 <at>  <at>  -3319,6 +3343,18  <at>  <at>  out_free:
 
 
 static void
+check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
+{
+  unsigned int split;
+
+  for (split = 0; split < 32 * 16; split = split * 2 + 16)
+    {
+      check_ocb_cipher_largebuf_split(algo, keylen, tagexpect, split);
+    }
+}
+
+
+static void
 check_ocb_cipher (void)
 {
   /* Check OCB cipher with separate destination and source buffers for

commit e11895da1f4af9782d89e92ba2e6b1a63235b54b
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Mon Aug 10 20:48:02 2015 +0300

    Add carryless 8-bit addition fast-path for AES-NI CTR mode
    
    * cipher/rijndael-aesni.c (do_aesni_ctr_4): Do addition using
    CTR in big-endian form, if least-significant byte does not overflow.
    --
    
    Patch improves AES-NI CTR speed by 20%.
    
    Benchmark on Intel Haswell (3.2 Ghz):
    
    Before:
     AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
            CTR enc |     0.273 ns/B    3489.8 MiB/s     0.875 c/B
            CTR dec |     0.273 ns/B    3491.0 MiB/s     0.874 c/B
    
    After:
            CTR enc |     0.228 ns/B    4190.0 MiB/s     0.729 c/B
            CTR dec |     0.228 ns/B    4190.2 MiB/s     0.729 c/B
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 882cc79..6678785 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
 <at>  <at>  -787,6 +787,13  <at>  <at>  static void
 do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 unsigned char *ctr, unsigned char *b, const unsigned char *a)
 {
+  static const byte bige_addb_const[4][16] __attribute__ ((aligned (16))) =
+    {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 }
+    };
 #define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
 #define aesenc_xmm1_xmm2      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
 #define aesenc_xmm1_xmm3      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
 <at>  <at>  -807,7 +814,25  <at>  <at>  do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
       xmm6  endian swapping mask
    */
 
-  asm volatile ("movdqa %%xmm5, %%xmm0\n\t"     /* xmm0, xmm2 := CTR (xmm5) */
+  asm volatile (/* detect if 8-bit carry handling is needed */
+                "cmpb   $0xfb, 15(%[ctr])\n\t"
+                "ja     .Ladd32bit%=\n\t"
+
+                "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0 := CTR (xmm5) */
+                "movdqa %[addb_1], %%xmm2\n\t"  /* xmm2 := be(1) */
+                "movdqa %[addb_2], %%xmm3\n\t"  /* xmm3 := be(2) */
+                "movdqa %[addb_3], %%xmm4\n\t"  /* xmm4 := be(3) */
+                "movdqa %[addb_4], %%xmm5\n\t"  /* xmm5 := be(4) */
+                "paddb  %%xmm0, %%xmm2\n\t"     /* xmm2 := be(1) + CTR (xmm0) */
+                "paddb  %%xmm0, %%xmm3\n\t"     /* xmm3 := be(2) + CTR (xmm0) */
+                "paddb  %%xmm0, %%xmm4\n\t"     /* xmm4 := be(3) + CTR (xmm0) */
+                "paddb  %%xmm0, %%xmm5\n\t"     /* xmm5 := be(4) + CTR (xmm0) */
+                "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0] */
+                "movl   %[rounds], %%esi\n\t"
+                "jmp    .Lstore_ctr%=\n\t"
+
+                ".Ladd32bit%=:\n\t"
+                "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0, xmm2 := CTR (xmm5) */
                 "movdqa %%xmm0, %%xmm2\n\t"
                 "pcmpeqd %%xmm1, %%xmm1\n\t"
                 "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
 <at>  <at>  -852,6 +877,8  <at>  <at>  do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 "pshufb %%xmm6, %%xmm3\n\t"     /* xmm3 := be(xmm3) */
                 "pshufb %%xmm6, %%xmm4\n\t"     /* xmm4 := be(xmm4) */
                 "pshufb %%xmm6, %%xmm5\n\t"     /* xmm5 := be(xmm5) */
+
+                ".Lstore_ctr%=:\n\t"
                 "movdqa %%xmm5, (%[ctr])\n\t"   /* Update CTR (mem).  */
 
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
 <at>  <at>  -956,7 +983,11  <at>  <at>  do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                   [src] "r" (a),
                   [dst] "r" (b),
                   [key] "r" (ctx->keyschenc),
-                  [rounds] "g" (ctx->rounds)
+                  [rounds] "g" (ctx->rounds),
+                  [addb_1] "m" (bige_addb_const[0][0]),
+                  [addb_2] "m" (bige_addb_const[1][0]),
+                  [addb_3] "m" (bige_addb_const[2][0]),
+                  [addb_4] "m" (bige_addb_const[3][0])
                 : "%esi", "cc", "memory");
 #undef aesenc_xmm1_xmm0
 #undef aesenc_xmm1_xmm2

-----------------------------------------------------------------------

Summary of changes:
 cipher/camellia-glue.c  | 254 +++++++++-----------
 cipher/rijndael-aesni.c | 597 ++++++++++++++++++++++++++----------------------
 cipher/serpent.c        | 370 ++++++++++++------------------
 tests/basic.c           |  48 +++-
 4 files changed, 619 insertions(+), 650 deletions(-)


hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
by Jussi Kivilinna | 12 Aug 17:33 2015
Picon

[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-254-g48822ae

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  48822ae0b436bcea0fe92dbf0d88475ba3179320 (commit)
      from  24ebf53f1e8a8afa27dcd768339bda70a740bb03 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 48822ae0b436bcea0fe92dbf0d88475ba3179320
Author: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
Date:   Wed Aug 12 18:17:01 2015 +0300

    Keccak: Fix array indexes in θ step

    * cipher/keccak.c (keccak_f1600_state_permute): Fix indexes for D[5].
    --

    Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>

diff --git a/cipher/keccak.c b/cipher/keccak.c
index 4ca8dbd..4a9c1f2 100644
--- a/cipher/keccak.c
+++ b/cipher/keccak.c
 <at>  <at>  -100,8 +100,8  <at>  <at>  static unsigned int keccak_f1600_state_permute(KECCAK_STATE *hd)
 	D[0] = C[4] ^ rol64(C[1], 1);
 	D[1] = C[0] ^ rol64(C[2], 1);
 	D[2] = C[1] ^ rol64(C[3], 1);
-	D[4] = C[2] ^ rol64(C[4], 1);
-	D[5] = C[3] ^ rol64(C[0], 1);
+	D[3] = C[2] ^ rol64(C[4], 1);
+	D[4] = C[3] ^ rol64(C[0], 1);

 	/* Add the θ effect to the whole column */
 	hd->state[0][0] ^= D[0];
 <at>  <at>  -125,18 +125,18  <at>  <at>  static unsigned int keccak_f1600_state_permute(KECCAK_STATE *hd)
 	hd->state[4][2] ^= D[2];

 	/* Add the θ effect to the whole column */
-	hd->state[0][3] ^= D[4];
-	hd->state[1][3] ^= D[4];
-	hd->state[2][3] ^= D[4];
-	hd->state[3][3] ^= D[4];
-	hd->state[4][3] ^= D[4];
+	hd->state[0][3] ^= D[3];
+	hd->state[1][3] ^= D[3];
+	hd->state[2][3] ^= D[3];
+	hd->state[3][3] ^= D[3];
+	hd->state[4][3] ^= D[3];

 	/* Add the θ effect to the whole column */
-	hd->state[0][4] ^= D[5];
-	hd->state[1][4] ^= D[5];
-	hd->state[2][4] ^= D[5];
-	hd->state[3][4] ^= D[5];
-	hd->state[4][4] ^= D[5];
+	hd->state[0][4] ^= D[4];
+	hd->state[1][4] ^= D[4];
+	hd->state[2][4] ^= D[4];
+	hd->state[3][4] ^= D[4];
+	hd->state[4][4] ^= D[4];
       }

       {

-----------------------------------------------------------------------

Summary of changes:
 cipher/keccak.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

hooks/post-receive
--

-- 
The GNU crypto library
http://git.gnupg.org

_______________________________________________
Gnupg-commits mailing list
Gnupg-commits <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits
_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Jussi Kivilinna | 12 Aug 17:20 2015
Picon
Picon

[PATCH] Keccak: Fix array indexes in θ step

* cipher/keccak.c (keccak_f1600_state_permute): Fix indexes for D[5].
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna <at> iki.fi>
---
 cipher/keccak.c |   24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/cipher/keccak.c b/cipher/keccak.c
index 4ca8dbd..4a9c1f2 100644
--- a/cipher/keccak.c
+++ b/cipher/keccak.c
 <at>  <at>  -100,8 +100,8  <at>  <at>  static unsigned int keccak_f1600_state_permute(KECCAK_STATE *hd)
 	D[0] = C[4] ^ rol64(C[1], 1);
 	D[1] = C[0] ^ rol64(C[2], 1);
 	D[2] = C[1] ^ rol64(C[3], 1);
-	D[4] = C[2] ^ rol64(C[4], 1);
-	D[5] = C[3] ^ rol64(C[0], 1);
+	D[3] = C[2] ^ rol64(C[4], 1);
+	D[4] = C[3] ^ rol64(C[0], 1);

 	/* Add the θ effect to the whole column */
 	hd->state[0][0] ^= D[0];
 <at>  <at>  -125,18 +125,18  <at>  <at>  static unsigned int keccak_f1600_state_permute(KECCAK_STATE *hd)
 	hd->state[4][2] ^= D[2];

 	/* Add the θ effect to the whole column */
-	hd->state[0][3] ^= D[4];
-	hd->state[1][3] ^= D[4];
-	hd->state[2][3] ^= D[4];
-	hd->state[3][3] ^= D[4];
-	hd->state[4][3] ^= D[4];
+	hd->state[0][3] ^= D[3];
+	hd->state[1][3] ^= D[3];
+	hd->state[2][3] ^= D[3];
+	hd->state[3][3] ^= D[3];
+	hd->state[4][3] ^= D[3];

 	/* Add the θ effect to the whole column */
-	hd->state[0][4] ^= D[5];
-	hd->state[1][4] ^= D[5];
-	hd->state[2][4] ^= D[5];
-	hd->state[3][4] ^= D[5];
-	hd->state[4][4] ^= D[5];
+	hd->state[0][4] ^= D[4];
+	hd->state[1][4] ^= D[4];
+	hd->state[2][4] ^= D[4];
+	hd->state[3][4] ^= D[4];
+	hd->state[4][4] ^= D[4];
       }

       {

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel <at> gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel

Gmane