Switch to SIMD based base64 decoder

This commit is contained in:
Kovid Goyal 2026-04-21 09:52:39 +05:30
parent 1b741ca725
commit 93573aec6f
No known key found for this signature in database
GPG key ID: 06BC317B515ACE7C
4 changed files with 33 additions and 26 deletions

1
go.mod
View file

@ -10,6 +10,7 @@ require (
github.com/bmatcuk/doublestar/v4 v4.10.0
github.com/dlclark/regexp2 v1.11.5
github.com/ebitengine/purego v0.10.0
github.com/emmansun/base64 v0.9.0
github.com/google/go-cmp v0.7.0
github.com/google/uuid v1.6.0
github.com/hako/durafmt v0.0.0-20210608085754-5c1018a4e16b

2
go.sum
View file

@ -14,6 +14,8 @@ github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZ
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU=
github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/emmansun/base64 v0.9.0 h1:92dLrE7iro6g/yWuPsd7M9TzJpe9fEeqKH0H7MApDtE=
github.com/emmansun/base64 v0.9.0/go.mod h1:hp0DxCkKt7bF26HOh4BzhcObvqfH1BVy2vznoGThW6Q=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=

View file

@ -1,13 +1,16 @@
package streaming_base64
import (
"encoding/base64"
"fmt"
"iter"
"github.com/emmansun/base64"
)
var _ = fmt.Print
type CorruptInputError = base64.CorruptInputError
type StreamingBase64Decoder struct {
leftover [4]byte
num_leftover int
@ -15,9 +18,9 @@ type StreamingBase64Decoder struct {
}
func wrap_error(err error, chunkOffset int64) error {
if e, ok := err.(base64.CorruptInputError); ok {
if e, ok := err.(CorruptInputError); ok {
// CorruptInputError is an int64 representing the relative byte offset
return base64.CorruptInputError(int64(e) + chunkOffset)
return CorruptInputError(int64(e) + chunkOffset)
}
return err
}
@ -106,7 +109,7 @@ func (s *StreamingBase64Decoder) Finish() ([]byte, error) {
case 0:
return nil, nil
case 1:
return nil, base64.CorruptInputError(s.total_read - 1)
return nil, CorruptInputError(s.total_read - 1)
case 2:
s.leftover[2] = '='
s.leftover[3] = '='

View file

@ -4,9 +4,10 @@ package streaming_base64
import (
"bytes"
"encoding/base64"
"fmt"
"testing"
"github.com/emmansun/base64"
)
var _ = fmt.Print
@ -84,16 +85,16 @@ func roundtripNoPadding(t *testing.T, plaintext []byte, chunkSize int) {
// leftover after decoding (but 2 base64 chars left when unpadded).
func TestRoundtripAllChunkSizes(t *testing.T) {
plaintexts := [][]byte{
{}, // 0 bytes → 0 encoded → num_leftover=0
[]byte("a"), // 1 byte → 4 encoded → no leftover (padded)
[]byte("ab"), // 2 bytes → 4 encoded → no leftover (padded)
[]byte("abc"), // 3 bytes → 4 encoded → no leftover
[]byte("abcd"), // 4 bytes → 8 encoded → no leftover
[]byte("abcde"), // 5 bytes → 8 encoded → no leftover (padded)
[]byte("abcdef"), // 6 bytes → 8 encoded → no leftover (padded)
[]byte("Hello, World!"), // 13 bytes → 20 encoded
{}, // 0 bytes → 0 encoded → num_leftover=0
[]byte("a"), // 1 byte → 4 encoded → no leftover (padded)
[]byte("ab"), // 2 bytes → 4 encoded → no leftover (padded)
[]byte("abc"), // 3 bytes → 4 encoded → no leftover
[]byte("abcd"), // 4 bytes → 8 encoded → no leftover
[]byte("abcde"), // 5 bytes → 8 encoded → no leftover (padded)
[]byte("abcdef"), // 6 bytes → 8 encoded → no leftover (padded)
[]byte("Hello, World!"), // 13 bytes → 20 encoded
[]byte("The quick brown fox jumps over the"), // 34 bytes → 48 encoded
bytes.Repeat([]byte{0x00, 0xff, 0x80}, 17), // binary data
bytes.Repeat([]byte{0x00, 0xff, 0x80}, 17), // binary data
}
for _, plain := range plaintexts {
for chunkSize := 1; chunkSize <= 7; chunkSize++ {
@ -109,14 +110,14 @@ func TestRoundtripAllChunkSizes(t *testing.T) {
// padding bytes for all relevant chunk sizes.
func TestRoundtripNoPaddingAllChunkSizes(t *testing.T) {
plaintexts := [][]byte{
[]byte("a"), // 1 byte → "YQ" (2 base64 chars, no pad)
[]byte("ab"), // 2 bytes → "YWI" (3 base64 chars, no pad)
[]byte("abc"), // 3 bytes → "YWJj" (4 chars, no leftover)
[]byte("abcd"), // 4 bytes → "YWJjZA" (6 chars)
[]byte("Hello, World!"), // mixed
bytes.Repeat([]byte{0xde}, 10), // binary, 1 mod 3 remainder
bytes.Repeat([]byte{0xbe}, 11), // binary, 2 mod 3 remainder
bytes.Repeat([]byte{0xef}, 12), // binary, 0 mod 3 remainder
[]byte("a"), // 1 byte → "YQ" (2 base64 chars, no pad)
[]byte("ab"), // 2 bytes → "YWI" (3 base64 chars, no pad)
[]byte("abc"), // 3 bytes → "YWJj" (4 chars, no leftover)
[]byte("abcd"), // 4 bytes → "YWJjZA" (6 chars)
[]byte("Hello, World!"), // mixed
bytes.Repeat([]byte{0xde}, 10), // binary, 1 mod 3 remainder
bytes.Repeat([]byte{0xbe}, 11), // binary, 2 mod 3 remainder
bytes.Repeat([]byte{0xef}, 12), // binary, 0 mod 3 remainder
}
for _, plain := range plaintexts {
for chunkSize := 1; chunkSize <= 7; chunkSize++ {
@ -199,7 +200,7 @@ func TestFinishNumLeftover(t *testing.T) {
t.Run("leftover=1", func(t *testing.T) {
// Feed 5 base64 chars: 4 will be consumed, 1 leftover.
encoded := []byte(base64.StdEncoding.EncodeToString([]byte("abc"))) // "YWJj" (4)
encoded = append(encoded, 'Y') // + 1 → total 5
encoded = append(encoded, 'Y') // + 1 → total 5
var d StreamingBase64Decoder
outBuf := make([]byte, 16)
for _, err := range d.Decode(encoded, outBuf) {
@ -268,9 +269,9 @@ func TestFinishNumLeftover(t *testing.T) {
// byte offset within the full stream.
func TestErrorOffsetInDecode(t *testing.T) {
tests := []struct {
name string
chunks []string // successive calls to Decode
wantOffset int64
name string
chunks []string // successive calls to Decode
wantOffset int64
}{
{
// Error in the very first block (no leftovers involved).