zoobzio January 5, 2025 Edit this page

Architecture

How atom works under the hood.

Overview

Atom uses a two-phase approach:

  1. Registration Phase: Build an execution plan via reflection (once per type)
  2. Execution Phase: Apply the cached plan to convert structs (many times)
┌─────────────────────────────────────────────────────────────────┐
│                      Registration Phase                          │
│                                                                   │
│  reflect.Type ──▶ buildFieldPlan() ──▶ []fieldPlan ──▶ registry │
│                                                                   │
└─────────────────────────────────────────────────────────────────┘
                              │
                              ▼ (cached)
┌─────────────────────────────────────────────────────────────────┐
│                       Execution Phase                            │
│                                                                   │
│  struct ──▶ atomize(plan) ──▶ Atom ──▶ deatomize(plan) ──▶ struct│
│                                                                   │
└─────────────────────────────────────────────────────────────────┘

Field Plans

A fieldPlan describes how to process a single struct field:

type fieldPlan struct {
    name      string           // Field name (map key)
    index     []int            // Reflect index path
    table     Table            // Target table
    kind      fieldKind        // Scalar, pointer, slice, nested
    elemType  reflect.Type     // Element type for slices/pointers
    converter typeConverter    // Width conversion (int8→int64)
    nested    *reflectAtomizer // For nested structs
}

Field Kinds

const (
    kindScalar      // string, int64, bool, etc.
    kindPointer     // *string, *int64, etc.
    kindSlice       // []string, []int64, etc.
    kindNested      // embedded struct
    kindNestedSlice // []Struct
    kindNestedPtr   // *Struct
)

Plan Building

When Use[T]() is called:

func buildFieldPlan(typ reflect.Type) ([]fieldPlan, error) {
    var plans []fieldPlan
    for i := 0; i < typ.NumField(); i++ {
        sf := typ.Field(i)

        // Skip unexported fields
        if !sf.IsExported() {
            continue
        }

        fp, err := planField(sf, []int{i})
        if err != nil {
            return nil, err
        }
        plans = append(plans, fp)
    }
    return plans, nil
}

Type Converters

Converters handle width normalization between Go types and Atom storage:

type typeConverter struct {
    toInt64     func(reflect.Value) int64
    fromInt64   func(int64) (reflect.Value, error)
    toUint64    func(reflect.Value) uint64
    fromUint64  func(uint64) (reflect.Value, error)
    toFloat64   func(reflect.Value) float64
    fromFloat64 func(float64) (reflect.Value, error)
    origType    reflect.Type
}

Overflow Detection

The fromXxx functions validate that values fit in the target type:

func intConverter(t reflect.Type) typeConverter {
    var minVal, maxVal int64
    switch t.Kind() {
    case reflect.Int8:
        minVal, maxVal = math.MinInt8, math.MaxInt8
    case reflect.Int16:
        minVal, maxVal = math.MinInt16, math.MaxInt16
    // ...
    }

    return typeConverter{
        fromInt64: func(i int64) (reflect.Value, error) {
            if i < minVal || i > maxVal {
                return reflect.Value{}, fmt.Errorf("overflow")
            }
            rv := reflect.New(t).Elem()
            rv.SetInt(i)
            return rv, nil
        },
    }
}

Registry

The registry caches atomizers by type:

var (
    registry   = make(map[reflect.Type]*reflectAtomizer)
    registryMu sync.RWMutex
)

Thread Safety

The registry uses a read-write mutex:

  • Read path: Check cache with RLock() (fast, concurrent)
  • Write path: Build and cache with Lock() (exclusive, rare)
func Use[T any]() (*Atomizer[T], error) {
    typ := reflect.TypeFor[T]()

    // Fast path: check cache
    registryMu.RLock()
    if ra, ok := registry[typ]; ok {
        registryMu.RUnlock()
        return &Atomizer[T]{inner: ra}, nil
    }
    registryMu.RUnlock()

    // Slow path: build and cache
    registryMu.Lock()
    defer registryMu.Unlock()

    // Double-check after acquiring write lock
    if ra, ok := registry[typ]; ok {
        return &Atomizer[T]{inner: ra}, nil
    }

    ra, err := buildReflectAtomizerWithSpec(typ, spec)
    // ...
}

Nested Type Handling

Nested structs are handled recursively:

type Address struct {
    Street string
    City   string
}

type User struct {
    Name    string
    Address Address // Nested struct
}

When atomizing:

// User atom
atom.Strings["Name"] = "Alice"
atom.Nested["Address"] = Atom{
    Strings: map[string]string{
        "Street": "123 Main St",
        "City":   "Springfield",
    },
}

Circular References

The registry uses a shell pattern to handle self-referential types:

type Node struct {
    Value    int
    Children []Node // Self-referential
}
func ensureRegistered(typ reflect.Type) *reflectAtomizer {
    if ra, ok := registry[typ]; ok {
        return ra
    }

    // Register shell first (breaks circular dependency)
    ra := &reflectAtomizer{typ: typ}
    registry[typ] = ra

    // Now build plan (may recursively call ensureRegistered)
    plan, err := buildFieldPlan(typ)
    ra.plan = plan
    // ...
}

Atomize/Deatomize Flow

Atomize

func (ra *reflectAtomizer) atomize(src any, dst *Atom) {
    v := reflect.ValueOf(src)
    if v.Kind() == reflect.Ptr {
        v = v.Elem()
    }

    for i := range ra.plan {
        fp := &ra.plan[i]
        fv := v.FieldByIndex(fp.index)
        atomizeField(fp, fv, dst)
    }
}

Deatomize

func (ra *reflectAtomizer) deatomize(src *Atom, dst any) error {
    v := reflect.ValueOf(dst)
    if v.Kind() == reflect.Ptr {
        v = v.Elem()
    }

    for i := range ra.plan {
        fp := &ra.plan[i]
        fv := v.FieldByIndex(fp.index)
        if err := deatomizeField(fp, src, fv); err != nil {
            return err
        }
    }
    return nil
}

Memory Allocation

Atom pre-allocates maps based on field counts:

func allocateAtom(spec Spec, tableSet map[Table]int) *Atom {
    atom := &Atom{
        Spec:         spec,
        Nested:       make(map[string]Atom),
        NestedSlices: make(map[string][]Atom),
    }

    if n := tableSet[TableStrings]; n > 0 {
        atom.Strings = make(map[string]string, n)
    }
    if n := tableSet[TableInts]; n > 0 {
        atom.Ints = make(map[string]int64, n)
    }
    // ... only allocate maps that will be used
}

This avoids allocating empty maps for unused tables.

Interface Bypass

Types can implement Atomizable and Deatomizable to bypass reflection:

type Atomizable interface {
    Atomize(*Atom)
}

type Deatomizable interface {
    Deatomize(*Atom) error
}

When these interfaces are implemented:

func (a *Atomizer[T]) Atomize(obj *T) *Atom {
    atom := a.inner.newAtom()

    if a.inner.hasAtomizable {
        if az, ok := any(obj).(Atomizable); ok {
            az.Atomize(atom)
            return atom
        }
    }

    // Fall back to reflection
    a.inner.atomize(obj, atom)
    return atom
}

This enables code generation to eliminate reflection overhead entirely.

Next Steps