Schema Compiler

Code Generation

How the schema compiler generates serialization code for each target language.

The sia compile command transforms .sia schema files into serialization code. Each target language gets idiomatic output that uses that language's Sia library.

Compilation Pipeline

.sia source → Lexer (tokenize) → Parser (CST) → Visitor (IR) → Generator (target code)
  1. The lexer tokenizes the source into keywords, identifiers, literals, and operators
  2. The parser builds a concrete syntax tree (CST) from the token stream
  3. The visitor walks the CST and produces an intermediate representation (IR): an array of schema and plugin definitions
  4. The generator takes the IR and produces code for the target language

Target Language Detection

The compiler determines the target language in this priority order:

  1. Explicit flag: -e ts or --extension go
  2. Output file extension: -o schema.ts implies TypeScript
  3. Auto-detection from project files in the current directory (and up to 2 parent directories):
    • tsconfig.json found: TypeScript
    • go.sum found: Go
    • pyproject.toml found: Python
    • CMakeLists.txt found: C++

If nothing is detected, the compiler asks you to specify -e.

Usage

# Compile to TypeScript, write to file
sia compile schema.sia -o schema.ts

# Compile to Go, print to stdout
sia compile schema.sia -e go -s

# Compile to C++ (generates both .hpp and .cpp)
sia compile schema.sia -o schema.cpp

# Auto-detect language from project context
sia compile schema.sia -o schema

Input Schema

The examples below all use this schema:

schema Address {
  street?  string8 = "Default Street"
  city     string8
  zip      int32
}

schema Person {
  name     string8
  age?     int32
  email?   string8(encoding = "ascii")[]
  tags     string8[]
  avatar   byteN(length = 32)
  photo    byte64
  address  Address
}

Generated Output

The TypeScript generator produces an interface, an encode function, and a decode function for each schema:

import { Sia } from "@timeleap/sia";

export interface Person {
  name: string;
  age?: number;
  email?: string[];
  tags: string[];
  avatar?: Uint8Array | Buffer;
  photo: Uint8Array | Buffer;
  address: Address;
}

export function encodePerson(sia: Sia, person: Person): Sia {
  sia.addString8(person.name);
  sia.addInt32(person.age ?? 0);
  sia.addArray8(person.email ?? [], (s: Sia, v) => s.addString8(v));
  sia.addArray8(person.tags, (s: Sia, v) => s.addString8(v));
  sia.addByteArrayN(person.avatar ?? new Uint8Array(0));
  sia.addByteArray64(person.photo);
  encodeAddress(sia, person.address);
  return sia;
}

export function decodePerson(sia: Sia): Person {
  return {
    name: sia.readString8(),
    age: sia.readInt32(),
    email: sia.readArray8((s: Sia) => s.readString8()),
    tags: sia.readArray8((s: Sia) => s.readString8()),
    avatar: sia.readByteArrayN(32),
    photo: sia.readByteArray64(),
    address: decodeAddress(sia),
  };
}

export interface Address {
  street?: string;
  city: string;
  zip: number;
}

export function encodeAddress(sia: Sia, address: Address): Sia {
  sia.addAscii(address.street ?? "");
  sia.addString8(address.city);
  sia.addInt32(address.zip);
  return sia;
}

export function decodeAddress(sia: Sia): Address {
  return {
    street: sia.readAscii(),
    city: sia.readString8(),
    zip: sia.readInt32(),
  };
}

Key patterns:

  • Optional fields use ?? with a zero-value fallback
  • Arrays generate addArray8 / readArray8 with inline callbacks
  • Custom types call the referenced schema's encode/decode functions
  • Fields with encoding = "ascii" use addAscii / readAscii
  • Output is formatted with Prettier ::

The Go generator produces structs with receiver methods:

package schema

import sia "github.com/TimeleapLabs/go-sia/v2/pkg"

type Person struct {
  Name    string          `json:"name"`
  Age     int32           `json:"age"`
  Email   []string        `json:"email"`
  Tags    []string        `json:"tags"`
  Avatar  []byte          `json:"avatar"`
  Photo   []byte          `json:"photo"`
  Address *Address        `json:"address"`
}

func (p *Person) Sia() sia.Sia {
  s := sia.New()
  s.AddString8(p.Name)
  s.AddInt32(p.Age)
  s.AddByteArrayN(p.Avatar)
  s.AddByteArray64(p.Photo)
  s.EmbedSia(p.Address.Sia().GetSia())
  // Arrays use NewSiaArray with EmbedSia...
  return s
}

func (p *Person) FromSia(s sia.Sia) *Person {
  p.Name = s.ReadString8()
  p.Age = s.ReadInt32()
  p.Avatar = s.ReadByteArrayN(32)
  p.Photo = s.ReadByteArray64()
  address := Address{}
  p.Address = address.FromSia(s)
  // Arrays use NewArray with ReadArray8...
  return p
}

func (p *Person) FromSiaBytes(bytes []byte) *Person {
  s := sia.NewFromBytes(bytes)
  return p.FromSia(s)
}

Key patterns:

  • Struct fields have json tags
  • Custom types use pointer fields (*Address) and EmbedSia for encoding
  • Arrays use NewSiaArray / NewArray helpers
  • Optional fields with defaults use inline Go IIFEs
  • A convenience FromSiaBytes method is generated for each schema ::

The Python generator produces classes with encode instance methods and decode class methods:

from sia import Sia
from typing import List

class Person():
    def __init__(self,
        name: str,
        tags: List[str],
        photo: bytes,
        address: "Address",
        age: int = None,
        email: List[str] = None,
        avatar: bytes = None,
    ):
        self.name = name
        self.age = age
        self.email = email
        self.tags = tags
        self.avatar = avatar
        self.photo = photo
        self.address = address

    def encode(self, sia: Sia) -> Sia:
        sia.add_string8(self.name)
        sia.add_int32(self.age)
        sia.add_array8(self.email, lambda sia, v: sia.add_string8(v))
        sia.add_array8(self.tags, lambda sia, v: sia.add_string8(v))
        sia.add_byte_array_n(self.avatar)
        sia.add_byte_array64(self.photo)
        self.address.encode(sia)
        return sia

    @classmethod
    def decode(cls, sia: Sia) -> "Person":
        return cls(
            name=sia.read_string8(),
            age=sia.read_int32(),
            email=sia.read_array8(lambda sia: sia.read_string8()),
            tags=sia.read_array8(lambda sia: sia.read_string8()),
            avatar=sia.read_byte_array_n(32),
            photo=sia.read_byte_array64(),
            address=Address.decode(sia),
        )

Key patterns:

  • Required fields come first in __init__, optional fields follow with = None defaults
  • encode is an instance method that returns the Sia instance
  • decode is a @classmethod that constructs a new instance
  • Arrays use lambda callbacks ::

The C++ generator produces two files: a header (.hpp) and a source (.cpp).

The header declares structs and function prototypes:

#pragma once

#include <sia/sia.hpp>
#include <vector>
#include <string>
#include <memory>

struct Address;
struct Person;

struct Person {
  std::string name;
  int32_t age;
  std::vector<std::string> email;
  std::vector<std::string> tags;
  std::vector<uint8_t> avatar;
  std::vector<uint8_t> photo;
  std::shared_ptr<Address> address;
};

struct Address {
  std::string street;
  std::string city;
  int32_t zip;
};

std::shared_ptr<sia::Sia> encodePerson(Person person);
Person decodePerson(std::shared_ptr<sia::Sia> sia);
std::shared_ptr<sia::Sia> encodeAddress(Address address);
Address decodeAddress(std::shared_ptr<sia::Sia> sia);

The source implements the encode/decode functions:

#include "schema.hpp"
#include <sia/array.hpp>

std::shared_ptr<sia::Sia> encodePerson(Person person) {
  auto s = sia::New();
  s->AddString8(person.name);
  s->AddInt32(person.age);
  s->AddByteArrayN(person.avatar);
  s->AddByteArray64(person.photo);
  s->EmbedSia(encodeAddress(*person.address));
  sia::AddArray8<std::string>(s, person.email,
    [](auto s, const std::string& v) { s->AddString8(v); });
  sia::AddArray8<std::string>(s, person.tags,
    [](auto s, const std::string& v) { s->AddString8(v); });
  return s;
}

Person decodePerson(std::shared_ptr<sia::Sia> sia) {
  Person person;
  person.name = sia->ReadString8();
  person.age = sia->ReadInt32();
  person.avatar = sia->ReadByteArrayN(32);
  person.photo = sia->ReadByteArray64();
  person.address = std::make_shared<Address>(decodeAddress(sia));
  person.email = sia::ReadArray8<std::string>(sia,
    [](auto s) -> std::string { return s->ReadString8(); });
  person.tags = sia::ReadArray8<std::string>(sia,
    [](auto s) -> std::string { return s->ReadString8(); });
  return person;
}

Key patterns:

  • Custom types use std::shared_ptr<T> fields
  • Forward declarations for all struct types
  • Array encoding/decoding via sia::AddArray8<T> / sia::ReadArray8<T> with C++ lambdas
  • Separate .hpp (declarations) and .cpp (implementations) ::

Type Mapping

Sia typeTypeScriptGoPythonC++
string8stringstringstrstd::string
int32numberint32intint32_t
uint64numberuint64intuint64_t
boolbooleanboolboolbool
byte8Uint8Array | Buffer[]bytebytesstd::vector<uint8_t>
byteNUint8Array | Buffer[]bytebytesstd::vector<uint8_t>
Custom (Foo)Foo*Foo"Foo"std::shared_ptr<Foo>
string8[]string[][]stringList[str]std::vector<std::string>

Error Reporting

The compiler provides source-level error messages with context:

Parsing error in schema.sia at line 5:12

schema Person {
  name    string8
  age?    int3
           ↑

Expected token of type ...

Three lines of context are shown before the error location, with a caret pointing to the exact column.