osa1 github about atom

Containing contagious types with OCaml modules

March 10, 2026 - Tagged as: en, ocaml, plt.

In the previous post we looked at a way to extend product types with new fields and sum types with new constructors, using row types, in Fir.

A problem with the approach was that it required adding type parameters to the type being extended. In the cases where the extended type is a sum type and different constructors are extended with different fields, we may even need more than one type parameter. Those type parameters can then be propagated to the use sites, and their use sites, and their use sites…

I call these kinds of type parameters “contagious”, and it’s difficult to completely avoid them in Fir. In Fir, most function types are polymorphic in the exceptions they throw. This allows things like: calling a function that doesn’t throw in throwing contexts, or calling a function that throws Error1 and another that throws Error2 from the same function, and inferring the calling function’s exception type as [Error1, Error2, ..exn]. The way we achieve this polymorphism1 is by having a type parameter representing the exceptions the function can throw2.

So I thought, maybe instead of avoiding type parameters, we should think about how we might contain, or hide them, and I started to look at existing features in other languages.

In this post we’re going to look at how OCaml modules might be used for avoiding multiple type parameters (one for each extension). It turns out OCaml modules provide a solution that’s almost right.

(Full OCaml code is at the end of this post.)

The setup

We have lots of AST types for expressions, statements, declarations, … and we want to make them extensible with new fields and new constructors. Different AST types will be extended with different fields or constructors, and even in the same AST type (e.g. Expr in the original post) we may need different types of extensions for different constructors of the type.

To keep things simple, in this post we’ll only add new fields.

As the language, we’ll use the lambda calculus, with lets. Here’s how the AST could look like in OCaml:

type expr = Var of var | App of app | Abs of abs | Let of let_
and var = { name : string }
and app = { fn : expr; arg : expr }
and abs = { param : string; body : expr }
and let_ = { bound : string; rhs : expr; body : expr }

With extensions:

type ('v, 'a, 'b, 'l) expr =
  | Var of 'v var
  | App of ('v, 'a, 'b, 'l) app
  | Abs of ('v, 'a, 'b, 'l) abs
  | Let of ('v, 'a, 'b, 'l) let_

and 'v var = { name : string; var_ext : 'v }

and ('v, 'a, 'b, 'l) app = {
  fn : ('v, 'a, 'b, 'l) expr;
  arg : ('v, 'a, 'b, 'l) expr;
  app_ext : 'a;
}

and ('v, 'a, 'b, 'l) abs = {
  param : string;
  body : ('v, 'a, 'b, 'l) expr;
  abs_ext : 'b;
}

and ('v, 'a, 'b, 'l) let_ = {
  bound : string;
  rhs : ('v, 'a, 'b, 'l) expr;
  body : ('v, 'a, 'b, 'l) expr;
  let_ext : 'l;
}

This is obviously unusable and it won’t scale with more types and constructors.

With modules, we can have a module signature with the AST types and abstract extension types, and implement it with different concrete types for the extension types.

We first define a module signature with the AST extensions:

module type AST_EXTENSIONS = sig
  type var_ext
  type app_ext
  type abs_ext
  type let_ext

  val default_var_ext : var_ext
  val default_app_ext : app_ext
  val default_abs_ext : abs_ext
  val default_let_ext : let_ext
end

AST module signature then uses the extension types:

module type AST = sig
  include AST_EXTENSIONS

  type expr = Var of var | App of app | Abs of abs | Let of let_
  and var = { name : string; var_ext : var_ext }
  and app = { fn : expr; arg : expr; app_ext : app_ext }
  and abs = { param : string; body : expr; abs_ext : abs_ext }
  and let_ = { bound : string; rhs : expr; body : expr; let_ext : let_ext }
end

We then use a functor to create new AST modules, with a given extension module:

module MakeAst (Ext : AST_EXTENSIONS) :
  AST
    with type var_ext = Ext.var_ext
     and type app_ext = Ext.app_ext
     and type abs_ext = Ext.abs_ext
     and type let_ext = Ext.let_ext = struct
  type var_ext = Ext.var_ext
  type app_ext = Ext.app_ext
  type abs_ext = Ext.abs_ext
  type let_ext = Ext.let_ext

  let default_var_ext = Ext.default_var_ext
  let default_app_ext = Ext.default_app_ext
  let default_abs_ext = Ext.default_abs_ext
  let default_let_ext = Ext.default_let_ext

  type expr = Var of var | App of app | Abs of abs | Let of let_
  and var = { name : string; var_ext : Ext.var_ext }
  and app = { fn : expr; arg : expr; app_ext : app_ext }
  and abs = { param : string; body : expr; abs_ext : abs_ext }
  and let_ = { bound : string; rhs : expr; body : expr; let_ext : let_ext }
end

In the first post we had two examples: a formatter that doesn’t need any extensions, and a type checker that needs to annotate AST nodes with inferred types. Here are the formatter’s and type checker’s AST modules:

module FmtAst = MakeAst (struct
  type var_ext = unit
  type app_ext = unit
  type abs_ext = unit
  type let_ext = unit

  let default_var_ext = ()
  let default_app_ext = ()
  let default_abs_ext = ()
  let default_let_ext = ()
end)

(* The type-checking type does not matter, just as a placeholder. *)
type ty = TyVar of string | TyArrow of ty * ty

(* Type-checking AST extensions. *)
type tc_var_ext = { inferred_type : ty option }
type tc_app_ext = { result_type : ty option }
type tc_abs_ext = { param_type : ty option }
type tc_let_ext = { bound_type : ty option }

module TcAst = MakeAst (struct
  type var_ext = tc_var_ext
  type app_ext = tc_app_ext
  type abs_ext = tc_abs_ext
  type let_ext = tc_let_ext

  let default_var_ext = { inferred_type = None }
  let default_app_ext = { result_type = None }
  let default_abs_ext = { param_type = None }
  let default_let_ext = { bound_type = None }
end)

Now, the parser needs to be able to allocate different ASTs in different use sites, and so that’s where we need one type parameter (actually, a module parameter). As far as I understand, we can’t have functions parametric over modules, so we need a functor for generating a given module’s AST in the parser, using the default_..._ext functions in the AST module:

module Parse (A : AST) = struct
  (* Parsing entry point: tokenizes and parses. *)
  let parse (input : string) : A.expr =
    ...

  (* Parse a single expression from tokens. *)
  let rec parse_expr (toks : tokens) : A.expr * tokens =
    ...
end

Similarly, any other function that’s polymorphic over AST types needs to be a part of a functor that takes an AST module as argument. As another example, here’s a function that counts the number of AST nodes:

module CountNodes (A : AST) = struct
  let rec count (e : A.expr) : int =
    match e with
    | Var _ -> 1
    | App { fn; arg; _ } -> 1 + count fn + count arg
    | Abs { body; _ } -> 1 + count body
    | Let { rhs; body; _ } -> 1 + count rhs + count body
end

The final part of the ceremony is we apply these functors to get modules that we can then use to parse, format, and count nodes:

(* Parser module for the formatter. *)
module FmtParse = Parse (FmtAst)

(* Parser module for the type checker. *)
module TcParse = Parse (TcAst)

(* Node counter on the formatter's AST. *)
module CountFmt = CountNodes (FmtAst)

(* Node counter on the type checker's AST. *)
module CountTc = CountNodes (TcAst)

Type checker and formatter then refer to these modules:

let rec check_expr (e : TcAst.expr) : ty = ...
let rec format_expr (e : FmtAst.expr) : string = ...

The good

I can easily add per-AST functions, constants, or types and my parser or type checker code doesn’t become any worse. They always refer to the AST-specific things directly, and type signatures within the parser and type checker modules don’t get more complicated as we add more extensions.

The bad

The entire AST type definitions need to be duplicated in the AST signature and MakeAst functor. Just this alone renders this feature useless for our purposes, as in any real programming language there will be a lot of AST types, and each type will be quite large too (with many fields and constructors).

There’s also a smaller-scale duplication in these lines:

module MakeAst (Ext : AST_EXTENSIONS) :
  AST
    with type var_ext = Ext.var_ext
     and type app_ext = Ext.app_ext
     and type abs_ext = Ext.abs_ext
     and type let_ext = Ext.let_ext = struct
  type var_ext = Ext.var_ext
  type app_ext = Ext.app_ext
  type abs_ext = Ext.abs_ext
  type let_ext = Ext.let_ext
  ...
end

My understanding is that the types in the struct ... end part are abstract, i.e. not visible outside of the module (similar to existentials), and the : AST with type ... part specifies the returned module signature, i.e. the public interface. They need to be in sync, but they also need to be specified separately.

The only solution I can think of to these duplications is generating code, but if I’m OK with generating code, that opens up a lot of possibilities, and I don’t need functors anymore. I could even generate the full modules with all the AST types and everything else directly, without using functors.

So in short, OCaml modules helps quite a bit, but they’re held back by the issues with code duplication.


Full code (tested with OCaml 5.3.0)

(* Tested with OCaml 5.3.0. *)

module type AST_EXTENSIONS = sig
  type var_ext
  type app_ext
  type abs_ext
  type let_ext

  val default_var_ext : var_ext
  val default_app_ext : app_ext
  val default_abs_ext : abs_ext
  val default_let_ext : let_ext
end

module type AST = sig
  include AST_EXTENSIONS

  type expr = Var of var | App of app | Abs of abs | Let of let_
  and var = { name : string; var_ext : var_ext }
  and app = { fn : expr; arg : expr; app_ext : app_ext }
  and abs = { param : string; body : expr; abs_ext : abs_ext }
  and let_ = { bound : string; rhs : expr; body : expr; let_ext : let_ext }
end

module MakeAst (Ext : AST_EXTENSIONS) :
  AST
    with type var_ext = Ext.var_ext
     and type app_ext = Ext.app_ext
     and type abs_ext = Ext.abs_ext
     and type let_ext = Ext.let_ext = struct
  type var_ext = Ext.var_ext
  type app_ext = Ext.app_ext
  type abs_ext = Ext.abs_ext
  type let_ext = Ext.let_ext

  let default_var_ext = Ext.default_var_ext
  let default_app_ext = Ext.default_app_ext
  let default_abs_ext = Ext.default_abs_ext
  let default_let_ext = Ext.default_let_ext

  type expr = Var of var | App of app | Abs of abs | Let of let_
  and var = { name : string; var_ext : Ext.var_ext }
  and app = { fn : expr; arg : expr; app_ext : app_ext }
  and abs = { param : string; body : expr; abs_ext : abs_ext }
  and let_ = { bound : string; rhs : expr; body : expr; let_ext : let_ext }
end

(* --------------------------------------------------------
   A simple recursive-descent parser, generic over any AST.

   Grammar:
     expr   ::= 'let' IDENT '=' expr 'in' expr
              | '\' IDENT '.' expr
              | app
     app    ::= atom+
     atom   ::= IDENT | '(' expr ')'
   -------------------------------------------------------- *)
module Parse (A : AST) = struct
  type tokens = string list

  (* parse_expr: top-level, handles let/lambda/application.
     Lambda and let bodies extend as far right as possible
     (i.e. parse_expr), so nested constructs work without parens:
       let f = \x. \y. x in ...
       \x. \y. x y
     parse_app_args stops at 'in', ')', and non-atom tokens,
     so 'in' correctly terminates a let-RHS that is an application. *)
  let rec parse_expr (toks : tokens) : A.expr * tokens =
    match toks with
    | "let" :: name :: "=" :: rest -> (
        let rhs, rest = parse_expr rest in
        match rest with
        | "in" :: rest ->
            let body, rest = parse_expr rest in
            ( A.Let { bound = name; rhs; body; let_ext = A.default_let_ext },
              rest )
        | _ -> failwith "expected 'in'")
    | "\\" :: param :: "." :: rest ->
        let body, rest = parse_expr rest in
        (A.Abs { param; body; abs_ext = A.default_abs_ext }, rest)
    | _ -> parse_app toks

  and parse_app (toks : tokens) : A.expr * tokens =
    let head, rest = parse_atom toks in
    parse_app_args head rest

  and parse_app_args (fn : A.expr) (toks : tokens) : A.expr * tokens =
    match toks with
    | [] | ")" :: _ | "in" :: _ -> (fn, toks)
    | _ -> (
        match parse_atom_opt toks with
        | Some (arg, rest) ->
            let node = A.App { fn; arg; app_ext = A.default_app_ext } in
            parse_app_args node rest
        | None -> (fn, toks))

  and parse_atom (toks : tokens) : A.expr * tokens =
    match parse_atom_opt toks with
    | Some r -> r
    | None ->
        let tok = match toks with t :: _ -> t | [] -> "EOF" in
        failwith (Printf.sprintf "expected atom, got '%s'" tok)

  and parse_atom_opt (toks : tokens) : (A.expr * tokens) option =
    match toks with
    | "(" :: rest -> (
        let e, rest = parse_expr rest in
        match rest with
        | ")" :: rest -> Some (e, rest)
        | _ -> failwith "expected ')'")
    | tok :: rest
      when tok <> "let" && tok <> "\\" && tok <> "in" && tok <> "="
           && tok <> "." && tok <> "(" && tok <> ")" ->
        Some (A.Var { name = tok; var_ext = A.default_var_ext }, rest)
    | _ -> None

  let parse (input : string) : A.expr =
    (* Tokenize: split on whitespace, treat parens as separate tokens *)
    let buf = Buffer.create (String.length input) in
    String.iter
      (fun c ->
        match c with
        | '(' | ')' | '.' | '\\' ->
            Buffer.add_char buf ' ';
            Buffer.add_char buf c;
            Buffer.add_char buf ' '
        | _ -> Buffer.add_char buf c)
      input;
    let s = Buffer.contents buf in
    let toks = String.split_on_char ' ' s |> List.filter (fun s -> s <> "") in
    let expr, rest = parse_expr toks in
    if rest <> [] then
      failwith (Printf.sprintf "unexpected token '%s'" (List.hd rest));
    expr
end

(* --------------------------------------------------------
   Formatter — all extensions are unit.
   -------------------------------------------------------- *)
module FmtAst = MakeAst (struct
  type var_ext = unit
  type app_ext = unit
  type abs_ext = unit
  type let_ext = unit

  let default_var_ext = ()
  let default_app_ext = ()
  let default_abs_ext = ()
  let default_let_ext = ()
end)

module FmtParse = Parse (FmtAst)

let rec format_expr (e : FmtAst.expr) : string =
  match e with
  | Var { name; _ } -> name
  | App { fn; arg; _ } ->
      Printf.sprintf "(%s %s)" (format_expr fn) (format_arg arg)
  | Abs { param; body; _ } ->
      Printf.sprintf "(\\%s. %s)" param (format_expr body)
  | Let { bound; rhs; body; _ } ->
      Printf.sprintf "(let %s = %s in %s)" bound (format_expr rhs)
        (format_expr body)

and format_arg (e : FmtAst.expr) : string =
  match e with
  | Var { name; _ } -> name
  | _ -> Printf.sprintf "(%s)" (format_expr e)

(* --------------------------------------------------------
   Type checker — extensions carry inferred types.
   -------------------------------------------------------- *)
type ty = TyVar of string | TyArrow of ty * ty
type tc_var_ext = { inferred_type : ty option }
type tc_app_ext = { result_type : ty option }
type tc_abs_ext = { param_type : ty option }
type tc_let_ext = { bound_type : ty option }

module TcAst = MakeAst (struct
  type var_ext = tc_var_ext
  type app_ext = tc_app_ext
  type abs_ext = tc_abs_ext
  type let_ext = tc_let_ext

  let default_var_ext = { inferred_type = None }
  let default_app_ext = { result_type = None }
  let default_abs_ext = { param_type = None }
  let default_let_ext = { bound_type = None }
end)

module TcParse = Parse (TcAst)

let rec format_ty (t : ty) : string =
  match t with
  | TyVar s -> s
  | TyArrow ((TyArrow _ as a), b) ->
      Printf.sprintf "(%s) -> %s" (format_ty a) (format_ty b)
  | TyArrow (a, b) -> Printf.sprintf "%s -> %s" (format_ty a) (format_ty b)

(* Placeholder: just read off the extension annotation if present. *)
let rec check_expr (e : TcAst.expr) : ty =
  match e with
  | Var { var_ext = { inferred_type = Some t }; _ } -> t
  | Var { name; _ } -> TyVar name
  | App { app_ext = { result_type = Some t }; _ } -> t
  | App { fn; _ } -> (
      match check_expr fn with TyArrow (_, ret) -> ret | t -> t)
  | Abs { param; body; abs_ext = { param_type }; _ } ->
      let p = match param_type with Some t -> t | None -> TyVar param in
      TyArrow (p, check_expr body)
  | Let { body; _ } -> check_expr body

(* --------------------------------------------------------
   Generic node counter — works on any AST.
   -------------------------------------------------------- *)
module CountNodes (A : AST) = struct
  let rec count (e : A.expr) : int =
    match e with
    | Var _ -> 1
    | App { fn; arg; _ } -> 1 + count fn + count arg
    | Abs { body; _ } -> 1 + count body
    | Let { rhs; body; _ } -> 1 + count rhs + count body
end

module CountFmt = CountNodes (FmtAst)
module CountTc = CountNodes (TcAst)

(* --------------------------------------------------------
   Demo: parse the same source in both worlds.
   -------------------------------------------------------- *)
let source = {|let id = \x. x in id 42|}

let () =
  (* Formatter world — parse and pretty-print *)
  let prog = FmtParse.parse source in
  Printf.printf "formatted: %s\n" (format_expr prog);
  Printf.printf "node count: %d\n" (CountFmt.count prog);

  (* Type checker world — parse (extensions default to None),
     then check with the placeholder checker *)
  let tc_prog = TcParse.parse source in
  Printf.printf "inferred type: %s\n" (format_ty (check_expr tc_prog));
  Printf.printf "node count: %d\n" (CountTc.count tc_prog)

  1. Actually, any kind of polymorphism. Fir currently doesn’t have trait objects and the only way to have polymorphism is by using type parameters, potentially with qualifications.↩︎

  2. This is a little bit simplified, see this post for more details and examples.↩︎