From 2595511a38adb1c03c6b42ac20d15dc20842844e Mon Sep 17 00:00:00 2001 From: valentin gagarin Date: Wed, 13 Nov 2024 15:24:41 +0100 Subject: [PATCH] implement a slice of the DOM spec as modules this is roughly sufficient to recreate the website as it currently is - elements: - document - html - head - title - base - link (variants that must be unique nested under `head` directly) - canonical - meta (same as for link): - charset - viewport - author (can be multiple, but still unique in aggregate for a document) - description - global attributes: - class - hidden - id - lang - style - title - element-specific attributes: - href - target there's still a lot to do for a reasonably complete implementation, most importantly everything concerning - navigation - top-level flow content (`div`, `article`, headings, `p`, ...) - stylesheets there's also some infrastructure to be arranged for easy but somewhat safe switching between literal HTML and structured representations. --- website/presentation/dom.nix | 366 +++++++++++++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 website/presentation/dom.nix diff --git a/website/presentation/dom.nix b/website/presentation/dom.nix new file mode 100644 index 00000000..c1b0e27c --- /dev/null +++ b/website/presentation/dom.nix @@ -0,0 +1,366 @@ +/** + A strongly typed module system implementation of the Document Object Model (DOM) + + Based on the WHATWG's HTML Living Standard https://html.spec.whatwg.org (CC-BY 4.0) + Inspired by https://github.com/knupfer/type-of-html by @knupfer (BSD-3-Clause) + Similar work from the OCaml ecosystem: https://github.com/ocsigen/tyxml +*/ +{ lib, ... }: +let + inherit (lib) mkOption types; + + # https://html.spec.whatwg.org/multipage/dom.html#content-models + # https://html.spec.whatwg.org/multipage/dom.html#kinds-of-content + content-categories = [ + "none" # https://html.spec.whatwg.org/multipage/dom.html#the-nothing-content-model + "text" # https://html.spec.whatwg.org/multipage/dom.html#text-content + "metadata" # https://html.spec.whatwg.org/multipage/dom.html#metadata-content + "flow" # https://html.spec.whatwg.org/multipage/dom.html#flow-content + "sectioning" # https://html.spec.whatwg.org/multipage/dom.html#sectioning-content + "heading" # https://html.spec.whatwg.org/multipage/dom.html#heading-content + "phrasing" # https://html.spec.whatwg.org/multipage/dom.html#phrasing-content + "embedded" # https://html.spec.whatwg.org/multipage/dom.html#embedded-content-2 + "interactive" # https://html.spec.whatwg.org/multipage/dom.html#interactive-content + "palpable" # https://html.spec.whatwg.org/multipage/dom.html#palpable-content + "scripting" # https://html.spec.whatwg.org/multipage/dom.html#script-supporting-elements + ]; + + # base type for all DOM elements + element = { name, config, ... }: { + # TODO: add fields for upstream documentation references + # TODO: programmatically generate documentation + options = with lib; { + attrs = mkAttrs { }; + categories = mkOption { + type = types.listOfUnique (types.enum content-categories); + }; + __toString = mkOption { + internal = true; + type = with types; functionTo str; + }; + }; + }; + + # options with types for all the defined DOM elements + element-types = lib.mapAttrs + (name: value: mkOption { type = types.submodule value; }) + elements; + + # attrset of categories, where values are module options with the type of the + # elements that belong to these categories + categories = with lib; + genAttrs + content-categories + (category: + (mapAttrs (_: e: mkOption { type = types.submodule e; }) + # HACK: don't evaluate the submodule types, just grab the config directly + (filterAttrs (_: e: elem category (e { name = "dummy"; }).config.categories) elements)) + ); + + global-attrs = lib.mapAttrs (name: value: mkOption value) { + class = { + type = with types; listOf nonEmptyStr; + default = [ ]; + }; + hidden = { + type = types.bool; + default = false; + }; + id = { + type = with types; nullOr nonEmptyStr; + default = null; + }; + lang = { + # TODO: https://www.rfc-editor.org/rfc/rfc5646.html + type = with types; nullOr str; + default = null; + }; + style = { + # TODO: CSS type ;..) + type = with types; nullOr str; + default = null; + }; + title = { + type = with types; nullOr lines; + default = null; + }; + # TODO: more global attributes + # https://html.spec.whatwg.org/#global-attributes + # https://html.spec.whatwg.org/#attr-aria-* + # https://html.spec.whatwg.org/multipage/microdata.html#encoding-microdata + }; + + attrs = lib.mapAttrs (name: value: mkOption value) { + href = { + # TODO: https://url.spec.whatwg.org/#valid-url-string + # ;..O + type = types.str; + }; + target = { + # https://html.spec.whatwg.org/multipage/document-sequences.html#valid-navigable-target-name-or-keyword + type = + let + is-valid-target = s: + let + inherit (lib) match; + has-lt = s: match ".*<.*" s != null; + has-tab-or-newline = s: match ".*[\t\n].*" s != null; + has-valid-start = s: match "^[^_].*$" s != null; + in + has-valid-start s && !(has-lt s && has-tab-or-newline s); + in + with types; either + (enum [ "_blank" "_self" "_parent" "_top" ]) + (types.addCheck str is-valid-target) + ; + }; + }; + + mkAttrs = attrs: with lib; + mkOption { + type = types.submodule { + options = global-attrs // attrs; + }; + default = { }; + }; + + print-attrs = with lib; attrs: + # TODO: figure out how let attributes know how to print themselves without polluting the interface + let + result = trim (join " " + (mapAttrsToList + # TODO: this needs to be smarter for boolean attributes + # where the value must be written out explicitly. + # probably the attribute itself should have its own `__toString`. + (name: value: + if isBool value then + if value then name else "" + # TODO: some attributes must be explicitly empty + else optionalString (toString value != "") ''${name}=${value}'' + ) + attrs) + ); + in + optionalString (stringLength result > 0) " " + result + ; + + print-element = name: attrs: content: + with lib; + lib.squash '' + <${name}${print-attrs attrs}> + ${lib.indent " " content} + + ''; + + elements = rec { + document = { ... }: { + imports = [ element ]; + options = { + inherit (element-types) html; + }; + + config.categories = [ ]; + config.__toString = self: '' + + ${self.html} + ''; + }; + + html = { name, ... }: { + imports = [ element ]; + options = { + inherit (element-types) head body; + }; + + config.categories = [ ]; + config.__toString = self: print-element name self.attrs '' + ${self.head} + ${self.body} + ''; + }; + + head = { name, ... }: { + imports = [ element ]; + options = with lib; { + # https://html.spec.whatwg.org/multipage/semantics.html#the-head-element:concept-element-content-model + # XXX: this doesn't implement the iframe srcdoc semantics + # as those have questionable value and would complicate things a bit. + # it should be possible though, by passing a flag via module arguments. + inherit (element-types) title; + base = mkOption { + type = with types; nullOr (submodule base); + default = null; + }; + # https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-charset + meta.charset = mkOption { + # TODO: create programmatically from https://encoding.spec.whatwg.org/encodings.json + type = types.enum [ + "utf-8" + ]; + default = "utf-8"; + }; + # https://developer.mozilla.org/en-US/docs/Web/HTML/Viewport_meta_tag#viewport_width_and_screen_width + # this should not exist and no one should ever have to think about it + meta.viewport = mkOption { + type = types.submodule ({ ... }: { + # TODO: figure out how to render only non-default values + options = { + width = mkOption { + type = with types; either + (ints.between 1 10000) + (enum [ "device-width" ]); + default = "device-width"; # not default by standard + }; + height = mkOption { + type = with types; either + (ints.between 1 10000) + (enum [ "device-height" ]); + default = "device-height"; # not default by standard (but seems to work if you don't set it) + }; + initial-scale = mkOption { + type = types.numbers.between 0.1 10; + default = 1; + }; + minimum-scale = mkOption { + type = types.numbers.between 0.1 10; + # TODO: render only as many digits as needed + default = 0.1; + }; + maximum-scale = mkOption { + type = types.numbers.between 0.1 10; + default = 10; + }; + user-scalable = mkOption { + type = types.bool; + default = true; + }; + interactive-widget = mkOption { + type = types.enum [ + "resizes-visual" + "resizes-content" + "overlays-content" + ]; + default = "resizes-visual"; + }; + }; + }); + default = { }; + }; + + meta.authors = mkOption { + type = with types; listOf str; + default = [ ]; + }; + meta.description = mkOption { + type = with types; nullOr str; + default = null; + }; + link.canonical = mkOption { + type = with types; nullOr str; + default = null; + }; + + # TODO: figure out `meta` elements + # https://html.spec.whatwg.org/multipage/semantics.html#the-meta-element:concept-element-attributes + # https://html.spec.whatwg.org/multipage/semantics.html#other-metadata-names + }; + + config.categories = [ ]; + config.__toString = self: + with lib; + print-element name self.attrs '' + ${self.title} + ${with lib; optionalString (!isNull self.base) self.base} + + + ${/* https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-x-ua-compatible */ + ""} + + + + ${join "\n" (map + (author: '''') + self.meta.authors) + } + ''; + }; + + title = { name, ... }: { + imports = [ element ]; + options.text = mkOption { + type = types.str; + }; + config.categories = [ "metadata" ]; + config.__toString = self: "<${name}${print-attrs self.attrs}>${self.text}"; + + }; + + base = { name, ... }: { + imports = [ element ]; + # TODO: "A base element must have either an href attribute, a target attribute, or both." + attrs = mkAttrs { inherit (attrs) href target; }; + config.categories = [ "metadata" ]; + }; + + link = { name, ... }: { + imports = [ element ]; + options = mkAttrs + { + # TODO: more attributes + # https://html.spec.whatwg.org/multipage/semantics.html#the-link-element:concept-element-attributes + inherit (attrs) href; + } // { + # XXX: there are variants of `rel` for `link`, `a`/`area`, and `form` + rel = mkOption { + # https://html.spec.whatwg.org/multipage/semantics.html#attr-link-rel + type = with types; listOfUnique str (enum + # TODO: work out link types in detail, there are lots of additional constraints + # https://html.spec.whatwg.org/multipage/links.html#linkTypes + [ + "alternate" + "dns-prefetch" + "expect" + "help" + "icon" + "license" + "manifest" + "modulepreload" + "next" + "pingback" + "preconnect" + "prefetch" + "preload" + "prev" + "privacy-policy" + "search" + "stylesheet" + "terms-of-service" + ] + ); + }; + }; + # TODO: figure out how to make body-ok `link` elements + # https://html.spec.whatwg.org/multipage/semantics.html#allowed-in-the-body + config.categories = [ "metadata" ]; + config.__toString = self: ""; + }; + + body = { name, ... }: { + imports = [ element ]; + options = { + content = mkOption { + type = with types; + # HACK: bail out for now + # TODO: find a reasonable cut-off for where to place raw content + either str (listOf (attrTag categories.flow)); + }; + }; + config.categories = [ ]; + config.__toString = self: with lib; + if isList self.content then join "\n" (toString self.content) else self.content; + }; + }; +in +elements