From 0f7433abdee60d56d033be2c3546b9832c78fecc Mon Sep 17 00:00:00 2001 From: Kiara Grouwstra Date: Wed, 3 Sep 2025 15:14:40 +0200 Subject: [PATCH] data model: add TF test (#506) add a data model deployment test by VM using opentofu. builds upon #505. Reviewed-on: https://git.fediversity.eu/Fediversity/Fediversity/pulls/506 --- .forgejo/workflows/ci.yaml | 12 ++++ deployment/check/common/data-model.nix | 34 +++++++++ deployment/check/data-model-ssh/nixosTest.nix | 5 +- deployment/check/data-model-tf/constants.nix | 11 +++ deployment/check/data-model-tf/default.nix | 21 ++++++ deployment/check/data-model-tf/nixosTest.nix | 63 ++++++++++++++++ deployment/data-model.nix | 71 +++++++++++++++++++ deployment/flake-part.nix | 5 ++ deployment/run/ssh-single-host/run.sh | 2 +- deployment/run/tf-single-host/main.tf | 52 ++++++++++++++ deployment/run/tf-single-host/run.sh | 9 +++ deployment/run/tf-single-host/setup.nix | 16 +++++ deployment/run/tf-single-host/tf-env.nix | 31 ++++++++ deployment/run/tf-single-host/tf.nix | 11 +++ deployment/run/tf-single-host/variables.tf | 54 ++++++++++++++ 15 files changed, 392 insertions(+), 5 deletions(-) create mode 100644 deployment/check/data-model-tf/constants.nix create mode 100644 deployment/check/data-model-tf/default.nix create mode 100644 deployment/check/data-model-tf/nixosTest.nix create mode 100644 deployment/run/tf-single-host/main.tf create mode 100644 deployment/run/tf-single-host/run.sh create mode 100644 deployment/run/tf-single-host/setup.nix create mode 100644 deployment/run/tf-single-host/tf-env.nix create mode 100644 deployment/run/tf-single-host/tf.nix create mode 100644 deployment/run/tf-single-host/variables.tf diff --git a/.forgejo/workflows/ci.yaml b/.forgejo/workflows/ci.yaml index 9b3495c5..4c7effbc 100644 --- a/.forgejo/workflows/ci.yaml +++ b/.forgejo/workflows/ci.yaml @@ -64,6 +64,12 @@ jobs: - uses: actions/checkout@v4 - run: nix build .#checks.x86_64-linux.deployment-panel -L + check-deployment-model: + runs-on: native + steps: + - uses: actions/checkout@v4 + - run: nix build .#checks.x86_64-linux.deployment-model-ssh -L + check-deployment-model-ssh: runs-on: native steps: @@ -76,6 +82,12 @@ jobs: - uses: actions/checkout@v4 - run: nix build .#checks.x86_64-linux.deployment-model-nixops4 -L + check-deployment-model-tf: + runs-on: native + steps: + - uses: actions/checkout@v4 + - run: nix build .#checks.x86_64-linux.deployment-model-tf -L + ## NOTE: NixOps4 does not provide a good “dry run” mode, so we instead check ## proxies for resources, namely whether their `.#vmOptions.` and ## `.#nixosConfigurations.` outputs evaluate and build correctly, and diff --git a/deployment/check/common/data-model.nix b/deployment/check/common/data-model.nix index 27dd8149..9c5e942e 100644 --- a/deployment/check/common/data-model.nix +++ b/deployment/check/common/data-model.nix @@ -183,6 +183,29 @@ let }; }; }; + single-nixos-vm-tf = environment: { + resources."operator-environment".login-shell.username = "operator"; + implementation = + { + required-resources, + deployment-name, + }: + { + tf-host = { + nixos-configuration = mkNixosConfiguration environment required-resources; + system = targetSystem; + ssh = { + username = "root"; + host = nodeName; + key-file = null; + inherit sshOpts; + }; + module = self; + inherit args deployment-name; + root-path = pathToRoot; + }; + }; + }; }; }; options = { @@ -215,6 +238,17 @@ let configuration = config."example-configuration"; }; }; + "tf-deployment" = + let + env = config.environments."single-nixos-vm-tf"; + in + mkOption { + type = env.resource-mapping.output-type; + default = env.deployment { + deployment-name = "tf-deployment"; + configuration = config."example-configuration"; + }; + }; }; } ); diff --git a/deployment/check/data-model-ssh/nixosTest.nix b/deployment/check/data-model-ssh/nixosTest.nix index 0e129493..b3c3aedc 100644 --- a/deployment/check/data-model-ssh/nixosTest.nix +++ b/deployment/check/data-model-ssh/nixosTest.nix @@ -10,10 +10,7 @@ let inherit pathToRoot pathFromRoot; nodeName = "ssh"; targetSystem = system; - sshOpts = [ - "ConnectTimeout=1" - "ServerAliveInterval=1" - ]; + sshOpts = [ ]; }; deploy = (import ../common/data-model.nix { diff --git a/deployment/check/data-model-tf/constants.nix b/deployment/check/data-model-tf/constants.nix new file mode 100644 index 00000000..b7de5251 --- /dev/null +++ b/deployment/check/data-model-tf/constants.nix @@ -0,0 +1,11 @@ +{ + targetMachines = [ + "target" + ]; + pathToRoot = builtins.path { + path = ../../..; + name = "root"; + }; + pathFromRoot = "/deployment/check/data-model-tf"; + enableAcme = true; +} diff --git a/deployment/check/data-model-tf/default.nix b/deployment/check/data-model-tf/default.nix new file mode 100644 index 00000000..1815f19a --- /dev/null +++ b/deployment/check/data-model-tf/default.nix @@ -0,0 +1,21 @@ +{ + runNixOSTest, + inputs, + sources, +}: + +runNixOSTest { + imports = [ + ../../data-model.nix + ../../function.nix + ../common/nixosTest.nix + ./nixosTest.nix + ]; + _module.args = { inherit inputs sources; }; + inherit (import ./constants.nix) + targetMachines + pathToRoot + pathFromRoot + enableAcme + ; +} diff --git a/deployment/check/data-model-tf/nixosTest.nix b/deployment/check/data-model-tf/nixosTest.nix new file mode 100644 index 00000000..40467dba --- /dev/null +++ b/deployment/check/data-model-tf/nixosTest.nix @@ -0,0 +1,63 @@ +{ + lib, + pkgs, + ... +}: +let + inherit (import ./constants.nix) pathToRoot pathFromRoot; + inherit (pkgs) system; + deployment-config = { + inherit pathToRoot pathFromRoot; + nodeName = "target"; + targetSystem = system; + sshOpts = [ ]; + }; + deploy = + (import ../common/data-model.nix { + inherit system; + config = deployment-config; + # opt not to pass `inputs`, as we could only pass serializable arguments through to its self-call + })."tf-deployment".tf-host.run; +in +{ + _class = "nixosTest"; + imports = [ + ../common/data-model-options.nix + ]; + + name = "deployment-model"; + sourceFileset = lib.fileset.unions [ + ../../run/tf-single-host/run.sh + ]; + + nodes.deployer = + { pkgs, ... }: + { + environment.systemPackages = with pkgs; [ + (pkgs.callPackage ../../run/tf-single-host/tf.nix { }) + jq + deploy + ]; + + # needed only when building from deployer + system.extraDependenciesFromModule = + { pkgs, ... }: + { + environment.systemPackages = with pkgs; [ + hello + ]; + }; + }; + + extraTestScript = '' + with subtest("ssh: Check the status before deployment"): + target.fail("hello 1>&2") + + with subtest("ssh: Run the deployment"): + deployer.succeed(""" + ${lib.getExe deploy} + """) + target.wait_for_unit("multi-user.target") + target.succeed("su - operator -c hello 1>&2") + ''; +} diff --git a/deployment/data-model.nix b/deployment/data-model.nix index 8a71efca..b30ea2ba 100644 --- a/deployment/data-model.nix +++ b/deployment/data-model.nix @@ -159,6 +159,76 @@ let description = "A NixOps4 NixOS deployment. For an example, see https://github.com/nixops4/nixops4-nixos/blob/main/example/deployment.nix."; type = nixops4Deployment; }; + tf-host = mkOption { + description = "A Terraform deployment by SSH to update a single existing NixOS host."; + type = submodule (tf-host: { + options = { + system = mkOption { + description = "The architecture of the system to deploy to."; + type = types.str; + }; + inherit nixos-configuration; + ssh = host-ssh; + module = mkOption { + description = "The module to call to obtain the NixOS configuration from."; + type = types.str; + }; + args = mkOption { + description = "The arguments with which to call the module to obtain the NixOS configuration."; + type = types.attrs; + }; + deployment-name = mkOption { + description = "The name of the deployment for which to obtain the NixOS configuration."; + type = types.str; + }; + root-path = mkOption { + description = "The path to the root of the repository."; + type = types.path; + }; + run = mkOption { + type = types.package; + # error: The option `tf-deployment.tf-host.run' is read-only, but it's set multiple times. + # readOnly = true; + default = + let + inherit (tf-host.config) + system + ssh + module + args + deployment-name + root-path + ; + inherit (ssh) + host + username + key-file + sshOpts + ; + environment = { + key_file = key-file; + deployment_name = deployment-name; + root_path = root-path; + ssh_opts = sshOpts; + inherit + system + host + username + module + args + ; + deployment_type = "tf-host"; + }; + tf-env = pkgs.callPackage ./run/tf-single-host/tf-env.nix { }; + in + pkgs.writeShellScriptBin "deploy-ssh.sh" '' + env ${toString (lib.mapAttrsToList (k: v: "TF_VAR_${k}=\"${toBash v}\"") environment)} \ + tf_env=${tf-env} bash ./deployment/run/tf-single-host/run.sh + ''; + }; + }; + }); + }; }; in { @@ -190,6 +260,7 @@ in type = types.optionType; }; # TODO(@fricklerhandwerk): we may want to make the function type explicit here: `application-resources -> resource-type` + # and then also rename this to be consistent with the application's resource mapping options.apply = mkOption { description = "Apply the policy to a request"; type = functionTo policy.config.resource-type; diff --git a/deployment/flake-part.nix b/deployment/flake-part.nix index b2c9ff89..c11512e6 100644 --- a/deployment/flake-part.nix +++ b/deployment/flake-part.nix @@ -36,6 +36,11 @@ inherit (pkgs.testers) runNixOSTest; inherit inputs sources; }; + + deployment-model-tf = import ./check/data-model-tf { + inherit (pkgs.testers) runNixOSTest; + inherit inputs sources; + }; }; }; } diff --git a/deployment/run/ssh-single-host/run.sh b/deployment/run/ssh-single-host/run.sh index 058f8267..a0662b4c 100755 --- a/deployment/run/ssh-single-host/run.sh +++ b/deployment/run/ssh-single-host/run.sh @@ -39,7 +39,7 @@ NIX_SSHOPTS="${sshOpts[*]}" nix-copy-closure --to "$destination" "$outPath" --gz # shellcheck disable=SC2029 ssh "${sshOpts[@]}" "$destination" "nix-env --profile /nix/var/nix/profiles/system --set $outPath" # shellcheck disable=SC2029 -output=$(ssh "${sshOpts[@]}" "$destination" "nohup $outPath/bin/switch-to-configuration switch &" 2>&1) || echo "status code: $?" +output=$(ssh -o "ConnectTimeout=1" -o "ServerAliveInterval=1" "${sshOpts[@]}" "$destination" "nohup $outPath/bin/switch-to-configuration switch &" 2>&1) || echo "status code: $?" echo "output: $output" if [[ $output != *"Timeout, server $host not responding"* ]]; then echo "non-timeout error: $output" diff --git a/deployment/run/tf-single-host/main.tf b/deployment/run/tf-single-host/main.tf new file mode 100644 index 00000000..c05add7c --- /dev/null +++ b/deployment/run/tf-single-host/main.tf @@ -0,0 +1,52 @@ +# hash of our code directory, used to trigger re-deploy +# FIXME calculate separately to reduce false positives +data "external" "hash" { + program = ["sh", "-c", "echo \"{\\\"hash\\\":\\\"$(nix-hash ../../..)\\\"}\""] +} + +# TF resource to build and deploy NixOS instances. +resource "terraform_data" "nixos" { + + # trigger rebuild/deploy if (FIXME?) any potentially used config/code changed, + # preventing these (20+s, build being bottleneck) when nothing changed. + # terraform-nixos separates these to only deploy if instantiate changed, + # yet building even then - which may be not as bad using deploy on remote. + # having build/deploy one resource reflects wanting to prevent no-op rebuilds + # over preventing (with less false positives) no-op deployments, + # as i could not find a way to do prevent no-op rebuilds without merging them: + # - generic resources cannot have outputs, while we want info from the instantiation (unless built on host?). + # - `data` always runs, which is slow for deploy and especially build. + triggers_replace = [ + data.external.hash.result, + var.host, + var.module, + var.args, + var.root_path, + var.deployment_type, + ] + + provisioner "local-exec" { + # directory to run the script from. we use the TF project root dir, + # here as a path relative from where TF is run from, + # matching calling modules' expectations on config_nix locations. + # note that absolute paths can cause false positives in triggers, + # so are generally discouraged in TF. + working_dir = path.root + environment = { + system = var.system + username = var.username + host = var.host + module = var.module + host = var.host + args = var.args + key_file = var.key_file + deployment_name = var.deployment_name + root_path = var.root_path + ssh_opts = var.ssh_opts + deployment_type = var.deployment_type + } + # TODO: refactor back to command="ignoreme" interpreter=concat([]) to protect sensitive data from error logs? + # TODO: build on target? + command = "sh ../ssh-single-host/run.sh" + } +} diff --git a/deployment/run/tf-single-host/run.sh b/deployment/run/tf-single-host/run.sh new file mode 100644 index 00000000..203466b9 --- /dev/null +++ b/deployment/run/tf-single-host/run.sh @@ -0,0 +1,9 @@ +#! /usr/bin/env bash +set -xeuo pipefail +declare tf_env + +export TF_LOG=info + +cd "${tf_env}/deployment/run/tf-single-host" +# parallelism=1: limit OOM risk +tofu apply --auto-approve -lock=false -parallelism=1 diff --git a/deployment/run/tf-single-host/setup.nix b/deployment/run/tf-single-host/setup.nix new file mode 100644 index 00000000..4166812e --- /dev/null +++ b/deployment/run/tf-single-host/setup.nix @@ -0,0 +1,16 @@ +{ + pkgs, + lib, + sources, +}: +pkgs.writeScriptBin "setup" '' + set -xe + # calculated pins + echo '${lib.strings.toJSON sources}' > ./.npins.json + # generate TF lock for nix's TF providers + rm -rf .terraform/ + rm -f .terraform.lock.hcl + # suppress warning on architecture-specific generated lock file: + # `Warning: Incomplete lock file information for providers`. + tofu init -input=false 1>/dev/null +'' diff --git a/deployment/run/tf-single-host/tf-env.nix b/deployment/run/tf-single-host/tf-env.nix new file mode 100644 index 00000000..99d9e6c9 --- /dev/null +++ b/deployment/run/tf-single-host/tf-env.nix @@ -0,0 +1,31 @@ +{ + lib, + pkgs, + sources ? import ../../../npins, +}: +pkgs.stdenv.mkDerivation { + name = "tf-repo"; + src = + with lib.fileset; + toSource { + root = ../../../.; + # don't copy ignored files + fileset = intersection (gitTracked ../../../.) ../../../.; + }; + buildInputs = [ + (pkgs.callPackage ./tf.nix { }) + (pkgs.callPackage ./setup.nix { inherit sources; }) + ]; + buildPhase = '' + runHook preBuild + pushd deployment/run/tf-single-host + source setup + popd + runHook postBuild + ''; + installPhase = '' + runHook preInstall + cp -r . $out + runHook postInstall + ''; +} diff --git a/deployment/run/tf-single-host/tf.nix b/deployment/run/tf-single-host/tf.nix new file mode 100644 index 00000000..8551cb82 --- /dev/null +++ b/deployment/run/tf-single-host/tf.nix @@ -0,0 +1,11 @@ +# FIXME: use overlays so this gets imported just once? +{ + pkgs, + ... +}: +let + tf = pkgs.opentofu; +in +tf.withPlugins (p: [ + p.external +]) diff --git a/deployment/run/tf-single-host/variables.tf b/deployment/run/tf-single-host/variables.tf new file mode 100644 index 00000000..32948210 --- /dev/null +++ b/deployment/run/tf-single-host/variables.tf @@ -0,0 +1,54 @@ +variable "system" { + description = "The architecture of the system to deploy to." + type = string + default = "x86_64-linux" +} + +variable "username" { + description = "the SSH user to use" + type = string + default = "root" +} + +variable "host" { + description = "the host to access by SSH" + type = string +} + +variable "module" { + description = "The module to call to obtain the NixOS configuration from." + type = string +} + +variable "args" { + description = "The arguments with which to call the module to obtain the NixOS configuration." + type = string + default = "{}" +} + +variable "key_file" { + description = "path to the user's SSH private key" + type = string +} + +variable "deployment_name" { + description = "The name of the deployment for which to obtain the NixOS configuration." + type = string +} + +variable "root_path" { + description = "The path to the root of the repository." + type = string +} + +variable "ssh_opts" { + description = "Extra SSH options (`-o`) to use." + type = string + default = "[]" +} + +variable "deployment_type" { + description = "A `deployment-type` from the Fediversity data model, for grabbing the desired NixOS configuration." + type = string + default = "tf-host" +}