207 lines
7.4 KiB
Nix
207 lines
7.4 KiB
Nix
{
|
||
lib,
|
||
config,
|
||
pkgs,
|
||
...
|
||
}:
|
||
let
|
||
cfg = config.custom.kernel.hardened;
|
||
in
|
||
{
|
||
options.custom.kernel.hardened = {
|
||
enable = lib.mkEnableOption "hardened kernel options";
|
||
};
|
||
config = lib.mkIf cfg.enable {
|
||
environment.systemPackages = [
|
||
pkgs.kernel-hardening-checker
|
||
pkgs.lynis
|
||
];
|
||
|
||
security = {
|
||
auditd.enable = true;
|
||
audit.enable = true;
|
||
audit.rules = [ "-a always,exit -F arch=b64 -S execve" ];
|
||
protectKernelImage = true;
|
||
lockKernelModules = false; # this breaks iptables, wireguard, and virtd
|
||
|
||
# force-enable the Page Table Isolation (PTI) Linux kernel feature
|
||
forcePageTableIsolation = true;
|
||
|
||
# User namespaces are required for sandboxing.
|
||
# this means you cannot set `"user.max_user_namespaces" = 0;` in sysctl
|
||
allowUserNamespaces = true;
|
||
|
||
# Disable unprivileged user namespaces, unless containers are enabled
|
||
unprivilegedUsernsClone = config.virtualisation.containers.enable;
|
||
allowSimultaneousMultithreading = true;
|
||
};
|
||
|
||
boot.kernel.sysctl = {
|
||
"fs.suid_dumpable" = 0;
|
||
# prevent pointer leaks
|
||
"kernel.kptr_restrict" = 2;
|
||
# restrict kernel log to CAP_SYSLOG capability
|
||
"kernel.dmesg_restrict" = 1;
|
||
# Note: certian container runtimes or browser sandboxes might rely on the following
|
||
# restrict eBPF to the CAP_BPF capability
|
||
"kernel.unprivileged_bpf_disabled" = 1;
|
||
# should be enabled along with bpf above
|
||
# "net.core.bpf_jit_harden" = 2;
|
||
# restrict loading TTY line disciplines to the CAP_SYS_MODULE
|
||
"dev.tty.ldisk_autoload" = 0;
|
||
# prevent exploit of use-after-free flaws
|
||
"vm.unprivileged_userfaultfd" = 0;
|
||
# kexec is used to boot another kernel during runtime and can be abused
|
||
"kernel.kexec_load_disabled" = 1;
|
||
# Kernel self-protection
|
||
# SysRq exposes a lot of potentially dangerous debugging functionality to unprivileged users
|
||
# 4 makes it so a user can only use the secure attention key. A value of 0 would disable completely
|
||
"kernel.sysrq" = 4;
|
||
# disable unprivileged user namespaces, Note: Docker, NH, and other apps may need this
|
||
"kernel.unprivileged_userns_clone" = 1;
|
||
# restrict all usage of performance events to the CAP_PERFMON capability
|
||
"kernel.perf_event_paranoid" = 3;
|
||
|
||
# Network
|
||
# protect against SYN flood attacks (denial of service attack)
|
||
"net.ipv4.tcp_syncookies" = 1;
|
||
# protection against TIME-WAIT assassination
|
||
"net.ipv4.tcp_rfc1337" = 1;
|
||
# enable source validation of packets received (prevents IP spoofing)
|
||
"net.ipv4.conf.default.rp_filter" = 1;
|
||
"net.ipv4.conf.all.rp_filter" = 1;
|
||
|
||
"net.ipv4.conf.all.accept_redirects" = 0;
|
||
"net.ipv4.conf.default.accept_redirects" = 0;
|
||
"net.ipv4.conf.all.secure_redirects" = 0;
|
||
"net.ipv4.conf.default.secure_redirects" = 0;
|
||
# Protect against IP spoofing
|
||
"net.ipv6.conf.all.accept_redirects" = 0;
|
||
"net.ipv6.conf.default.accept_redirects" = 0;
|
||
"net.ipv4.conf.all.send_redirects" = 0;
|
||
"net.ipv4.conf.default.send_redirects" = 0;
|
||
|
||
# prevent man-in-the-middle attacks
|
||
"net.ipv4.icmp_echo_ignore_all" = 1;
|
||
|
||
# ignore ICMP request, helps avoid Smurf attacks
|
||
"net.ipv4.conf.all.forwarding" = 0;
|
||
"net.ipv4.conf.default.accept_source_route" = 0;
|
||
"net.ipv4.conf.all.accept_source_route" = 0;
|
||
"net.ipv6.conf.all.accept_source_route" = 0;
|
||
"net.ipv6.conf.default.accept_source_route" = 0;
|
||
# Reverse path filtering causes the kernel to do source validation of
|
||
"net.ipv6.conf.all.forwarding" = 0;
|
||
"net.ipv6.conf.all.accept_ra" = 0;
|
||
"net.ipv6.conf.default.accept_ra" = 0;
|
||
|
||
## TCP hardening
|
||
# Prevent bogus ICMP errors from filling up logs.
|
||
"net.ipv4.icmp_ignore_bogus_error_responses" = 1;
|
||
|
||
# Disable TCP SACK
|
||
"net.ipv4.tcp_sack" = 0;
|
||
"net.ipv4.tcp_dsack" = 0;
|
||
"net.ipv4.tcp_fack" = 0;
|
||
|
||
# Userspace
|
||
# restrict usage of ptrace
|
||
"kernel.yama.ptrace_scope" = 2;
|
||
|
||
# ASLR memory protection (64-bit systems)
|
||
"vm.mmap_rnd_bits" = 32;
|
||
"vm.mmap_rnd_compat_bits" = 16;
|
||
|
||
# only permit symlinks to be followed when outside of a world-writable sticky directory
|
||
"fs.protected_symlinks" = 1;
|
||
"fs.protected_hardlinks" = 1;
|
||
# Prevent creating files in potentially attacker-controlled environments
|
||
"fs.protected_fifos" = 2;
|
||
"fs.protected_regular" = 2;
|
||
|
||
# Randomize memory
|
||
"kernel.randomize_va_space" = 2;
|
||
# Exec Shield (Stack protection)
|
||
"kernel.exec-shield" = 1;
|
||
|
||
## TCP optimization
|
||
# TCP Fast Open is a TCP extension that reduces network latency by packing
|
||
# data in the sender’s initial TCP SYN. Setting 3 = enable TCP Fast Open for
|
||
# both incoming and outgoing connections:
|
||
"net.ipv4.tcp_fastopen" = 3;
|
||
# Bufferbloat mitigations + slight improvement in throughput & latency
|
||
"net.ipv4.tcp_congestion_control" = "bbr";
|
||
"net.core.default_qdisc" = "cake";
|
||
};
|
||
|
||
boot.kernelParams = [
|
||
"audit=1"
|
||
# make it harder to influence slab cache layout
|
||
"slab_nomerge"
|
||
# enables zeroing of memory during allocation and free time
|
||
# helps mitigate use-after-free vulnerabilaties
|
||
"init_on_alloc=1"
|
||
"init_on_free=1"
|
||
# randomizes page allocator freelist, improving security by
|
||
# making page allocations less predictable
|
||
"page_alloc.shuffel=1"
|
||
# enables Kernel Page Table Isolation, which mitigates Meltdown and
|
||
# prevents some KASLR bypasses
|
||
"pti=on"
|
||
# randomizes the kernel stack offset on each syscall
|
||
# making attacks that rely on a deterministic stack layout difficult
|
||
"randomize_kstack_offset=on"
|
||
# disables vsyscalls, they've been replaced with vDSO
|
||
"vsyscall=none"
|
||
# disables debugfs, which exposes sensitive info about the kernel
|
||
"debugfs=off"
|
||
# certain exploits cause an "oops", this makes the kernel panic if an "oops" occurs
|
||
"oops=panic"
|
||
# only alows kernel modules that have been signed with a valid key to be loaded
|
||
# making it harder to load malicious kernel modules
|
||
# can make VirtualBox or Nvidia drivers unusable
|
||
#"module.sig_enforce=1"
|
||
# prevents user space code excalation
|
||
"lockdown=confidentiality"
|
||
# "rd.udev.log_level=3"
|
||
# "udev.log_priority=3"
|
||
];
|
||
|
||
boot.blacklistedKernelModules = [
|
||
# Obscure networking protocols
|
||
"dccp" # Datagram Congestion Control Protocol
|
||
"sctp" # Stream Control Transmission Protocol
|
||
"rds" # Reliable Datagram Sockets
|
||
"tipc" # Transparent Inter-Process Communication
|
||
"n-hdlc" # High-level Data Link Control
|
||
"ax25" # Amateur X.25
|
||
"netrom" # NetRom
|
||
"x25" # X.25
|
||
"rose"
|
||
"decnet"
|
||
"econet"
|
||
"af_802154" # IEEE 802.15.4
|
||
"ipx" # Internetwork Packet Exchange
|
||
"appletalk"
|
||
"psnap" # SubnetworkAccess Protocol
|
||
"p8023" # Novell raw IEE 802.3
|
||
"p8022" # IEE 802.3
|
||
"can" # Controller Area Network
|
||
"atm"
|
||
# Various rare filesystems
|
||
"cramfs"
|
||
"freevxfs"
|
||
"jffs2"
|
||
"hfs"
|
||
"hfsplus"
|
||
# "udf"
|
||
# "nfs" # Network File System
|
||
# "nfsv3"
|
||
# "nfsv4"
|
||
"gfs2" # Global File System 2
|
||
# vivid driver is only useful for testing purposes and has been the
|
||
# cause of privilege escalation vulnerabilities
|
||
"vivid"
|
||
];
|
||
};
|
||
}
|