From 0a350777002ba638bcd44eb23db323b12f7c5d9e Mon Sep 17 00:00:00 2001 From: Franck Cuny Date: Fri, 6 Dec 2024 17:37:28 -0800 Subject: get rid of sections I use tags to organize things. --- config.toml | 3 +- content/1password-ssh-agent.md | 208 ++++++++ content/blog/1password-ssh-agent.md | 208 -------- content/blog/_index.md | 6 - content/blog/git-link-and-sourcegraph.md | 52 -- content/blog/google-doc-failure.md | 69 --- content/blog/leaving-twitter.md | 14 - content/blog/nix-raid-systemd-boot.md | 53 --- content/blog/no-ssh-to-prod.md | 29 -- content/blog/tailscale-docker-https.md | 127 ----- content/containerd-to-firecracker.md | 679 +++++++++++++++++++++++++++ content/cpu-power-management.md | 121 +++++ content/fogcutter.md | 63 +++ content/git-link-and-sourcegraph.md | 52 ++ content/google-doc-failure.md | 69 +++ content/leaving-twitter.md | 14 + content/making-sense-intel-amd-cpus.md | 236 ++++++++++ content/misc/fogcutter.md | 69 --- content/nix-raid-systemd-boot.md | 53 +++ content/no-ssh-to-prod.md | 29 ++ content/notes/_index.md | 6 - content/notes/containerd-to-firecracker.md | 679 --------------------------- content/notes/cpu-power-management.md | 121 ----- content/notes/making-sense-intel-amd-cpus.md | 236 ---------- content/notes/stuff-about-pcie.md | 266 ----------- content/notes/working-with-go.md | 285 ----------- content/notes/working-with-nix.md | 45 -- content/resume.md | 2 +- content/stuff-about-pcie.md | 266 +++++++++++ content/tailscale-docker-https.md | 127 +++++ content/working-with-go.md | 285 +++++++++++ content/working-with-nix.md | 45 ++ templates/bike.html | 10 + templates/blog.html | 19 - templates/feed.xml | 39 -- templates/orphan.html | 10 - templates/resume.html | 10 + templates/section.html | 16 - 38 files changed, 2269 insertions(+), 2352 deletions(-) create mode 100644 content/1password-ssh-agent.md delete mode 100644 content/blog/1password-ssh-agent.md delete mode 100644 content/blog/_index.md delete mode 100644 content/blog/git-link-and-sourcegraph.md delete mode 100644 content/blog/google-doc-failure.md delete mode 100644 content/blog/leaving-twitter.md delete mode 100644 content/blog/nix-raid-systemd-boot.md delete mode 100644 content/blog/no-ssh-to-prod.md delete mode 100644 content/blog/tailscale-docker-https.md create mode 100644 content/containerd-to-firecracker.md create mode 100644 content/cpu-power-management.md create mode 100644 content/fogcutter.md create mode 100644 content/git-link-and-sourcegraph.md create mode 100644 content/google-doc-failure.md create mode 100644 content/leaving-twitter.md create mode 100644 content/making-sense-intel-amd-cpus.md delete mode 100644 content/misc/fogcutter.md create mode 100644 content/nix-raid-systemd-boot.md create mode 100644 content/no-ssh-to-prod.md delete mode 100644 content/notes/_index.md delete mode 100644 content/notes/containerd-to-firecracker.md delete mode 100644 content/notes/cpu-power-management.md delete mode 100644 content/notes/making-sense-intel-amd-cpus.md delete mode 100644 content/notes/stuff-about-pcie.md delete mode 100644 content/notes/working-with-go.md delete mode 100644 content/notes/working-with-nix.md create mode 100644 content/stuff-about-pcie.md create mode 100644 content/tailscale-docker-https.md create mode 100644 content/working-with-go.md create mode 100644 content/working-with-nix.md create mode 100644 templates/bike.html delete mode 100644 templates/blog.html delete mode 100644 templates/feed.xml delete mode 100644 templates/orphan.html create mode 100644 templates/resume.html delete mode 100644 templates/section.html diff --git a/config.toml b/config.toml index fd294a2..dc3d107 100644 --- a/config.toml +++ b/config.toml @@ -8,8 +8,7 @@ output_dir = "docs" default_language = "en" -generate_feeds = true -feed_filenames = ["feed.xml"] +generate_feeds = false taxonomies = [ {name = "tags", feed = false}, diff --git a/content/1password-ssh-agent.md b/content/1password-ssh-agent.md new file mode 100644 index 0000000..5d5d436 --- /dev/null +++ b/content/1password-ssh-agent.md @@ -0,0 +1,208 @@ ++++ +title = "1password's ssh agent and nix" +date = 2023-12-02 +[taxonomies] +tags = ["nix"] ++++ + +[A while ago](https://blog.1password.com/1password-ssh-agent/), 1password introduced an SSH agent, and I've been using it for a while now. The following describe how I've configured it with `nix`. All my ssh keys are in 1password, and it's the only ssh agent I'm using at this point. + +## Personal configuration + +I have a personal 1password account, and I've created a new SSH key in it that I use for both authenticating to github and to sign commits. I use [nix-darwin](http://daiderd.com/nix-darwin/) and [home-manager](https://github.com/nix-community/home-manager) to configure my personal machine. + +This is how I configure ssh: + +```nix +programs.ssh = { + enable = true; + forwardAgent = true; + serverAliveInterval = 60; + controlMaster = "auto"; + controlPersist = "30m"; + extraConfig = '' + IdentityAgent "~/Library/Group Containers/2BUA8C4S2C.com.1password/t/agent.sock" + ''; + matchBlocks = { + "github.com" = { + hostname = "github.com"; + user = "git"; + forwardAgent = false; + extraOptions = { preferredAuthentications = "publickey"; }; + }; + }; +}; +``` + +The configuration for git: + +```nix +{ lib, pkgs, config, ... }: +let + sshPub = builtins.fromTOML ( + builtins.readFile ../../configs/ssh-pubkeys.toml + ); +in +{ + home.file.".ssh/allowed_signers".text = lib.concatMapStrings (x: "franck@fcuny.net ${x}\n") (with sshPub; [ ykey-laptop ykey-backup op ]); + + programs.git = { + enable = true; + userName = "Franck Cuny"; + userEmail = "franck@fcuny.net"; + + signing = { + key = "key::${sshPub.op}"; + signByDefault = true; + }; + + extraConfig = { + gpg.format = "ssh"; + gpg.ssh.allowedSignersFile = "~/.ssh/allowed_signers"; + gpg.ssh.program = "/Applications/1Password.app/Contents/MacOS/op-ssh-sign"; + }; +} +``` + +In the repository with my nix configuration, I've a file `ssh-pubkeys.toml` that contains all the public ssh keys I keep track of (mine and a few other developers). Keys from that file are used to create the file `~/.ssh/allowed_signers` that is then used by `git` (for example `git log --show-signature`) when I want to ensure commits are signed with a valid key. + +`ssh-pubkeys.toml` looks like this: + +```toml +# yubikey key connected to the laptop +ykey-laptop="ssh-ed25519 ..." +# backup yubikey key +ykey-backup="ssh-ed25519 ..." +# 1password key +op="ssh-ed25519 ..." +``` + +And the following is for `zsh` so that I can use the agent for other commands that I run in the shell: + +```nix +programs.zsh.envExtra = '' + # use 1password ssh agent + # see https://developer.1password.com/docs/ssh/get-started#step-4-configure-your-ssh-or-git-client + export SSH_AUTH_SOCK=~/Library/Group\ Containers/2BUA8C4S2C.com.1password/t/agent.sock +''; +``` + +And that's it, this is enough to get use the agent for all my personal use cases. + +## Work configuration + +The work configuration is slightly different. Here I want to use both my work and personal keys so that I can clone some of my personal repositories on the work machine (for example my emacs configuration). We also use both github.com and a github enterprise instance and I need to authenticate against both. + +I've imported my existing keys into 1password, and I keep the public keys on the disk: `$HOME/.ssh/work_gh.pub` and `$HOME/.ssh/personal_gh.pub`. I've removed the private keys from the disk. + +This is the configuration I use for work: + +```nix +programs.ssh = { + enable = true; + forwardAgent = true; + serverAliveInterval = 60; + controlMaster = "auto"; + controlPersist = "30m"; + extraConfig = '' + IdentityAgent "~/Library/Group Containers/2BUA8C4S2C.com.1password/t/agent.sock" + ''; + matchBlocks = { + "personal" = { + hostname = "github.com"; + user = "git"; + forwardAgent = false; + identifyFile = "~/.ssh/personal_gh.pub"; + identitiesOnly = true; + extraOptions = { preferredAuthentications = "publickey"; }; + }; + "work" = { + hostname = "github.com"; + user = "git"; + forwardAgent = false; + identifyFile = "~/.ssh/work_gh.pub"; + identitiesOnly = true; + extraOptions = { preferredAuthentications = "publickey"; }; + }; + "github.enterprise" = { + hostname = "github.enterprise"; + user = "git"; + forwardAgent = false; + identifyFile = "~/.ssh/work_gh.pub"; + identitiesOnly = true; + extraOptions = { preferredAuthentications = "publickey"; }; + }; + }; +}; +``` + +I also create a configuration file for the 1password agent, to make sure I can use the keys from all the accounts: + +```nix + # Generate ssh agent config for 1Password - I want both my personal and work keys + home.file.".config/1Password/ssh/agent.toml".text = '' + [[ssh-keys]] + account = "my.1password.com" + [[ssh-keys]] + account = "$work.1password.com" + ''; +``` + +Then the ssh configuration: + +```nix +{ config, lib, pkgs, ... }: +let + sshPub = builtins.fromTOML ( + builtins.readFile ../etc/ssh-pubkeys.toml + ); +in +{ + home.file.".ssh/allowed_signers".text = lib.concatMapStrings (x: "franck@fcuny.net ${x}\n") (with sshPub; [ work_laptop op ]); + + programs.git = { + enable = true; + + signing = { + key = "key::${sshPub.op}"; + signByDefault = true; + }; + + extraConfig = { + gpg.format = "ssh"; + gpg.ssh.allowedSignersFile = "~/.ssh/allowed_signers"; + gpg.ssh.program = "/Applications/1Password.app/Contents/MacOS/op-ssh-sign"; + + url = { + "ssh://git@github.enterprise/" = { + insteadOf = "https://github.enterprise/"; + }; + }; + }; + }; +} +``` + +Now, when I clone a repository, instead of doing `git clone git@github.com/$WORK/repo` I do `git clone work:/$WORK/repo`. + +## Conclusion + +I've used yubikey to sign my commits for a while, but I find the 1password ssh agent a bit more convenient. The initial setup for yubikey was not as straightforward (granted, it's a one time thing per key). + +On my personal machine, my `$HOME/.ssh` looks as follow: + +```sh +➜ ~ ls -l ~/.ssh ~ +total 16 +lrwxr-xr-x@ 1 fcuny staff 83 Nov 6 17:03 allowed_signers -> /nix/store/v9qhbr2vb7w6bd24ypbjjz59xis3g8y2-home-manager-files/.ssh/allowed_signers +lrwxr-xr-x@ 1 fcuny staff 74 Nov 6 17:03 config -> /nix/store/v9qhbr2vb7w6bd24ypbjjz59xis3g8y2-home-manager-files/.ssh/config +-rw-------@ 1 fcuny staff 828 Nov 13 17:53 known_hosts +``` + +When I create a new commit, 1password ask me to authorize git to use the agent and sign the commit. Same when I want to ssh to a host. + +When I'm working on the macbook, I use touch ID to confirm, and when the laptop is connected to a dock, I need to type my 1password's password to unlock it and authorize the command. + +There's a cache in the agent so I'm not prompted too often. I find this convenient, I will never have to copy my ssh key when I get a new laptop, since it's already in 1password. + +The agent has worked flawlessly so far, and I'm happy with this setup. diff --git a/content/blog/1password-ssh-agent.md b/content/blog/1password-ssh-agent.md deleted file mode 100644 index 5d5d436..0000000 --- a/content/blog/1password-ssh-agent.md +++ /dev/null @@ -1,208 +0,0 @@ -+++ -title = "1password's ssh agent and nix" -date = 2023-12-02 -[taxonomies] -tags = ["nix"] -+++ - -[A while ago](https://blog.1password.com/1password-ssh-agent/), 1password introduced an SSH agent, and I've been using it for a while now. The following describe how I've configured it with `nix`. All my ssh keys are in 1password, and it's the only ssh agent I'm using at this point. - -## Personal configuration - -I have a personal 1password account, and I've created a new SSH key in it that I use for both authenticating to github and to sign commits. I use [nix-darwin](http://daiderd.com/nix-darwin/) and [home-manager](https://github.com/nix-community/home-manager) to configure my personal machine. - -This is how I configure ssh: - -```nix -programs.ssh = { - enable = true; - forwardAgent = true; - serverAliveInterval = 60; - controlMaster = "auto"; - controlPersist = "30m"; - extraConfig = '' - IdentityAgent "~/Library/Group Containers/2BUA8C4S2C.com.1password/t/agent.sock" - ''; - matchBlocks = { - "github.com" = { - hostname = "github.com"; - user = "git"; - forwardAgent = false; - extraOptions = { preferredAuthentications = "publickey"; }; - }; - }; -}; -``` - -The configuration for git: - -```nix -{ lib, pkgs, config, ... }: -let - sshPub = builtins.fromTOML ( - builtins.readFile ../../configs/ssh-pubkeys.toml - ); -in -{ - home.file.".ssh/allowed_signers".text = lib.concatMapStrings (x: "franck@fcuny.net ${x}\n") (with sshPub; [ ykey-laptop ykey-backup op ]); - - programs.git = { - enable = true; - userName = "Franck Cuny"; - userEmail = "franck@fcuny.net"; - - signing = { - key = "key::${sshPub.op}"; - signByDefault = true; - }; - - extraConfig = { - gpg.format = "ssh"; - gpg.ssh.allowedSignersFile = "~/.ssh/allowed_signers"; - gpg.ssh.program = "/Applications/1Password.app/Contents/MacOS/op-ssh-sign"; - }; -} -``` - -In the repository with my nix configuration, I've a file `ssh-pubkeys.toml` that contains all the public ssh keys I keep track of (mine and a few other developers). Keys from that file are used to create the file `~/.ssh/allowed_signers` that is then used by `git` (for example `git log --show-signature`) when I want to ensure commits are signed with a valid key. - -`ssh-pubkeys.toml` looks like this: - -```toml -# yubikey key connected to the laptop -ykey-laptop="ssh-ed25519 ..." -# backup yubikey key -ykey-backup="ssh-ed25519 ..." -# 1password key -op="ssh-ed25519 ..." -``` - -And the following is for `zsh` so that I can use the agent for other commands that I run in the shell: - -```nix -programs.zsh.envExtra = '' - # use 1password ssh agent - # see https://developer.1password.com/docs/ssh/get-started#step-4-configure-your-ssh-or-git-client - export SSH_AUTH_SOCK=~/Library/Group\ Containers/2BUA8C4S2C.com.1password/t/agent.sock -''; -``` - -And that's it, this is enough to get use the agent for all my personal use cases. - -## Work configuration - -The work configuration is slightly different. Here I want to use both my work and personal keys so that I can clone some of my personal repositories on the work machine (for example my emacs configuration). We also use both github.com and a github enterprise instance and I need to authenticate against both. - -I've imported my existing keys into 1password, and I keep the public keys on the disk: `$HOME/.ssh/work_gh.pub` and `$HOME/.ssh/personal_gh.pub`. I've removed the private keys from the disk. - -This is the configuration I use for work: - -```nix -programs.ssh = { - enable = true; - forwardAgent = true; - serverAliveInterval = 60; - controlMaster = "auto"; - controlPersist = "30m"; - extraConfig = '' - IdentityAgent "~/Library/Group Containers/2BUA8C4S2C.com.1password/t/agent.sock" - ''; - matchBlocks = { - "personal" = { - hostname = "github.com"; - user = "git"; - forwardAgent = false; - identifyFile = "~/.ssh/personal_gh.pub"; - identitiesOnly = true; - extraOptions = { preferredAuthentications = "publickey"; }; - }; - "work" = { - hostname = "github.com"; - user = "git"; - forwardAgent = false; - identifyFile = "~/.ssh/work_gh.pub"; - identitiesOnly = true; - extraOptions = { preferredAuthentications = "publickey"; }; - }; - "github.enterprise" = { - hostname = "github.enterprise"; - user = "git"; - forwardAgent = false; - identifyFile = "~/.ssh/work_gh.pub"; - identitiesOnly = true; - extraOptions = { preferredAuthentications = "publickey"; }; - }; - }; -}; -``` - -I also create a configuration file for the 1password agent, to make sure I can use the keys from all the accounts: - -```nix - # Generate ssh agent config for 1Password - I want both my personal and work keys - home.file.".config/1Password/ssh/agent.toml".text = '' - [[ssh-keys]] - account = "my.1password.com" - [[ssh-keys]] - account = "$work.1password.com" - ''; -``` - -Then the ssh configuration: - -```nix -{ config, lib, pkgs, ... }: -let - sshPub = builtins.fromTOML ( - builtins.readFile ../etc/ssh-pubkeys.toml - ); -in -{ - home.file.".ssh/allowed_signers".text = lib.concatMapStrings (x: "franck@fcuny.net ${x}\n") (with sshPub; [ work_laptop op ]); - - programs.git = { - enable = true; - - signing = { - key = "key::${sshPub.op}"; - signByDefault = true; - }; - - extraConfig = { - gpg.format = "ssh"; - gpg.ssh.allowedSignersFile = "~/.ssh/allowed_signers"; - gpg.ssh.program = "/Applications/1Password.app/Contents/MacOS/op-ssh-sign"; - - url = { - "ssh://git@github.enterprise/" = { - insteadOf = "https://github.enterprise/"; - }; - }; - }; - }; -} -``` - -Now, when I clone a repository, instead of doing `git clone git@github.com/$WORK/repo` I do `git clone work:/$WORK/repo`. - -## Conclusion - -I've used yubikey to sign my commits for a while, but I find the 1password ssh agent a bit more convenient. The initial setup for yubikey was not as straightforward (granted, it's a one time thing per key). - -On my personal machine, my `$HOME/.ssh` looks as follow: - -```sh -➜ ~ ls -l ~/.ssh ~ -total 16 -lrwxr-xr-x@ 1 fcuny staff 83 Nov 6 17:03 allowed_signers -> /nix/store/v9qhbr2vb7w6bd24ypbjjz59xis3g8y2-home-manager-files/.ssh/allowed_signers -lrwxr-xr-x@ 1 fcuny staff 74 Nov 6 17:03 config -> /nix/store/v9qhbr2vb7w6bd24ypbjjz59xis3g8y2-home-manager-files/.ssh/config --rw-------@ 1 fcuny staff 828 Nov 13 17:53 known_hosts -``` - -When I create a new commit, 1password ask me to authorize git to use the agent and sign the commit. Same when I want to ssh to a host. - -When I'm working on the macbook, I use touch ID to confirm, and when the laptop is connected to a dock, I need to type my 1password's password to unlock it and authorize the command. - -There's a cache in the agent so I'm not prompted too often. I find this convenient, I will never have to copy my ssh key when I get a new laptop, since it's already in 1password. - -The agent has worked flawlessly so far, and I'm happy with this setup. diff --git a/content/blog/_index.md b/content/blog/_index.md deleted file mode 100644 index d44a9f7..0000000 --- a/content/blog/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: Blog -sort_by: date -render: true -template: blog.html ---- diff --git a/content/blog/git-link-and-sourcegraph.md b/content/blog/git-link-and-sourcegraph.md deleted file mode 100644 index c86b465..0000000 --- a/content/blog/git-link-and-sourcegraph.md +++ /dev/null @@ -1,52 +0,0 @@ -+++ -title = "emacs' git-link and sourcegraph" -date = 2021-08-24 -[taxonomies] -tags = ["emacs"] -+++ - -I use [sourcegraph](https://sourcegraph.com/) for searching code, and I sometimes need to share a link to the source code I'm looking at in a buffer. For this, the package [`git-link`](https://github.com/sshaw/git-link) is great. - -To integrate sourcegraph and `git-link`, the [documentation](https://github.com/sshaw/git-link#sourcegraph) recommends adding a remote entry named `sourcegraph` in the repository, like this: - -```bash -git remote add sourcegraph https://sourcegraph.com/github.com/sshaw/copy-as-format -``` - -The next time you run `M-x git-link` in a buffer, it will use the URL associated with that remote. That's works great, except that now you need to add this for every repository. Instead, for my usage, I came up with the following solution: - -```lisp -(use-package git-link - :ensure t - :after magit - :bind (("C-c g l" . git-link) - ("C-c g a" . git-link-commit)) - :config - (defun fcuny/get-sg-remote-from-hostname (hostname) - (format "sourcegraph.<$domain>.<$tld>/%s" hostname)) - - (defun fcuny/git-link-work-sourcegraph (hostname dirname filename _branch commit start end) - ;;; For a given repository, build the proper link for sourcegraph. - ;;; Use the default branch of the repository instead of the - ;;; current one (we might be on a feature branch that is not - ;;; available on the remote). - (require 'magit-branch) - (let ((sg-base-url (fcuny/get-sg-remote-from-hostname hostname)) - (main-branch (magit-main-branch))) - (git-link-sourcegraph sg-base-url dirname filename main-branch commit start end))) - - (defun fcuny/git-link-commit-work-sourcegraph (hostname dirname commit) - (let ((sg-base-url (fcuny/get-sg-remote-from-hostname hostname))) - (git-link-commit-sourcegraph sg-base-url dirname commit))) - - (add-to-list 'git-link-remote-alist '("twitter" fcuny/git-link-work-sourcegraph)) - (add-to-list 'git-link-commit-remote-alist '("twitter" fcuny/git-link-commit-work-sourcegraph)) - - (setq git-link-open-in-browser 't)) -``` - -We use different domains to host various git repositories at work (e.g. `git.$work`, `gitfoo.$work`, etc). Each of them map to a different URI for sourcegraph (e.g. `sourcegraph.$work/gitfoo`). - -`git-link-commit-remote-alist` is an [association list](https://www.gnu.org/software/emacs/manual/html_node/elisp/Association-Lists.html) that takes a regular expression and a function. The custom function receives the hostname for the remote repository, which is then used to generate the URI for our sourcegraph instance. I then call `git-link-sourcegraph` replacing the hostname with the URI for sourcegraph. - -Now I can run `M-x git-link` in any repository where the host for the origin git repository matches `twitter` without having to setup the custom remote first. diff --git a/content/blog/google-doc-failure.md b/content/blog/google-doc-failure.md deleted file mode 100644 index b4a65b9..0000000 --- a/content/blog/google-doc-failure.md +++ /dev/null @@ -1,69 +0,0 @@ -+++ -title = "Google Doc Failures" -date = 2021-04-11 -[taxonomies] -tags = ["practices"] -+++ - -In most use cases, Google Doc is an effective tool to create "write once, read never" documents. - -## Convenience - -Google Doc (GDoc from now on) is the most common way of writing and sharing documents at my current job. It's very easy to start a new document, even more since we can now point our browser to and start typing right away. - -Like most of my co-workers, I use it frequently during the day. Some of these documents are draft for some communication that I want others to review before I share with a broader audience; it can be a [Request For Comments](https://en.wikipedia.org/wiki/Request_for_Comments) for a project; meeting notes for others to read; information that I need to capture during an incident or a debugging session; interviews notes; etc. - -I would not be surprised if the teams I work closely with generate 50 new documents each week. - -## ETOOMANYTABS - -I have a tendency of having hundreds of open tabs in my browser during the week. A majority of these tabs are GDocs, and I think this is one of the true failure of the product. Why do I have so many tabs ? There's mainly two reasons. - -The first reason is a problem with Chrome's UX itself: it happily let me open the same URL as many times as I want in as many tabs, instead of sending me to the already opened tab if the document is loaded. It's not uncommon that I find the same document opened in 5 different tabs. - -The second reason, and it's the most important one, I know that if I need to read or comment on a doc and I close the tab, I'll likely never find that document again, or will completely forget about it. - -## Discoverability - -In 'the old days', you'd start a new document in Word or LibreOffice, and as you hit "save" for the first time, you've two decisions to make: how am I going to name that file, and where am I going to save it on disk. - -With GDoc these questions don't have to be answered, you don't have to name the file, and it does not matter where it lives. I've likely hundreds of docs named 'untitled' in my "drive". I also don't have to think about where they will live, because they are saved automatically for me. I'm sure there's hundreds of studies that show that these two simple steps are actually complex for many users and creates useless friction (in which folder do I store it; should I organize the docuemnts by team, years, projects; do I name it with the date and the current project; etc.). - -GDoc being a Google product, it seems pretty obvious that they would come up with a better solution: let's not organize in a strict hierarchy these files, and let's instead search for them. - -Unfortunately, GDoc's search is really poor (and I'm being kind). By default most of us start by looking for some words we know are in the doc, maybe even in the title. But when working on a multiple projects that are related to the same technology, you suddenly get hundreds of documents matching your query. It's unclear how the returned set is ordered (by date ? by author ? by some scoring that is invisible to me ?). - -You can also search by owners, but here is another annoying bit: I think about owner as author, so I usually type `author:foo` before realizing it does not work. And that implies you already know who's the owner of the document. In the case of TDDs (Technical Design Document), I might know which team is behind it, but rarely who's the actual author. - -I could search for the title, but I rarely remember or know the name of the document I'm looking for. I could also be looking by keywords, but when working on a project with tens of related documents, you have to open all the returned docs to see which one is the correct one. - -And then what about new members joining your the team ? They don't know which docs exist, who wrote them, and how they are named. They end up searching and hoping that something good will be returned. - -## Workflows - -More and more we create workflows around these documents: some of the docs are TDDs that are going through reviews; others are decision documents that require input from multiple teams and are pending approval; others are road map documents that also go through some review process. - -As a result we create templates for all kind of documents, with usually something like "draft → reviews → approved/rejected" at the top. We expect the owner of the doc to mark in bold what's the status of the doc to help the reader understand in what state the document is. It's difficult to keep track of open actions and comments. Yes, there's a way to get a list of all of them, but it's not in an obvious place. - -As a result, some engineers in my team built an external dashboard with swim lanes which captures the state of a document. We add new document with their URLs, add who are the reviewers, and we move the doc between the lanes. Now we have to operate a service and a database to keep track of the status of documents in GDoc. - -## Alternatives - -When it comes to technical document, I find that [approach](https://caitiem.com/2020/03/29/design-docs-markdown-and-git/) much more interesting. Some open source projects have adopted a similar workflow ([Kubernetes](https://github.com/kubernetes/enhancements/tree/master/keps), [Go](https://github.com/golang/proposal)). - -A new document starts its life as a text file (using what ever markup language your team/company prefers). The document is submitted for review, and the people who need to be consulted are added as reviewers. They can now comment on the document, the author can address them, mark them as resolved. It's clear in which state the document is: it's either in review, committed, or rejected. With this approach you also end up with a clear history, as time moves on you can amend the document by submitting a change, and the change goes through the same process. - -New comers will find the document in the repository, and if they want to see the conversation they can open the review associated with the original change. They can also see how the document evolved over time. It's also easy to publish these documents on an internal website, using a static site generator for example. - -One of the thing that I think are critical, is that all of that is done using the tools the engineers are already using for their day to day job: a text editor, a version control system, a code review tool. - -There's obviously challenges with this approach too: - -- **it's more heavy handed**: not every one likes to write in a text editor using a markup language. It can requires some time to learn or get used to the syntax -- **it's harder to integrate schema / visuals**: but having them checked in in the repository also improves the discoverability - -It's also true that no all documents suffer the same challenges for discoverability: - -- meeting notes are usually linked to meeting invites (however if you were not part of the meeting, you end up with the same challenges to discover them) -- drafts for communications are usually not relevant once the communication has been sent -- interview notes are usually transferred to some tools for HR when the feedback is submitted diff --git a/content/blog/leaving-twitter.md b/content/blog/leaving-twitter.md deleted file mode 100644 index f7d98f5..0000000 --- a/content/blog/leaving-twitter.md +++ /dev/null @@ -1,14 +0,0 @@ -+++ -title = "Leaving Twitter" -date = 2022-01-15 -[taxonomies] -tags = ["work"] -+++ - -January 7th 2022 was my last day at Twitter, after more than 7 years at the company. - -The first few years I worked as an SRE in the core-storage team, with the PUB/SUB and key-value store teams. - -I spend the last four years working with the Compute team, both maintaining and operating our (very large) Aurora/Mesos clusters, and also working on the adoption of kubernetes, both for our data centers and for the cloud. Working with Compute was extremely fulfilling to me, as I worked closely with our hardware engineering and kernel/operating system teams. - -During these 7 years, I was constantly pushed by my coworkers to grow, to step up to new challenges, and I learned a tremendous amount about running large scale distributed systems. I'm extremely glad for that experience, it was by far the most interesting and challenging job I've ever had so far. diff --git a/content/blog/nix-raid-systemd-boot.md b/content/blog/nix-raid-systemd-boot.md deleted file mode 100644 index de68695..0000000 --- a/content/blog/nix-raid-systemd-boot.md +++ /dev/null @@ -1,53 +0,0 @@ -+++ -title = "Workaround md raid boot issue in NixOS 22.11" -date = 2023-01-10 -[taxonomies] -tags = ["nix"] -+++ - -For about a year now I've been running [NixOS](https://nixos.org/ "NixOS") on my personal machines. Yesterday I decided to go ahead and upgrade my NAS from NixOS 22.05 to [22.11](https://nixos.org/blog/announcements.html#nixos-22.11). On that machine, all the disks are encrypted, and there are two RAID0 devices. To unlock the drives, I log into the [SSH daemon running in `initrd`](https://nixos.wiki/wiki/Remote_LUKS_Unlocking), where I can type my passphrase. This time however, instead of a prompt to unlock the disk, I see the following message: - -``` -waiting for device /dev/disk/by-uuid/66c58a92-45fe-4b03-9be0-214ff67c177c to appear... -``` - -followed by a timeout and then I'm asked if I want to reboot the machine. I do reboot the machine, and same thing happens. - -Now, and this is something really great about NixOS, I can boot to the previous generation (on 22.05), and this time I'm prompted for my password, the disks are unlocked, and I can log into my machine. This eliminates the possibility of a hardware failure! I also have a way to get a working machine to do more build if needed. Knowing that I can easily switch from a broken generation to a working one gives me more confidence in making changes to my system. - -I then reboot again in the broken build, and drop into a `busybox` shell. I look to see what `blkid` reports, and I confirm that my disks are all present and they have a **UUID** set. Next I check what's listed under `/dev/disk/by-uuid` and, surprise, the disks are not there. They are however under `/dev/disk`. Now, looking at `/nix/store` I only see a few things, and one of them is a script named `stage-1-init.sh`. I read quickly the script, checked it does, and confirmed that it was blocking on the disks. I looked at what was reported by `udevadm info ` and I could see that the `DEVLINKS` was missing the path for `by-uuid`. - -My laptop has a similar setup, but without RAID devices. I had already updated to 22.11, and had rebooted the laptop without issues. To be sure, I ran another update and rebooted, and I was able to unlock the drive and log into the machine without problem. - -From here I have enough information to start searching for an issue similar to this. I got pretty lucky and two issues I found were: - -- [Since systemd-251.3 mdadm doesn't start at boot time #196800 ](https://github.com/nixoS/nixpkgs/issues/196800) -- [Won't boot when root on raid0 with boot.initrd.systemd=true #199551 ](https://github.com/nixoS/nixpkgs/issues/199551) - -The proposed solution was easy: - -```diff -@@ -43,7 +43,7 @@ - }; - - boot.initrd.luks.devices."raid-fast".device = -- "/dev/disk/by-uuid/66c58a92-45fe-4b03-9be0-214ff67c177c"; -+ "/dev/disk/by-id/md-name-nixos:fast"; - - fileSystems."/data/slow" = { - device = "/dev/disk/by-uuid/0f16db51-0ee7-48d8-9e48-653b85ecbf0a"; -@@ -51,7 +51,7 @@ - }; - - boot.initrd.luks.devices."raid-slow".device = -- "/dev/disk/by-uuid/d8b21267-d457-4522-91d9-5481b44dd0a5"; -+ "/dev/disk/by-id/md-name-nixos:slow"; -``` - -I rebuild, rebooted, and success, I was able to get access to the machine. - -## Takeaways - -I now have a mitigation to the problem, however I still don't have a root cause. Since it's only the `by-uuid` path that is missing, and this is managed by `udev`, I'm guessing that some rules for `udev` have changed, but so far I can't find anything about that. - -It's really great to be able to easily switch back to a previous generation of my system, so I can debug and experiment different solutions. If this had happen with another distribution, getting out of this mess would have been more tedious. diff --git a/content/blog/no-ssh-to-prod.md b/content/blog/no-ssh-to-prod.md deleted file mode 100644 index 9c2d20a..0000000 --- a/content/blog/no-ssh-to-prod.md +++ /dev/null @@ -1,29 +0,0 @@ -+++ -title = "No SSH to production" -date = 2022-11-28 -[taxonomies] -tags = ["practices"] -+++ - -It's not uncommon to hear talk about preventing engineers to SSH to production machines. While I think it's a noble goal, I think most organizations are not ready for it in the short or even medium term. - -Why do we usually need to get a shell on a machine ? The most common reason is to investigate a system that is behaving in an unexpected way, and we need to collect information, maybe using `strace`, `tcpdump`, `perf` or one of the BCC tools. Another reason might be to validate that a change deployed to a single machine is applied correctly, before rolling it out to a large portion of the fleet. - -If you end up writing a postmortem after the investigation session, one of the reviewer might ask why did we need to get a shell on the machine in the first place. Usually it's because we're lacking the capabilities to collect that kind of information remotely. Someone will write an action item to improve this, it will be labeled 'long-term-action-item', and it will disappear in the bottomless backlog of a random team (how many organizations have a clear ownership for managing access to machines ?). - -In most cases, I think we would be better off by breaking down the problems in smaller chunk, and focus on iterative improvements. "No one gets to SSH to machines in production" is a poorly framed problem. - -What I think is better is to ask the following questions - -- who has access to the machines -- who actually SSH to the machines -- why do they need to SSH to the machines -- was the state of the machine altered after someone logged to the machine - -For the first question, I'd recommend that we don't create user accounts and don't distribute engineers' SSH public keys on the machines. I'd create an 'infra' user account, and use signed SSH certificates (for example with [vault](https://www.hashicorp.com/products/vault/ssh-with-vault)). Only engineers who _have_ to have access should be able to sign their SSH key. That way you've limited the risks to a few engineers, and you have an audit trail of who requested access. You can build reports from these audit logs, to see how frequently engineer request access. For the 'infra' user, I'd limit it's privileges, and make sure it can only run commands required for debugging/troubleshooting. - -Using linux' audit logs, you can also generate reports on which commands are run. You can learn why the engineers needed to get on the host, and it can be used by the SRE organization to build services and tools that will enable new capabilities (for example, a service to collect traces, or do network capture remotely). - -Using the same audit logs, look for commands that are modifying the filesystems (for example `apt`, `yum`, `mkdir`): if the hosts are stateless, send them through the provisioning pipeline. - -At that point you've hardened the system, and you get visibility into what engineers are doing on these machines. Having engineers being able to get a shell on a production machine is a high risk: even if your disks are encrypted at rest, when the host is running an engineer can see data they are not supposed to look at, etc. But I think knowing who/when/why is more important than completely blocking SSH access: there's always going to be that one incident where there's nothing you can do without a shell on that one host. diff --git a/content/blog/tailscale-docker-https.md b/content/blog/tailscale-docker-https.md deleted file mode 100644 index 1094ca6..0000000 --- a/content/blog/tailscale-docker-https.md +++ /dev/null @@ -1,127 +0,0 @@ -+++ -title = "Tailscale, Docker and HTTPS" -date = "2021-12-29" -[taxonomies] -tags = ["containers"] -+++ - -I run a number of services in my home network. For the majority of these services, I don't want to make them available on the internet, I want to only be able to access them when I'm on my home network. However, sometimes I'm not at home and I still want to access them. So far I've been using plain [wireguard](https://www.wireguard.com/) to achieve this. While the initial configuration for wireguard is pretty simple, it starts to be a bit more cumbersome as I add more hosts/containers. It's also not easy to share keys with other folks if I want to give access to some of the machines or services. For that reason I decided to give a look at [tailscale](https://tailscale.com/). - -There's already a lot of articles about tailscale and how to use and configure it. Their [documentation](https://tailscale.com/kb/) is also pretty good, so I won't cover the initial setup. - -As stated above, I want to access some of my services that are running as docker containers from anywhere. For web services, I want to use them through HTTPS, with a valid certificate, and without having to remember on which port the service it's listening. I also don't want to setup a PKI in my home lab for that (and I'm also not interested in configuring split DNS), and instead I prefer to use [let's encrypt](https://letsencrypt.org/) with a proper subdomain that is unique for each service. - -The [tailscale documentation](https://tailscale.com/kb/1054/dns/) has two suggestions for this: - -- use their magicDNS feature / split DNS -- setup a subdomain on a public domain - -Since I already have a public domain that I use for my home network, I decided to go with the second option (I'm also uncertain how to achieve my goal using magicDNS without running tailscale inside the container). - -The public domain I'm using is managed through [Google Cloud Domain](https://cloud.google.com/dns/docs/tutorials/create-domain-tutorial). I create a new record for the services I want to run (for example, `dash` for my instance of grafana), using the IP address from the tailscale node the service runs on (e.g. 100.83.51.12). - -For routing the traffic I use [traefik](https://traefik.io/). The configuration for traefik looks like this: - -```yaml -global: - sendAnonymousUsage: false -providers: - docker: - exposedByDefault: false -entryPoints: - http: - address: ":80" - https: - address: ":443" -certificatesResolvers: - dash: - acme: - email: franck@fcuny.net - storage: acme.json - dnsChallenge: - provider: gcloud -``` - -The important bit here is the `certificatesResolvers` part. I'll be using the [dnsChallenge](https://doc.traefik.io/traefik/user-guides/docker-compose/acme-dns/) instead of the [httpChallenge](https://doc.traefik.io/traefik/user-guides/docker-compose/acme-http/) to obtain the certificate from let's encrypt. For this to work, I need to specify the `provider` to be [gcloud](https://go-acme.github.io/lego/dns/gcloud/). I'll also need a service account (see [this doc](https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application) to create it). I run `traefik` in a docker container, and the `systemd` unit file is below. The required bits for using the `dnsChallenge` with `gcloud` are: - -- the environment variable `GCP_SERVICE_ACCOUNT_FILE`: it contains the credentials so that `traefik` can update the DNS record for the challenge -- the environment variable `GCP_PROJECT`: the name of the GCP project -- mounting the service account file inside the container (I store it on the host under `/data/containers/traefik/config/sa.json`) - -```ini -[Unit] -Description=traefik proxy -Documentation=https://doc.traefik.io/traefik/ -After=docker.service -Requires=docker.service - -[Service] -Restart=on-failure -ExecStartPre=-/usr/bin/docker kill traefik -ExecStartPre=-/usr/bin/docker rm traefik -ExecStartPre=/usr/bin/docker pull traefik:latest - -ExecStart=/usr/bin/docker run \ - -p 80:80 \ - -p 9080:8080 \ - -p 443:443 \ - --name=traefik \ - -e GCE_SERVICE_ACCOUNT_FILE=/var/run/gcp-service-account.json \ - -e GCE_PROJECT= gcp-super-project \ - --volume=/data/containers/traefik/config/acme.json:/acme.json \ - --volume=/data/containers/traefik/config/traefik.yml:/etc/traefik/traefik.yml:ro \ - --volume=/data/containers/traefik/config/sa.json:/var/run/gcp-service-account.json \ - --volume=/var/run/docker.sock:/var/run/docker.sock:ro \ - traefik:latest -ExecStop=/usr/bin/docker stop traefik - -[Install] -WantedBy=multi-user.target -``` - -As an example, I run [grafana](https://grafana.com/) on my home network to view metrics from the various containers / hosts. Let's pretend I use `example.net` as my domain. I want to be able to access `grafana` via . Here's the `systemd` unit configuration I use for this: - -```ini -[Unit] -Description=Grafana in a docker container -Documentation=https://grafana.com/docs/ -After=docker.service -Requires=docker.service - -[Service] -Restart=on-failure -RuntimeDirectory=grafana -ExecStartPre=-/usr/bin/docker kill grafana-server -ExecStartPre=-/usr/bin/docker rm grafana-server -ExecStartPre=-/usr/bin/docker pull grafana/grafana:latest - -ExecStart=/usr/bin/docker run \ - -p 3000:3000 \ - -e TZ='America/Los_Angeles' \ - --name grafana-server \ - -v /data/containers/grafana/etc/grafana:/etc/grafana \ - -v /data/containers/grafana/var/lib/grafana:/var/lib/grafana \ - -v /data/containers/grafana/var/log/grafana:/var/log/grafana \ - --user=grafana \ - --label traefik.enable=true \ - --label traefik.http.middlewares.grafana-https-redirect.redirectscheme.scheme=https \ - --label traefik.http.middlewares.grafana-https-redirect.redirectscheme.permanent=true \ - --label traefik.http.routers.grafana-http.rule=Host(`dash.example.net`) \ - --label traefik.http.routers.grafana-http.entrypoints=http \ - --label traefik.http.routers.grafana-http.service=grafana-svc \ - --label traefik.http.routers.grafana-http.middlewares=grafana-https-redirect \ - --label traefik.http.routers.grafana-https.rule=Host(`dash.example.net`) \ - --label traefik.http.routers.grafana-https.entrypoints=https \ - --label traefik.http.routers.grafana-https.tls=true \ - --label traefik.http.routers.grafana-https.tls.certresolver=dash \ - --label traefik.http.routers.grafana-https.service=grafana-svc \ - --label traefik.http.services.grafana-svc.loadbalancer.server.port=3000 \ - grafana/grafana:latest - -ExecStop=/usr/bin/docker stop unifi-controller - -[Install] -WantedBy=multi-user.target -``` - -Now I can access my grafana instance via HTTPS (and would redirect to HTTPS) while my tailscale interface is up on the machine I'm using (e.g. my desktop or my phone). diff --git a/content/containerd-to-firecracker.md b/content/containerd-to-firecracker.md new file mode 100644 index 0000000..df26cba --- /dev/null +++ b/content/containerd-to-firecracker.md @@ -0,0 +1,679 @@ ++++ +title = "containerd to firecracker" +date = 2021-05-15 +[taxonomies] +tags = ["containers"] ++++ + +fly.io had an [interesting +article](https://fly.io/blog/docker-without-docker/) about how they use +docker images to create VMs for `firecracker`. + +They describe the process as follow: + +1. Pull a container from a registry +2. Create a loop device to store the container's filesystem on +3. Unpack the container into the mounted loop device +4. Create a second block device and inject init, kernel, configuration + and other stuff +5. Attach persistent volumes (if any) +6. Create a TAP device and configure it +7. Hand it off to Firecracker and boot that thing + +That's pretty detailed, and I'm curious how difficult it is to implement +this. I've been meaning to look into Firecracker for a while and into +containers'd API, so this is a perfect opportunity to get started. The +code is available [here](https://git.fcuny.net/containerd-to-vm/). + +## #1 Pull a container from a registry with `containerd` + +`containerd` has a pretty [detailed +documentation](https://pkg.go.dev/github.com/containerd/containerd). +From the main page we can see the following example to create a client. + +```go +import ( + "github.com/containerd/containerd" + "github.com/containerd/containerd/cio" +) + + +func main() { + client, err := containerd.New("/run/containerd/containerd.sock") + defer client.Close() +} +``` + +And pulling an image is also pretty straightforward: + +```go +image, err := client.Pull(context, "docker.io/library/redis:latest") +``` + +The `Pull` method returns an +[`Image`](https://pkg.go.dev/github.com/containerd/containerd@v1.4.4/images#Image) +and there's a few methods associated with it. + +As `containerd` has namespaces, it's possible to specify the namespace +we want to use when working with the API: + +```go +ctx := namespaces.WithNamespace(context.Background(), "c2vm") +image, err := client.Pull(ctx, "docker.io/library/redis:latest") +``` + +The image will now be stored in the `c2vm` namespace. We can verify this +with: + +```bash +; sudo ctr -n c2vm images ls -q +docker.io/library/redis:latest +``` + +## #2 Create a loop device to store the container's filesystem on + +This is going to be pretty straightforward. To create a loop device we +need to: + +1. pre-allocate space to a file +2. convert that file to some format +3. mount it to some destination + +There's two commons ways to pre-allocate space to a file: `dd` and +`fallocate` (there's likely way more ways to do this). I'll go with +`fallocate` for this example. + +First, to be safe, we create a temporary file, and use `renameio` to +handle the renaming (I recommend reading the doc of the module). + +```go +f, err := renameio.TempFile("", rawFile) +if err != nil { + return err +} +defer f.Cleanup() +``` + +Now to do the pre-allocation (we're making an assumption here that 2GB +is enough, we can likely check what's the size of the container before +doing this): + +```go +command := exec.Command("fallocate", "-l", "2G", f.Name()) +if err := command.Run(); err != nil { + return fmt.Errorf("fallocate error: %s", err) +} +``` + +We can now convert that file to ext4: + +```go +command = exec.Command("mkfs.ext4", "-F", f.Name()) +if err := command.Run(); err != nil { + return fmt.Errorf("mkfs.ext4 error: %s", err) +} +``` + +Now we can rename safely the temporary file to the proper file we want: + +```go +f.CloseAtomicallyReplace() +``` + +And to mount that file + +```go +command = exec.Command("mount", "-o", "loop", rawFile, mntDir) +if err := command.Run(); err != nil { + return fmt.Errorf("mount error: %s", err) +} +``` + +## #3 Unpack the container into the mounted loop device + +Extracting the container using `containerd` is pretty simple. Here's the +function that I use: + +```go +func extract(ctx context.Context, client *containerd.Client, image containerd.Image, mntDir string) error { + manifest, err := images.Manifest(ctx, client.ContentStore(), image.Target(), platform) + if err != nil { + log.Fatalf("failed to get the manifest: %v\n", err) + } + + for _, desc := range manifest.Layers { + log.Printf("extracting layer %s\n", desc.Digest.String()) + layer, err := client.ContentStore().ReaderAt(ctx, desc) + if err != nil { + return err + } + if err := archive.Untar(content.NewReader(layer), mntDir, &archive.TarOptions{NoLchown: true}); err != nil { + return err + } + } + + return nil +} +``` + +Calling `images.Manifest` returns the +[manifest](https://github.com/opencontainers/image-spec/blob/master/manifest.md) +from the image. What we care here are the list of layers. Here I'm +making a number of assumptions regarding their type (we should be +checking the media type first). We read the layers and extract them to +the mounted path. + +## #4 Create a second block device and inject other stuff + +Here I'm going to deviate a bit. I will not create a second loop device, +and I will not inject a kernel. In their article, they provided a link +to a snapshot of their `init` process +(). In order to keep this +simple, our init is going to be a shell script composed of the content +of the entry point of the container. We're also going to add a few extra +files to container (`/etc/hosts` and `/etc/resolv.conf`). + +Finally, since we've pre-allocated 2GB for that container, and we likely +don't need that much, we're also going to resize the image. + +### Add init + +Let's refer to the [specification for the +config](https://github.com/opencontainers/image-spec/blob/master/config.md). +The elements that are of interest to me are: + +- `Env`, which is array of strings. They contain the environment + variables that likely we need to run the program +- `Cmd`, which is also an array of strings. If there's no entry point + provided, this is what is used. + +At this point, for this experiment, I'm going to ignore exposed ports, +working directory, and the user. + +First we need to read the config from the container. This is easily +done: + +```go +config, err := images.Config(ctx, client.ContentStore(), image.Target(), platform) +if err != nil { + return err +} +``` + +This needs to be read and decoded: + +```go +configBlob, err := content.ReadBlob(ctx, client.ContentStore(), config) +var imageSpec ocispec.Image +json.Unmarshal(configBlob, &imageSpec) +``` + +`init` is the first process started by Linux during boot. On a regular +Linux desktop you likely have a symbolic link from `/usr/bin/init` to +`/usr/lib/systemd/systemd`, since most distributions have switched to +`systemd`. For my use case however, I want to run a single process, and +I want it to be the one from the container. For this we can create a +simple shell script inside the container (the location does not matter +for now) with the environment variables and the command. + +Naively, this can be done like this: + +```go +initPath := filepath.Join(mntDir, "init.sh") +f, err := renameio.TempFile("", initPath) +if err != nil { + return err +} +defer f.Cleanup() + +writer := bufio.NewWriter(f) +fmt.Fprintf(writer, "#!/bin/sh\n") +for _, env := range initEnvs { + fmt.Fprintf(writer, "export %s\n", env) +} +fmt.Fprintf(writer, "%s\n", initCmd) +writer.Flush() + +f.CloseAtomicallyReplace() + +mode := int(0755) +os.Chmod(initPath, os.FileMode(mode)) +``` + +We're once again creating a temporary file with `renamio`, and we're +writing our shell scripts, one line at a time. We only need to make sure +this executable. + +### extra files + +Once we have our init file, I also want to add a few extra files: +`/etc/hosts` and `/etc/resolv.conf`. This files are not always present, +since they can be injected by other systems. I also want to make sure +that DNS resolutions are done using my own DNS server. + +### resize the image + +We've pre-allocated 2GB for the image, and it's likely we don't need as +much space. We can do this by running `e2fsck` and `resize2fs` once +we're done manipulating the image. + +Within a function, we can do the following: + +```go +command := exec.Command("/usr/bin/e2fsck", "-p", "-f", rawFile) +if err := command.Run(); err != nil { + return fmt.Errorf("e2fsck error: %s", err) +} + +command = exec.Command("resize2fs", "-M", rawFile) +if err := command.Run(); err != nil { + return fmt.Errorf("resize2fs error: %s", err) +} +``` + +I'm using `docker.io/library/redis:latest` for my test, and I end up +with the following size for the image: + +```bash +-rw------- 1 root root 216M Apr 22 14:50 /tmp/fcuny.img +``` + +### Kernel + +We're going to need a kernel to run that VM. In my case I've decided to +go with version 5.8, and build a custom kernel. If you are not familiar +with the process, the firecracker team has [documented how to do +this](https://github.com/firecracker-microvm/firecracker/blob/main/docs/rootfs-and-kernel-setup.md#creating-a-kernel-image). +In my case all I had to do was: + +```bash +git clone https://github.com/torvalds/linux.git linux.git +cd linux.git +git checkout v5.8 +curl -o .config -s https://github.com/firecracker-microvm/firecracker/blob/main/resources/microvm-kernel-x86_64.config +make menuconfig +make vmlinux -j8 +``` + +Note that they also have a pretty [good documentation for +production](https://github.com/firecracker-microvm/firecracker/blob/main/docs/prod-host-setup.md). + +## #5 Attach persistent volumes (if any) + +I'm going to skip that step for now. + +## #6 Create a TAP device and configure it + +We're going to need a network for that VM (otherwise it might be a bit +boring). There's a few solutions that we can take: + +1. create the TAP device +2. delegate all that work to a + [CNI](https://github.com/containernetworking/cni) + +I've decided to use the CNI approach [documented in the Go's +SDK](https://github.com/firecracker-microvm/firecracker-go-sdk#cni). For +this to work we need to install the `tc-redirect-tap` CNI plugin +(available at ). + +Based on that documentation, I'll start with the following configuration +in `etc/cni/conf.d/50-c2vm.conflist`: + +```json +{ + "name": "c2vm", + "cniVersion": "0.4.0", + "plugins": [ + { + "type": "bridge", + "bridge": "c2vm-br", + "isDefaultGateway": true, + "forceAddress": false, + "ipMasq": true, + "hairpinMode": true, + "mtu": 1500, + "ipam": { + "type": "host-local", + "subnet": "192.168.128.0/24", + "resolvConf": "/etc/resolv.conf" + } + }, + { + "type": "firewall" + }, + { + "type": "tc-redirect-tap" + } + ] +} +``` + +## #7 Hand it off to Firecracker and boot that thing + +Now that we have all the components, we need to boot that VM. Since I've +been working with Go so far, I'll also use the [Go +SDK](https://github.com/firecracker-microvm/firecracker-go-sdk) to +manage and start the VM. + +For this we need the firecracker binary, which we can [find on +GitHub](https://github.com/firecracker-microvm/firecracker/releases). + +The first thing is to configure the list of devices. In our case we will +have a single device, the boot drive that we've created in the previous +step. + +```go +devices := make([]models.Drive, 1) +devices[0] = models.Drive{ + DriveID: firecracker.String("1"), + PathOnHost: &rawImage, + IsRootDevice: firecracker.Bool(true), + IsReadOnly: firecracker.Bool(false), +} +``` + +The next step is to configure the VM: + +```go +fcCfg := firecracker.Config{ + LogLevel: "debug", + SocketPath: firecrackerSock, + KernelImagePath: linuxKernel, + KernelArgs: "console=ttyS0 reboot=k panic=1 acpi=off pci=off i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd init=/init.sh random.trust_cpu=on", + Drives: devices, + MachineCfg: models.MachineConfiguration{ + VcpuCount: firecracker.Int64(1), + CPUTemplate: models.CPUTemplate("C3"), + HtEnabled: firecracker.Bool(true), + MemSizeMib: firecracker.Int64(512), + }, + NetworkInterfaces: []firecracker.NetworkInterface{ + { + CNIConfiguration: &firecracker.CNIConfiguration{ + NetworkName: "c2vm", + IfName: "eth0", + }, + }, + }, +} +``` + +Finally we can create the command to start and run the VM: + +```go +command := firecracker.VMCommandBuilder{}. + WithBin(firecrackerBinary). + WithSocketPath(fcCfg.SocketPath). + WithStdin(os.Stdin). + WithStdout(os.Stdout). + WithStderr(os.Stderr). + Build(ctx) +machineOpts = append(machineOpts, firecracker.WithProcessRunner(command)) +m, err := firecracker.NewMachine(vmmCtx, fcCfg, machineOpts...) +if err != nil { + panic(err) +} + +if err := m.Start(vmmCtx); err != nil { + panic(err) +} +defer m.StopVMM() + +if err := m.Wait(vmmCtx); err != nil { + panic(err) +} +``` + +The end result: + + ; sudo ./c2vm -container docker.io/library/redis:latest -firecracker-binary ./hack/firecracker/firecracker-v0.24.3-x86_64 -linux-kernel ./hack/linux/my-linux.bin -out /tmp/redis.img + 2021/05/15 14:12:59 pulled docker.io/library/redis:latest (38690247 bytes) + 2021/05/15 14:13:00 mounted /tmp/redis.img on /tmp/c2vm026771514 + 2021/05/15 14:13:00 extracting layer sha256:69692152171afee1fd341febc390747cfca2ff302f2881d8b394e786af605696 + 2021/05/15 14:13:00 extracting layer sha256:a4a46f2fd7e06fab84b4e78eb2d1b6d007351017f9b18dbeeef1a9e7cf194e00 + 2021/05/15 14:13:00 extracting layer sha256:bcdf6fddc3bdaab696860eb0f4846895c53a3192c9d7bf8d2275770ea8073532 + 2021/05/15 14:13:01 extracting layer sha256:b7e9b50900cc06838c44e0fc5cbebe5c0b3e7f70c02f32dd754e1aa6326ed566 + 2021/05/15 14:13:01 extracting layer sha256:5f3030c50d85a9d2f70adb610b19b63290c6227c825639b227ddc586f86d1c76 + 2021/05/15 14:13:01 extracting layer sha256:63dae8e0776cdbd63909fbd9c047c1615a01cb21b73efa87ae2feed680d3ffa1 + 2021/05/15 14:13:01 init script created + 2021/05/15 14:13:01 umount /tmp/c2vm026771514 + INFO[0003] Called startVMM(), setting up a VMM on firecracker.sock + INFO[0003] VMM logging disabled. + INFO[0003] VMM metrics disabled. + INFO[0003] refreshMachineConfiguration: [GET /machine-config][200] getMachineConfigurationOK &{CPUTemplate:C3 HtEnabled:0xc0004e6753 MemSizeMib:0xc0004e6748 VcpuCount:0xc0004e6740} + INFO[0003] PutGuestBootSource: [PUT /boot-source][204] putGuestBootSourceNoContent + INFO[0003] Attaching drive /tmp/redis.img, slot 1, root true. + INFO[0003] Attached drive /tmp/redis.img: [PUT /drives/{drive_id}][204] putGuestDriveByIdNoContent + INFO[0003] Attaching NIC tap0 (hwaddr 9e:72:c7:04:6b:80) at index 1 + INFO[0003] startInstance successful: [PUT /actions][204] createSyncActionNoContent + [ 0.000000] Linux version 5.8.0 (fcuny@nas) (gcc (Debian 8.3.0-6) 8.3.0, GNU ld (GNU Binutils for Debian) 2.31.1) #1 SMP Mon Apr 12 20:07:40 PDT 2021 + [ 0.000000] Command line: i8042.dumbkbd ip=192.168.128.9::192.168.128.1:255.255.255.0:::off::: console=ttyS0 reboot=k panic=1 acpi=off pci=off i8042.noaux i8042.nomux i8042.nopnp init=/init.sh random.trust_cpu=on root=/dev/vda rw virtio_mmio.device=4K@0xd0000000:5 virtio_mmio.device=4K@0xd0001000:6 + [ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers' + [ 0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers' + [ 0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers' + [ 0.000000] x86/fpu: xstate_offset[2]: 576, xstate_sizes[2]: 256 + [ 0.000000] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, using 'standard' format. + [ 0.000000] BIOS-provided physical RAM map: + [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable + [ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000001fffffff] usable + [ 0.000000] NX (Execute Disable) protection: active + [ 0.000000] DMI not present or invalid. + [ 0.000000] Hypervisor detected: KVM + [ 0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00 + [ 0.000000] kvm-clock: cpu 0, msr 2401001, primary cpu clock + [ 0.000000] kvm-clock: using sched offset of 11918596 cycles + [ 0.000005] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns + [ 0.000011] tsc: Detected 1190.400 MHz processor + [ 0.000108] last_pfn = 0x20000 max_arch_pfn = 0x400000000 + [ 0.000151] Disabled + [ 0.000156] x86/PAT: MTRRs disabled, skipping PAT initialization too. + [ 0.000166] CPU MTRRs all blank - virtualized system. + [ 0.000170] x86/PAT: Configuration [0-7]: WB WT UC- UC WB WT UC- UC + [ 0.000201] found SMP MP-table at [mem 0x0009fc00-0x0009fc0f] + [ 0.000257] check: Scanning 1 areas for low memory corruption + [ 0.000364] No NUMA configuration found + [ 0.000365] Faking a node at [mem 0x0000000000000000-0x000000001fffffff] + [ 0.000370] NODE_DATA(0) allocated [mem 0x1ffde000-0x1fffffff] + [ 0.000490] Zone ranges: + [ 0.000493] DMA [mem 0x0000000000001000-0x0000000000ffffff] + [ 0.000494] DMA32 [mem 0x0000000001000000-0x000000001fffffff] + [ 0.000495] Normal empty + [ 0.000497] Movable zone start for each node + [ 0.000500] Early memory node ranges + [ 0.000501] node 0: [mem 0x0000000000001000-0x000000000009efff] + [ 0.000502] node 0: [mem 0x0000000000100000-0x000000001fffffff] + [ 0.000510] Zeroed struct page in unavailable ranges: 98 pages + [ 0.000511] Initmem setup node 0 [mem 0x0000000000001000-0x000000001fffffff] + [ 0.004990] Intel MultiProcessor Specification v1.4 + [ 0.004995] MPTABLE: OEM ID: FC + [ 0.004995] MPTABLE: Product ID: 000000000000 + [ 0.004996] MPTABLE: APIC at: 0xFEE00000 + [ 0.005007] Processor #0 (Bootup-CPU) + [ 0.005039] IOAPIC[0]: apic_id 2, version 17, address 0xfec00000, GSI 0-23 + [ 0.005041] Processors: 1 + [ 0.005042] TSC deadline timer available + [ 0.005044] smpboot: Allowing 1 CPUs, 0 hotplug CPUs + [ 0.005060] KVM setup pv remote TLB flush + [ 0.005072] KVM setup pv sched yield + [ 0.005078] PM: hibernation: Registered nosave memory: [mem 0x00000000-0x00000fff] + [ 0.005079] PM: hibernation: Registered nosave memory: [mem 0x0009f000-0x000fffff] + [ 0.005081] [mem 0x20000000-0xffffffff] available for PCI devices + [ 0.005082] Booting paravirtualized kernel on KVM + [ 0.005084] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns + [ 0.005087] setup_percpu: NR_CPUS:128 nr_cpumask_bits:128 nr_cpu_ids:1 nr_node_ids:1 + [ 0.006381] percpu: Embedded 44 pages/cpu s143360 r8192 d28672 u2097152 + [ 0.006404] KVM setup async PF for cpu 0 + [ 0.006410] kvm-stealtime: cpu 0, msr 1f422080 + [ 0.006420] Built 1 zonelists, mobility grouping on. Total pages: 128905 + [ 0.006420] Policy zone: DMA32 + [ 0.006422] Kernel command line: i8042.dumbkbd ip=192.168.128.9::192.168.128.1:255.255.255.0:::off::: console=ttyS0 reboot=k panic=1 acpi=off pci=off i8042.noaux i8042.nomux i8042.nopnp init=/init.sh random.trust_cpu=on root=/dev/vda rw virtio_mmio.device=4K@0xd0000000:5 virtio_mmio.device=4K@0xd0001000:6 + [ 0.006858] Dentry cache hash table entries: 65536 (order: 7, 524288 bytes, linear) + [ 0.007003] Inode-cache hash table entries: 32768 (order: 6, 262144 bytes, linear) + [ 0.007047] mem auto-init: stack:off, heap alloc:off, heap free:off + [ 0.007947] Memory: 491940K/523896K available (10243K kernel code, 629K rwdata, 1860K rodata, 1408K init, 6048K bss, 31956K reserved, 0K cma-reserved) + [ 0.007980] random: get_random_u64 called from __kmem_cache_create+0x3d/0x540 with crng_init=0 + [ 0.008053] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 + [ 0.008146] rcu: Hierarchical RCU implementation. + [ 0.008147] rcu: RCU restricting CPUs from NR_CPUS=128 to nr_cpu_ids=1. + [ 0.008151] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. + [ 0.008152] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 + [ 0.008170] NR_IRQS: 4352, nr_irqs: 48, preallocated irqs: 16 + [ 0.008373] random: crng done (trusting CPU's manufacturer) + [ 0.008430] Console: colour dummy device 80x25 + [ 0.052276] printk: console [ttyS0] enabled + [ 0.052685] APIC: Switch to symmetric I/O mode setup + [ 0.053288] x2apic enabled + [ 0.053705] Switched APIC routing to physical x2apic. + [ 0.054213] KVM setup pv IPIs + [ 0.055559] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x1128af0325d, max_idle_ns: 440795261011 ns + [ 0.056516] Calibrating delay loop (skipped) preset value.. 2380.80 BogoMIPS (lpj=4761600) + [ 0.057259] pid_max: default: 32768 minimum: 301 + [ 0.057726] LSM: Security Framework initializing + [ 0.058176] SELinux: Initializing. + [ 0.058556] Mount-cache hash table entries: 1024 (order: 1, 8192 bytes, linear) + [ 0.059221] Mountpoint-cache hash table entries: 1024 (order: 1, 8192 bytes, linear) + [ 0.060382] x86/cpu: User Mode Instruction Prevention (UMIP) activated + [ 0.060510] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0 + [ 0.060510] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0 + [ 0.060510] Spectre V1 : Mitigation: usercopy/swapgs barriers and __user pointer sanitization + [ 0.060510] Spectre V2 : Mitigation: Enhanced IBRS + [ 0.060510] Spectre V2 : Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch + [ 0.060510] Spectre V2 : mitigation: Enabling conditional Indirect Branch Prediction Barrier + [ 0.060510] Speculative Store Bypass: Mitigation: Speculative Store Bypass disabled via prctl and seccomp + [ 0.060510] Freeing SMP alternatives memory: 32K + [ 0.060510] smpboot: CPU0: Intel(R) Xeon(R) Processor @ 1.20GHz (family: 0x6, model: 0x3e, stepping: 0x4) + [ 0.060510] Performance Events: unsupported p6 CPU model 62 no PMU driver, software events only. + [ 0.060510] rcu: Hierarchical SRCU implementation. + [ 0.060510] smp: Bringing up secondary CPUs ... + [ 0.060510] smp: Brought up 1 node, 1 CPU + [ 0.060510] smpboot: Max logical packages: 1 + [ 0.060523] smpboot: Total of 1 processors activated (2380.80 BogoMIPS) + [ 0.061338] devtmpfs: initialized + [ 0.061710] x86/mm: Memory block size: 128MB + [ 0.062341] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns + [ 0.063245] futex hash table entries: 256 (order: 2, 16384 bytes, linear) + [ 0.063946] thermal_sys: Registered thermal governor 'fair_share' + [ 0.063946] thermal_sys: Registered thermal governor 'step_wise' + [ 0.064522] thermal_sys: Registered thermal governor 'user_space' + [ 0.065313] NET: Registered protocol family 16 + [ 0.066398] DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations + [ 0.067057] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA pool for atomic allocations + [ 0.067778] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations + [ 0.068506] audit: initializing netlink subsys (disabled) + [ 0.068708] cpuidle: using governor ladder + [ 0.069097] cpuidle: using governor menu + [ 0.070636] audit: type=2000 audit(1621113181.800:1): state=initialized audit_enabled=0 res=1 + [ 0.076346] HugeTLB registered 2.00 MiB page size, pre-allocated 0 pages + [ 0.077007] ACPI: Interpreter disabled. + [ 0.077445] SCSI subsystem initialized + [ 0.077812] pps_core: LinuxPPS API ver. 1 registered + [ 0.078277] pps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti + [ 0.079206] PTP clock support registered + [ 0.079741] NetLabel: Initializing + [ 0.080111] NetLabel: domain hash size = 128 + [ 0.080529] NetLabel: protocols = UNLABELED CIPSOv4 CALIPSO + [ 0.081113] NetLabel: unlabeled traffic allowed by default + [ 0.082072] clocksource: Switched to clocksource kvm-clock + [ 0.082715] VFS: Disk quotas dquot_6.6.0 + [ 0.083123] VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes) + [ 0.083855] pnp: PnP ACPI: disabled + [ 0.084510] NET: Registered protocol family 2 + [ 0.084718] tcp_listen_portaddr_hash hash table entries: 256 (order: 0, 4096 bytes, linear) + [ 0.085602] TCP established hash table entries: 4096 (order: 3, 32768 bytes, linear) + [ 0.086365] TCP bind hash table entries: 4096 (order: 4, 65536 bytes, linear) + [ 0.087025] TCP: Hash tables configured (established 4096 bind 4096) + [ 0.087749] UDP hash table entries: 256 (order: 1, 8192 bytes, linear) + [ 0.088481] UDP-Lite hash table entries: 256 (order: 1, 8192 bytes, linear) + [ 0.089261] NET: Registered protocol family 1 + [ 0.090395] virtio-mmio: Registering device virtio-mmio.0 at 0xd0000000-0xd0000fff, IRQ 5. + [ 0.091388] virtio-mmio: Registering device virtio-mmio.1 at 0xd0001000-0xd0001fff, IRQ 6. + [ 0.092222] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x1128af0325d, max_idle_ns: 440795261011 ns + [ 0.093322] clocksource: Switched to clocksource tsc + [ 0.093824] platform rtc_cmos: registered platform RTC device (no PNP device found) + [ 0.094618] check: Scanning for low memory corruption every 60 seconds + [ 0.095394] Initialise system trusted keyrings + [ 0.095836] Key type blacklist registered + [ 0.096427] workingset: timestamp_bits=36 max_order=17 bucket_order=0 + [ 0.097849] squashfs: version 4.0 (2009/01/31) Phillip Lougher + [ 0.107488] Key type asymmetric registered + [ 0.107905] Asymmetric key parser 'x509' registered + [ 0.108409] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252) + [ 0.109435] Serial: 8250/16550 driver, 1 ports, IRQ sharing disabled + [ 0.110116] serial8250: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A + [ 0.111877] loop: module loaded + [ 0.112426] virtio_blk virtio0: [vda] 441152 512-byte logical blocks (226 MB/215 MiB) + [ 0.113229] vda: detected capacity change from 0 to 225869824 + [ 0.114143] Loading iSCSI transport class v2.0-870. + [ 0.114753] iscsi: registered transport (tcp) + [ 0.115162] tun: Universal TUN/TAP device driver, 1.6 + [ 0.115955] i8042: PNP detection disabled + [ 0.116498] serio: i8042 KBD port at 0x60,0x64 irq 1 + [ 0.117089] input: AT Raw Set 2 keyboard as /devices/platform/i8042/serio0/input/input0 + [ 0.117932] intel_pstate: CPU model not supported + [ 0.118448] hid: raw HID events driver (C) Jiri Kosina + [ 0.119090] Initializing XFRM netlink socket + [ 0.119555] NET: Registered protocol family 10 + [ 0.120285] Segment Routing with IPv6 + [ 0.120812] NET: Registered protocol family 17 + [ 0.121350] Bridge firewalling registered + [ 0.122026] NET: Registered protocol family 40 + [ 0.122515] IPI shorthand broadcast: enabled + [ 0.122961] sched_clock: Marking stable (72512224, 48198862)->(137683636, -16972550) + [ 0.123796] registered taskstats version 1 + [ 0.124203] Loading compiled-in X.509 certificates + [ 0.125355] Loaded X.509 cert 'Build time autogenerated kernel key: 6203e6adc37b712d3b220a26b38f3d31311d5966' + [ 0.126355] Key type ._fscrypt registered + [ 0.126736] Key type .fscrypt registered + [ 0.127109] Key type fscrypt-provisioning registered + [ 0.127657] Key type encrypted registered + [ 0.144629] IP-Config: Complete: + [ 0.144968] device=eth0, hwaddr=9e:72:c7:04:6b:80, ipaddr=192.168.128.9, mask=255.255.255.0, gw=192.168.128.1 + [ 0.146044] host=192.168.128.9, domain=, nis-domain=(none) + [ 0.146604] bootserver=255.255.255.255, rootserver=255.255.255.255, rootpath= + [ 0.148347] EXT4-fs (vda): mounted filesystem with ordered data mode. Opts: (null) + [ 0.149098] VFS: Mounted root (ext4 filesystem) on device 254:0. + [ 0.149761] devtmpfs: mounted + [ 0.150340] Freeing unused decrypted memory: 2040K + [ 0.151148] Freeing unused kernel image (initmem) memory: 1408K + [ 0.156621] Write protecting the kernel read-only data: 14336k + [ 0.158657] Freeing unused kernel image (text/rodata gap) memory: 2044K + [ 0.159490] Freeing unused kernel image (rodata/data gap) memory: 188K + [ 0.160150] Run /init.sh as init process + 462:C 15 May 2021 21:13:01.903 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo + 462:C 15 May 2021 21:13:01.904 # Redis version=6.2.3, bits=64, commit=00000000, modified=0, pid=462, just started + 462:C 15 May 2021 21:13:01.905 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf + 462:M 15 May 2021 21:13:01.907 * Increased maximum number of open files to 10032 (it was originally set to 1024). + 462:M 15 May 2021 21:13:01.909 * monotonic clock: POSIX clock_gettime + _._ + _.-``__ ''-._ + _.-`` `. `_. ''-._ Redis 6.2.3 (00000000/0) 64 bit + .-`` .-```. ```\/ _.,_ ''-._ + ( ' , .-` | `, ) Running in standalone mode + |`-._`-...-` __...-.``-._|'` _.-'| Port: 6379 + | `-._ `._ / _.-' | PID: 462 + `-._ `-._ `-./ _.-' _.-' + |`-._`-._ `-.__.-' _.-'_.-'| + | `-._`-._ _.-'_.-' | https://redis.io + `-._ `-._`-.__.-'_.-' _.-' + |`-._`-._ `-.__.-' _.-'_.-'| + | `-._`-._ _.-'_.-' | + `-._ `-._`-.__.-'_.-' _.-' + `-._ `-.__.-' _.-' + `-._ _.-' + `-.__.-' + + 462:M 15 May 2021 21:13:01.922 # Server initialized + 462:M 15 May 2021 21:13:01.923 * Ready to accept connections + +We can do a quick test with the following: + +```bash +; sudo docker run -it --rm redis redis-cli -h 192.168.128.9 +192.168.128.9:6379> get foo +(nil) +192.168.128.9:6379> set foo 1 +OK +192.168.128.9:6379> get foo +"1" +192.168.128.9:6379> +``` diff --git a/content/cpu-power-management.md b/content/cpu-power-management.md new file mode 100644 index 0000000..922f081 --- /dev/null +++ b/content/cpu-power-management.md @@ -0,0 +1,121 @@ ++++ +title = "CPU power management" +date = 2023-01-22 +[taxonomies] +tags = ["hardware"] ++++ + +## Maximum power consumption of a processor + +Our Intel CPU has a thermal design power (TDP) of 120W. The AMD CPU has a TDP of 200W. + +The Intel CPU has 80 cores while the AMD one has 128 cores. For Intel, this gives us 1.5W per core, while for AMD, 1.56W. + +The TDP is the average value the processor can sustain forever, and this is the power the cooling solution needs to be designed at for reliability. The TDP is measured under worst case load, with all cores running at 1.8Ghz (the base frequency). + +## C-State vs. P-State + +We have two ways to control the power consumption: + +- disabling a subsystem +- decrease the voltage + +This is done by using + +- _C-State_ is for optimization of power consumption +- _P-State_ is for optimization of the voltage and CPU frequency + +_C-State_ means that one or more subsystem are executing nothing, one or more subsystem of the CPU is at idle, powered down. + +_P-State_ the subsystem is actually running, but it does not require full performance, so the voltage and/or frequency it operates is decreased. + +The states are numbered starting from 0. The higher the number, the more power is saved. `C0` means no power saving. `P0` means maximum performance (thus maximum frequency, voltage and power used). + +### C-state + +A timeline of power saving using C states is as follow: + +1. normal operation is at c0 +2. the clock of idle core is stopped (C1) +3. the local caches (L1/L2) of the core are flushed and the core is powered down (C3) +4. when all the cores are powered down, the shared cache of the package (L3/LLC) are flushed and the whole package/CPU can be powered down + +| state | description | +| ----- | --------------------------------------------------------------------------------------------------------------------------- | +| C0 | operating state | +| C1 | a state where the processor is not executing instructions, but can return to an executing state essentially instantaneously | +| C2 | a state where the processor maintains all software-visible state, but may take longer to wake up | +| C3 | a state where the processor does not need to keep its cache coherent, but maintains other state | + +Running `cpuid` we can find all the supported C-states for a processor (Intel(R) Xeon(R) Gold 6122 CPU @ 1.80GHz): + +``` + MONITOR/MWAIT (5): + smallest monitor-line size (bytes) = 0x40 (64) + largest monitor-line size (bytes) = 0x40 (64) + enum of Monitor-MWAIT exts supported = true + supports intrs as break-event for MWAIT = true + number of C0 sub C-states using MWAIT = 0x0 (0) + number of C1 sub C-states using MWAIT = 0x2 (2) + number of C2 sub C-states using MWAIT = 0x0 (0) + number of C3 sub C-states using MWAIT = 0x2 (2) + number of C4 sub C-states using MWAIT = 0x0 (0) + number of C5 sub C-states using MWAIT = 0x0 (0) + number of C6 sub C-states using MWAIT = 0x0 (0) + number of C7 sub C-states using MWAIT = 0x0 (0) +``` + +If I interpret this correctly: + +- there's one `C0` +- there's two sub C-states for `C1` +- there's two sub C-states for `C3` + +### P-state + +Being in P-states means the CPU core is also in `C0`, since it has to be powered to execute some code. + +P-states allow to change the voltage and frequency of the CPU core to decrease the power consumption. + +A P-state refers to different frequency-voltage pairs. The highest operating point is the maximum state which is `P0`. + +| state | description | +| ----- | ------------------------------------------ | +| P0 | maximum power and frequency | +| P1 | less than P0, voltage and frequency scaled | +| P2 | less than P1, voltage and frequency scaled | + +## ACPI power state + +The ACPI Specification defines the following four global "Gx" states and six sleep "Sx" states + +| GX | name | Sx | description | +| ---- | -------------- | ---- | --------------------------------------------------------------------------------- | +| `G0` | working | `S0` | The computer is running and executing instructions | +| `G1` | sleeping | `S1` | Processor caches are flushed and the CPU stop executing instructions | +| `G1` | sleeping | `S2` | CPU powered off, dirty caches flushed to RAM | +| `G1` | sleeping | `S3` | Suspend to RAM | +| `G1` | sleeping | `S4` | Suspend to disk, all content of the main memory is flushed to non volatile memory | +| `G2` | soft off | `S5` | PSU still supplies power, a full reboot is required | +| `G3` | mechanical off | `S6` | The system is safe for disassembly | + +When we are in any C-states, we are in `G0`. + +## Speed Select Technology + +[Speed Select Technology](https://en.wikichip.org/wiki/intel/speed_select_technology) is a set of power management controls that allows a system administrator to customize per-core performance. By configuring the performance of specific cores and affinitizing workloads to those cores, higher software performance can be achieved. SST supports multiple types of customization: + +- Frequency Prioritization (SST-CP) - allows specific cores to clock higher by reducing the frequency of cores running lower-priority software. +- Speed Select Base Freq (SST-BF) - allows specific cores to run higher base frequency (P1) by reducing the base frequencies (P1) of other cores. + +## Turbo Boost + +TDP is the maximum power consumption the CPU can sustain. When the power consumption is low (e.g. many cores are in P1+ states), the CPU frequency can be increased beyond base frequency to take advantage of the headroom, since this condition does not increase the power consumption beyond TDP. + +Modern CPUs are heavily reliant on "Turbo(Intel)" or "boost (AMD)" ([TBT](https://en.wikichip.org/wiki/intel/turbo_boost_technology) and [TBTM](https://en.wikichip.org/wiki/intel/turbo_boost_max_technology)). + +In our case, the Intel 6122 is rated at 1.8GHz, A.K.A "stamp speed". If we want to run the CPU at a consistent frequency, we'd have to choose 1.8GHz or below, and we'd lose significant performance if we were to disable turbo/boost. + +### Turbo boost max + +During the manufacturing process, Intel is able to test each die and determine which cores possess the best overclocking capabilities. That information is then stored in the CPU in order from best to worst. diff --git a/content/fogcutter.md b/content/fogcutter.md new file mode 100644 index 0000000..9ae6b98 --- /dev/null +++ b/content/fogcutter.md @@ -0,0 +1,63 @@ ++++ +title = "SOMA Fog Cutter" +date = 2024-09-22 +template = "bike.html" +[taxonomies] +tags = ["bike"] ++++ + +A [SOMA](https://www.somafab.com/archives/product/fog-cutter-frame-set) [Fog Cutter](https://www.somafab.com/archives/product/fog-cutter-frame-set) road bike, build by [Blue Heron Bike](https://www.blueheronbikesberkeley.com/bike-accessories) in Berkeley. The size of the frame is 58cm and the color is blue. It comes with a carbon fork. + + + + + +## Part list + +| part | model | +| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Frame | [SOMA Fog Cutter](https://www.somafab.com/archives/product/fog-cutter-frame-set) 58cm in blue | +| Fork | [Soma Fork Fog Cutter Carbon Cerulean Blue (Thru-Axle)](https://www.somafabshop.com/shop/231007-soma-fork-fog-cutter-carbon-cerulean-blue-thru-axle-5617?search=cerulean&product=product.template%285617%2C%29#attr=) | +| Headset | White Industries | +| Front and rear wheel | [DT Swiss XR 331 29 20 mm DB VI](https://www.dtswiss.com/en/support/product-support?matnr=RTXR3329N28S011223) | +| Tire | trevail rampart 38 | +| Front hub | [SP dynamo PL7](https://www.sp-dynamo.com/series7-pl7) | +| Rear hub | [Shimano Tiagra rs740](https://bike.shimano.com/en-US/product/component/tiagra-4700/FH-RS470.html) | +| Rear derailleur | [Shimano Ultegra RX 11 speed](https://bike.shimano.com/en-US/product/component/ultegra-rx/RD-RX800-GS.html) | +| Front derailleur | [Shimano Metrea 2x11 speed](https://bike.shimano.com/en-US/product/component/metrea-u5000/FD-U5000-F.html) | +| Handlebar | [Zipp Service Course 70 Ergo Drop Handlebar 42cm](https://www.sram.com/en/zipp/models/hb-dbsc-7e-b2) | +| Brifter | [Shimano Dura Ace 9120](https://bike.shimano.com/en-US/product/component/duraace-r9100/ST-R9120-R.html) | +| Saddle | [Brooks C15 black](https://www.brooksengland.com/en_us/c15.html) | +| Seat post | [SIM Works Beatnik post (black)](https://www.sim.works/products/beatnik-post-1) | +| Front light | [Busch & Müller Lumotec IQ-X Headlamp](https://www.bumm.de/en/products/dynamo-scheinwerfer/produkt/164rtsndi-01-schwarz-164rtsndi-silber%20.html) | +| Brake calipers | [Shimano rs785](https://bike.shimano.com/en-EU/product/component/ultegra-6870-di2/BR-RS785.html) | +| Crank | [White Industries Square Taper road cranks](https://www.whiteind.com/product/square-taper-road-cranks/) | +| Chain ring | [White Industries 52/32](https://www.whiteind.com/product/vbc-chainring-sets/) | +| Pedal | Shimano PD-R550 SPD-SL (black) - can change for SPD if preferred | +| Bar tape | [Lizzard Skin (brown)](https://www.lizardskins.com/cycling) | diff --git a/content/git-link-and-sourcegraph.md b/content/git-link-and-sourcegraph.md new file mode 100644 index 0000000..c86b465 --- /dev/null +++ b/content/git-link-and-sourcegraph.md @@ -0,0 +1,52 @@ ++++ +title = "emacs' git-link and sourcegraph" +date = 2021-08-24 +[taxonomies] +tags = ["emacs"] ++++ + +I use [sourcegraph](https://sourcegraph.com/) for searching code, and I sometimes need to share a link to the source code I'm looking at in a buffer. For this, the package [`git-link`](https://github.com/sshaw/git-link) is great. + +To integrate sourcegraph and `git-link`, the [documentation](https://github.com/sshaw/git-link#sourcegraph) recommends adding a remote entry named `sourcegraph` in the repository, like this: + +```bash +git remote add sourcegraph https://sourcegraph.com/github.com/sshaw/copy-as-format +``` + +The next time you run `M-x git-link` in a buffer, it will use the URL associated with that remote. That's works great, except that now you need to add this for every repository. Instead, for my usage, I came up with the following solution: + +```lisp +(use-package git-link + :ensure t + :after magit + :bind (("C-c g l" . git-link) + ("C-c g a" . git-link-commit)) + :config + (defun fcuny/get-sg-remote-from-hostname (hostname) + (format "sourcegraph.<$domain>.<$tld>/%s" hostname)) + + (defun fcuny/git-link-work-sourcegraph (hostname dirname filename _branch commit start end) + ;;; For a given repository, build the proper link for sourcegraph. + ;;; Use the default branch of the repository instead of the + ;;; current one (we might be on a feature branch that is not + ;;; available on the remote). + (require 'magit-branch) + (let ((sg-base-url (fcuny/get-sg-remote-from-hostname hostname)) + (main-branch (magit-main-branch))) + (git-link-sourcegraph sg-base-url dirname filename main-branch commit start end))) + + (defun fcuny/git-link-commit-work-sourcegraph (hostname dirname commit) + (let ((sg-base-url (fcuny/get-sg-remote-from-hostname hostname))) + (git-link-commit-sourcegraph sg-base-url dirname commit))) + + (add-to-list 'git-link-remote-alist '("twitter" fcuny/git-link-work-sourcegraph)) + (add-to-list 'git-link-commit-remote-alist '("twitter" fcuny/git-link-commit-work-sourcegraph)) + + (setq git-link-open-in-browser 't)) +``` + +We use different domains to host various git repositories at work (e.g. `git.$work`, `gitfoo.$work`, etc). Each of them map to a different URI for sourcegraph (e.g. `sourcegraph.$work/gitfoo`). + +`git-link-commit-remote-alist` is an [association list](https://www.gnu.org/software/emacs/manual/html_node/elisp/Association-Lists.html) that takes a regular expression and a function. The custom function receives the hostname for the remote repository, which is then used to generate the URI for our sourcegraph instance. I then call `git-link-sourcegraph` replacing the hostname with the URI for sourcegraph. + +Now I can run `M-x git-link` in any repository where the host for the origin git repository matches `twitter` without having to setup the custom remote first. diff --git a/content/google-doc-failure.md b/content/google-doc-failure.md new file mode 100644 index 0000000..b4a65b9 --- /dev/null +++ b/content/google-doc-failure.md @@ -0,0 +1,69 @@ ++++ +title = "Google Doc Failures" +date = 2021-04-11 +[taxonomies] +tags = ["practices"] ++++ + +In most use cases, Google Doc is an effective tool to create "write once, read never" documents. + +## Convenience + +Google Doc (GDoc from now on) is the most common way of writing and sharing documents at my current job. It's very easy to start a new document, even more since we can now point our browser to and start typing right away. + +Like most of my co-workers, I use it frequently during the day. Some of these documents are draft for some communication that I want others to review before I share with a broader audience; it can be a [Request For Comments](https://en.wikipedia.org/wiki/Request_for_Comments) for a project; meeting notes for others to read; information that I need to capture during an incident or a debugging session; interviews notes; etc. + +I would not be surprised if the teams I work closely with generate 50 new documents each week. + +## ETOOMANYTABS + +I have a tendency of having hundreds of open tabs in my browser during the week. A majority of these tabs are GDocs, and I think this is one of the true failure of the product. Why do I have so many tabs ? There's mainly two reasons. + +The first reason is a problem with Chrome's UX itself: it happily let me open the same URL as many times as I want in as many tabs, instead of sending me to the already opened tab if the document is loaded. It's not uncommon that I find the same document opened in 5 different tabs. + +The second reason, and it's the most important one, I know that if I need to read or comment on a doc and I close the tab, I'll likely never find that document again, or will completely forget about it. + +## Discoverability + +In 'the old days', you'd start a new document in Word or LibreOffice, and as you hit "save" for the first time, you've two decisions to make: how am I going to name that file, and where am I going to save it on disk. + +With GDoc these questions don't have to be answered, you don't have to name the file, and it does not matter where it lives. I've likely hundreds of docs named 'untitled' in my "drive". I also don't have to think about where they will live, because they are saved automatically for me. I'm sure there's hundreds of studies that show that these two simple steps are actually complex for many users and creates useless friction (in which folder do I store it; should I organize the docuemnts by team, years, projects; do I name it with the date and the current project; etc.). + +GDoc being a Google product, it seems pretty obvious that they would come up with a better solution: let's not organize in a strict hierarchy these files, and let's instead search for them. + +Unfortunately, GDoc's search is really poor (and I'm being kind). By default most of us start by looking for some words we know are in the doc, maybe even in the title. But when working on a multiple projects that are related to the same technology, you suddenly get hundreds of documents matching your query. It's unclear how the returned set is ordered (by date ? by author ? by some scoring that is invisible to me ?). + +You can also search by owners, but here is another annoying bit: I think about owner as author, so I usually type `author:foo` before realizing it does not work. And that implies you already know who's the owner of the document. In the case of TDDs (Technical Design Document), I might know which team is behind it, but rarely who's the actual author. + +I could search for the title, but I rarely remember or know the name of the document I'm looking for. I could also be looking by keywords, but when working on a project with tens of related documents, you have to open all the returned docs to see which one is the correct one. + +And then what about new members joining your the team ? They don't know which docs exist, who wrote them, and how they are named. They end up searching and hoping that something good will be returned. + +## Workflows + +More and more we create workflows around these documents: some of the docs are TDDs that are going through reviews; others are decision documents that require input from multiple teams and are pending approval; others are road map documents that also go through some review process. + +As a result we create templates for all kind of documents, with usually something like "draft → reviews → approved/rejected" at the top. We expect the owner of the doc to mark in bold what's the status of the doc to help the reader understand in what state the document is. It's difficult to keep track of open actions and comments. Yes, there's a way to get a list of all of them, but it's not in an obvious place. + +As a result, some engineers in my team built an external dashboard with swim lanes which captures the state of a document. We add new document with their URLs, add who are the reviewers, and we move the doc between the lanes. Now we have to operate a service and a database to keep track of the status of documents in GDoc. + +## Alternatives + +When it comes to technical document, I find that [approach](https://caitiem.com/2020/03/29/design-docs-markdown-and-git/) much more interesting. Some open source projects have adopted a similar workflow ([Kubernetes](https://github.com/kubernetes/enhancements/tree/master/keps), [Go](https://github.com/golang/proposal)). + +A new document starts its life as a text file (using what ever markup language your team/company prefers). The document is submitted for review, and the people who need to be consulted are added as reviewers. They can now comment on the document, the author can address them, mark them as resolved. It's clear in which state the document is: it's either in review, committed, or rejected. With this approach you also end up with a clear history, as time moves on you can amend the document by submitting a change, and the change goes through the same process. + +New comers will find the document in the repository, and if they want to see the conversation they can open the review associated with the original change. They can also see how the document evolved over time. It's also easy to publish these documents on an internal website, using a static site generator for example. + +One of the thing that I think are critical, is that all of that is done using the tools the engineers are already using for their day to day job: a text editor, a version control system, a code review tool. + +There's obviously challenges with this approach too: + +- **it's more heavy handed**: not every one likes to write in a text editor using a markup language. It can requires some time to learn or get used to the syntax +- **it's harder to integrate schema / visuals**: but having them checked in in the repository also improves the discoverability + +It's also true that no all documents suffer the same challenges for discoverability: + +- meeting notes are usually linked to meeting invites (however if you were not part of the meeting, you end up with the same challenges to discover them) +- drafts for communications are usually not relevant once the communication has been sent +- interview notes are usually transferred to some tools for HR when the feedback is submitted diff --git a/content/leaving-twitter.md b/content/leaving-twitter.md new file mode 100644 index 0000000..f7d98f5 --- /dev/null +++ b/content/leaving-twitter.md @@ -0,0 +1,14 @@ ++++ +title = "Leaving Twitter" +date = 2022-01-15 +[taxonomies] +tags = ["work"] ++++ + +January 7th 2022 was my last day at Twitter, after more than 7 years at the company. + +The first few years I worked as an SRE in the core-storage team, with the PUB/SUB and key-value store teams. + +I spend the last four years working with the Compute team, both maintaining and operating our (very large) Aurora/Mesos clusters, and also working on the adoption of kubernetes, both for our data centers and for the cloud. Working with Compute was extremely fulfilling to me, as I worked closely with our hardware engineering and kernel/operating system teams. + +During these 7 years, I was constantly pushed by my coworkers to grow, to step up to new challenges, and I learned a tremendous amount about running large scale distributed systems. I'm extremely glad for that experience, it was by far the most interesting and challenging job I've ever had so far. diff --git a/content/making-sense-intel-amd-cpus.md b/content/making-sense-intel-amd-cpus.md new file mode 100644 index 0000000..9d1ce84 --- /dev/null +++ b/content/making-sense-intel-amd-cpus.md @@ -0,0 +1,236 @@ ++++ +title = "Making sense of Intel and AMD CPUs naming" +date = 2021-12-29 +[taxonomies] +tags = ["hardware"] ++++ + +## Intel + +### Core + +The line up for the core family is i3, i5, i7 and i9. As of January 2023, the current generation is [Raptor Lake](https://en.wikipedia.org/wiki/Raptor_Lake) (13th generation). + +The brand modifiers are: + +- **i3**: laptops/low-end desktop +- **i5**: mainstream users +- **i7**: high-end users +- **i9**: enthusiast users + +How to read a SKU ? Let's use the [i7-12700K](https://ark.intel.com/content/www/us/en/ark/products/134594/intel-core-i712700k-processor-25m-cache-up-to-5-00-ghz.html) processor: + +- **i7**: high end users +- **12**: 12th generation +- **700**: SKU digits, usually assigned in the order the processors + are developed +- **K**: unlocked + +List of suffixes: + +| suffix | meaning | +| ------ | -------------------------------------- | +| G.. | integrated graphics | +| E | embedded | +| F | require discrete graphic card | +| H | high performance for mobile | +| HK | high performance for mobile / unlocked | +| K | unlocked | +| S | special edition | +| T | power optimized lifestyle | +| U | mobile power efficient | +| Y | mobile low power | +| X/XE | unlocked, high end | + +> **Unlocked,** what does that means ? A processor with the **K** suffix +> is made with the an unlocked clock multiplier. When used with some +> specific chipset, it's possible to overclock the processor. + +#### Raptor Lake (13th generation) + +Raptor lake is an hybrid architecture, featuring both P-cores (performance cores) and E-cores (efficient cores), similar to Alder lake. P-cores are based on the [Raptor cove](https://en.wikipedia.org/wiki/Golden_Cove#Raptor_Cove) architecture, while the E-cores are based on the [Gracemont]() architecture (same as for Alder lake). + +Available processors: + +| model | p-cores | e-cores | GHz (base) | GHz (boosted) | TDP | +| ---------- | ------- | ------- | ---------- | ------------- | -------- | +| i9-13900KS | 8 (16) | 16 | 3.2/2.4 | 6/4.3 | 150/253W | +| i9-13900K | 8 (16) | 16 | 3.0/2.0 | 5.8/4.3 | 125/253W | +| i9-13900KF | 8 (16) | 16 | 3.0/2.0 | 5.8/4.3 | 125/253W | +| i9-13900 | 8 (16) | 16 | 2.0/1.5 | 5.2/4.2 | 65/219W | +| i9-13900F | 8 (16) | 16 | 2.0/1.5 | 5.2/4.2 | 65/219W | +| i9-13900T | 8 (16) | 16 | 1.1/0.8 | 5.1/3.9 | 35/219W | +| i7-13700K | 8 (16) | 8 | 3.4/2.5 | 5.4/4.2 | 125/253W | +| i7-13700KF | 8 (16) | 8 | 3.4/2.5 | 5.4/4.2 | 125/253W | +| i7-13700 | 8 (16) | 8 | 2.1/1.5 | 5.1/4.1 | 65/219W | +| i7-13700F | 8 (16) | 8 | 2.1/1.5 | 5.1/4.1 | 65/219W | +| i7-13700T | 8 (16) | 8 | 1.4/1.0 | 4.8/3.6 | 35/106W | +| i5-13600K | 6 (12) | 8 | 3.5/2.6 | 5.1/3.9 | 125/181W | +| i5-13600KF | 6 (12) | 8 | 3.5/2.6 | 5.1/3.9 | 125/181W | + +For the Raptor Lake generation, as for the Alder lake generation, the supported socket is the [LGA1700](https://en.wikipedia.org/wiki/LGA_1700). + +List of Raptor lake chipsets: +| feature | b760[^7] | h770[^8] | z790[^9] | +|-----------------------------|----------|----------|----------| +| P and E cores over clocking | no | no | yes | +| memory over clocking | yes | yes | yes | +| DMI 4 lanes | 4 | 8 | 8 | +| chipset PCIe 5.0 lanes | | | | +| chipset PCIe 4.0 lanes | | | | +| chipset PCIe 3.0 lanes | | | | +| SATA 3.0 ports | up to 4 | up to 8 | up to 8 | + +#### Alder Lake (12th generation) + +Alder lake is an hybrid architecture, featuring both P-cores (performance cores) and E-cores (efficient cores). P-cores are based on the [Golden Cove](https://en.wikipedia.org/wiki/Golden_Cove) architecture, while the E-cores are based on the [Gracemont]() architecture. + +This is a [good article](https://www.anandtech.com/show/16881/a-deep-dive-into-intels-alder-lake-microarchitectures/2) to read about this model. Inside the processor there's a microcontroller that monitors what each thread is doing. This can be used by the OS scheduler to hint on which core a thread should be scheduled on (between performance or efficiency). + +As of December 2021 this is not yet properly supported by the Linux kernel. + +Available processors: + +| model | p-cores | e-cores | GHz (base) | GHz (boosted) | TDP | +| ---------- | ------- | ------- | ---------- | ------------- | ---- | +| i9-12900K | 8 (16) | 8 | 3.2/2.4 | 5.1/3.9 | 241W | +| i9-12900KF | 8 (16) | 8 | 3.2/2.4 | 5.1/3.9 | 241W | +| i7-12700K | 8 (16) | 4 | 3.6/2.7 | 4.9/3.8 | 190W | +| i7-12700KF | 8 (16) | 4 | 3.6/2.7 | 4.9/3.8 | 190W | +| i5-12600K | 6 (12) | 4 | 3.7/2.8 | 4.9/3.6 | 150W | +| i5-12600KF | 6 (12) | 4 | 3.7/2.8 | 4.9/3.6 | 150W | + +- support DDR4 and DDR5 (up to DDR5-4800) +- support PCIe 4.0 and 5.0 (16 PCIe 5.0 and 4 PCIe 4.0) + +For the Alder Lake generation, the supported socket is the [LGA1700](https://en.wikipedia.org/wiki/LGA_1700). + +For now only supported chipset for Alder Lake are: + +| feature | z690[^1] | h670[^2] | b660[^3] | h610[^4] | q670[^6] | w680[^5] | +| --------------------------- | -------- | -------- | -------- | -------- | -------- | -------- | +| P and E cores over clocking | yes | no | no | no | no | yes | +| memory over clocking | yes | yes | yes | no | - | yes | +| DMI 4 lanes | 8 | 8 | 4 | 4 | 8 | 8 | +| chipset PCIe 4.0 lanes | up to 12 | up to 12 | up to 6 | none | | | +| chipset PCIe 3.0 lanes | up to 16 | up to 12 | up to 8 | 8 | | | +| SATA 3.0 ports | up to 8 | up to 8 | 4 | 4 | up to 8 | up to 8 | + +### Xeon + +Xeon is the brand of Intel processor designed for non-consumer servers and workstations. The most recent generations are: + +| name | availability | +| --------------- | ------------ | +| Skylake | 2015 | +| Cascade lake | 2019 | +| Cooper lake | 2022 | +| Sapphire rapids | 2023 | + +The following brand identifiers are used: + +- platinium +- gold +- silver +- bronze + +## AMD + +### Ryzen + +There are multiple generation for this brand of processors. They are based on the [zen micro architecture](). + +The current (as of January 2023) generation is Ryzen 7000. + +The brand modifiers are: + +- ryzen 3: entry level +- ryzen 5: mainstream +- ryzen 9: high end performance +- ryzen 9: enthusiast + +List of suffixes: + +| suffix | meaning | +| ------ | ------------------------------------------------------------------------------- | +| X | high performance | +| G | integrated graphics | +| T | power optimized lifecycle | +| S | low power desktop with integrated graphics | +| H | high performance mobile | +| U | standard mobile | +| M | low power mobile | +| 3D | feature [3D V-cache technology](https://www.amd.com/en/technologies/3d-v-cache) | + +### EPYC + +EPYC is the AMD brand of processors for the server market, based on the zen architecture. They use the [SP3](https://en.wikipedia.org/wiki/Socket_SP3) socket. The EPYC processor is chipset free. + +### Threadripper + +The threadripper is for high performance desktop. It uses the [TR4](https://en.wikipedia.org/wiki/Socket_TR4) socket. At the moment there's only one chipset that supports this process, the [X399](https://en.wikipedia.org/wiki/List_of_AMD_chipsets#TR4_chipsets). + +The threadripper based on zen3 architecture is not yet released, but it's expected to hit the market in the first half of Q1 2022. + +### Sockets/Chipsets + +The majority of these processors use the [AM4 socket](https://en.wikipedia.org/wiki/Socket_AM4). The threadripper line uses different sockets. + +There are multiple [chipset](https://en.wikipedia.org/wiki/Socket_AM4#Chipsets) for the AM4 socket. The more advanced ones are the B550 and the X570. + +The threadripper processors use the TR4, sTRX4 and sWRX8 sockets. + +### Zen 3 + +Zen 3 was released in November 2020. + +| model | cores | GHz (base) | GHz (boosted) | PCIe lanes | TDP | +| ------------- | ------- | ---------- | ------------- | ---------- | ---- | +| ryzen 5 5600x | 6 (12) | 3.7 | 4.6 | 24 | 65W | +| ryzen 7 5800 | 8 (16) | 3.4 | 4.6 | 24 | 65W | +| ryzen 7 5800x | 8 (16) | 3.8 | 4.7 | 24 | 105W | +| ryzen 9 5900 | 12 (24) | 3.0 | 4.7 | 24 | 65W | +| ryzen 9 5900x | 12 (24) | 3.7 | 4.8 | 24 | 105W | +| ryzen 9 5950x | 16 (32) | 3.4 | 4.9 | 24 | 105W | + +- support PCIe 3.0 and PCIe 4.0 (except for the G series) +- only support DDR4 (up to DDR4-3200) + +### Zen 4 + +Zen 4 was released in September 2022. + +- only supports DDR 5 +- all desktop processors feature 28 (24 + 4) PCIe 5.0 lanes +- all desktop processors feature 2 x 4 lane PCIe interfaces (mostly for M.2 storage devices) + +| model | cores | GHz (base) | GHz (boosted) | TDP | +| --------------- | ------- | ---------- | ------------- | ---- | +| ryzen 5 7600x | 6 (12) | 4.7 | 5.3 | 105W | +| ryzen 5 7600 | 6 (12) | 3.8 | 5.1 | 65W | +| ryzen 7 7800X3D | 8 (16) | | 5.0 | 120W | +| ryzen 7 7700X | 8 (16) | 4.5 | 5.4 | 105W | +| ryzen 7 7700 | 8 (16) | 3.8 | 5.3 | 65W | +| ryzen 9 7900 | 12 (24) | 3.7 | 5.4 | 65W | +| ryzen 9 7900X | 12 (24) | 4.7 | 5.6 | 170W | +| ryzen 9 7900X3D | 12 (24) | 4.4 | 5.6 | 120W | +| ryzen 9 7950X | 16 (32) | 4.5 | 5.7 | 170W | +| ryzen 9 7950X3D | 16 (32) | 4.2 | 5.7 | 120W | + +[^1]: https://ark.intel.com/content/www/us/en/ark/products/218833/intel-z690-chipset.html + +[^2]: https://www.intel.com/content/www/us/en/products/sku/218831/intel-h670-chipset/specifications.html + +[^3]: https://ark.intel.com/content/www/us/en/ark/products/218832/intel-b660-chipset.html + +[^4]: https://www.intel.com/content/www/us/en/products/sku/218829/intel-h610-chipset/specifications.html + +[^5]: https://ark.intel.com/content/www/us/en/ark/products/218834/intel-w680-chipset.html + +[^6]: https://ark.intel.com/content/www/us/en/ark/products/218827/intel-q670-chipset.html + +[^7]: https://www.intel.com/content/www/us/en/products/sku/229719/intel-b760-chipset/specifications.html + +[^8]: https://www.intel.com/content/www/us/en/products/sku/229720/intel-h770-chipset.html + +[^9]: https://www.intel.com/content/www/us/en/products/sku/229721/intel-z790-chipset/specifications.html diff --git a/content/misc/fogcutter.md b/content/misc/fogcutter.md deleted file mode 100644 index 8bd39e6..0000000 --- a/content/misc/fogcutter.md +++ /dev/null @@ -1,69 +0,0 @@ -+++ -title = "SOMA Fog Cutter" -date = 2024-09-22 -template = "orphan.html" -[taxonomies] -tags = ["bike"] -+++ - -A [SOMA](https://www.somafab.com/archives/product/fog-cutter-frame-set) [Fog Cutter](https://www.somafab.com/archives/product/fog-cutter-frame-set) road bike, build by [Blue Heron Bike](https://www.blueheronbikesberkeley.com/bike-accessories) in Berkeley. The size of the frame is 58cm and the color is blue. It comes with a carbon fork. - -**Asking price is $2600** or best offer. To pick up in Berkeley. - -the bike is in great condition. - -You can contact me by email. - - - - - -## Part list - -| part | model | -| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Frame | [SOMA Fog Cutter](https://www.somafab.com/archives/product/fog-cutter-frame-set) 58cm in blue | -| Fork | [Soma Fork Fog Cutter Carbon Cerulean Blue (Thru-Axle)](https://www.somafabshop.com/shop/231007-soma-fork-fog-cutter-carbon-cerulean-blue-thru-axle-5617?search=cerulean&product=product.template%285617%2C%29#attr=) | -| Headset | White Industries | -| Front and rear wheel | [DT Swiss XR 331 29 20 mm DB VI](https://www.dtswiss.com/en/support/product-support?matnr=RTXR3329N28S011223) | -| Tire | trevail rampart 38 | -| Front hub | [SP dynamo PL7](https://www.sp-dynamo.com/series7-pl7) | -| Rear hub | [Shimano Tiagra rs740](https://bike.shimano.com/en-US/product/component/tiagra-4700/FH-RS470.html) | -| Rear derailleur | [Shimano Ultegra RX 11 speed](https://bike.shimano.com/en-US/product/component/ultegra-rx/RD-RX800-GS.html) | -| Front derailleur | [Shimano Metrea 2x11 speed](https://bike.shimano.com/en-US/product/component/metrea-u5000/FD-U5000-F.html) | -| Handlebar | [Zipp Service Course 70 Ergo Drop Handlebar 42cm](https://www.sram.com/en/zipp/models/hb-dbsc-7e-b2) | -| Brifter | [Shimano Dura Ace 9120](https://bike.shimano.com/en-US/product/component/duraace-r9100/ST-R9120-R.html) | -| Saddle | [Brooks C15 black](https://www.brooksengland.com/en_us/c15.html) | -| Seat post | [SIM Works Beatnik post (black)](https://www.sim.works/products/beatnik-post-1) | -| Front light | [Busch & Müller Lumotec IQ-X Headlamp](https://www.bumm.de/en/products/dynamo-scheinwerfer/produkt/164rtsndi-01-schwarz-164rtsndi-silber%20.html) | -| Brake calipers | [Shimano rs785](https://bike.shimano.com/en-EU/product/component/ultegra-6870-di2/BR-RS785.html) | -| Crank | [White Industries Square Taper road cranks](https://www.whiteind.com/product/square-taper-road-cranks/) | -| Chain ring | [White Industries 52/32](https://www.whiteind.com/product/vbc-chainring-sets/) | -| Pedal | Shimano PD-R550 SPD-SL (black) - can change for SPD if preferred | -| Bar tape | [Lizzard Skin (brown)](https://www.lizardskins.com/cycling) | diff --git a/content/nix-raid-systemd-boot.md b/content/nix-raid-systemd-boot.md new file mode 100644 index 0000000..de68695 --- /dev/null +++ b/content/nix-raid-systemd-boot.md @@ -0,0 +1,53 @@ ++++ +title = "Workaround md raid boot issue in NixOS 22.11" +date = 2023-01-10 +[taxonomies] +tags = ["nix"] ++++ + +For about a year now I've been running [NixOS](https://nixos.org/ "NixOS") on my personal machines. Yesterday I decided to go ahead and upgrade my NAS from NixOS 22.05 to [22.11](https://nixos.org/blog/announcements.html#nixos-22.11). On that machine, all the disks are encrypted, and there are two RAID0 devices. To unlock the drives, I log into the [SSH daemon running in `initrd`](https://nixos.wiki/wiki/Remote_LUKS_Unlocking), where I can type my passphrase. This time however, instead of a prompt to unlock the disk, I see the following message: + +``` +waiting for device /dev/disk/by-uuid/66c58a92-45fe-4b03-9be0-214ff67c177c to appear... +``` + +followed by a timeout and then I'm asked if I want to reboot the machine. I do reboot the machine, and same thing happens. + +Now, and this is something really great about NixOS, I can boot to the previous generation (on 22.05), and this time I'm prompted for my password, the disks are unlocked, and I can log into my machine. This eliminates the possibility of a hardware failure! I also have a way to get a working machine to do more build if needed. Knowing that I can easily switch from a broken generation to a working one gives me more confidence in making changes to my system. + +I then reboot again in the broken build, and drop into a `busybox` shell. I look to see what `blkid` reports, and I confirm that my disks are all present and they have a **UUID** set. Next I check what's listed under `/dev/disk/by-uuid` and, surprise, the disks are not there. They are however under `/dev/disk`. Now, looking at `/nix/store` I only see a few things, and one of them is a script named `stage-1-init.sh`. I read quickly the script, checked it does, and confirmed that it was blocking on the disks. I looked at what was reported by `udevadm info ` and I could see that the `DEVLINKS` was missing the path for `by-uuid`. + +My laptop has a similar setup, but without RAID devices. I had already updated to 22.11, and had rebooted the laptop without issues. To be sure, I ran another update and rebooted, and I was able to unlock the drive and log into the machine without problem. + +From here I have enough information to start searching for an issue similar to this. I got pretty lucky and two issues I found were: + +- [Since systemd-251.3 mdadm doesn't start at boot time #196800 ](https://github.com/nixoS/nixpkgs/issues/196800) +- [Won't boot when root on raid0 with boot.initrd.systemd=true #199551 ](https://github.com/nixoS/nixpkgs/issues/199551) + +The proposed solution was easy: + +```diff +@@ -43,7 +43,7 @@ + }; + + boot.initrd.luks.devices."raid-fast".device = +- "/dev/disk/by-uuid/66c58a92-45fe-4b03-9be0-214ff67c177c"; ++ "/dev/disk/by-id/md-name-nixos:fast"; + + fileSystems."/data/slow" = { + device = "/dev/disk/by-uuid/0f16db51-0ee7-48d8-9e48-653b85ecbf0a"; +@@ -51,7 +51,7 @@ + }; + + boot.initrd.luks.devices."raid-slow".device = +- "/dev/disk/by-uuid/d8b21267-d457-4522-91d9-5481b44dd0a5"; ++ "/dev/disk/by-id/md-name-nixos:slow"; +``` + +I rebuild, rebooted, and success, I was able to get access to the machine. + +## Takeaways + +I now have a mitigation to the problem, however I still don't have a root cause. Since it's only the `by-uuid` path that is missing, and this is managed by `udev`, I'm guessing that some rules for `udev` have changed, but so far I can't find anything about that. + +It's really great to be able to easily switch back to a previous generation of my system, so I can debug and experiment different solutions. If this had happen with another distribution, getting out of this mess would have been more tedious. diff --git a/content/no-ssh-to-prod.md b/content/no-ssh-to-prod.md new file mode 100644 index 0000000..9c2d20a --- /dev/null +++ b/content/no-ssh-to-prod.md @@ -0,0 +1,29 @@ ++++ +title = "No SSH to production" +date = 2022-11-28 +[taxonomies] +tags = ["practices"] ++++ + +It's not uncommon to hear talk about preventing engineers to SSH to production machines. While I think it's a noble goal, I think most organizations are not ready for it in the short or even medium term. + +Why do we usually need to get a shell on a machine ? The most common reason is to investigate a system that is behaving in an unexpected way, and we need to collect information, maybe using `strace`, `tcpdump`, `perf` or one of the BCC tools. Another reason might be to validate that a change deployed to a single machine is applied correctly, before rolling it out to a large portion of the fleet. + +If you end up writing a postmortem after the investigation session, one of the reviewer might ask why did we need to get a shell on the machine in the first place. Usually it's because we're lacking the capabilities to collect that kind of information remotely. Someone will write an action item to improve this, it will be labeled 'long-term-action-item', and it will disappear in the bottomless backlog of a random team (how many organizations have a clear ownership for managing access to machines ?). + +In most cases, I think we would be better off by breaking down the problems in smaller chunk, and focus on iterative improvements. "No one gets to SSH to machines in production" is a poorly framed problem. + +What I think is better is to ask the following questions + +- who has access to the machines +- who actually SSH to the machines +- why do they need to SSH to the machines +- was the state of the machine altered after someone logged to the machine + +For the first question, I'd recommend that we don't create user accounts and don't distribute engineers' SSH public keys on the machines. I'd create an 'infra' user account, and use signed SSH certificates (for example with [vault](https://www.hashicorp.com/products/vault/ssh-with-vault)). Only engineers who _have_ to have access should be able to sign their SSH key. That way you've limited the risks to a few engineers, and you have an audit trail of who requested access. You can build reports from these audit logs, to see how frequently engineer request access. For the 'infra' user, I'd limit it's privileges, and make sure it can only run commands required for debugging/troubleshooting. + +Using linux' audit logs, you can also generate reports on which commands are run. You can learn why the engineers needed to get on the host, and it can be used by the SRE organization to build services and tools that will enable new capabilities (for example, a service to collect traces, or do network capture remotely). + +Using the same audit logs, look for commands that are modifying the filesystems (for example `apt`, `yum`, `mkdir`): if the hosts are stateless, send them through the provisioning pipeline. + +At that point you've hardened the system, and you get visibility into what engineers are doing on these machines. Having engineers being able to get a shell on a production machine is a high risk: even if your disks are encrypted at rest, when the host is running an engineer can see data they are not supposed to look at, etc. But I think knowing who/when/why is more important than completely blocking SSH access: there's always going to be that one incident where there's nothing you can do without a shell on that one host. diff --git a/content/notes/_index.md b/content/notes/_index.md deleted file mode 100644 index 7e8d52b..0000000 --- a/content/notes/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: Note -sort_by: date -render: true -template: blog.html ---- diff --git a/content/notes/containerd-to-firecracker.md b/content/notes/containerd-to-firecracker.md deleted file mode 100644 index df26cba..0000000 --- a/content/notes/containerd-to-firecracker.md +++ /dev/null @@ -1,679 +0,0 @@ -+++ -title = "containerd to firecracker" -date = 2021-05-15 -[taxonomies] -tags = ["containers"] -+++ - -fly.io had an [interesting -article](https://fly.io/blog/docker-without-docker/) about how they use -docker images to create VMs for `firecracker`. - -They describe the process as follow: - -1. Pull a container from a registry -2. Create a loop device to store the container's filesystem on -3. Unpack the container into the mounted loop device -4. Create a second block device and inject init, kernel, configuration - and other stuff -5. Attach persistent volumes (if any) -6. Create a TAP device and configure it -7. Hand it off to Firecracker and boot that thing - -That's pretty detailed, and I'm curious how difficult it is to implement -this. I've been meaning to look into Firecracker for a while and into -containers'd API, so this is a perfect opportunity to get started. The -code is available [here](https://git.fcuny.net/containerd-to-vm/). - -## #1 Pull a container from a registry with `containerd` - -`containerd` has a pretty [detailed -documentation](https://pkg.go.dev/github.com/containerd/containerd). -From the main page we can see the following example to create a client. - -```go -import ( - "github.com/containerd/containerd" - "github.com/containerd/containerd/cio" -) - - -func main() { - client, err := containerd.New("/run/containerd/containerd.sock") - defer client.Close() -} -``` - -And pulling an image is also pretty straightforward: - -```go -image, err := client.Pull(context, "docker.io/library/redis:latest") -``` - -The `Pull` method returns an -[`Image`](https://pkg.go.dev/github.com/containerd/containerd@v1.4.4/images#Image) -and there's a few methods associated with it. - -As `containerd` has namespaces, it's possible to specify the namespace -we want to use when working with the API: - -```go -ctx := namespaces.WithNamespace(context.Background(), "c2vm") -image, err := client.Pull(ctx, "docker.io/library/redis:latest") -``` - -The image will now be stored in the `c2vm` namespace. We can verify this -with: - -```bash -; sudo ctr -n c2vm images ls -q -docker.io/library/redis:latest -``` - -## #2 Create a loop device to store the container's filesystem on - -This is going to be pretty straightforward. To create a loop device we -need to: - -1. pre-allocate space to a file -2. convert that file to some format -3. mount it to some destination - -There's two commons ways to pre-allocate space to a file: `dd` and -`fallocate` (there's likely way more ways to do this). I'll go with -`fallocate` for this example. - -First, to be safe, we create a temporary file, and use `renameio` to -handle the renaming (I recommend reading the doc of the module). - -```go -f, err := renameio.TempFile("", rawFile) -if err != nil { - return err -} -defer f.Cleanup() -``` - -Now to do the pre-allocation (we're making an assumption here that 2GB -is enough, we can likely check what's the size of the container before -doing this): - -```go -command := exec.Command("fallocate", "-l", "2G", f.Name()) -if err := command.Run(); err != nil { - return fmt.Errorf("fallocate error: %s", err) -} -``` - -We can now convert that file to ext4: - -```go -command = exec.Command("mkfs.ext4", "-F", f.Name()) -if err := command.Run(); err != nil { - return fmt.Errorf("mkfs.ext4 error: %s", err) -} -``` - -Now we can rename safely the temporary file to the proper file we want: - -```go -f.CloseAtomicallyReplace() -``` - -And to mount that file - -```go -command = exec.Command("mount", "-o", "loop", rawFile, mntDir) -if err := command.Run(); err != nil { - return fmt.Errorf("mount error: %s", err) -} -``` - -## #3 Unpack the container into the mounted loop device - -Extracting the container using `containerd` is pretty simple. Here's the -function that I use: - -```go -func extract(ctx context.Context, client *containerd.Client, image containerd.Image, mntDir string) error { - manifest, err := images.Manifest(ctx, client.ContentStore(), image.Target(), platform) - if err != nil { - log.Fatalf("failed to get the manifest: %v\n", err) - } - - for _, desc := range manifest.Layers { - log.Printf("extracting layer %s\n", desc.Digest.String()) - layer, err := client.ContentStore().ReaderAt(ctx, desc) - if err != nil { - return err - } - if err := archive.Untar(content.NewReader(layer), mntDir, &archive.TarOptions{NoLchown: true}); err != nil { - return err - } - } - - return nil -} -``` - -Calling `images.Manifest` returns the -[manifest](https://github.com/opencontainers/image-spec/blob/master/manifest.md) -from the image. What we care here are the list of layers. Here I'm -making a number of assumptions regarding their type (we should be -checking the media type first). We read the layers and extract them to -the mounted path. - -## #4 Create a second block device and inject other stuff - -Here I'm going to deviate a bit. I will not create a second loop device, -and I will not inject a kernel. In their article, they provided a link -to a snapshot of their `init` process -(). In order to keep this -simple, our init is going to be a shell script composed of the content -of the entry point of the container. We're also going to add a few extra -files to container (`/etc/hosts` and `/etc/resolv.conf`). - -Finally, since we've pre-allocated 2GB for that container, and we likely -don't need that much, we're also going to resize the image. - -### Add init - -Let's refer to the [specification for the -config](https://github.com/opencontainers/image-spec/blob/master/config.md). -The elements that are of interest to me are: - -- `Env`, which is array of strings. They contain the environment - variables that likely we need to run the program -- `Cmd`, which is also an array of strings. If there's no entry point - provided, this is what is used. - -At this point, for this experiment, I'm going to ignore exposed ports, -working directory, and the user. - -First we need to read the config from the container. This is easily -done: - -```go -config, err := images.Config(ctx, client.ContentStore(), image.Target(), platform) -if err != nil { - return err -} -``` - -This needs to be read and decoded: - -```go -configBlob, err := content.ReadBlob(ctx, client.ContentStore(), config) -var imageSpec ocispec.Image -json.Unmarshal(configBlob, &imageSpec) -``` - -`init` is the first process started by Linux during boot. On a regular -Linux desktop you likely have a symbolic link from `/usr/bin/init` to -`/usr/lib/systemd/systemd`, since most distributions have switched to -`systemd`. For my use case however, I want to run a single process, and -I want it to be the one from the container. For this we can create a -simple shell script inside the container (the location does not matter -for now) with the environment variables and the command. - -Naively, this can be done like this: - -```go -initPath := filepath.Join(mntDir, "init.sh") -f, err := renameio.TempFile("", initPath) -if err != nil { - return err -} -defer f.Cleanup() - -writer := bufio.NewWriter(f) -fmt.Fprintf(writer, "#!/bin/sh\n") -for _, env := range initEnvs { - fmt.Fprintf(writer, "export %s\n", env) -} -fmt.Fprintf(writer, "%s\n", initCmd) -writer.Flush() - -f.CloseAtomicallyReplace() - -mode := int(0755) -os.Chmod(initPath, os.FileMode(mode)) -``` - -We're once again creating a temporary file with `renamio`, and we're -writing our shell scripts, one line at a time. We only need to make sure -this executable. - -### extra files - -Once we have our init file, I also want to add a few extra files: -`/etc/hosts` and `/etc/resolv.conf`. This files are not always present, -since they can be injected by other systems. I also want to make sure -that DNS resolutions are done using my own DNS server. - -### resize the image - -We've pre-allocated 2GB for the image, and it's likely we don't need as -much space. We can do this by running `e2fsck` and `resize2fs` once -we're done manipulating the image. - -Within a function, we can do the following: - -```go -command := exec.Command("/usr/bin/e2fsck", "-p", "-f", rawFile) -if err := command.Run(); err != nil { - return fmt.Errorf("e2fsck error: %s", err) -} - -command = exec.Command("resize2fs", "-M", rawFile) -if err := command.Run(); err != nil { - return fmt.Errorf("resize2fs error: %s", err) -} -``` - -I'm using `docker.io/library/redis:latest` for my test, and I end up -with the following size for the image: - -```bash --rw------- 1 root root 216M Apr 22 14:50 /tmp/fcuny.img -``` - -### Kernel - -We're going to need a kernel to run that VM. In my case I've decided to -go with version 5.8, and build a custom kernel. If you are not familiar -with the process, the firecracker team has [documented how to do -this](https://github.com/firecracker-microvm/firecracker/blob/main/docs/rootfs-and-kernel-setup.md#creating-a-kernel-image). -In my case all I had to do was: - -```bash -git clone https://github.com/torvalds/linux.git linux.git -cd linux.git -git checkout v5.8 -curl -o .config -s https://github.com/firecracker-microvm/firecracker/blob/main/resources/microvm-kernel-x86_64.config -make menuconfig -make vmlinux -j8 -``` - -Note that they also have a pretty [good documentation for -production](https://github.com/firecracker-microvm/firecracker/blob/main/docs/prod-host-setup.md). - -## #5 Attach persistent volumes (if any) - -I'm going to skip that step for now. - -## #6 Create a TAP device and configure it - -We're going to need a network for that VM (otherwise it might be a bit -boring). There's a few solutions that we can take: - -1. create the TAP device -2. delegate all that work to a - [CNI](https://github.com/containernetworking/cni) - -I've decided to use the CNI approach [documented in the Go's -SDK](https://github.com/firecracker-microvm/firecracker-go-sdk#cni). For -this to work we need to install the `tc-redirect-tap` CNI plugin -(available at ). - -Based on that documentation, I'll start with the following configuration -in `etc/cni/conf.d/50-c2vm.conflist`: - -```json -{ - "name": "c2vm", - "cniVersion": "0.4.0", - "plugins": [ - { - "type": "bridge", - "bridge": "c2vm-br", - "isDefaultGateway": true, - "forceAddress": false, - "ipMasq": true, - "hairpinMode": true, - "mtu": 1500, - "ipam": { - "type": "host-local", - "subnet": "192.168.128.0/24", - "resolvConf": "/etc/resolv.conf" - } - }, - { - "type": "firewall" - }, - { - "type": "tc-redirect-tap" - } - ] -} -``` - -## #7 Hand it off to Firecracker and boot that thing - -Now that we have all the components, we need to boot that VM. Since I've -been working with Go so far, I'll also use the [Go -SDK](https://github.com/firecracker-microvm/firecracker-go-sdk) to -manage and start the VM. - -For this we need the firecracker binary, which we can [find on -GitHub](https://github.com/firecracker-microvm/firecracker/releases). - -The first thing is to configure the list of devices. In our case we will -have a single device, the boot drive that we've created in the previous -step. - -```go -devices := make([]models.Drive, 1) -devices[0] = models.Drive{ - DriveID: firecracker.String("1"), - PathOnHost: &rawImage, - IsRootDevice: firecracker.Bool(true), - IsReadOnly: firecracker.Bool(false), -} -``` - -The next step is to configure the VM: - -```go -fcCfg := firecracker.Config{ - LogLevel: "debug", - SocketPath: firecrackerSock, - KernelImagePath: linuxKernel, - KernelArgs: "console=ttyS0 reboot=k panic=1 acpi=off pci=off i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd init=/init.sh random.trust_cpu=on", - Drives: devices, - MachineCfg: models.MachineConfiguration{ - VcpuCount: firecracker.Int64(1), - CPUTemplate: models.CPUTemplate("C3"), - HtEnabled: firecracker.Bool(true), - MemSizeMib: firecracker.Int64(512), - }, - NetworkInterfaces: []firecracker.NetworkInterface{ - { - CNIConfiguration: &firecracker.CNIConfiguration{ - NetworkName: "c2vm", - IfName: "eth0", - }, - }, - }, -} -``` - -Finally we can create the command to start and run the VM: - -```go -command := firecracker.VMCommandBuilder{}. - WithBin(firecrackerBinary). - WithSocketPath(fcCfg.SocketPath). - WithStdin(os.Stdin). - WithStdout(os.Stdout). - WithStderr(os.Stderr). - Build(ctx) -machineOpts = append(machineOpts, firecracker.WithProcessRunner(command)) -m, err := firecracker.NewMachine(vmmCtx, fcCfg, machineOpts...) -if err != nil { - panic(err) -} - -if err := m.Start(vmmCtx); err != nil { - panic(err) -} -defer m.StopVMM() - -if err := m.Wait(vmmCtx); err != nil { - panic(err) -} -``` - -The end result: - - ; sudo ./c2vm -container docker.io/library/redis:latest -firecracker-binary ./hack/firecracker/firecracker-v0.24.3-x86_64 -linux-kernel ./hack/linux/my-linux.bin -out /tmp/redis.img - 2021/05/15 14:12:59 pulled docker.io/library/redis:latest (38690247 bytes) - 2021/05/15 14:13:00 mounted /tmp/redis.img on /tmp/c2vm026771514 - 2021/05/15 14:13:00 extracting layer sha256:69692152171afee1fd341febc390747cfca2ff302f2881d8b394e786af605696 - 2021/05/15 14:13:00 extracting layer sha256:a4a46f2fd7e06fab84b4e78eb2d1b6d007351017f9b18dbeeef1a9e7cf194e00 - 2021/05/15 14:13:00 extracting layer sha256:bcdf6fddc3bdaab696860eb0f4846895c53a3192c9d7bf8d2275770ea8073532 - 2021/05/15 14:13:01 extracting layer sha256:b7e9b50900cc06838c44e0fc5cbebe5c0b3e7f70c02f32dd754e1aa6326ed566 - 2021/05/15 14:13:01 extracting layer sha256:5f3030c50d85a9d2f70adb610b19b63290c6227c825639b227ddc586f86d1c76 - 2021/05/15 14:13:01 extracting layer sha256:63dae8e0776cdbd63909fbd9c047c1615a01cb21b73efa87ae2feed680d3ffa1 - 2021/05/15 14:13:01 init script created - 2021/05/15 14:13:01 umount /tmp/c2vm026771514 - INFO[0003] Called startVMM(), setting up a VMM on firecracker.sock - INFO[0003] VMM logging disabled. - INFO[0003] VMM metrics disabled. - INFO[0003] refreshMachineConfiguration: [GET /machine-config][200] getMachineConfigurationOK &{CPUTemplate:C3 HtEnabled:0xc0004e6753 MemSizeMib:0xc0004e6748 VcpuCount:0xc0004e6740} - INFO[0003] PutGuestBootSource: [PUT /boot-source][204] putGuestBootSourceNoContent - INFO[0003] Attaching drive /tmp/redis.img, slot 1, root true. - INFO[0003] Attached drive /tmp/redis.img: [PUT /drives/{drive_id}][204] putGuestDriveByIdNoContent - INFO[0003] Attaching NIC tap0 (hwaddr 9e:72:c7:04:6b:80) at index 1 - INFO[0003] startInstance successful: [PUT /actions][204] createSyncActionNoContent - [ 0.000000] Linux version 5.8.0 (fcuny@nas) (gcc (Debian 8.3.0-6) 8.3.0, GNU ld (GNU Binutils for Debian) 2.31.1) #1 SMP Mon Apr 12 20:07:40 PDT 2021 - [ 0.000000] Command line: i8042.dumbkbd ip=192.168.128.9::192.168.128.1:255.255.255.0:::off::: console=ttyS0 reboot=k panic=1 acpi=off pci=off i8042.noaux i8042.nomux i8042.nopnp init=/init.sh random.trust_cpu=on root=/dev/vda rw virtio_mmio.device=4K@0xd0000000:5 virtio_mmio.device=4K@0xd0001000:6 - [ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers' - [ 0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers' - [ 0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers' - [ 0.000000] x86/fpu: xstate_offset[2]: 576, xstate_sizes[2]: 256 - [ 0.000000] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, using 'standard' format. - [ 0.000000] BIOS-provided physical RAM map: - [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable - [ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000001fffffff] usable - [ 0.000000] NX (Execute Disable) protection: active - [ 0.000000] DMI not present or invalid. - [ 0.000000] Hypervisor detected: KVM - [ 0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00 - [ 0.000000] kvm-clock: cpu 0, msr 2401001, primary cpu clock - [ 0.000000] kvm-clock: using sched offset of 11918596 cycles - [ 0.000005] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns - [ 0.000011] tsc: Detected 1190.400 MHz processor - [ 0.000108] last_pfn = 0x20000 max_arch_pfn = 0x400000000 - [ 0.000151] Disabled - [ 0.000156] x86/PAT: MTRRs disabled, skipping PAT initialization too. - [ 0.000166] CPU MTRRs all blank - virtualized system. - [ 0.000170] x86/PAT: Configuration [0-7]: WB WT UC- UC WB WT UC- UC - [ 0.000201] found SMP MP-table at [mem 0x0009fc00-0x0009fc0f] - [ 0.000257] check: Scanning 1 areas for low memory corruption - [ 0.000364] No NUMA configuration found - [ 0.000365] Faking a node at [mem 0x0000000000000000-0x000000001fffffff] - [ 0.000370] NODE_DATA(0) allocated [mem 0x1ffde000-0x1fffffff] - [ 0.000490] Zone ranges: - [ 0.000493] DMA [mem 0x0000000000001000-0x0000000000ffffff] - [ 0.000494] DMA32 [mem 0x0000000001000000-0x000000001fffffff] - [ 0.000495] Normal empty - [ 0.000497] Movable zone start for each node - [ 0.000500] Early memory node ranges - [ 0.000501] node 0: [mem 0x0000000000001000-0x000000000009efff] - [ 0.000502] node 0: [mem 0x0000000000100000-0x000000001fffffff] - [ 0.000510] Zeroed struct page in unavailable ranges: 98 pages - [ 0.000511] Initmem setup node 0 [mem 0x0000000000001000-0x000000001fffffff] - [ 0.004990] Intel MultiProcessor Specification v1.4 - [ 0.004995] MPTABLE: OEM ID: FC - [ 0.004995] MPTABLE: Product ID: 000000000000 - [ 0.004996] MPTABLE: APIC at: 0xFEE00000 - [ 0.005007] Processor #0 (Bootup-CPU) - [ 0.005039] IOAPIC[0]: apic_id 2, version 17, address 0xfec00000, GSI 0-23 - [ 0.005041] Processors: 1 - [ 0.005042] TSC deadline timer available - [ 0.005044] smpboot: Allowing 1 CPUs, 0 hotplug CPUs - [ 0.005060] KVM setup pv remote TLB flush - [ 0.005072] KVM setup pv sched yield - [ 0.005078] PM: hibernation: Registered nosave memory: [mem 0x00000000-0x00000fff] - [ 0.005079] PM: hibernation: Registered nosave memory: [mem 0x0009f000-0x000fffff] - [ 0.005081] [mem 0x20000000-0xffffffff] available for PCI devices - [ 0.005082] Booting paravirtualized kernel on KVM - [ 0.005084] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns - [ 0.005087] setup_percpu: NR_CPUS:128 nr_cpumask_bits:128 nr_cpu_ids:1 nr_node_ids:1 - [ 0.006381] percpu: Embedded 44 pages/cpu s143360 r8192 d28672 u2097152 - [ 0.006404] KVM setup async PF for cpu 0 - [ 0.006410] kvm-stealtime: cpu 0, msr 1f422080 - [ 0.006420] Built 1 zonelists, mobility grouping on. Total pages: 128905 - [ 0.006420] Policy zone: DMA32 - [ 0.006422] Kernel command line: i8042.dumbkbd ip=192.168.128.9::192.168.128.1:255.255.255.0:::off::: console=ttyS0 reboot=k panic=1 acpi=off pci=off i8042.noaux i8042.nomux i8042.nopnp init=/init.sh random.trust_cpu=on root=/dev/vda rw virtio_mmio.device=4K@0xd0000000:5 virtio_mmio.device=4K@0xd0001000:6 - [ 0.006858] Dentry cache hash table entries: 65536 (order: 7, 524288 bytes, linear) - [ 0.007003] Inode-cache hash table entries: 32768 (order: 6, 262144 bytes, linear) - [ 0.007047] mem auto-init: stack:off, heap alloc:off, heap free:off - [ 0.007947] Memory: 491940K/523896K available (10243K kernel code, 629K rwdata, 1860K rodata, 1408K init, 6048K bss, 31956K reserved, 0K cma-reserved) - [ 0.007980] random: get_random_u64 called from __kmem_cache_create+0x3d/0x540 with crng_init=0 - [ 0.008053] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 - [ 0.008146] rcu: Hierarchical RCU implementation. - [ 0.008147] rcu: RCU restricting CPUs from NR_CPUS=128 to nr_cpu_ids=1. - [ 0.008151] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies. - [ 0.008152] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1 - [ 0.008170] NR_IRQS: 4352, nr_irqs: 48, preallocated irqs: 16 - [ 0.008373] random: crng done (trusting CPU's manufacturer) - [ 0.008430] Console: colour dummy device 80x25 - [ 0.052276] printk: console [ttyS0] enabled - [ 0.052685] APIC: Switch to symmetric I/O mode setup - [ 0.053288] x2apic enabled - [ 0.053705] Switched APIC routing to physical x2apic. - [ 0.054213] KVM setup pv IPIs - [ 0.055559] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x1128af0325d, max_idle_ns: 440795261011 ns - [ 0.056516] Calibrating delay loop (skipped) preset value.. 2380.80 BogoMIPS (lpj=4761600) - [ 0.057259] pid_max: default: 32768 minimum: 301 - [ 0.057726] LSM: Security Framework initializing - [ 0.058176] SELinux: Initializing. - [ 0.058556] Mount-cache hash table entries: 1024 (order: 1, 8192 bytes, linear) - [ 0.059221] Mountpoint-cache hash table entries: 1024 (order: 1, 8192 bytes, linear) - [ 0.060382] x86/cpu: User Mode Instruction Prevention (UMIP) activated - [ 0.060510] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0 - [ 0.060510] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0 - [ 0.060510] Spectre V1 : Mitigation: usercopy/swapgs barriers and __user pointer sanitization - [ 0.060510] Spectre V2 : Mitigation: Enhanced IBRS - [ 0.060510] Spectre V2 : Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch - [ 0.060510] Spectre V2 : mitigation: Enabling conditional Indirect Branch Prediction Barrier - [ 0.060510] Speculative Store Bypass: Mitigation: Speculative Store Bypass disabled via prctl and seccomp - [ 0.060510] Freeing SMP alternatives memory: 32K - [ 0.060510] smpboot: CPU0: Intel(R) Xeon(R) Processor @ 1.20GHz (family: 0x6, model: 0x3e, stepping: 0x4) - [ 0.060510] Performance Events: unsupported p6 CPU model 62 no PMU driver, software events only. - [ 0.060510] rcu: Hierarchical SRCU implementation. - [ 0.060510] smp: Bringing up secondary CPUs ... - [ 0.060510] smp: Brought up 1 node, 1 CPU - [ 0.060510] smpboot: Max logical packages: 1 - [ 0.060523] smpboot: Total of 1 processors activated (2380.80 BogoMIPS) - [ 0.061338] devtmpfs: initialized - [ 0.061710] x86/mm: Memory block size: 128MB - [ 0.062341] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns - [ 0.063245] futex hash table entries: 256 (order: 2, 16384 bytes, linear) - [ 0.063946] thermal_sys: Registered thermal governor 'fair_share' - [ 0.063946] thermal_sys: Registered thermal governor 'step_wise' - [ 0.064522] thermal_sys: Registered thermal governor 'user_space' - [ 0.065313] NET: Registered protocol family 16 - [ 0.066398] DMA: preallocated 128 KiB GFP_KERNEL pool for atomic allocations - [ 0.067057] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA pool for atomic allocations - [ 0.067778] DMA: preallocated 128 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocations - [ 0.068506] audit: initializing netlink subsys (disabled) - [ 0.068708] cpuidle: using governor ladder - [ 0.069097] cpuidle: using governor menu - [ 0.070636] audit: type=2000 audit(1621113181.800:1): state=initialized audit_enabled=0 res=1 - [ 0.076346] HugeTLB registered 2.00 MiB page size, pre-allocated 0 pages - [ 0.077007] ACPI: Interpreter disabled. - [ 0.077445] SCSI subsystem initialized - [ 0.077812] pps_core: LinuxPPS API ver. 1 registered - [ 0.078277] pps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti - [ 0.079206] PTP clock support registered - [ 0.079741] NetLabel: Initializing - [ 0.080111] NetLabel: domain hash size = 128 - [ 0.080529] NetLabel: protocols = UNLABELED CIPSOv4 CALIPSO - [ 0.081113] NetLabel: unlabeled traffic allowed by default - [ 0.082072] clocksource: Switched to clocksource kvm-clock - [ 0.082715] VFS: Disk quotas dquot_6.6.0 - [ 0.083123] VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes) - [ 0.083855] pnp: PnP ACPI: disabled - [ 0.084510] NET: Registered protocol family 2 - [ 0.084718] tcp_listen_portaddr_hash hash table entries: 256 (order: 0, 4096 bytes, linear) - [ 0.085602] TCP established hash table entries: 4096 (order: 3, 32768 bytes, linear) - [ 0.086365] TCP bind hash table entries: 4096 (order: 4, 65536 bytes, linear) - [ 0.087025] TCP: Hash tables configured (established 4096 bind 4096) - [ 0.087749] UDP hash table entries: 256 (order: 1, 8192 bytes, linear) - [ 0.088481] UDP-Lite hash table entries: 256 (order: 1, 8192 bytes, linear) - [ 0.089261] NET: Registered protocol family 1 - [ 0.090395] virtio-mmio: Registering device virtio-mmio.0 at 0xd0000000-0xd0000fff, IRQ 5. - [ 0.091388] virtio-mmio: Registering device virtio-mmio.1 at 0xd0001000-0xd0001fff, IRQ 6. - [ 0.092222] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x1128af0325d, max_idle_ns: 440795261011 ns - [ 0.093322] clocksource: Switched to clocksource tsc - [ 0.093824] platform rtc_cmos: registered platform RTC device (no PNP device found) - [ 0.094618] check: Scanning for low memory corruption every 60 seconds - [ 0.095394] Initialise system trusted keyrings - [ 0.095836] Key type blacklist registered - [ 0.096427] workingset: timestamp_bits=36 max_order=17 bucket_order=0 - [ 0.097849] squashfs: version 4.0 (2009/01/31) Phillip Lougher - [ 0.107488] Key type asymmetric registered - [ 0.107905] Asymmetric key parser 'x509' registered - [ 0.108409] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252) - [ 0.109435] Serial: 8250/16550 driver, 1 ports, IRQ sharing disabled - [ 0.110116] serial8250: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A - [ 0.111877] loop: module loaded - [ 0.112426] virtio_blk virtio0: [vda] 441152 512-byte logical blocks (226 MB/215 MiB) - [ 0.113229] vda: detected capacity change from 0 to 225869824 - [ 0.114143] Loading iSCSI transport class v2.0-870. - [ 0.114753] iscsi: registered transport (tcp) - [ 0.115162] tun: Universal TUN/TAP device driver, 1.6 - [ 0.115955] i8042: PNP detection disabled - [ 0.116498] serio: i8042 KBD port at 0x60,0x64 irq 1 - [ 0.117089] input: AT Raw Set 2 keyboard as /devices/platform/i8042/serio0/input/input0 - [ 0.117932] intel_pstate: CPU model not supported - [ 0.118448] hid: raw HID events driver (C) Jiri Kosina - [ 0.119090] Initializing XFRM netlink socket - [ 0.119555] NET: Registered protocol family 10 - [ 0.120285] Segment Routing with IPv6 - [ 0.120812] NET: Registered protocol family 17 - [ 0.121350] Bridge firewalling registered - [ 0.122026] NET: Registered protocol family 40 - [ 0.122515] IPI shorthand broadcast: enabled - [ 0.122961] sched_clock: Marking stable (72512224, 48198862)->(137683636, -16972550) - [ 0.123796] registered taskstats version 1 - [ 0.124203] Loading compiled-in X.509 certificates - [ 0.125355] Loaded X.509 cert 'Build time autogenerated kernel key: 6203e6adc37b712d3b220a26b38f3d31311d5966' - [ 0.126355] Key type ._fscrypt registered - [ 0.126736] Key type .fscrypt registered - [ 0.127109] Key type fscrypt-provisioning registered - [ 0.127657] Key type encrypted registered - [ 0.144629] IP-Config: Complete: - [ 0.144968] device=eth0, hwaddr=9e:72:c7:04:6b:80, ipaddr=192.168.128.9, mask=255.255.255.0, gw=192.168.128.1 - [ 0.146044] host=192.168.128.9, domain=, nis-domain=(none) - [ 0.146604] bootserver=255.255.255.255, rootserver=255.255.255.255, rootpath= - [ 0.148347] EXT4-fs (vda): mounted filesystem with ordered data mode. Opts: (null) - [ 0.149098] VFS: Mounted root (ext4 filesystem) on device 254:0. - [ 0.149761] devtmpfs: mounted - [ 0.150340] Freeing unused decrypted memory: 2040K - [ 0.151148] Freeing unused kernel image (initmem) memory: 1408K - [ 0.156621] Write protecting the kernel read-only data: 14336k - [ 0.158657] Freeing unused kernel image (text/rodata gap) memory: 2044K - [ 0.159490] Freeing unused kernel image (rodata/data gap) memory: 188K - [ 0.160150] Run /init.sh as init process - 462:C 15 May 2021 21:13:01.903 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo - 462:C 15 May 2021 21:13:01.904 # Redis version=6.2.3, bits=64, commit=00000000, modified=0, pid=462, just started - 462:C 15 May 2021 21:13:01.905 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf - 462:M 15 May 2021 21:13:01.907 * Increased maximum number of open files to 10032 (it was originally set to 1024). - 462:M 15 May 2021 21:13:01.909 * monotonic clock: POSIX clock_gettime - _._ - _.-``__ ''-._ - _.-`` `. `_. ''-._ Redis 6.2.3 (00000000/0) 64 bit - .-`` .-```. ```\/ _.,_ ''-._ - ( ' , .-` | `, ) Running in standalone mode - |`-._`-...-` __...-.``-._|'` _.-'| Port: 6379 - | `-._ `._ / _.-' | PID: 462 - `-._ `-._ `-./ _.-' _.-' - |`-._`-._ `-.__.-' _.-'_.-'| - | `-._`-._ _.-'_.-' | https://redis.io - `-._ `-._`-.__.-'_.-' _.-' - |`-._`-._ `-.__.-' _.-'_.-'| - | `-._`-._ _.-'_.-' | - `-._ `-._`-.__.-'_.-' _.-' - `-._ `-.__.-' _.-' - `-._ _.-' - `-.__.-' - - 462:M 15 May 2021 21:13:01.922 # Server initialized - 462:M 15 May 2021 21:13:01.923 * Ready to accept connections - -We can do a quick test with the following: - -```bash -; sudo docker run -it --rm redis redis-cli -h 192.168.128.9 -192.168.128.9:6379> get foo -(nil) -192.168.128.9:6379> set foo 1 -OK -192.168.128.9:6379> get foo -"1" -192.168.128.9:6379> -``` diff --git a/content/notes/cpu-power-management.md b/content/notes/cpu-power-management.md deleted file mode 100644 index 922f081..0000000 --- a/content/notes/cpu-power-management.md +++ /dev/null @@ -1,121 +0,0 @@ -+++ -title = "CPU power management" -date = 2023-01-22 -[taxonomies] -tags = ["hardware"] -+++ - -## Maximum power consumption of a processor - -Our Intel CPU has a thermal design power (TDP) of 120W. The AMD CPU has a TDP of 200W. - -The Intel CPU has 80 cores while the AMD one has 128 cores. For Intel, this gives us 1.5W per core, while for AMD, 1.56W. - -The TDP is the average value the processor can sustain forever, and this is the power the cooling solution needs to be designed at for reliability. The TDP is measured under worst case load, with all cores running at 1.8Ghz (the base frequency). - -## C-State vs. P-State - -We have two ways to control the power consumption: - -- disabling a subsystem -- decrease the voltage - -This is done by using - -- _C-State_ is for optimization of power consumption -- _P-State_ is for optimization of the voltage and CPU frequency - -_C-State_ means that one or more subsystem are executing nothing, one or more subsystem of the CPU is at idle, powered down. - -_P-State_ the subsystem is actually running, but it does not require full performance, so the voltage and/or frequency it operates is decreased. - -The states are numbered starting from 0. The higher the number, the more power is saved. `C0` means no power saving. `P0` means maximum performance (thus maximum frequency, voltage and power used). - -### C-state - -A timeline of power saving using C states is as follow: - -1. normal operation is at c0 -2. the clock of idle core is stopped (C1) -3. the local caches (L1/L2) of the core are flushed and the core is powered down (C3) -4. when all the cores are powered down, the shared cache of the package (L3/LLC) are flushed and the whole package/CPU can be powered down - -| state | description | -| ----- | --------------------------------------------------------------------------------------------------------------------------- | -| C0 | operating state | -| C1 | a state where the processor is not executing instructions, but can return to an executing state essentially instantaneously | -| C2 | a state where the processor maintains all software-visible state, but may take longer to wake up | -| C3 | a state where the processor does not need to keep its cache coherent, but maintains other state | - -Running `cpuid` we can find all the supported C-states for a processor (Intel(R) Xeon(R) Gold 6122 CPU @ 1.80GHz): - -``` - MONITOR/MWAIT (5): - smallest monitor-line size (bytes) = 0x40 (64) - largest monitor-line size (bytes) = 0x40 (64) - enum of Monitor-MWAIT exts supported = true - supports intrs as break-event for MWAIT = true - number of C0 sub C-states using MWAIT = 0x0 (0) - number of C1 sub C-states using MWAIT = 0x2 (2) - number of C2 sub C-states using MWAIT = 0x0 (0) - number of C3 sub C-states using MWAIT = 0x2 (2) - number of C4 sub C-states using MWAIT = 0x0 (0) - number of C5 sub C-states using MWAIT = 0x0 (0) - number of C6 sub C-states using MWAIT = 0x0 (0) - number of C7 sub C-states using MWAIT = 0x0 (0) -``` - -If I interpret this correctly: - -- there's one `C0` -- there's two sub C-states for `C1` -- there's two sub C-states for `C3` - -### P-state - -Being in P-states means the CPU core is also in `C0`, since it has to be powered to execute some code. - -P-states allow to change the voltage and frequency of the CPU core to decrease the power consumption. - -A P-state refers to different frequency-voltage pairs. The highest operating point is the maximum state which is `P0`. - -| state | description | -| ----- | ------------------------------------------ | -| P0 | maximum power and frequency | -| P1 | less than P0, voltage and frequency scaled | -| P2 | less than P1, voltage and frequency scaled | - -## ACPI power state - -The ACPI Specification defines the following four global "Gx" states and six sleep "Sx" states - -| GX | name | Sx | description | -| ---- | -------------- | ---- | --------------------------------------------------------------------------------- | -| `G0` | working | `S0` | The computer is running and executing instructions | -| `G1` | sleeping | `S1` | Processor caches are flushed and the CPU stop executing instructions | -| `G1` | sleeping | `S2` | CPU powered off, dirty caches flushed to RAM | -| `G1` | sleeping | `S3` | Suspend to RAM | -| `G1` | sleeping | `S4` | Suspend to disk, all content of the main memory is flushed to non volatile memory | -| `G2` | soft off | `S5` | PSU still supplies power, a full reboot is required | -| `G3` | mechanical off | `S6` | The system is safe for disassembly | - -When we are in any C-states, we are in `G0`. - -## Speed Select Technology - -[Speed Select Technology](https://en.wikichip.org/wiki/intel/speed_select_technology) is a set of power management controls that allows a system administrator to customize per-core performance. By configuring the performance of specific cores and affinitizing workloads to those cores, higher software performance can be achieved. SST supports multiple types of customization: - -- Frequency Prioritization (SST-CP) - allows specific cores to clock higher by reducing the frequency of cores running lower-priority software. -- Speed Select Base Freq (SST-BF) - allows specific cores to run higher base frequency (P1) by reducing the base frequencies (P1) of other cores. - -## Turbo Boost - -TDP is the maximum power consumption the CPU can sustain. When the power consumption is low (e.g. many cores are in P1+ states), the CPU frequency can be increased beyond base frequency to take advantage of the headroom, since this condition does not increase the power consumption beyond TDP. - -Modern CPUs are heavily reliant on "Turbo(Intel)" or "boost (AMD)" ([TBT](https://en.wikichip.org/wiki/intel/turbo_boost_technology) and [TBTM](https://en.wikichip.org/wiki/intel/turbo_boost_max_technology)). - -In our case, the Intel 6122 is rated at 1.8GHz, A.K.A "stamp speed". If we want to run the CPU at a consistent frequency, we'd have to choose 1.8GHz or below, and we'd lose significant performance if we were to disable turbo/boost. - -### Turbo boost max - -During the manufacturing process, Intel is able to test each die and determine which cores possess the best overclocking capabilities. That information is then stored in the CPU in order from best to worst. diff --git a/content/notes/making-sense-intel-amd-cpus.md b/content/notes/making-sense-intel-amd-cpus.md deleted file mode 100644 index 9d1ce84..0000000 --- a/content/notes/making-sense-intel-amd-cpus.md +++ /dev/null @@ -1,236 +0,0 @@ -+++ -title = "Making sense of Intel and AMD CPUs naming" -date = 2021-12-29 -[taxonomies] -tags = ["hardware"] -+++ - -## Intel - -### Core - -The line up for the core family is i3, i5, i7 and i9. As of January 2023, the current generation is [Raptor Lake](https://en.wikipedia.org/wiki/Raptor_Lake) (13th generation). - -The brand modifiers are: - -- **i3**: laptops/low-end desktop -- **i5**: mainstream users -- **i7**: high-end users -- **i9**: enthusiast users - -How to read a SKU ? Let's use the [i7-12700K](https://ark.intel.com/content/www/us/en/ark/products/134594/intel-core-i712700k-processor-25m-cache-up-to-5-00-ghz.html) processor: - -- **i7**: high end users -- **12**: 12th generation -- **700**: SKU digits, usually assigned in the order the processors - are developed -- **K**: unlocked - -List of suffixes: - -| suffix | meaning | -| ------ | -------------------------------------- | -| G.. | integrated graphics | -| E | embedded | -| F | require discrete graphic card | -| H | high performance for mobile | -| HK | high performance for mobile / unlocked | -| K | unlocked | -| S | special edition | -| T | power optimized lifestyle | -| U | mobile power efficient | -| Y | mobile low power | -| X/XE | unlocked, high end | - -> **Unlocked,** what does that means ? A processor with the **K** suffix -> is made with the an unlocked clock multiplier. When used with some -> specific chipset, it's possible to overclock the processor. - -#### Raptor Lake (13th generation) - -Raptor lake is an hybrid architecture, featuring both P-cores (performance cores) and E-cores (efficient cores), similar to Alder lake. P-cores are based on the [Raptor cove](https://en.wikipedia.org/wiki/Golden_Cove#Raptor_Cove) architecture, while the E-cores are based on the [Gracemont]() architecture (same as for Alder lake). - -Available processors: - -| model | p-cores | e-cores | GHz (base) | GHz (boosted) | TDP | -| ---------- | ------- | ------- | ---------- | ------------- | -------- | -| i9-13900KS | 8 (16) | 16 | 3.2/2.4 | 6/4.3 | 150/253W | -| i9-13900K | 8 (16) | 16 | 3.0/2.0 | 5.8/4.3 | 125/253W | -| i9-13900KF | 8 (16) | 16 | 3.0/2.0 | 5.8/4.3 | 125/253W | -| i9-13900 | 8 (16) | 16 | 2.0/1.5 | 5.2/4.2 | 65/219W | -| i9-13900F | 8 (16) | 16 | 2.0/1.5 | 5.2/4.2 | 65/219W | -| i9-13900T | 8 (16) | 16 | 1.1/0.8 | 5.1/3.9 | 35/219W | -| i7-13700K | 8 (16) | 8 | 3.4/2.5 | 5.4/4.2 | 125/253W | -| i7-13700KF | 8 (16) | 8 | 3.4/2.5 | 5.4/4.2 | 125/253W | -| i7-13700 | 8 (16) | 8 | 2.1/1.5 | 5.1/4.1 | 65/219W | -| i7-13700F | 8 (16) | 8 | 2.1/1.5 | 5.1/4.1 | 65/219W | -| i7-13700T | 8 (16) | 8 | 1.4/1.0 | 4.8/3.6 | 35/106W | -| i5-13600K | 6 (12) | 8 | 3.5/2.6 | 5.1/3.9 | 125/181W | -| i5-13600KF | 6 (12) | 8 | 3.5/2.6 | 5.1/3.9 | 125/181W | - -For the Raptor Lake generation, as for the Alder lake generation, the supported socket is the [LGA1700](https://en.wikipedia.org/wiki/LGA_1700). - -List of Raptor lake chipsets: -| feature | b760[^7] | h770[^8] | z790[^9] | -|-----------------------------|----------|----------|----------| -| P and E cores over clocking | no | no | yes | -| memory over clocking | yes | yes | yes | -| DMI 4 lanes | 4 | 8 | 8 | -| chipset PCIe 5.0 lanes | | | | -| chipset PCIe 4.0 lanes | | | | -| chipset PCIe 3.0 lanes | | | | -| SATA 3.0 ports | up to 4 | up to 8 | up to 8 | - -#### Alder Lake (12th generation) - -Alder lake is an hybrid architecture, featuring both P-cores (performance cores) and E-cores (efficient cores). P-cores are based on the [Golden Cove](https://en.wikipedia.org/wiki/Golden_Cove) architecture, while the E-cores are based on the [Gracemont]() architecture. - -This is a [good article](https://www.anandtech.com/show/16881/a-deep-dive-into-intels-alder-lake-microarchitectures/2) to read about this model. Inside the processor there's a microcontroller that monitors what each thread is doing. This can be used by the OS scheduler to hint on which core a thread should be scheduled on (between performance or efficiency). - -As of December 2021 this is not yet properly supported by the Linux kernel. - -Available processors: - -| model | p-cores | e-cores | GHz (base) | GHz (boosted) | TDP | -| ---------- | ------- | ------- | ---------- | ------------- | ---- | -| i9-12900K | 8 (16) | 8 | 3.2/2.4 | 5.1/3.9 | 241W | -| i9-12900KF | 8 (16) | 8 | 3.2/2.4 | 5.1/3.9 | 241W | -| i7-12700K | 8 (16) | 4 | 3.6/2.7 | 4.9/3.8 | 190W | -| i7-12700KF | 8 (16) | 4 | 3.6/2.7 | 4.9/3.8 | 190W | -| i5-12600K | 6 (12) | 4 | 3.7/2.8 | 4.9/3.6 | 150W | -| i5-12600KF | 6 (12) | 4 | 3.7/2.8 | 4.9/3.6 | 150W | - -- support DDR4 and DDR5 (up to DDR5-4800) -- support PCIe 4.0 and 5.0 (16 PCIe 5.0 and 4 PCIe 4.0) - -For the Alder Lake generation, the supported socket is the [LGA1700](https://en.wikipedia.org/wiki/LGA_1700). - -For now only supported chipset for Alder Lake are: - -| feature | z690[^1] | h670[^2] | b660[^3] | h610[^4] | q670[^6] | w680[^5] | -| --------------------------- | -------- | -------- | -------- | -------- | -------- | -------- | -| P and E cores over clocking | yes | no | no | no | no | yes | -| memory over clocking | yes | yes | yes | no | - | yes | -| DMI 4 lanes | 8 | 8 | 4 | 4 | 8 | 8 | -| chipset PCIe 4.0 lanes | up to 12 | up to 12 | up to 6 | none | | | -| chipset PCIe 3.0 lanes | up to 16 | up to 12 | up to 8 | 8 | | | -| SATA 3.0 ports | up to 8 | up to 8 | 4 | 4 | up to 8 | up to 8 | - -### Xeon - -Xeon is the brand of Intel processor designed for non-consumer servers and workstations. The most recent generations are: - -| name | availability | -| --------------- | ------------ | -| Skylake | 2015 | -| Cascade lake | 2019 | -| Cooper lake | 2022 | -| Sapphire rapids | 2023 | - -The following brand identifiers are used: - -- platinium -- gold -- silver -- bronze - -## AMD - -### Ryzen - -There are multiple generation for this brand of processors. They are based on the [zen micro architecture](). - -The current (as of January 2023) generation is Ryzen 7000. - -The brand modifiers are: - -- ryzen 3: entry level -- ryzen 5: mainstream -- ryzen 9: high end performance -- ryzen 9: enthusiast - -List of suffixes: - -| suffix | meaning | -| ------ | ------------------------------------------------------------------------------- | -| X | high performance | -| G | integrated graphics | -| T | power optimized lifecycle | -| S | low power desktop with integrated graphics | -| H | high performance mobile | -| U | standard mobile | -| M | low power mobile | -| 3D | feature [3D V-cache technology](https://www.amd.com/en/technologies/3d-v-cache) | - -### EPYC - -EPYC is the AMD brand of processors for the server market, based on the zen architecture. They use the [SP3](https://en.wikipedia.org/wiki/Socket_SP3) socket. The EPYC processor is chipset free. - -### Threadripper - -The threadripper is for high performance desktop. It uses the [TR4](https://en.wikipedia.org/wiki/Socket_TR4) socket. At the moment there's only one chipset that supports this process, the [X399](https://en.wikipedia.org/wiki/List_of_AMD_chipsets#TR4_chipsets). - -The threadripper based on zen3 architecture is not yet released, but it's expected to hit the market in the first half of Q1 2022. - -### Sockets/Chipsets - -The majority of these processors use the [AM4 socket](https://en.wikipedia.org/wiki/Socket_AM4). The threadripper line uses different sockets. - -There are multiple [chipset](https://en.wikipedia.org/wiki/Socket_AM4#Chipsets) for the AM4 socket. The more advanced ones are the B550 and the X570. - -The threadripper processors use the TR4, sTRX4 and sWRX8 sockets. - -### Zen 3 - -Zen 3 was released in November 2020. - -| model | cores | GHz (base) | GHz (boosted) | PCIe lanes | TDP | -| ------------- | ------- | ---------- | ------------- | ---------- | ---- | -| ryzen 5 5600x | 6 (12) | 3.7 | 4.6 | 24 | 65W | -| ryzen 7 5800 | 8 (16) | 3.4 | 4.6 | 24 | 65W | -| ryzen 7 5800x | 8 (16) | 3.8 | 4.7 | 24 | 105W | -| ryzen 9 5900 | 12 (24) | 3.0 | 4.7 | 24 | 65W | -| ryzen 9 5900x | 12 (24) | 3.7 | 4.8 | 24 | 105W | -| ryzen 9 5950x | 16 (32) | 3.4 | 4.9 | 24 | 105W | - -- support PCIe 3.0 and PCIe 4.0 (except for the G series) -- only support DDR4 (up to DDR4-3200) - -### Zen 4 - -Zen 4 was released in September 2022. - -- only supports DDR 5 -- all desktop processors feature 28 (24 + 4) PCIe 5.0 lanes -- all desktop processors feature 2 x 4 lane PCIe interfaces (mostly for M.2 storage devices) - -| model | cores | GHz (base) | GHz (boosted) | TDP | -| --------------- | ------- | ---------- | ------------- | ---- | -| ryzen 5 7600x | 6 (12) | 4.7 | 5.3 | 105W | -| ryzen 5 7600 | 6 (12) | 3.8 | 5.1 | 65W | -| ryzen 7 7800X3D | 8 (16) | | 5.0 | 120W | -| ryzen 7 7700X | 8 (16) | 4.5 | 5.4 | 105W | -| ryzen 7 7700 | 8 (16) | 3.8 | 5.3 | 65W | -| ryzen 9 7900 | 12 (24) | 3.7 | 5.4 | 65W | -| ryzen 9 7900X | 12 (24) | 4.7 | 5.6 | 170W | -| ryzen 9 7900X3D | 12 (24) | 4.4 | 5.6 | 120W | -| ryzen 9 7950X | 16 (32) | 4.5 | 5.7 | 170W | -| ryzen 9 7950X3D | 16 (32) | 4.2 | 5.7 | 120W | - -[^1]: https://ark.intel.com/content/www/us/en/ark/products/218833/intel-z690-chipset.html - -[^2]: https://www.intel.com/content/www/us/en/products/sku/218831/intel-h670-chipset/specifications.html - -[^3]: https://ark.intel.com/content/www/us/en/ark/products/218832/intel-b660-chipset.html - -[^4]: https://www.intel.com/content/www/us/en/products/sku/218829/intel-h610-chipset/specifications.html - -[^5]: https://ark.intel.com/content/www/us/en/ark/products/218834/intel-w680-chipset.html - -[^6]: https://ark.intel.com/content/www/us/en/ark/products/218827/intel-q670-chipset.html - -[^7]: https://www.intel.com/content/www/us/en/products/sku/229719/intel-b760-chipset/specifications.html - -[^8]: https://www.intel.com/content/www/us/en/products/sku/229720/intel-h770-chipset.html - -[^9]: https://www.intel.com/content/www/us/en/products/sku/229721/intel-z790-chipset/specifications.html diff --git a/content/notes/stuff-about-pcie.md b/content/notes/stuff-about-pcie.md deleted file mode 100644 index 311e55f..0000000 --- a/content/notes/stuff-about-pcie.md +++ /dev/null @@ -1,266 +0,0 @@ -+++ -title = "Stuff about PCIe" -date = 2022-01-03 -[taxonomies] -tags = ["hardware"] -+++ - -## Speed - -The most common versions are 3 and 4, while 5 is starting to be -available with newer Intel processors. - -| ver | encoding | transfer rate | x1 | x2 | x4 | x8 | x16 | -| --- | --------- | ------------- | ---------- | ----------- | ---------- | ---------- | ----------- | -| 1 | 8b/10b | 2.5GT/s | 250MB/s | 500MB/s | 1GB/s | 2GB/s | 4GB/s | -| 2 | 8b/10b | 5.0GT/s | 500MB/s | 1GB/s | 2GB/s | 4GB/s | 8GB/s | -| 3 | 128b/130b | 8.0GT/s | 984.6 MB/s | 1.969 GB/s | 3.94 GB/s | 7.88 GB/s | 15.75 GB/s | -| 4 | 128b/130b | 16.0GT/s | 1969 MB/s | 3.938 GB/s | 7.88 GB/s | 15.75 GB/s | 31.51 GB/s | -| 5 | 128b/130b | 32.0GT/s | 3938 MB/s | 7.877 GB/s | 15.75 GB/s | 31.51 GB/s | 63.02 GB/s | -| 6 | 128b/130 | 64.0 GT/s | 7877 MB/s | 15.754 GB/s | 31.51 GB/s | 63.02 GB/s | 126.03 GB/s | - -This is a -[useful](https://community.mellanox.com/s/article/understanding-pcie-configuration-for-maximum-performance) -link to understand the formula: - - Maximum PCIe Bandwidth = SPEED * WIDTH * (1 - ENCODING) - 1Gb/s - -We remove 1Gb/s for protocol overhead and error corrections. The main -difference between the generations besides the supported speed is the -encoding overhead of the packet. For generations 1 and 2, each packet -sent on the PCIe has 20% PCIe headers overhead. This was improved in -generation 3, where the overhead was reduced to 1.5% (2/130) - see -[8b/10b encoding](https://en.wikipedia.org/wiki/8b/10b_encoding) and -[128b/130b encoding](https://en.wikipedia.org/wiki/64b/66b_encoding). - -If we apply the formula, for a PCIe version 3 device we can expect -3.7GB/s of data transfer rate: - - 8GT/s * 4 lanes * (1 - 2/130) - 1G = 32G * 0.985 - 1G = ~30Gb/s -> 3750MB/s - -## Topology - -An easy way to see the PCIe topology is with `lspci`: - - $ lspci -tv - -[0000:00]-+-00.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Root Complex - +-01.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge - +-01.1-[01]----00.0 OCZ Technology Group, Inc. RD400/400A SSD - +-01.3-[02-03]----00.0-[03]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family - +-01.5-[04]--+-00.0 Intel Corporation I350 Gigabit Network Connection - | +-00.1 Intel Corporation I350 Gigabit Network Connection - | +-00.2 Intel Corporation I350 Gigabit Network Connection - | \-00.3 Intel Corporation I350 Gigabit Network Connection - +-02.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge - +-03.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge - +-04.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge - +-07.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge - +-07.1-[05]--+-00.0 Advanced Micro Devices, Inc. [AMD] Zeppelin/Raven/Raven2 PCIe Dummy Function - | +-00.2 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Platform Security Processor - | \-00.3 Advanced Micro Devices, Inc. [AMD] Zeppelin USB 3.0 Host controller - +-08.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge - +-08.1-[06]--+-00.0 Advanced Micro Devices, Inc. [AMD] Zeppelin/Renoir PCIe Dummy Function - | +-00.1 Advanced Micro Devices, Inc. [AMD] Zeppelin Cryptographic Coprocessor NTBCCP - | +-00.2 Advanced Micro Devices, Inc. [AMD] FCH SATA Controller [AHCI mode] - | \-00.3 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) HD Audio Controller - +-14.0 Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller - +-14.3 Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge - +-18.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 0 - +-18.1 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 1 - +-18.2 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 2 - +-18.3 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 3 - +-18.4 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 4 - +-18.5 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 5 - +-18.6 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 6 - \-18.7 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 7 - -Now, how do we read this ? - -``` -+-[10000:00]-+-02.0-[01]----00.0 Intel Corporation NVMe Datacenter SSD [3DNAND, Beta Rock Controller] -| \-03.0-[02]----00.0 Intel Corporation NVMe Datacenter SSD [3DNAND, Beta Rock Controller] -``` - -This is a lot of information, how do we read this ? - -- The first part in brackets (`[10000:00]`) is the domain and the bus. -- The second part (`02.0` is still unclear to me) -- The third number (between brackets) is the device on the bus - -## View a single device - -```sh -lspci -v -s 0000:01:00.0 -: 01:00.0 Non-Volatile memory controller: OCZ Technology Group, Inc. RD400/400A SSD (rev 01) (prog-if 02 [NVM Express]) -: Subsystem: OCZ Technology Group, Inc. RD400/400A SSD -: Flags: bus master, fast devsel, latency 0, IRQ 41, NUMA node 0 -: Memory at ef800000 (64-bit, non-prefetchable) [size=16K] -: Capabilities: -: Kernel driver in use: nvme -: Kernel modules: nvme -``` - -## Reading `lspci` output - - $ sudo lspci -vvv -s 0000:01:00.0 - 01:00.0 Non-Volatile memory controller: OCZ Technology Group, Inc. RD400/400A SSD (rev 01) (prog-if 02 [NVM Express]) - Subsystem: OCZ Technology Group, Inc. RD400/400A SSD - Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+ - Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- = 1.16_. - -## Go Modules - -[Go modules](https://blog.golang.org/using-go-modules) have been added -in 2019 with Go 1.11. A number of changes were introduced with [Go -1.16](https://blog.golang.org/go116-module-changes). This document is a -reference for me so that I can find answers to things I keep forgetting. - -### Creating a new module - -To create a new module, run `go mod init golang.fcuny.net/m`. This will -create two files: `go.mod` and `go.sum`. - -In the `go.mod` file you'll find: - -- the module import path (prefixed with `module`) -- the list of dependencies (within `require`) -- the version of go to use for the module - -### Versioning - -To bump the version of a module: - -```bash -$ git tag v1.2.3 -$ git push --tags -``` - -Then as a user: - -```bash -$ go get -d golang.fcuny.net/m@v1.2.3 -``` - -### Updating dependencies - -To update the dependencies, run `go mod tidy` - -### Editing a module - -If you need to modify a module, you can check out the module in your -workspace (`git clone `). - -Edit the `go.mod` file to add - -```go -replace => -``` - -Then modify the code of the module and the next time you compile the -project, the cloned module will be used. - -This is particularly useful when trying to debug an issue with an -external module. - -### Vendor-ing modules - -It's still possible to vendor modules by running `go mod vendor`. This -can be useful in the case of a CI setup that does not have access to -internet. - -### Proxy - -As of version 1.13, the variable `GOPROXY` defaults to -`https://proxy.golang.org,direct` (see -[here](https://github.com/golang/go/blob/c95464f0ea3f87232b1f3937d1b37da6f335f336/src/cmd/go/internal/cfg/cfg.go#L269)). -As a result, when running something like -`go get golang.org/x/tools/gopls@latest`, the request goes through the -proxy. - -There's a number of ways to control the behavior, they are documented -[here](https://golang.org/ref/mod#private-modules). - -There's a few interesting things that can be done when using the proxy. -There's a few special URLs (better documentation -[here](https://golang.org/ref/mod#goproxy-protocol)): - -| path | description | -| --------------------- | ---------------------------------------------------------------------------------------- | -| $mod/@v/list | Returns the list of known versions - there's one version per line and it's in plain text | -| $mod/@v/$version.info | Returns metadata about a version in JSON format | -| $mod/@v/$version.mod | Returns the `go.mod` file for that version | - -For example, looking at the most recent versions for `gopls`: - -```bash -; curl -s -L https://proxy.golang.org/golang.org/x/tools/gopls/@v/list|sort -r|head -v0.7.1-pre.2 -v0.7.1-pre.1 -v0.7.1 -v0.7.0-pre.3 -v0.7.0-pre.2 -v0.7.0-pre.1 -v0.7.0 -v0.6.9-pre.1 -v0.6.9 -v0.6.8-pre.1 -``` - -Let's check the details for the most recent version - -```bash -; curl -s -L https://proxy.golang.org/golang.org/x/tools/gopls/@v/list|sort -r|head -v0.7.1-pre.2 -v0.7.1-pre.1 -v0.7.1 -v0.7.0-pre.3 -v0.7.0-pre.2 -v0.7.0-pre.1 -v0.7.0 -v0.6.9-pre.1 -v0.6.9 -v0.6.8-pre.1 -``` - -And let's look at the content of the `go.mod` for that version too: - -```bash -; curl -s -L https://proxy.golang.org/golang.org/x/tools/gopls/@v/v0.7.1-pre.2.mod -module golang.org/x/tools/gopls - -go 1.17 - -require ( - github.com/BurntSushi/toml v0.3.1 // indirect - github.com/google/go-cmp v0.5.5 - github.com/google/safehtml v0.0.2 // indirect - github.com/jba/templatecheck v0.6.0 - github.com/sanity-io/litter v1.5.0 - github.com/sergi/go-diff v1.1.0 - golang.org/x/mod v0.4.2 - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect - golang.org/x/sys v0.0.0-20210510120138-977fb7262007 - golang.org/x/text v0.3.6 // indirect - golang.org/x/tools v0.1.6-0.20210802203754-9b21a8868e16 - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect - honnef.co/go/tools v0.2.0 - mvdan.cc/gofumpt v0.1.1 - mvdan.cc/xurls/v2 v2.2.0 -) -``` - -# Tooling - -### LSP - -`gopls` is the default implementation of the language server protocol -maintained by the Go team. To install the latest version, run -`go install golang.org/x/tools/gopls@latest` - -### `staticcheck` - -[`staticcheck`](https://staticcheck.io/) is a great tool to run against -your code to find issues. To install the latest version, run -`go install honnef.co/go/tools/cmd/staticcheck@latest`. - -## Emacs integration - -### `go-mode` - -[This is the mode](https://github.com/dominikh/go-mode.el) to install to -get syntax highlighting (mostly). - -### Integration with LSP - -Emacs has a pretty good integration with LSP, and ["Eglot for better -programming experience in -Emacs"](https://whatacold.io/blog/2022-01-22-emacs-eglot-lsp/) is a good -starting point. - -#### eglot - -[This is the main mode to install](https://github.com/joaotavora/eglot). - -The configuration is straightforward, this is what I use: - -```lisp -;; for go's LSP I want to use staticcheck and placeholders for completion -(customize-set-variable 'eglot-workspace-configuration - '((:gopls . - ((staticcheck . t) - (matcher . "CaseSensitive") - (usePlaceholders . t))))) - -;; ensure we load eglot for some specific modes -(dolist (hook '(go-mode-hook nix-mode-hook)) - (add-hook hook 'eglot-ensure)) -``` - -`eglot` integrates well with existing modes for Emacs, mainly xref, -flymake, eldoc. - -## Profiling - -### pprof - -[pprof](https://github.com/google/pprof) is a tool to visualize -performance data. Let's start with the following test: - -```go -package main - -import ( - "strings" - "testing" -) - -func BenchmarkStringJoin(b *testing.B) { - input := []string{"a", "b"} - for i := 0; i <= b.N; i++ { - r := strings.Join(input, " ") - if r != "a b" { - b.Errorf("want a b got %s", r) - } - } -} -``` - -Let's run a benchmark with -`go test . -bench=. -cpuprofile cpu_profile.out`: - -```go -goos: linux -goarch: amd64 -pkg: golang.fcuny.net/m -cpu: Intel(R) Core(TM) i3-1005G1 CPU @ 1.20GHz -BenchmarkStringJoin-4 41833486 26.85 ns/op 3 B/op 1 allocs/op -PASS -ok golang.fcuny.net/m 1.327s -``` - -And let's take a look at the profile with -`go tool pprof cpu_profile.out` - -```bash -File: m.test -Type: cpu -Time: Aug 15, 2021 at 3:01pm (PDT) -Duration: 1.31s, Total samples = 1.17s (89.61%) -Entering interactive mode (type "help" for commands, "o" for options) -(pprof) top -Showing nodes accounting for 1100ms, 94.02% of 1170ms total -Showing top 10 nodes out of 41 - flat flat% sum% cum cum% - 240ms 20.51% 20.51% 240ms 20.51% runtime.memmove - 220ms 18.80% 39.32% 320ms 27.35% runtime.mallocgc - 130ms 11.11% 50.43% 450ms 38.46% runtime.makeslice - 110ms 9.40% 59.83% 1150ms 98.29% golang.fcuny.net/m.BenchmarkStringJoin - 110ms 9.40% 69.23% 580ms 49.57% strings.(*Builder).grow (inline) - 110ms 9.40% 78.63% 1040ms 88.89% strings.Join - 70ms 5.98% 84.62% 300ms 25.64% strings.(*Builder).WriteString - 50ms 4.27% 88.89% 630ms 53.85% strings.(*Builder).Grow (inline) - 40ms 3.42% 92.31% 40ms 3.42% runtime.nextFreeFast (inline) - 20ms 1.71% 94.02% 20ms 1.71% runtime.getMCache (inline) -``` - -We can get a breakdown of the data for our module: - -```bash -(pprof) list golang.fcuny.net -Total: 1.17s -ROUTINE ======================== golang.fcuny.net/m.BenchmarkStringJoin in /home/fcuny/workspace/gobench/app_test.go - 110ms 1.15s (flat, cum) 98.29% of Total - . . 5: "testing" - . . 6:) - . . 7: - . . 8:func BenchmarkStringJoin(b *testing.B) { - . . 9: b.ReportAllocs() - 10ms 10ms 10: input := []string{"a", "b"} - . . 11: for i := 0; i <= b.N; i++ { - 20ms 1.06s 12: r := strings.Join(input, " ") - 80ms 80ms 13: if r != "a b" { - . . 14: b.Errorf("want a b got %s", r) - . . 15: } - . . 16: } - . . 17:} -``` diff --git a/content/notes/working-with-nix.md b/content/notes/working-with-nix.md deleted file mode 100644 index 1269963..0000000 --- a/content/notes/working-with-nix.md +++ /dev/null @@ -1,45 +0,0 @@ -+++ -title = "working with nix" -date = 2022-05-10 -[taxonomies] -tags = ["nix"] -+++ - -## the `nix develop` command - -The `nix develop` command is for working on a repository. If our -repository contains a `Makefile`, it will be used by the various -sub-commands. - -`nix develop` supports multiple -[phases](https://nixos.org/manual/nixpkgs/stable/#sec-stdenv-phases) and -they map as follow: - -| phase | default to | command | note | -| -------------- | -------------- | ------------------------- | ---- | -| configurePhase | `./configure` | `nix develop --configure` | | -| buildPhase | `make` | `nix develop --build` | | -| checkPhase | `make check` | `nix develop --check` | | -| installPhase | `make install` | `nix develop --install` | | - -In the repository, running `nix develop --build` will build the binary -**using the Makefile**. This is different from running `nix build`. - -## the `nix build` and `nix run` commands - -### for Go - -For Go, there's the `buildGoModule`. Looking at the -[source](https://github.com/NixOS/nixpkgs/blob/fb7287e6d2d2684520f756639846ee07f6287caa/pkgs/development/go-modules/generic/default.nix) -we can see there's a definition of what will be done for each phases. As -a result, we don't have to define them ourselves. - -If we run `nix build` in the repository, it will run the default [build -phase](https://github.com/NixOS/nixpkgs/blob/fb7287e6d2d2684520f756639846ee07f6287caa/pkgs/development/go-modules/generic/default.nix#L171). - -## `buildInputs` or `nativeBuildInputs` - -- `nativeBuildInputs` is intended for architecture-dependent - build-time-only dependencies -- `buildInputs` is intended for architecture-independent - build-time-only dependencies diff --git a/content/resume.md b/content/resume.md index a3922ec..cb095da 100644 --- a/content/resume.md +++ b/content/resume.md @@ -1,6 +1,6 @@ +++ title = "Resume" -template = "orphan.html" +template = "resume.html" date = 2024-08-10 [taxonomies] tags = ["work"] diff --git a/content/stuff-about-pcie.md b/content/stuff-about-pcie.md new file mode 100644 index 0000000..311e55f --- /dev/null +++ b/content/stuff-about-pcie.md @@ -0,0 +1,266 @@ ++++ +title = "Stuff about PCIe" +date = 2022-01-03 +[taxonomies] +tags = ["hardware"] ++++ + +## Speed + +The most common versions are 3 and 4, while 5 is starting to be +available with newer Intel processors. + +| ver | encoding | transfer rate | x1 | x2 | x4 | x8 | x16 | +| --- | --------- | ------------- | ---------- | ----------- | ---------- | ---------- | ----------- | +| 1 | 8b/10b | 2.5GT/s | 250MB/s | 500MB/s | 1GB/s | 2GB/s | 4GB/s | +| 2 | 8b/10b | 5.0GT/s | 500MB/s | 1GB/s | 2GB/s | 4GB/s | 8GB/s | +| 3 | 128b/130b | 8.0GT/s | 984.6 MB/s | 1.969 GB/s | 3.94 GB/s | 7.88 GB/s | 15.75 GB/s | +| 4 | 128b/130b | 16.0GT/s | 1969 MB/s | 3.938 GB/s | 7.88 GB/s | 15.75 GB/s | 31.51 GB/s | +| 5 | 128b/130b | 32.0GT/s | 3938 MB/s | 7.877 GB/s | 15.75 GB/s | 31.51 GB/s | 63.02 GB/s | +| 6 | 128b/130 | 64.0 GT/s | 7877 MB/s | 15.754 GB/s | 31.51 GB/s | 63.02 GB/s | 126.03 GB/s | + +This is a +[useful](https://community.mellanox.com/s/article/understanding-pcie-configuration-for-maximum-performance) +link to understand the formula: + + Maximum PCIe Bandwidth = SPEED * WIDTH * (1 - ENCODING) - 1Gb/s + +We remove 1Gb/s for protocol overhead and error corrections. The main +difference between the generations besides the supported speed is the +encoding overhead of the packet. For generations 1 and 2, each packet +sent on the PCIe has 20% PCIe headers overhead. This was improved in +generation 3, where the overhead was reduced to 1.5% (2/130) - see +[8b/10b encoding](https://en.wikipedia.org/wiki/8b/10b_encoding) and +[128b/130b encoding](https://en.wikipedia.org/wiki/64b/66b_encoding). + +If we apply the formula, for a PCIe version 3 device we can expect +3.7GB/s of data transfer rate: + + 8GT/s * 4 lanes * (1 - 2/130) - 1G = 32G * 0.985 - 1G = ~30Gb/s -> 3750MB/s + +## Topology + +An easy way to see the PCIe topology is with `lspci`: + + $ lspci -tv + -[0000:00]-+-00.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Root Complex + +-01.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge + +-01.1-[01]----00.0 OCZ Technology Group, Inc. RD400/400A SSD + +-01.3-[02-03]----00.0-[03]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family + +-01.5-[04]--+-00.0 Intel Corporation I350 Gigabit Network Connection + | +-00.1 Intel Corporation I350 Gigabit Network Connection + | +-00.2 Intel Corporation I350 Gigabit Network Connection + | \-00.3 Intel Corporation I350 Gigabit Network Connection + +-02.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge + +-03.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge + +-04.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge + +-07.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge + +-07.1-[05]--+-00.0 Advanced Micro Devices, Inc. [AMD] Zeppelin/Raven/Raven2 PCIe Dummy Function + | +-00.2 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Platform Security Processor + | \-00.3 Advanced Micro Devices, Inc. [AMD] Zeppelin USB 3.0 Host controller + +-08.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-1fh) PCIe Dummy Host Bridge + +-08.1-[06]--+-00.0 Advanced Micro Devices, Inc. [AMD] Zeppelin/Renoir PCIe Dummy Function + | +-00.1 Advanced Micro Devices, Inc. [AMD] Zeppelin Cryptographic Coprocessor NTBCCP + | +-00.2 Advanced Micro Devices, Inc. [AMD] FCH SATA Controller [AHCI mode] + | \-00.3 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) HD Audio Controller + +-14.0 Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller + +-14.3 Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge + +-18.0 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 0 + +-18.1 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 1 + +-18.2 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 2 + +-18.3 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 3 + +-18.4 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 4 + +-18.5 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 5 + +-18.6 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 6 + \-18.7 Advanced Micro Devices, Inc. [AMD] Family 17h (Models 00h-0fh) Data Fabric: Device 18h; Function 7 + +Now, how do we read this ? + +``` ++-[10000:00]-+-02.0-[01]----00.0 Intel Corporation NVMe Datacenter SSD [3DNAND, Beta Rock Controller] +| \-03.0-[02]----00.0 Intel Corporation NVMe Datacenter SSD [3DNAND, Beta Rock Controller] +``` + +This is a lot of information, how do we read this ? + +- The first part in brackets (`[10000:00]`) is the domain and the bus. +- The second part (`02.0` is still unclear to me) +- The third number (between brackets) is the device on the bus + +## View a single device + +```sh +lspci -v -s 0000:01:00.0 +: 01:00.0 Non-Volatile memory controller: OCZ Technology Group, Inc. RD400/400A SSD (rev 01) (prog-if 02 [NVM Express]) +: Subsystem: OCZ Technology Group, Inc. RD400/400A SSD +: Flags: bus master, fast devsel, latency 0, IRQ 41, NUMA node 0 +: Memory at ef800000 (64-bit, non-prefetchable) [size=16K] +: Capabilities: +: Kernel driver in use: nvme +: Kernel modules: nvme +``` + +## Reading `lspci` output + + $ sudo lspci -vvv -s 0000:01:00.0 + 01:00.0 Non-Volatile memory controller: OCZ Technology Group, Inc. RD400/400A SSD (rev 01) (prog-if 02 [NVM Express]) + Subsystem: OCZ Technology Group, Inc. RD400/400A SSD + Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx+ + Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- . Here's the `systemd` unit configuration I use for this: + +```ini +[Unit] +Description=Grafana in a docker container +Documentation=https://grafana.com/docs/ +After=docker.service +Requires=docker.service + +[Service] +Restart=on-failure +RuntimeDirectory=grafana +ExecStartPre=-/usr/bin/docker kill grafana-server +ExecStartPre=-/usr/bin/docker rm grafana-server +ExecStartPre=-/usr/bin/docker pull grafana/grafana:latest + +ExecStart=/usr/bin/docker run \ + -p 3000:3000 \ + -e TZ='America/Los_Angeles' \ + --name grafana-server \ + -v /data/containers/grafana/etc/grafana:/etc/grafana \ + -v /data/containers/grafana/var/lib/grafana:/var/lib/grafana \ + -v /data/containers/grafana/var/log/grafana:/var/log/grafana \ + --user=grafana \ + --label traefik.enable=true \ + --label traefik.http.middlewares.grafana-https-redirect.redirectscheme.scheme=https \ + --label traefik.http.middlewares.grafana-https-redirect.redirectscheme.permanent=true \ + --label traefik.http.routers.grafana-http.rule=Host(`dash.example.net`) \ + --label traefik.http.routers.grafana-http.entrypoints=http \ + --label traefik.http.routers.grafana-http.service=grafana-svc \ + --label traefik.http.routers.grafana-http.middlewares=grafana-https-redirect \ + --label traefik.http.routers.grafana-https.rule=Host(`dash.example.net`) \ + --label traefik.http.routers.grafana-https.entrypoints=https \ + --label traefik.http.routers.grafana-https.tls=true \ + --label traefik.http.routers.grafana-https.tls.certresolver=dash \ + --label traefik.http.routers.grafana-https.service=grafana-svc \ + --label traefik.http.services.grafana-svc.loadbalancer.server.port=3000 \ + grafana/grafana:latest + +ExecStop=/usr/bin/docker stop unifi-controller + +[Install] +WantedBy=multi-user.target +``` + +Now I can access my grafana instance via HTTPS (and would redirect to HTTPS) while my tailscale interface is up on the machine I'm using (e.g. my desktop or my phone). diff --git a/content/working-with-go.md b/content/working-with-go.md new file mode 100644 index 0000000..2a5d7a6 --- /dev/null +++ b/content/working-with-go.md @@ -0,0 +1,285 @@ ++++ +title = "Working with Go" +date = 2021-08-05 +[taxonomies] +tags = ["go"] ++++ + +_This document assumes go version \>= 1.16_. + +## Go Modules + +[Go modules](https://blog.golang.org/using-go-modules) have been added +in 2019 with Go 1.11. A number of changes were introduced with [Go +1.16](https://blog.golang.org/go116-module-changes). This document is a +reference for me so that I can find answers to things I keep forgetting. + +### Creating a new module + +To create a new module, run `go mod init golang.fcuny.net/m`. This will +create two files: `go.mod` and `go.sum`. + +In the `go.mod` file you'll find: + +- the module import path (prefixed with `module`) +- the list of dependencies (within `require`) +- the version of go to use for the module + +### Versioning + +To bump the version of a module: + +```bash +$ git tag v1.2.3 +$ git push --tags +``` + +Then as a user: + +```bash +$ go get -d golang.fcuny.net/m@v1.2.3 +``` + +### Updating dependencies + +To update the dependencies, run `go mod tidy` + +### Editing a module + +If you need to modify a module, you can check out the module in your +workspace (`git clone `). + +Edit the `go.mod` file to add + +```go +replace => +``` + +Then modify the code of the module and the next time you compile the +project, the cloned module will be used. + +This is particularly useful when trying to debug an issue with an +external module. + +### Vendor-ing modules + +It's still possible to vendor modules by running `go mod vendor`. This +can be useful in the case of a CI setup that does not have access to +internet. + +### Proxy + +As of version 1.13, the variable `GOPROXY` defaults to +`https://proxy.golang.org,direct` (see +[here](https://github.com/golang/go/blob/c95464f0ea3f87232b1f3937d1b37da6f335f336/src/cmd/go/internal/cfg/cfg.go#L269)). +As a result, when running something like +`go get golang.org/x/tools/gopls@latest`, the request goes through the +proxy. + +There's a number of ways to control the behavior, they are documented +[here](https://golang.org/ref/mod#private-modules). + +There's a few interesting things that can be done when using the proxy. +There's a few special URLs (better documentation +[here](https://golang.org/ref/mod#goproxy-protocol)): + +| path | description | +| --------------------- | ---------------------------------------------------------------------------------------- | +| $mod/@v/list | Returns the list of known versions - there's one version per line and it's in plain text | +| $mod/@v/$version.info | Returns metadata about a version in JSON format | +| $mod/@v/$version.mod | Returns the `go.mod` file for that version | + +For example, looking at the most recent versions for `gopls`: + +```bash +; curl -s -L https://proxy.golang.org/golang.org/x/tools/gopls/@v/list|sort -r|head +v0.7.1-pre.2 +v0.7.1-pre.1 +v0.7.1 +v0.7.0-pre.3 +v0.7.0-pre.2 +v0.7.0-pre.1 +v0.7.0 +v0.6.9-pre.1 +v0.6.9 +v0.6.8-pre.1 +``` + +Let's check the details for the most recent version + +```bash +; curl -s -L https://proxy.golang.org/golang.org/x/tools/gopls/@v/list|sort -r|head +v0.7.1-pre.2 +v0.7.1-pre.1 +v0.7.1 +v0.7.0-pre.3 +v0.7.0-pre.2 +v0.7.0-pre.1 +v0.7.0 +v0.6.9-pre.1 +v0.6.9 +v0.6.8-pre.1 +``` + +And let's look at the content of the `go.mod` for that version too: + +```bash +; curl -s -L https://proxy.golang.org/golang.org/x/tools/gopls/@v/v0.7.1-pre.2.mod +module golang.org/x/tools/gopls + +go 1.17 + +require ( + github.com/BurntSushi/toml v0.3.1 // indirect + github.com/google/go-cmp v0.5.5 + github.com/google/safehtml v0.0.2 // indirect + github.com/jba/templatecheck v0.6.0 + github.com/sanity-io/litter v1.5.0 + github.com/sergi/go-diff v1.1.0 + golang.org/x/mod v0.4.2 + golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect + golang.org/x/sys v0.0.0-20210510120138-977fb7262007 + golang.org/x/text v0.3.6 // indirect + golang.org/x/tools v0.1.6-0.20210802203754-9b21a8868e16 + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect + honnef.co/go/tools v0.2.0 + mvdan.cc/gofumpt v0.1.1 + mvdan.cc/xurls/v2 v2.2.0 +) +``` + +# Tooling + +### LSP + +`gopls` is the default implementation of the language server protocol +maintained by the Go team. To install the latest version, run +`go install golang.org/x/tools/gopls@latest` + +### `staticcheck` + +[`staticcheck`](https://staticcheck.io/) is a great tool to run against +your code to find issues. To install the latest version, run +`go install honnef.co/go/tools/cmd/staticcheck@latest`. + +## Emacs integration + +### `go-mode` + +[This is the mode](https://github.com/dominikh/go-mode.el) to install to +get syntax highlighting (mostly). + +### Integration with LSP + +Emacs has a pretty good integration with LSP, and ["Eglot for better +programming experience in +Emacs"](https://whatacold.io/blog/2022-01-22-emacs-eglot-lsp/) is a good +starting point. + +#### eglot + +[This is the main mode to install](https://github.com/joaotavora/eglot). + +The configuration is straightforward, this is what I use: + +```lisp +;; for go's LSP I want to use staticcheck and placeholders for completion +(customize-set-variable 'eglot-workspace-configuration + '((:gopls . + ((staticcheck . t) + (matcher . "CaseSensitive") + (usePlaceholders . t))))) + +;; ensure we load eglot for some specific modes +(dolist (hook '(go-mode-hook nix-mode-hook)) + (add-hook hook 'eglot-ensure)) +``` + +`eglot` integrates well with existing modes for Emacs, mainly xref, +flymake, eldoc. + +## Profiling + +### pprof + +[pprof](https://github.com/google/pprof) is a tool to visualize +performance data. Let's start with the following test: + +```go +package main + +import ( + "strings" + "testing" +) + +func BenchmarkStringJoin(b *testing.B) { + input := []string{"a", "b"} + for i := 0; i <= b.N; i++ { + r := strings.Join(input, " ") + if r != "a b" { + b.Errorf("want a b got %s", r) + } + } +} +``` + +Let's run a benchmark with +`go test . -bench=. -cpuprofile cpu_profile.out`: + +```go +goos: linux +goarch: amd64 +pkg: golang.fcuny.net/m +cpu: Intel(R) Core(TM) i3-1005G1 CPU @ 1.20GHz +BenchmarkStringJoin-4 41833486 26.85 ns/op 3 B/op 1 allocs/op +PASS +ok golang.fcuny.net/m 1.327s +``` + +And let's take a look at the profile with +`go tool pprof cpu_profile.out` + +```bash +File: m.test +Type: cpu +Time: Aug 15, 2021 at 3:01pm (PDT) +Duration: 1.31s, Total samples = 1.17s (89.61%) +Entering interactive mode (type "help" for commands, "o" for options) +(pprof) top +Showing nodes accounting for 1100ms, 94.02% of 1170ms total +Showing top 10 nodes out of 41 + flat flat% sum% cum cum% + 240ms 20.51% 20.51% 240ms 20.51% runtime.memmove + 220ms 18.80% 39.32% 320ms 27.35% runtime.mallocgc + 130ms 11.11% 50.43% 450ms 38.46% runtime.makeslice + 110ms 9.40% 59.83% 1150ms 98.29% golang.fcuny.net/m.BenchmarkStringJoin + 110ms 9.40% 69.23% 580ms 49.57% strings.(*Builder).grow (inline) + 110ms 9.40% 78.63% 1040ms 88.89% strings.Join + 70ms 5.98% 84.62% 300ms 25.64% strings.(*Builder).WriteString + 50ms 4.27% 88.89% 630ms 53.85% strings.(*Builder).Grow (inline) + 40ms 3.42% 92.31% 40ms 3.42% runtime.nextFreeFast (inline) + 20ms 1.71% 94.02% 20ms 1.71% runtime.getMCache (inline) +``` + +We can get a breakdown of the data for our module: + +```bash +(pprof) list golang.fcuny.net +Total: 1.17s +ROUTINE ======================== golang.fcuny.net/m.BenchmarkStringJoin in /home/fcuny/workspace/gobench/app_test.go + 110ms 1.15s (flat, cum) 98.29% of Total + . . 5: "testing" + . . 6:) + . . 7: + . . 8:func BenchmarkStringJoin(b *testing.B) { + . . 9: b.ReportAllocs() + 10ms 10ms 10: input := []string{"a", "b"} + . . 11: for i := 0; i <= b.N; i++ { + 20ms 1.06s 12: r := strings.Join(input, " ") + 80ms 80ms 13: if r != "a b" { + . . 14: b.Errorf("want a b got %s", r) + . . 15: } + . . 16: } + . . 17:} +``` diff --git a/content/working-with-nix.md b/content/working-with-nix.md new file mode 100644 index 0000000..1269963 --- /dev/null +++ b/content/working-with-nix.md @@ -0,0 +1,45 @@ ++++ +title = "working with nix" +date = 2022-05-10 +[taxonomies] +tags = ["nix"] ++++ + +## the `nix develop` command + +The `nix develop` command is for working on a repository. If our +repository contains a `Makefile`, it will be used by the various +sub-commands. + +`nix develop` supports multiple +[phases](https://nixos.org/manual/nixpkgs/stable/#sec-stdenv-phases) and +they map as follow: + +| phase | default to | command | note | +| -------------- | -------------- | ------------------------- | ---- | +| configurePhase | `./configure` | `nix develop --configure` | | +| buildPhase | `make` | `nix develop --build` | | +| checkPhase | `make check` | `nix develop --check` | | +| installPhase | `make install` | `nix develop --install` | | + +In the repository, running `nix develop --build` will build the binary +**using the Makefile**. This is different from running `nix build`. + +## the `nix build` and `nix run` commands + +### for Go + +For Go, there's the `buildGoModule`. Looking at the +[source](https://github.com/NixOS/nixpkgs/blob/fb7287e6d2d2684520f756639846ee07f6287caa/pkgs/development/go-modules/generic/default.nix) +we can see there's a definition of what will be done for each phases. As +a result, we don't have to define them ourselves. + +If we run `nix build` in the repository, it will run the default [build +phase](https://github.com/NixOS/nixpkgs/blob/fb7287e6d2d2684520f756639846ee07f6287caa/pkgs/development/go-modules/generic/default.nix#L171). + +## `buildInputs` or `nativeBuildInputs` + +- `nativeBuildInputs` is intended for architecture-dependent + build-time-only dependencies +- `buildInputs` is intended for architecture-independent + build-time-only dependencies diff --git a/templates/bike.html b/templates/bike.html new file mode 100644 index 0000000..db8634a --- /dev/null +++ b/templates/bike.html @@ -0,0 +1,10 @@ +{% extends "base.html" %} + +{% block title %}{{ page.title }} - {{ config.title }}{% endblock title %} + +{% block content -%} +

{{- page.title -}}

+ +{{ page.content | safe -}} + +{%- endblock content -%} diff --git a/templates/blog.html b/templates/blog.html deleted file mode 100644 index cfdd8c2..0000000 --- a/templates/blog.html +++ /dev/null @@ -1,19 +0,0 @@ -{% extends "section.html" %} - -{%- block content -%} - -{%- for year, posts in section.pages | group_by(attribute="year") -%} -
-
    -{%- for post in posts %} -
  • - {{- post.title -}} - -
  • -{%- endfor %} -
- -
-{%- endfor %} - -{%- endblock content -%} diff --git a/templates/feed.xml b/templates/feed.xml deleted file mode 100644 index 2ac0851..0000000 --- a/templates/feed.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - {{ config.title }} - - {{ last_updated | date(format="%+") }} - {{ feed_url | safe }} - {%- set blog = get_section(path="blog/_index.md") -%} - {%- for page in blog.pages %} - - {{ page.title }} - {{ page.date | date(format="%+") }} - {{ page.updated | default(value=page.date) | date(format="%+") }} - {% for author in page.authors %} - - - {{ author }} - - - {% else %} - - - {%- if config.author -%} - {{ config.author }} - {%- else -%} - Unknown - {%- endif -%} - - - {% endfor %} - - {{ page.permalink | safe }} - {% if page.summary %} - {{ page.summary }} - {% else %} - {{ page.content }} - {% endif %} - - {%- endfor %} - diff --git a/templates/orphan.html b/templates/orphan.html deleted file mode 100644 index db8634a..0000000 --- a/templates/orphan.html +++ /dev/null @@ -1,10 +0,0 @@ -{% extends "base.html" %} - -{% block title %}{{ page.title }} - {{ config.title }}{% endblock title %} - -{% block content -%} -

{{- page.title -}}

- -{{ page.content | safe -}} - -{%- endblock content -%} diff --git a/templates/resume.html b/templates/resume.html new file mode 100644 index 0000000..db8634a --- /dev/null +++ b/templates/resume.html @@ -0,0 +1,10 @@ +{% extends "base.html" %} + +{% block title %}{{ page.title }} - {{ config.title }}{% endblock title %} + +{% block content -%} +

{{- page.title -}}

+ +{{ page.content | safe -}} + +{%- endblock content -%} diff --git a/templates/section.html b/templates/section.html deleted file mode 100644 index e6b6694..0000000 --- a/templates/section.html +++ /dev/null @@ -1,16 +0,0 @@ -{% extends "orphan.html" %} - -{% block title %}{{ section.title }} - {{ config.title }}{% endblock title %} - -{% block content %} -

{{ section.title }}

-
    -{% for post in section.pages %} -
  • - {{ post.title }} -
    - {{ post.date | date(format="%d %h %Y") }} -
  • -{% endfor %} -
-{% endblock content %} -- cgit v1.2.3