起因

选择 Traefik 主要是因为其强大的生态支持,例如 Kubernetes、Docker Swarm 等,能够自动发现容器并进行路由注册。而后我在本地就从原先的 Nginx 转向了 Traefik,起初我本地所有的服务都用 mkcert 来生成,后来发现每次部署一个项目都要先手动为项目生成一个 TLD 的证书,然后还要手动将证书添加到 Traefik 的配置文件中,这是一个工作量不大,但是量多了以后极其浪费时间的过程:

下面就是我现在现有的项目所用的的证书文件:

tree
.
├── certs
│   ├── acme.json
│   ├── adguard.test.cer
│   ├── adguard.test.key
│   ├── aptabase.test.cer
│   ├── aptabase.test.key
│   ├── authorizer.test.cer
│   ├── authorizer.test.key
│   ├── bytebase.test.cer
│   ├── bytebase.test.key
│   ├── calcom.test.cer
│   ├── calcom.test.key
│   ├── cockroachdb.test.cer
│   ├── cockroachdb.test.key
│   ├── consul.test.cer
│   ├── consul.test.key
│   ├── dify.test.cer
│   ├── dify.test.key
│   ├── directus.test.cer
│   ├── directus.test.key
│   ├── emqx.test.cer
│   ├── emqx.test.key
│   ├── ente.test.cer
│   ├── ente.test.key
│   ├── expose.test.cer
│   ├── expose.test.key
│   ├── fleet.test.cer
│   ├── fleet.test.key
│   ├── formbricks.test.cer
│   ├── formbricks.test.key
│   ├── gitlab.test.cer
│   ├── gitlab.test.key
│   ├── hatchet.test.cer
│   ├── hatchet.test.key
│   ├── huly.test.cer
│   ├── huly.test.key
│   ├── ingress.test.cer
│   ├── ingress.test.key
│   ├── livekit.test.cer
│   ├── livekit.test.key
│   ├── logto.test.cer
│   ├── logto.test.key
│   ├── mailpit.test.cer
│   ├── mailpit.test.key
│   ├── matrix.test.cer
│   ├── matrix.test.key
│   ├── mattermost.test.cer
│   ├── mattermost.test.key
│   ├── mercure.test.cer
│   ├── mercure.test.key
│   ├── minio.test.cer
│   ├── minio.test.key
│   ├── outline.test.cer
│   ├── outline.test.key
│   ├── pages.test.cer
│   ├── pages.test.key
│   ├── rallly.test.cer
│   ├── rallly.test.key
│   ├── redpanda.test.cer
│   ├── redpanda.test.key
│   ├── registry.test.cer
│   ├── registry.test.key
│   ├── river.test.cer
│   ├── river.test.key
│   ├── rocket.test.cer
│   ├── rocket.test.key
│   ├── snipe-it.test.cer
│   ├── snipe-it.test.key
│   ├── sqlchat.test.cer
│   ├── sqlchat.test.key
│   ├── sshx.test.cer
│   ├── sshx.test.key
│   ├── svc.dev.cer
│   ├── svc.dev.key
│   ├── typebot.test.cer
│   ├── typebot.test.key
│   ├── unit.test.cer
│   ├── unit.test.key
│   ├── wallos.test.cer
│   ├── wallos.test.key
│   ├── warrant.test.cer
│   ├── warrant.test.key
│   ├── zitadel.test.cer
│   └── zitadel.test.key
├── config
│   └── tls.yaml
└── docker-compose.yaml

3 directories, 85 files

config/tls.yaml 配置:

global:
  checkNewVersion: true
  sendAnonymousUsage: true
serversTransport:
  insecureSkipVerify: true
tls:
  options:
    default:
      sniStrict: true
      minVersion: VersionTLS12
      maxVersion: VersionTLS13
      cipherSuites:
        - TLS_AES_128_GCM_SHA256
        - TLS_AES_256_GCM_SHA384
        - TLS_CHACHA20_POLY1305_SHA256
        - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
        - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
        - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256
  certificates:
    - certFile: /certs/svc.dev.cer
      keyFile: /certs/svc.dev.key
    - certFile: /certs/unit.test.cer
      keyFile: /certs/unit.test.key
    - certFile: /certs/ente.test.cer
      keyFile: /certs/ente.test.key
    - certFile: /certs/emqx.test.cer
      keyFile: /certs/emqx.test.key
    - certFile: /certs/dify.test.cer
      keyFile: /certs/dify.test.key
    - certFile: /certs/huly.test.cer
      keyFile: /certs/huly.test.key
    - certFile: /certs/river.test.cer
      keyFile: /certs/river.test.key
    - certFile: /certs/fleet.test.cer
      keyFile: /certs/fleet.test.key
    - certFile: /certs/wallos.test.cer
      keyFile: /certs/wallos.test.key
    - certFile: /certs/calcom.test.cer
      keyFile: /certs/calcom.test.key
    - certFile: /certs/expose.test.cer
      keyFile: /certs/expose.test.key
    - certFile: /certs/sshx.test.cer
      keyFile: /certs/sshx.test.key
    - certFile: /certs/minio.test.cer
      keyFile: /certs/minio.test.key
    - certFile: /certs/logto.test.cer
      keyFile: /certs/logto.test.key
    - certFile: /certs/pages.test.cer
      keyFile: /certs/pages.test.key
    - certFile: /certs/gitlab.test.cer
      keyFile: /certs/gitlab.test.key
    - certFile: /certs/consul.test.cer
      keyFile: /certs/consul.test.key
    - certFile: /certs/livekit.test.cer
      keyFile: /certs/livekit.test.key
    - certFile: /certs/mailpit.test.cer
      keyFile: /certs/mailpit.test.key
    - certFile: /certs/rallly.test.cer
      keyFile: /certs/rallly.test.key
    - certFile: /certs/ingress.test.cer
      keyFile: /certs/ingress.test.key
    - certFile: /certs/rocket.test.cer
      keyFile: /certs/rocket.test.key
    - certFile: /certs/warrant.test.cer
      keyFile: /certs/warrant.test.key
    - certFile: /certs/matrix.test.cer
      keyFile: /certs/matrix.test.key
    - certFile: /certs/adguard.test.cer
      keyFile: /certs/adguard.test.key
    - certFile: /certs/typebot.test.cer
      keyFile: /certs/typebot.test.key
    - certFile: /certs/hatchet.test.cer
      keyFile: /certs/hatchet.test.key
    - certFile: /certs/registry.test.cer
      keyFile: /certs/registry.test.key
    - certFile: /certs/zitadel.test.cer
      keyFile: /certs/zitadel.test.key
    - certFile: /certs/mercure.test.cer
      keyFile: /certs/mercure.test.key
    - certFile: /certs/outline.test.cer
      keyFile: /certs/outline.test.key
    - certFile: /certs/sqlchat.test.cer
      keyFile: /certs/sqlchat.test.key
    - certFile: /certs/aptabase.test.cer
      keyFile: /certs/aptabase.test.key
    - certFile: /certs/snipe-it.test.cer
      keyFile: /certs/snipe-it.test.key
    - certFile: /certs/directus.test.cer
      keyFile: /certs/directus.test.key
    - certFile: /certs/bytebase.test.cer
      keyFile: /certs/bytebase.test.key
    - certFile: /certs/redpanda.test.cer
      keyFile: /certs/redpanda.test.key
    - certFile: /certs/formbricks.test.cer
      keyFile: /certs/formbricks.test.key
    - certFile: /certs/authorizer.test.cer
      keyFile: /certs/authorizer.test.key
    - certFile: /certs/mattermost.test.cer
      keyFile: /certs/mattermost.test.key
    - certFile: /certs/cockroachdb.test.cer
      keyFile: /certs/cockroachdb.test.key

不知不觉都攒这么多了,也许你会问为什么不用一个 FQDN?其实用一个 FQDN 也不是不行,只是域名会比较长一些,且有些项目会包含多个带有 HTTP 服务的容器,例如 MinIO 这个项目就需要多个子域名对应不同的需求:

  • api.minio.test: MinIO 的 API Endpoint
  • console.minio.test: MinIO 的管理后台
  • *.minio.test: 直接将 Subdomain 映射到 Bucket

如果不用 TLD 来区分不同服务,而是用相同 FQDN 的不同子域名区分的话,有两种方式:

  • 用多级子域名来区分,这种方式也会有同样的问题,就是需要四级域名的项目还是要手动生成证书,例如 FQDN 是 svc.dev,那么 上面的 console.minio.test,就应该是 console.minio.svc.dev,这时候为 *.svc.dev 生成的通配证书就失效了,需要手动在生成 *.minio.svc.dev 的通配证书。
  • 用中横线区分同一个项目的不同服务,例如:console.minio.test 对应的就是 minio-console.svc.dev,这种方式成本最低,但是其域名过长,个人感觉体验不好

综上因素考虑,我还是选择了之前的方式,通过特定 TLD 的二级域名来区分项目,然后用三级域名区分项目中不同服务,这种方式对于访问者来说,体验是最好的,但是需要较复杂的网络配置。

  • 首先需要一个不合公网重复的 TLD,例如我这里选的就是 *.test
  • 需要本地运行一个 DNS,例如 dnsmasq
  • 系统需要支持为特定 TLD 或 FQDN 指定一个 NAMESERVER,在 macOS 中就是在 /etc/resolver 目录中定义
  • 需要熟悉网络底层的原理,例如 IP、DNS 等

例如我的电脑中就有如下配置:

tree /etc/resolver
/etc/resolver
├── infra
├── svc.dev
└── test

1 directory, 3 files

因为 svc.dev 的 TLD 是公网的可用域名,所以,我这里只用二级域名作为本地访问的域名,避免覆盖了大量互联网域名!

其中 /etc/resolver/svc.dev 的配置如下:

cat /etc/resolver/svc.dev
nameserver 127.0.0.1

当我需要解析 *.svc.dev 的域名是,系统就会将请求发送到指定的 DNS,也就是监听 127.0.0.1:53 的 dnsmasq。

然后还需要在 dnsmasq 中配置 *.svc.dev 的 A 记录:

# ACME DNS
address=/dns.svc.dev/10.8.10.253

# Smallstep CA
address=/ca.svc.dev/10.8.10.254

# Traefik ingress
address=/.svc.dev/10.8.10.252

# Forward DNS request
server=/./223.5.5.5

# Listen Address
listen-address=0.0.0.0

因为我的所有容器内的服务,都运行在 10.8.10.0/24 这个网段,并且为 Step-CA、ACME DNS 和 Traefik 配置了固定 IP。

看到这里也许你会疑惑,为什么宿主机有了 dnsmasq 之后,还要在容器内运行一个 ACME DNS,这个 DNS 其实主要是为了处理 ACME 协议中的 dnsChallenge 业务。

碍于篇幅限制,ACME DNS 相关的内容,我会在另一期文章中分享!

自动 HTTPS

其实只需要 Traefik 和 Step-CA 这两个容器就可以实现针对 FQDN 的证书,主要用到的就是 httpChallengetlsChallenge,在 Traefik 的 docker-compose.yaml 中配置如下:

services:
  traefik:
    dns: 
      - 10.0.6.8
    image: traefik:latest
    ports:
      - 0.0.0.0:80:80/tcp
      - 0.0.0.0:443:443/tcp
    labels:
      - traefik.tls.stores.default.defaultgeneratedcert.resolver=step-ca
      - traefik.tls.stores.default.defaultgeneratedcert.domain.main=svc.dev
      - traefik.tls.stores.default.defaultgeneratedcert.domain.sans=*.svc.dev

      - traefik.enable=true
      - traefik.docker.network=traefik

      - traefik.http.routers.traefik-dashboard.tls=true
      - traefik.http.routers.traefik-dashboard.tls.certresolver=step-ca
      - traefik.http.routers.traefik-dashboard.entrypoints=http,https
      - traefik.http.routers.traefik-dashboard.rule=Host(`traefik.svc.dev`)
      - traefik.http.routers.traefik-dashboard.service=dashboard@internal

      - traefik.http.routers.traefik-dashboard-api.tls=true
      - traefik.http.routers.traefik-dashboard.tls.certresolver=step-ca
      - traefik.http.routers.traefik-dashboard-api.entrypoints=http,https
      - traefik.http.routers.traefik-dashboard-api.rule=Host(`traefik.svc.dev`) && PathPrefix(`/api`)
      - traefik.http.routers.traefik-dashboard-api.service=api@internal
    hostname: traefik
    networks:
      traefik:
        ipv4_address: 10.8.10.252
    command: 
      - --api=true
      - --api.dashboard=true

      - --log.level=ERROR

      - --accesslog=true
      - --accesslog.fields.defaultmode=keep
      - --accesslog.fields.names.RouterName=keep
      - --accesslog.fields.headers.defaultMode=keep
      - --accesslog.fields.headers.names.RouterName=keep
      - --accesslog.fields.headers.names.RequestHost=keep

      - --providers.file=true
      - --providers.file.watch=true
      - --providers.file.directory=/etc/traefik/config
      - --providers.file.debugloggeneratedtemplate=true

      - --providers.docker=true
      - --providers.docker.watch=true
      - --providers.docker.network=traefik
      - --providers.docker.useBindPortIP=false
      - --providers.docker.endpoint=unix:///var/run/docker.sock

      - --serverstransport.insecureskipverify=true

      - --entrypoints.ssh.address=:22

      - --entrypoints.http.address=:80
      - --entrypoints.http.http.redirections.entryPoint.to=https
      - --entrypoints.http.http.redirections.entryPoint.scheme=https
      - --entryPoints.http.http.redirections.entrypoint.permanent=true

      - --entrypoints.https.address=:443
      - --entryPoints.https.http3.advertisedport=443
      - --entryPoints.https.http.tls.certResolver=step-ca
      - --entryPoints.https.http.tls.domains[0].main=svc.dev
      - --entryPoints.https.http.tls.domains[0].sans=*.svc.dev

      - [email protected]
      - --certificatesresolvers.step-ca.acme.storage=/certs/acme.json
      - --certificatesresolvers.step-ca.acme.caserver=https://ca.svc.dev/acme/acme/directory
      - --certificatesresolvers.step-ca.acme.tlschallenge=true
      - --certificatesresolvers.step-ca.acme.dnschallenge=false
      - --certificatesresolvers.step-ca.acme.dnschallenge.provider=httpreq
      - --certificatesresolvers.step-ca.acme.httpchallenge=false
    volumes:
      - step-ca:/step-ca:ro
      - ./certs/:/certs/:rw
      - ./config/:/etc/traefik/config/:ro
      - /var/run/docker.sock:/var/run/docker.sock
    logging:
      driver: json-file
      options:
        max-size: 32m
    extra_hosts:
      - ca.svc.dev:10.8.10.254
      - dns.svc.dev:10.8.10.253
    environment:
      - TZ=Asia/Shanghai
      - LEGO_CA_CERTIFICATES=/step-ca/certs/root_ca.crt
    container_name: traefik

volumes:
  certs:
    name: certs
    external: true
  step-ca:
    name: step-ca
    external: true

networks:
  traefik:
    external: true

如果对 Step-CA 还不是很了解的,可以看我这篇文章《使用 Step-CA 搭建私有 ACME Server》

  • 3~4 行:使用宿主机的 dnsmasq 来解析服务,主要在 tlsChallenge 时 Step-CA 需要请求 HTTPS 服务
  • 10~12 行:设置默认的 TLS 证书相关配置
  • 18,24 行:为路由设置 TLS 证书解析器
  • 67~69 行:为 Entrypoint 设置解析器和域名
  • 75~77 行:因为我将所有 HTTP 流量都做了重定向到 HTTPS,所以只能开启 tlsChallenge
  • 79,92 行:挂载 Step-CA 容器产生的根证书,否则会出现 TLS 握手失败

只要 DNS 解析正常,Step-CA 启动后,就能够为 Traefik 签发证书!

上述配置中,我有意做了一个错误的示范,就是在 11~1268~69 行中配置的通配证书域名,因为只有 dnsChallenge 才支持通配证书的签发!

tlsChallengehttpChallenge 的大致流程如下图所示:

tlsChallenge

证书申请成功后,对应的 crt 和 key 会存放在 --certificatesresolvers.step-ca.acme.storage 配置的目录文件中:

{
  "step-ca": {
    "Account": {
      "Email": "[email protected]",
      "Registration": {
        "body": {
          "status": "valid",
          "contact": [
            "mailto:[email protected]"
          ],
          "orders": "https://ca.svc.dev/acme/acme/account/FFqNin75x7Kk5Y6R3FcUdsAGSgL486yr/orders"
        },
        "uri": "https://ca.svc.dev/acme/acme/account/FFqNin75x7Kk5Y6R3FcUdsAGSgL486yr"
      },
      "PrivateKey": "MIIJKL...........Pt1jfvTA=",
      "KeyType": "4096"
    },
    "Certificates": [
      {
        "domain": {
          "main": "traefik.svc.dev"
        },
        "certificate": "LS0tLS...........RFLS0tLS0K",
        "key": "LS0tLS...........VEUgT21XS0VZLS0tLS0K",
        "Store": "default"
      }
    ]
  }
}

总结

按照上面的不走操作下来,就能够实现 Traefik 从内网私有 ACME Server 申请证书了,但是还存才一个问题,那就是只有 dnsChallenge 才支持通配证书的申请。

而这在项目中经常会用到,例如我在 MinIO 中就用子域名作为 Bucket 的映射!

要想实现通配证书的申请,必须要要在内网集成支持 API 更新的 DNS,例如 ACME DNS,Traefik 的 dnsChallenge Provider 中也支持 acme-dns

更多有关 ACME DNS 和通配证书的申请的内容,敬请期待下一期内容!

I hope this is helpful, Happy hacking…