fix(api): manual check persists last_check_status (was stale unknown)

Manual domain checks (Recheck button / diff page load) never wrote
domains.last_check_status - only the scheduler did, leaving a
newly-templated domain stuck at "unknown" until the next scheduled run.

Extract status derivation into internal/service (single source of truth):
StatusUnknown/InSync/Drift/Error constants and DeriveStatus(diff.Changeset).
The scheduler now aliases these constants instead of duplicating them.
handleCheck persists the derived status (or StatusError on failure) via
TenantStore.SetDomainStatus after every manual check - status/history only,
no notification, which remains the scheduler's job.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01BwxdSt4reTm7Dj1oxRvpP3
This commit is contained in:
2026-07-05 14:22:02 +07:00
parent cc5e562a67
commit 1b367c4bda
8 changed files with 175 additions and 6 deletions
+4
View File
@@ -50,6 +50,10 @@ type TenantStore interface {
DeleteDomain(ctx context.Context, id, projectID uuid.UUID) error
ImportDomains(ctx context.Context, projectID, accountID uuid.UUID, zones []provider.Zone) ([]store.Domain, error)
SetDomainTemplate(ctx context.Context, domainID, projectID uuid.UUID, templateID *uuid.UUID) (store.Domain, error)
// SetDomainStatus persists the outcome of a manual check (handleCheck) so
// the domain's badge reflects reality immediately, instead of staying
// "unknown" until the scheduler's next tick.
SetDomainStatus(ctx context.Context, domainID uuid.UUID, status string) error
}
// Cipher encrypts/decrypts provider account secrets. *crypto.Cipher satisfies it.
+87 -1
View File
@@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"strings"
@@ -20,9 +21,20 @@ type mockCheckApplier struct {
lastReq service.ApplyRequest
zoneRecords []model.Record
zoneErr error
// checkCS/checkErr, when set, override Check's default fixed changeset —
// used by handleCheck status-persistence tests (drift/in_sync/error).
checkCS *diff.Changeset
checkErr error
}
func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) {
if m.checkErr != nil {
return diff.Changeset{}, m.checkErr
}
if m.checkCS != nil {
return *m.checkCS, nil
}
d := model.Record{Type: model.A, Name: "a.example.com.", TTL: 300, Values: []string{"1.1.1.1"}}
return diff.Changeset{Diffs: []diff.RecordDiff{{Kind: diff.Add, Type: d.Type, Name: d.Name, Desired: &d}}}, nil
}
@@ -44,7 +56,10 @@ func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([
// middleware_test.go's own tests and the IDOR regression.
func newTestAPI() (*API, *mockCheckApplier) {
m := &mockCheckApplier{}
return &API{Svc: m, Auth: alwaysOwnedAuthStore(), Sessions: alwaysValidSessions(uuid.New())}, m
return &API{
Svc: m, Store: &mockTenantStore{},
Auth: alwaysOwnedAuthStore(), Sessions: alwaysValidSessions(uuid.New()),
}, m
}
func TestCheckEndpoint(t *testing.T) {
@@ -137,6 +152,77 @@ func TestApplyBadUUID(t *testing.T) {
}
}
// TestCheckEndpoint_PersistsDriftStatus covers the core bug fix: a manual
// check (GET .../check) whose changeset has an actionable prune must persist
// "drift" via Store.SetDomainStatus — previously only the scheduler wrote
// last_check_status, leaving a manually-checked domain stuck at "unknown".
func TestCheckEndpoint_PersistsDriftStatus(t *testing.T) {
a, m := newTestAPI()
ts := a.Store.(*mockTenantStore)
rec := model.Record{Type: model.A, Name: "x.example.com.", Values: []string{"1.1.1.1"}}
m.checkCS = &diff.Changeset{Diffs: []diff.RecordDiff{{Kind: diff.Delete, Type: rec.Type, Name: rec.Name, Actual: &rec}}}
router := NewRouter(a)
did := uuid.New()
req := requestWithSessionCookie(http.MethodGet,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("status %d body %s", w.Code, w.Body.String())
}
if len(ts.statusCalls) != 1 || ts.statusCalls[0].domainID != did || ts.statusCalls[0].status != service.StatusDrift {
t.Fatalf("expected SetDomainStatus(%s, drift), got %+v", did, ts.statusCalls)
}
}
// TestCheckEndpoint_PersistsInSyncStatus covers the no-drift case: a
// changeset with no actionable diffs persists "in_sync".
func TestCheckEndpoint_PersistsInSyncStatus(t *testing.T) {
a, m := newTestAPI()
ts := a.Store.(*mockTenantStore)
m.checkCS = &diff.Changeset{}
router := NewRouter(a)
did := uuid.New()
req := requestWithSessionCookie(http.MethodGet,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("status %d body %s", w.Code, w.Body.String())
}
if len(ts.statusCalls) != 1 || ts.statusCalls[0].status != service.StatusInSync {
t.Fatalf("expected SetDomainStatus(_, in_sync), got %+v", ts.statusCalls)
}
}
// TestCheckEndpoint_ErrorPersistsErrorStatus covers the failure path: when
// Svc.Check itself fails (provider/loader error), the handler must persist
// "error" before returning 500 — a write failure of the status itself must
// not mask the original 500.
func TestCheckEndpoint_ErrorPersistsErrorStatus(t *testing.T) {
a, m := newTestAPI()
ts := a.Store.(*mockTenantStore)
m.checkErr = errors.New("boom: provider unreachable")
router := NewRouter(a)
did := uuid.New()
req := requestWithSessionCookie(http.MethodGet,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d body %s", w.Code, w.Body.String())
}
if len(ts.statusCalls) != 1 || ts.statusCalls[0].domainID != did || ts.statusCalls[0].status != service.StatusError {
t.Fatalf("expected SetDomainStatus(%s, error), got %+v", did, ts.statusCalls)
}
}
// TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an
// empty changeset (zone matches its template exactly, e.g. right after a
// snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice
+10
View File
@@ -37,10 +37,20 @@ func (a *API) handleCheck(w http.ResponseWriter, r *http.Request) {
}
cs, err := a.Svc.Check(r.Context(), pid, did)
if err != nil {
// Persist the failure so the domain badge reflects it instead of stale
// "unknown"; the write error (if any) is logged, never masks the 500.
if serr := a.Store.SetDomainStatus(r.Context(), did, service.StatusError); serr != nil {
log.Printf("api: set domain status (error) failed: %v", serr)
}
log.Printf("api: check failed: %v", err)
writeErr(w, http.StatusInternalServerError, "internal error")
return
}
// Manual check persists status/history only — no notification. Notify
// remains the scheduler's responsibility (see internal/scheduler).
if serr := a.Store.SetDomainStatus(r.Context(), did, service.DeriveStatus(cs)); serr != nil {
log.Printf("api: set domain status failed: %v", serr)
}
writeJSON(w, http.StatusOK, toChangesetResponse(cs))
}
+1 -1
View File
@@ -239,7 +239,7 @@ func TestIDOR_CheckForeignProject_Returns404AndServiceNotCalled(t *testing.T) {
}
return store.Project{}, errors.New("not found")
}}
a := &API{Svc: svc, Auth: auth, Sessions: alwaysValidSessions(userA)}
a := &API{Svc: svc, Store: &mockTenantStore{}, Auth: auth, Sessions: alwaysValidSessions(userA)}
router := NewRouter(a)
did := uuid.New().String()
+18
View File
@@ -39,6 +39,14 @@ type mockTenantStore struct {
importCalled bool
setDomainTemplateErr error
// statusCalls records every SetDomainStatus(domainID, status) call, in
// order, so tests can assert what the handler persisted.
statusCalls []struct {
domainID uuid.UUID
status string
}
setDomainStatusErr error
}
func (m *mockTenantStore) CreateAccount(_ context.Context, projectID uuid.UUID, prov, secretEnc, comment string) (store.Account, error) {
@@ -126,6 +134,16 @@ func (m *mockTenantStore) SetDomainTemplate(_ context.Context, domainID, project
return d, nil
}
// SetDomainStatus records the call for assertion instead of actually mutating
// m.domains — handleCheck tests only need to verify what was written.
func (m *mockTenantStore) SetDomainStatus(_ context.Context, domainID uuid.UUID, status string) error {
m.statusCalls = append(m.statusCalls, struct {
domainID uuid.UUID
status string
}{domainID, status})
return m.setDomainStatusErr
}
func (m *mockTenantStore) ImportDomains(_ context.Context, projectID, accountID uuid.UUID, zones []provider.Zone) ([]store.Domain, error) {
m.importCalled = true
if m.importDomainsErr != nil {
+8 -4
View File
@@ -14,17 +14,21 @@ import (
"github.com/vasyakrg/dns-autoresolver/internal/diff"
"github.com/vasyakrg/dns-autoresolver/internal/metrics"
"github.com/vasyakrg/dns-autoresolver/internal/notify"
"github.com/vasyakrg/dns-autoresolver/internal/service"
"github.com/vasyakrg/dns-autoresolver/internal/store"
)
// Domain check statuses persisted via SchedStore.SetDomainStatus /
// surfaced via GetDomainStatus. "unknown" is the DB default for a domain
// that has never been checked (see migrations/0004_schedule_notify.sql).
// Aliased from internal/service — the single source of truth shared with the
// manual check handler (internal/api), so both paths persist the same
// values.
const (
StatusUnknown = "unknown"
StatusInSync = "in_sync"
StatusDrift = "drift"
StatusError = "error"
StatusUnknown = service.StatusUnknown
StatusInSync = service.StatusInSync
StatusDrift = service.StatusDrift
StatusError = service.StatusError
)
// SchedStore is the narrow store dependency the scheduler needs: due
+24
View File
@@ -0,0 +1,24 @@
package service
import "github.com/vasyakrg/dns-autoresolver/internal/diff"
// Domain check statuses persisted in domains.last_check_status. "unknown" is
// the DB default for a domain that has never been checked. Single source of
// truth — the scheduler aliases these, the API check handler writes them.
const (
StatusUnknown = "unknown"
StatusInSync = "in_sync"
StatusDrift = "drift"
StatusError = "error"
)
// DeriveStatus maps a successful check's changeset to a domain status:
// actionable drift (managed adds/updates/prunes) → "drift", otherwise
// "in_sync". A failed check (provider/loader error) is "error" and handled by
// the caller, which has the error in hand.
func DeriveStatus(cs diff.Changeset) string {
if len(cs.Actionable()) > 0 {
return StatusDrift
}
return StatusInSync
}
+23
View File
@@ -0,0 +1,23 @@
package service_test
import (
"testing"
"github.com/vasyakrg/dns-autoresolver/internal/diff"
"github.com/vasyakrg/dns-autoresolver/internal/model"
"github.com/vasyakrg/dns-autoresolver/internal/service"
)
func TestDeriveStatus(t *testing.T) {
// no actionable diffs → in_sync
if got := service.DeriveStatus(diff.Changeset{}); got != service.StatusInSync {
t.Fatalf("empty: %q", got)
}
// an actionable prune → drift
cs := diff.Changeset{Diffs: []diff.RecordDiff{
{Kind: diff.Delete, Type: model.A, Name: "x.example.com.", Actual: &model.Record{Type: model.A, Name: "x.example.com.", Values: []string{"1.1.1.1"}}},
}}
if got := service.DeriveStatus(cs); got != service.StatusDrift {
t.Fatalf("prune: %q", got)
}
}