fix(api): surface real provider error on apply/check instead of generic internal error

resolve (shared by Check/Apply) and Apply now wrap GetRecords/ApplyChanges
failures in service.ErrProviderUnavailable, matching ZoneRecords' existing
behavior. handleApply/handleCheck use errors.Is against it to return 502
with the real provider message (e.g. Selectel's 409 conflict body) instead
of masking every failure as a generic 500 "internal error"; non-provider
errors (decrypt/db/loader) are unaffected.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01BwxdSt4reTm7Dj1oxRvpP3
This commit is contained in:
2026-07-05 15:53:27 +07:00
parent 6f9958af60
commit 879e9e14b1
4 changed files with 190 additions and 6 deletions
+101
View File
@@ -5,6 +5,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"strings" "strings"
@@ -26,6 +27,10 @@ type mockCheckApplier struct {
// used by handleCheck status-persistence tests (drift/in_sync/error). // used by handleCheck status-persistence tests (drift/in_sync/error).
checkCS *diff.Changeset checkCS *diff.Changeset
checkErr error checkErr error
// applyErr, when set, makes Apply fail with this error — used by the
// provider-error-surfacing tests (502 with real message vs 500 generic).
applyErr error
} }
func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) { func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) {
@@ -40,6 +45,9 @@ func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Ch
} }
func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) { func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) {
m.lastReq = req m.lastReq = req
if m.applyErr != nil {
return diff.Changeset{}, m.applyErr
}
return diff.Changeset{}, nil return diff.Changeset{}, nil
} }
func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) { func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) {
@@ -271,6 +279,99 @@ func TestCheckEndpoint_ErrorScopesStatusToCallerProject(t *testing.T) {
} }
} }
// TestApplyEndpoint_ProviderErrorSurfacesRealMessage covers the fix: when
// Svc.Apply fails with an error wrapping service.ErrProviderUnavailable (e.g.
// Selectel rejecting a change with a 409 conflict), the handler must respond
// 502 with the actual provider message in body.error — not a generic 500
// "internal error" that hides the real cause from the user.
func TestApplyEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
a, m := newTestAPI()
m.applyErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
errors.New("selectel POST /zones/x/rrset: 409: conflicting CNAME record exists"))
router := NewRouter(a)
did := uuid.New().String()
req := requestWithSessionCookie(http.MethodPost,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
strings.NewReader(`{}`))
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadGateway {
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if !strings.Contains(resp["error"], "409") || !strings.Contains(resp["error"], "conflicting CNAME") {
t.Fatalf("expected real provider message in body, got %q", resp["error"])
}
if strings.Contains(resp["error"], "internal error") {
t.Fatalf("provider error must not be masked as internal error, got %q", resp["error"])
}
}
// TestApplyEndpoint_NonProviderErrorStaysGeneric covers the flip side: an
// unwrapped/local error (decrypt, db, loader) from Svc.Apply must still fall
// back to a generic 500 "internal error" — only provider errors get their
// real message surfaced.
func TestApplyEndpoint_NonProviderErrorStaysGeneric(t *testing.T) {
a, m := newTestAPI()
m.applyErr = errors.New("decrypt: cipher: message authentication failed")
router := NewRouter(a)
did := uuid.New().String()
req := requestWithSessionCookie(http.MethodPost,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
strings.NewReader(`{}`))
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d body %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if resp["error"] != "internal error" {
t.Fatalf("expected generic internal error, got %q", resp["error"])
}
}
// TestCheckEndpoint_ProviderErrorSurfacesRealMessage mirrors the Apply case
// for /check: a provider-wrapped error must come back as 502 with the real
// provider message, while the existing status-persistence behavior (SetDomainStatus
// error before responding) is unaffected.
func TestCheckEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
a, m := newTestAPI()
ts := a.Store.(*mockTenantStore)
m.checkErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
errors.New("selectel GET /zones/x/rrset: 503: upstream unavailable"))
router := NewRouter(a)
did := uuid.New()
req := requestWithSessionCookie(http.MethodGet,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadGateway {
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if !strings.Contains(resp["error"], "503") || !strings.Contains(resp["error"], "upstream unavailable") {
t.Fatalf("expected real provider message in body, got %q", resp["error"])
}
if len(ts.statusCalls) != 1 || ts.statusCalls[0].status != service.StatusError {
t.Fatalf("expected SetDomainStatus(_, _, error) to still run, got %+v", ts.statusCalls)
}
}
// TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an // TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an
// empty changeset (zone matches its template exactly, e.g. right after a // empty changeset (zone matches its template exactly, e.g. right after a
// snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice // snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice
+15 -2
View File
@@ -43,7 +43,14 @@ func (a *API) handleCheck(w http.ResponseWriter, r *http.Request) {
log.Printf("api: set domain status (error) failed: %v", serr) log.Printf("api: set domain status (error) failed: %v", serr)
} }
log.Printf("api: check failed: %v", err) log.Printf("api: check failed: %v", err)
writeErr(w, http.StatusInternalServerError, "internal error") // A provider failure (e.g. Selectel returning a 409 conflict) is safe
// and useful to show the user as-is; any other failure (decrypt/db/loader)
// stays a generic "internal error" to avoid leaking internals.
if errors.Is(err, service.ErrProviderUnavailable) {
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
} else {
writeErr(w, http.StatusInternalServerError, "internal error")
}
return return
} }
// Manual check persists status/history only — no notification. Notify // Manual check persists status/history only — no notification. Notify
@@ -79,7 +86,13 @@ func (a *API) handleApply(w http.ResponseWriter, r *http.Request) {
}) })
if err != nil { if err != nil {
log.Printf("api: apply failed: %v", err) log.Printf("api: apply failed: %v", err)
writeErr(w, http.StatusInternalServerError, "internal error") // Same distinction as handleCheck: surface the real provider message,
// keep everything else generic.
if errors.Is(err, service.ErrProviderUnavailable) {
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
} else {
writeErr(w, http.StatusInternalServerError, "internal error")
}
return return
} }
writeJSON(w, http.StatusOK, toChangesetResponse(cs)) writeJSON(w, http.StatusOK, toChangesetResponse(cs))
+17 -2
View File
@@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"strings"
"github.com/google/uuid" "github.com/google/uuid"
@@ -23,6 +24,16 @@ import (
// pick 502 vs 404 without leaking provider error details as "not found". // pick 502 vs 404 without leaking provider error details as "not found".
var ErrProviderUnavailable = errors.New("service: provider unavailable") var ErrProviderUnavailable = errors.New("service: provider unavailable")
// ProviderMessage extracts the provider's own error text from an error
// wrapped with ErrProviderUnavailable, stripping the sentinel prefix so
// callers can surface it to the user as-is (e.g. Selectel's "409: conflicting
// CNAME record exists"). Only meant to be called on errors that
// errors.Is(err, ErrProviderUnavailable) — otherwise it just returns
// err.Error() unchanged.
func ProviderMessage(err error) string {
return strings.TrimPrefix(err.Error(), ErrProviderUnavailable.Error()+": ")
}
// DomainRef is the minimal data the service needs about a domain. // DomainRef is the minimal data the service needs about a domain.
type DomainRef struct { type DomainRef struct {
ZoneID string ZoneID string
@@ -84,7 +95,11 @@ func (s *DomainService) resolve(ctx context.Context, projectID, domainID uuid.UU
creds := provider.Credentials{Secret: string(secret)} creds := provider.Credentials{Secret: string(secret)}
actual, err := p.GetRecords(ctx, creds, ref.ZoneID) actual, err := p.GetRecords(ctx, creds, ref.ZoneID)
if err != nil { if err != nil {
return nil, provider.Credentials{}, ref, diff.Changeset{}, err // Only a failure of the provider call itself is "provider unavailable" —
// LoadDomain/ByName/Decrypt errors above are local resolution failures
// (e.g. domain not found, bad stored credentials) and must not be
// conflated with it.
return nil, provider.Credentials{}, ref, diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
} }
cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual) cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual)
return p, creds, ref, cs, nil return p, creds, ref, cs, nil
@@ -155,7 +170,7 @@ func (s *DomainService) Apply(ctx context.Context, projectID, domainID uuid.UUID
applied := diff.Changeset{Diffs: toApply} applied := diff.Changeset{Diffs: toApply}
if len(toApply) > 0 { if len(toApply) > 0 {
if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil { if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil {
return diff.Changeset{}, err return diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
} }
} }
return applied, nil return applied, nil
+57 -2
View File
@@ -2,6 +2,7 @@ package service
import ( import (
"context" "context"
"errors"
"testing" "testing"
"github.com/google/uuid" "github.com/google/uuid"
@@ -26,8 +27,10 @@ func testCipher(t *testing.T) *crypto.Cipher {
// fakeProvider records applied changesets and returns canned zone records. // fakeProvider records applied changesets and returns canned zone records.
type fakeProvider struct { type fakeProvider struct {
actual []model.Record actual []model.Record
applied diff.Changeset applied diff.Changeset
getErr error // when set, GetRecords fails with this error
applyErr error // when set, ApplyChanges fails with this error
} }
func (fakeProvider) Name() string { return "selectel" } func (fakeProvider) Name() string { return "selectel" }
@@ -35,9 +38,15 @@ func (fakeProvider) ListZones(context.Context, provider.Credentials) ([]provider
return nil, nil return nil, nil
} }
func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) { func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) {
if f.getErr != nil {
return nil, f.getErr
}
return f.actual, nil return f.actual, nil
} }
func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error { func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error {
if f.applyErr != nil {
return f.applyErr
}
f.applied = cs f.applied = cs
return nil return nil
} }
@@ -191,3 +200,49 @@ func TestApplySelectsByKeyAndOrdersPrunesBeforeUpdates(t *testing.T) {
t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs) t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs)
} }
} }
// TestApplyWrapsProviderError covers the fix: a failure from the provider's
// ApplyChanges call (e.g. Selectel rejecting a change with a 409 conflict)
// must be wrapped in ErrProviderUnavailable so the API layer can tell it
// apart from a local resolution failure and surface the real provider
// message instead of a generic "internal error".
func TestApplyWrapsProviderError(t *testing.T) {
actual := []model.Record{{Type: model.A, Name: "a.example.com.", TTL: 300, Values: []string{"9.9.9.9"}}}
tmpl := dto.TemplateDoc{Records: []dto.RecordDTO{
{Type: "A", Name: "a.example.com.", TTL: 300, Values: []string{"1.1.1.1"}},
}}
svc, fp := setup(t, actual, tmpl)
fp.applyErr = errors.New("selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists")
_, err := svc.Apply(context.Background(), uuid.New(), uuid.New(), ApplyRequest{Updates: []string{"A a.example.com."}})
if err == nil {
t.Fatal("expected error, got nil")
}
if !errors.Is(err, ErrProviderUnavailable) {
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
}
msg := ProviderMessage(err)
if msg != "selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists" {
t.Fatalf("expected clean provider message, got %q", msg)
}
}
// TestResolveWrapsProviderError covers the resolve helper shared by Check and
// Apply: a GetRecords failure from the provider must also be wrapped in
// ErrProviderUnavailable, mirroring ZoneRecords' existing behavior.
func TestResolveWrapsProviderError(t *testing.T) {
svc, fp := setup(t, nil, dto.TemplateDoc{})
fp.getErr = errors.New("selectel GET /zones/z1/rrset: 503: upstream unavailable")
_, err := svc.Check(context.Background(), uuid.New(), uuid.New())
if err == nil {
t.Fatal("expected error, got nil")
}
if !errors.Is(err, ErrProviderUnavailable) {
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
}
msg := ProviderMessage(err)
if msg != "selectel GET /zones/z1/rrset: 503: upstream unavailable" {
t.Fatalf("expected clean provider message, got %q", msg)
}
}