fix(api): surface real provider error on apply/check instead of generic internal error
resolve (shared by Check/Apply) and Apply now wrap GetRecords/ApplyChanges failures in service.ErrProviderUnavailable, matching ZoneRecords' existing behavior. handleApply/handleCheck use errors.Is against it to return 502 with the real provider message (e.g. Selectel's 409 conflict body) instead of masking every failure as a generic 500 "internal error"; non-provider errors (decrypt/db/loader) are unaffected. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01BwxdSt4reTm7Dj1oxRvpP3
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -26,6 +27,10 @@ type mockCheckApplier struct {
|
|||||||
// used by handleCheck status-persistence tests (drift/in_sync/error).
|
// used by handleCheck status-persistence tests (drift/in_sync/error).
|
||||||
checkCS *diff.Changeset
|
checkCS *diff.Changeset
|
||||||
checkErr error
|
checkErr error
|
||||||
|
|
||||||
|
// applyErr, when set, makes Apply fail with this error — used by the
|
||||||
|
// provider-error-surfacing tests (502 with real message vs 500 generic).
|
||||||
|
applyErr error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) {
|
func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) {
|
||||||
@@ -40,6 +45,9 @@ func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Ch
|
|||||||
}
|
}
|
||||||
func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) {
|
func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) {
|
||||||
m.lastReq = req
|
m.lastReq = req
|
||||||
|
if m.applyErr != nil {
|
||||||
|
return diff.Changeset{}, m.applyErr
|
||||||
|
}
|
||||||
return diff.Changeset{}, nil
|
return diff.Changeset{}, nil
|
||||||
}
|
}
|
||||||
func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) {
|
func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) {
|
||||||
@@ -271,6 +279,99 @@ func TestCheckEndpoint_ErrorScopesStatusToCallerProject(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestApplyEndpoint_ProviderErrorSurfacesRealMessage covers the fix: when
|
||||||
|
// Svc.Apply fails with an error wrapping service.ErrProviderUnavailable (e.g.
|
||||||
|
// Selectel rejecting a change with a 409 conflict), the handler must respond
|
||||||
|
// 502 with the actual provider message in body.error — not a generic 500
|
||||||
|
// "internal error" that hides the real cause from the user.
|
||||||
|
func TestApplyEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
|
||||||
|
a, m := newTestAPI()
|
||||||
|
m.applyErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
|
||||||
|
errors.New("selectel POST /zones/x/rrset: 409: conflicting CNAME record exists"))
|
||||||
|
router := NewRouter(a)
|
||||||
|
|
||||||
|
did := uuid.New().String()
|
||||||
|
req := requestWithSessionCookie(http.MethodPost,
|
||||||
|
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
|
||||||
|
strings.NewReader(`{}`))
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
router.ServeHTTP(w, req)
|
||||||
|
|
||||||
|
if w.Code != http.StatusBadGateway {
|
||||||
|
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
|
||||||
|
}
|
||||||
|
var resp map[string]string
|
||||||
|
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(resp["error"], "409") || !strings.Contains(resp["error"], "conflicting CNAME") {
|
||||||
|
t.Fatalf("expected real provider message in body, got %q", resp["error"])
|
||||||
|
}
|
||||||
|
if strings.Contains(resp["error"], "internal error") {
|
||||||
|
t.Fatalf("provider error must not be masked as internal error, got %q", resp["error"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestApplyEndpoint_NonProviderErrorStaysGeneric covers the flip side: an
|
||||||
|
// unwrapped/local error (decrypt, db, loader) from Svc.Apply must still fall
|
||||||
|
// back to a generic 500 "internal error" — only provider errors get their
|
||||||
|
// real message surfaced.
|
||||||
|
func TestApplyEndpoint_NonProviderErrorStaysGeneric(t *testing.T) {
|
||||||
|
a, m := newTestAPI()
|
||||||
|
m.applyErr = errors.New("decrypt: cipher: message authentication failed")
|
||||||
|
router := NewRouter(a)
|
||||||
|
|
||||||
|
did := uuid.New().String()
|
||||||
|
req := requestWithSessionCookie(http.MethodPost,
|
||||||
|
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
|
||||||
|
strings.NewReader(`{}`))
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
router.ServeHTTP(w, req)
|
||||||
|
|
||||||
|
if w.Code != http.StatusInternalServerError {
|
||||||
|
t.Fatalf("expected 500, got %d body %s", w.Code, w.Body.String())
|
||||||
|
}
|
||||||
|
var resp map[string]string
|
||||||
|
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if resp["error"] != "internal error" {
|
||||||
|
t.Fatalf("expected generic internal error, got %q", resp["error"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCheckEndpoint_ProviderErrorSurfacesRealMessage mirrors the Apply case
|
||||||
|
// for /check: a provider-wrapped error must come back as 502 with the real
|
||||||
|
// provider message, while the existing status-persistence behavior (SetDomainStatus
|
||||||
|
// error before responding) is unaffected.
|
||||||
|
func TestCheckEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
|
||||||
|
a, m := newTestAPI()
|
||||||
|
ts := a.Store.(*mockTenantStore)
|
||||||
|
m.checkErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
|
||||||
|
errors.New("selectel GET /zones/x/rrset: 503: upstream unavailable"))
|
||||||
|
router := NewRouter(a)
|
||||||
|
|
||||||
|
did := uuid.New()
|
||||||
|
req := requestWithSessionCookie(http.MethodGet,
|
||||||
|
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
router.ServeHTTP(w, req)
|
||||||
|
|
||||||
|
if w.Code != http.StatusBadGateway {
|
||||||
|
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
|
||||||
|
}
|
||||||
|
var resp map[string]string
|
||||||
|
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(resp["error"], "503") || !strings.Contains(resp["error"], "upstream unavailable") {
|
||||||
|
t.Fatalf("expected real provider message in body, got %q", resp["error"])
|
||||||
|
}
|
||||||
|
if len(ts.statusCalls) != 1 || ts.statusCalls[0].status != service.StatusError {
|
||||||
|
t.Fatalf("expected SetDomainStatus(_, _, error) to still run, got %+v", ts.statusCalls)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an
|
// TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an
|
||||||
// empty changeset (zone matches its template exactly, e.g. right after a
|
// empty changeset (zone matches its template exactly, e.g. right after a
|
||||||
// snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice
|
// snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice
|
||||||
|
|||||||
@@ -43,7 +43,14 @@ func (a *API) handleCheck(w http.ResponseWriter, r *http.Request) {
|
|||||||
log.Printf("api: set domain status (error) failed: %v", serr)
|
log.Printf("api: set domain status (error) failed: %v", serr)
|
||||||
}
|
}
|
||||||
log.Printf("api: check failed: %v", err)
|
log.Printf("api: check failed: %v", err)
|
||||||
|
// A provider failure (e.g. Selectel returning a 409 conflict) is safe
|
||||||
|
// and useful to show the user as-is; any other failure (decrypt/db/loader)
|
||||||
|
// stays a generic "internal error" to avoid leaking internals.
|
||||||
|
if errors.Is(err, service.ErrProviderUnavailable) {
|
||||||
|
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
|
||||||
|
} else {
|
||||||
writeErr(w, http.StatusInternalServerError, "internal error")
|
writeErr(w, http.StatusInternalServerError, "internal error")
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Manual check persists status/history only — no notification. Notify
|
// Manual check persists status/history only — no notification. Notify
|
||||||
@@ -79,7 +86,13 @@ func (a *API) handleApply(w http.ResponseWriter, r *http.Request) {
|
|||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("api: apply failed: %v", err)
|
log.Printf("api: apply failed: %v", err)
|
||||||
|
// Same distinction as handleCheck: surface the real provider message,
|
||||||
|
// keep everything else generic.
|
||||||
|
if errors.Is(err, service.ErrProviderUnavailable) {
|
||||||
|
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
|
||||||
|
} else {
|
||||||
writeErr(w, http.StatusInternalServerError, "internal error")
|
writeErr(w, http.StatusInternalServerError, "internal error")
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJSON(w, http.StatusOK, toChangesetResponse(cs))
|
writeJSON(w, http.StatusOK, toChangesetResponse(cs))
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
|
||||||
@@ -23,6 +24,16 @@ import (
|
|||||||
// pick 502 vs 404 without leaking provider error details as "not found".
|
// pick 502 vs 404 without leaking provider error details as "not found".
|
||||||
var ErrProviderUnavailable = errors.New("service: provider unavailable")
|
var ErrProviderUnavailable = errors.New("service: provider unavailable")
|
||||||
|
|
||||||
|
// ProviderMessage extracts the provider's own error text from an error
|
||||||
|
// wrapped with ErrProviderUnavailable, stripping the sentinel prefix so
|
||||||
|
// callers can surface it to the user as-is (e.g. Selectel's "409: conflicting
|
||||||
|
// CNAME record exists"). Only meant to be called on errors that
|
||||||
|
// errors.Is(err, ErrProviderUnavailable) — otherwise it just returns
|
||||||
|
// err.Error() unchanged.
|
||||||
|
func ProviderMessage(err error) string {
|
||||||
|
return strings.TrimPrefix(err.Error(), ErrProviderUnavailable.Error()+": ")
|
||||||
|
}
|
||||||
|
|
||||||
// DomainRef is the minimal data the service needs about a domain.
|
// DomainRef is the minimal data the service needs about a domain.
|
||||||
type DomainRef struct {
|
type DomainRef struct {
|
||||||
ZoneID string
|
ZoneID string
|
||||||
@@ -84,7 +95,11 @@ func (s *DomainService) resolve(ctx context.Context, projectID, domainID uuid.UU
|
|||||||
creds := provider.Credentials{Secret: string(secret)}
|
creds := provider.Credentials{Secret: string(secret)}
|
||||||
actual, err := p.GetRecords(ctx, creds, ref.ZoneID)
|
actual, err := p.GetRecords(ctx, creds, ref.ZoneID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, provider.Credentials{}, ref, diff.Changeset{}, err
|
// Only a failure of the provider call itself is "provider unavailable" —
|
||||||
|
// LoadDomain/ByName/Decrypt errors above are local resolution failures
|
||||||
|
// (e.g. domain not found, bad stored credentials) and must not be
|
||||||
|
// conflated with it.
|
||||||
|
return nil, provider.Credentials{}, ref, diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
|
||||||
}
|
}
|
||||||
cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual)
|
cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual)
|
||||||
return p, creds, ref, cs, nil
|
return p, creds, ref, cs, nil
|
||||||
@@ -155,7 +170,7 @@ func (s *DomainService) Apply(ctx context.Context, projectID, domainID uuid.UUID
|
|||||||
applied := diff.Changeset{Diffs: toApply}
|
applied := diff.Changeset{Diffs: toApply}
|
||||||
if len(toApply) > 0 {
|
if len(toApply) > 0 {
|
||||||
if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil {
|
if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil {
|
||||||
return diff.Changeset{}, err
|
return diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return applied, nil
|
return applied, nil
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package service
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@@ -28,6 +29,8 @@ func testCipher(t *testing.T) *crypto.Cipher {
|
|||||||
type fakeProvider struct {
|
type fakeProvider struct {
|
||||||
actual []model.Record
|
actual []model.Record
|
||||||
applied diff.Changeset
|
applied diff.Changeset
|
||||||
|
getErr error // when set, GetRecords fails with this error
|
||||||
|
applyErr error // when set, ApplyChanges fails with this error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fakeProvider) Name() string { return "selectel" }
|
func (fakeProvider) Name() string { return "selectel" }
|
||||||
@@ -35,9 +38,15 @@ func (fakeProvider) ListZones(context.Context, provider.Credentials) ([]provider
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) {
|
func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) {
|
||||||
|
if f.getErr != nil {
|
||||||
|
return nil, f.getErr
|
||||||
|
}
|
||||||
return f.actual, nil
|
return f.actual, nil
|
||||||
}
|
}
|
||||||
func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error {
|
func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error {
|
||||||
|
if f.applyErr != nil {
|
||||||
|
return f.applyErr
|
||||||
|
}
|
||||||
f.applied = cs
|
f.applied = cs
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -191,3 +200,49 @@ func TestApplySelectsByKeyAndOrdersPrunesBeforeUpdates(t *testing.T) {
|
|||||||
t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs)
|
t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestApplyWrapsProviderError covers the fix: a failure from the provider's
|
||||||
|
// ApplyChanges call (e.g. Selectel rejecting a change with a 409 conflict)
|
||||||
|
// must be wrapped in ErrProviderUnavailable so the API layer can tell it
|
||||||
|
// apart from a local resolution failure and surface the real provider
|
||||||
|
// message instead of a generic "internal error".
|
||||||
|
func TestApplyWrapsProviderError(t *testing.T) {
|
||||||
|
actual := []model.Record{{Type: model.A, Name: "a.example.com.", TTL: 300, Values: []string{"9.9.9.9"}}}
|
||||||
|
tmpl := dto.TemplateDoc{Records: []dto.RecordDTO{
|
||||||
|
{Type: "A", Name: "a.example.com.", TTL: 300, Values: []string{"1.1.1.1"}},
|
||||||
|
}}
|
||||||
|
svc, fp := setup(t, actual, tmpl)
|
||||||
|
fp.applyErr = errors.New("selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists")
|
||||||
|
|
||||||
|
_, err := svc.Apply(context.Background(), uuid.New(), uuid.New(), ApplyRequest{Updates: []string{"A a.example.com."}})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, ErrProviderUnavailable) {
|
||||||
|
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
|
||||||
|
}
|
||||||
|
msg := ProviderMessage(err)
|
||||||
|
if msg != "selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists" {
|
||||||
|
t.Fatalf("expected clean provider message, got %q", msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestResolveWrapsProviderError covers the resolve helper shared by Check and
|
||||||
|
// Apply: a GetRecords failure from the provider must also be wrapped in
|
||||||
|
// ErrProviderUnavailable, mirroring ZoneRecords' existing behavior.
|
||||||
|
func TestResolveWrapsProviderError(t *testing.T) {
|
||||||
|
svc, fp := setup(t, nil, dto.TemplateDoc{})
|
||||||
|
fp.getErr = errors.New("selectel GET /zones/z1/rrset: 503: upstream unavailable")
|
||||||
|
|
||||||
|
_, err := svc.Check(context.Background(), uuid.New(), uuid.New())
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error, got nil")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, ErrProviderUnavailable) {
|
||||||
|
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
|
||||||
|
}
|
||||||
|
msg := ProviderMessage(err)
|
||||||
|
if msg != "selectel GET /zones/z1/rrset: 503: upstream unavailable" {
|
||||||
|
t.Fatalf("expected clean provider message, got %q", msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user