fix(api): surface real provider error on apply/check instead of generic internal error

resolve (shared by Check/Apply) and Apply now wrap GetRecords/ApplyChanges
failures in service.ErrProviderUnavailable, matching ZoneRecords' existing
behavior. handleApply/handleCheck use errors.Is against it to return 502
with the real provider message (e.g. Selectel's 409 conflict body) instead
of masking every failure as a generic 500 "internal error"; non-provider
errors (decrypt/db/loader) are unaffected.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01BwxdSt4reTm7Dj1oxRvpP3
This commit is contained in:
2026-07-05 15:53:27 +07:00
parent 6f9958af60
commit 879e9e14b1
4 changed files with 190 additions and 6 deletions
+17 -2
View File
@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"strings"
"github.com/google/uuid"
@@ -23,6 +24,16 @@ import (
// pick 502 vs 404 without leaking provider error details as "not found".
var ErrProviderUnavailable = errors.New("service: provider unavailable")
// ProviderMessage extracts the provider's own error text from an error
// wrapped with ErrProviderUnavailable, stripping the sentinel prefix so
// callers can surface it to the user as-is (e.g. Selectel's "409: conflicting
// CNAME record exists"). Only meant to be called on errors that
// errors.Is(err, ErrProviderUnavailable) — otherwise it just returns
// err.Error() unchanged.
func ProviderMessage(err error) string {
return strings.TrimPrefix(err.Error(), ErrProviderUnavailable.Error()+": ")
}
// DomainRef is the minimal data the service needs about a domain.
type DomainRef struct {
ZoneID string
@@ -84,7 +95,11 @@ func (s *DomainService) resolve(ctx context.Context, projectID, domainID uuid.UU
creds := provider.Credentials{Secret: string(secret)}
actual, err := p.GetRecords(ctx, creds, ref.ZoneID)
if err != nil {
return nil, provider.Credentials{}, ref, diff.Changeset{}, err
// Only a failure of the provider call itself is "provider unavailable" —
// LoadDomain/ByName/Decrypt errors above are local resolution failures
// (e.g. domain not found, bad stored credentials) and must not be
// conflated with it.
return nil, provider.Credentials{}, ref, diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
}
cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual)
return p, creds, ref, cs, nil
@@ -155,7 +170,7 @@ func (s *DomainService) Apply(ctx context.Context, projectID, domainID uuid.UUID
applied := diff.Changeset{Diffs: toApply}
if len(toApply) > 0 {
if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil {
return diff.Changeset{}, err
return diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
}
}
return applied, nil
+57 -2
View File
@@ -2,6 +2,7 @@ package service
import (
"context"
"errors"
"testing"
"github.com/google/uuid"
@@ -26,8 +27,10 @@ func testCipher(t *testing.T) *crypto.Cipher {
// fakeProvider records applied changesets and returns canned zone records.
type fakeProvider struct {
actual []model.Record
applied diff.Changeset
actual []model.Record
applied diff.Changeset
getErr error // when set, GetRecords fails with this error
applyErr error // when set, ApplyChanges fails with this error
}
func (fakeProvider) Name() string { return "selectel" }
@@ -35,9 +38,15 @@ func (fakeProvider) ListZones(context.Context, provider.Credentials) ([]provider
return nil, nil
}
func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) {
if f.getErr != nil {
return nil, f.getErr
}
return f.actual, nil
}
func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error {
if f.applyErr != nil {
return f.applyErr
}
f.applied = cs
return nil
}
@@ -191,3 +200,49 @@ func TestApplySelectsByKeyAndOrdersPrunesBeforeUpdates(t *testing.T) {
t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs)
}
}
// TestApplyWrapsProviderError covers the fix: a failure from the provider's
// ApplyChanges call (e.g. Selectel rejecting a change with a 409 conflict)
// must be wrapped in ErrProviderUnavailable so the API layer can tell it
// apart from a local resolution failure and surface the real provider
// message instead of a generic "internal error".
func TestApplyWrapsProviderError(t *testing.T) {
actual := []model.Record{{Type: model.A, Name: "a.example.com.", TTL: 300, Values: []string{"9.9.9.9"}}}
tmpl := dto.TemplateDoc{Records: []dto.RecordDTO{
{Type: "A", Name: "a.example.com.", TTL: 300, Values: []string{"1.1.1.1"}},
}}
svc, fp := setup(t, actual, tmpl)
fp.applyErr = errors.New("selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists")
_, err := svc.Apply(context.Background(), uuid.New(), uuid.New(), ApplyRequest{Updates: []string{"A a.example.com."}})
if err == nil {
t.Fatal("expected error, got nil")
}
if !errors.Is(err, ErrProviderUnavailable) {
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
}
msg := ProviderMessage(err)
if msg != "selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists" {
t.Fatalf("expected clean provider message, got %q", msg)
}
}
// TestResolveWrapsProviderError covers the resolve helper shared by Check and
// Apply: a GetRecords failure from the provider must also be wrapped in
// ErrProviderUnavailable, mirroring ZoneRecords' existing behavior.
func TestResolveWrapsProviderError(t *testing.T) {
svc, fp := setup(t, nil, dto.TemplateDoc{})
fp.getErr = errors.New("selectel GET /zones/z1/rrset: 503: upstream unavailable")
_, err := svc.Check(context.Background(), uuid.New(), uuid.New())
if err == nil {
t.Fatal("expected error, got nil")
}
if !errors.Is(err, ErrProviderUnavailable) {
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
}
msg := ProviderMessage(err)
if msg != "selectel GET /zones/z1/rrset: 503: upstream unavailable" {
t.Fatalf("expected clean provider message, got %q", msg)
}
}