Merge fix/surface-provider-error: реальная ошибка провайдера вместо internal error
apply/check при провайдерской ошибке возвращают реальный ответ Selectel (502) вместо generic internal error; внутренние ошибки остаются generic 500. Секрет не течёт (токен не в path, auth-сообщения статические). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01BwxdSt4reTm7Dj1oxRvpP3
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
@@ -26,6 +27,10 @@ type mockCheckApplier struct {
|
||||
// used by handleCheck status-persistence tests (drift/in_sync/error).
|
||||
checkCS *diff.Changeset
|
||||
checkErr error
|
||||
|
||||
// applyErr, when set, makes Apply fail with this error — used by the
|
||||
// provider-error-surfacing tests (502 with real message vs 500 generic).
|
||||
applyErr error
|
||||
}
|
||||
|
||||
func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) {
|
||||
@@ -40,6 +45,9 @@ func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Ch
|
||||
}
|
||||
func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) {
|
||||
m.lastReq = req
|
||||
if m.applyErr != nil {
|
||||
return diff.Changeset{}, m.applyErr
|
||||
}
|
||||
return diff.Changeset{}, nil
|
||||
}
|
||||
func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) {
|
||||
@@ -271,6 +279,99 @@ func TestCheckEndpoint_ErrorScopesStatusToCallerProject(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyEndpoint_ProviderErrorSurfacesRealMessage covers the fix: when
|
||||
// Svc.Apply fails with an error wrapping service.ErrProviderUnavailable (e.g.
|
||||
// Selectel rejecting a change with a 409 conflict), the handler must respond
|
||||
// 502 with the actual provider message in body.error — not a generic 500
|
||||
// "internal error" that hides the real cause from the user.
|
||||
func TestApplyEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
|
||||
a, m := newTestAPI()
|
||||
m.applyErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
|
||||
errors.New("selectel POST /zones/x/rrset: 409: conflicting CNAME record exists"))
|
||||
router := NewRouter(a)
|
||||
|
||||
did := uuid.New().String()
|
||||
req := requestWithSessionCookie(http.MethodPost,
|
||||
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
|
||||
strings.NewReader(`{}`))
|
||||
w := httptest.NewRecorder()
|
||||
router.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusBadGateway {
|
||||
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !strings.Contains(resp["error"], "409") || !strings.Contains(resp["error"], "conflicting CNAME") {
|
||||
t.Fatalf("expected real provider message in body, got %q", resp["error"])
|
||||
}
|
||||
if strings.Contains(resp["error"], "internal error") {
|
||||
t.Fatalf("provider error must not be masked as internal error, got %q", resp["error"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyEndpoint_NonProviderErrorStaysGeneric covers the flip side: an
|
||||
// unwrapped/local error (decrypt, db, loader) from Svc.Apply must still fall
|
||||
// back to a generic 500 "internal error" — only provider errors get their
|
||||
// real message surfaced.
|
||||
func TestApplyEndpoint_NonProviderErrorStaysGeneric(t *testing.T) {
|
||||
a, m := newTestAPI()
|
||||
m.applyErr = errors.New("decrypt: cipher: message authentication failed")
|
||||
router := NewRouter(a)
|
||||
|
||||
did := uuid.New().String()
|
||||
req := requestWithSessionCookie(http.MethodPost,
|
||||
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
|
||||
strings.NewReader(`{}`))
|
||||
w := httptest.NewRecorder()
|
||||
router.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Fatalf("expected 500, got %d body %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if resp["error"] != "internal error" {
|
||||
t.Fatalf("expected generic internal error, got %q", resp["error"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestCheckEndpoint_ProviderErrorSurfacesRealMessage mirrors the Apply case
|
||||
// for /check: a provider-wrapped error must come back as 502 with the real
|
||||
// provider message, while the existing status-persistence behavior (SetDomainStatus
|
||||
// error before responding) is unaffected.
|
||||
func TestCheckEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
|
||||
a, m := newTestAPI()
|
||||
ts := a.Store.(*mockTenantStore)
|
||||
m.checkErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
|
||||
errors.New("selectel GET /zones/x/rrset: 503: upstream unavailable"))
|
||||
router := NewRouter(a)
|
||||
|
||||
did := uuid.New()
|
||||
req := requestWithSessionCookie(http.MethodGet,
|
||||
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
|
||||
w := httptest.NewRecorder()
|
||||
router.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusBadGateway {
|
||||
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]string
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !strings.Contains(resp["error"], "503") || !strings.Contains(resp["error"], "upstream unavailable") {
|
||||
t.Fatalf("expected real provider message in body, got %q", resp["error"])
|
||||
}
|
||||
if len(ts.statusCalls) != 1 || ts.statusCalls[0].status != service.StatusError {
|
||||
t.Fatalf("expected SetDomainStatus(_, _, error) to still run, got %+v", ts.statusCalls)
|
||||
}
|
||||
}
|
||||
|
||||
// TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an
|
||||
// empty changeset (zone matches its template exactly, e.g. right after a
|
||||
// snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice
|
||||
|
||||
@@ -43,7 +43,14 @@ func (a *API) handleCheck(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("api: set domain status (error) failed: %v", serr)
|
||||
}
|
||||
log.Printf("api: check failed: %v", err)
|
||||
// A provider failure (e.g. Selectel returning a 409 conflict) is safe
|
||||
// and useful to show the user as-is; any other failure (decrypt/db/loader)
|
||||
// stays a generic "internal error" to avoid leaking internals.
|
||||
if errors.Is(err, service.ErrProviderUnavailable) {
|
||||
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
|
||||
} else {
|
||||
writeErr(w, http.StatusInternalServerError, "internal error")
|
||||
}
|
||||
return
|
||||
}
|
||||
// Manual check persists status/history only — no notification. Notify
|
||||
@@ -79,7 +86,13 @@ func (a *API) handleApply(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("api: apply failed: %v", err)
|
||||
// Same distinction as handleCheck: surface the real provider message,
|
||||
// keep everything else generic.
|
||||
if errors.Is(err, service.ErrProviderUnavailable) {
|
||||
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
|
||||
} else {
|
||||
writeErr(w, http.StatusInternalServerError, "internal error")
|
||||
}
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, toChangesetResponse(cs))
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
@@ -23,6 +24,16 @@ import (
|
||||
// pick 502 vs 404 without leaking provider error details as "not found".
|
||||
var ErrProviderUnavailable = errors.New("service: provider unavailable")
|
||||
|
||||
// ProviderMessage extracts the provider's own error text from an error
|
||||
// wrapped with ErrProviderUnavailable, stripping the sentinel prefix so
|
||||
// callers can surface it to the user as-is (e.g. Selectel's "409: conflicting
|
||||
// CNAME record exists"). Only meant to be called on errors that
|
||||
// errors.Is(err, ErrProviderUnavailable) — otherwise it just returns
|
||||
// err.Error() unchanged.
|
||||
func ProviderMessage(err error) string {
|
||||
return strings.TrimPrefix(err.Error(), ErrProviderUnavailable.Error()+": ")
|
||||
}
|
||||
|
||||
// DomainRef is the minimal data the service needs about a domain.
|
||||
type DomainRef struct {
|
||||
ZoneID string
|
||||
@@ -84,7 +95,11 @@ func (s *DomainService) resolve(ctx context.Context, projectID, domainID uuid.UU
|
||||
creds := provider.Credentials{Secret: string(secret)}
|
||||
actual, err := p.GetRecords(ctx, creds, ref.ZoneID)
|
||||
if err != nil {
|
||||
return nil, provider.Credentials{}, ref, diff.Changeset{}, err
|
||||
// Only a failure of the provider call itself is "provider unavailable" —
|
||||
// LoadDomain/ByName/Decrypt errors above are local resolution failures
|
||||
// (e.g. domain not found, bad stored credentials) and must not be
|
||||
// conflated with it.
|
||||
return nil, provider.Credentials{}, ref, diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
|
||||
}
|
||||
cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual)
|
||||
return p, creds, ref, cs, nil
|
||||
@@ -155,7 +170,7 @@ func (s *DomainService) Apply(ctx context.Context, projectID, domainID uuid.UUID
|
||||
applied := diff.Changeset{Diffs: toApply}
|
||||
if len(toApply) > 0 {
|
||||
if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil {
|
||||
return diff.Changeset{}, err
|
||||
return diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err)
|
||||
}
|
||||
}
|
||||
return applied, nil
|
||||
|
||||
@@ -2,6 +2,7 @@ package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -28,6 +29,8 @@ func testCipher(t *testing.T) *crypto.Cipher {
|
||||
type fakeProvider struct {
|
||||
actual []model.Record
|
||||
applied diff.Changeset
|
||||
getErr error // when set, GetRecords fails with this error
|
||||
applyErr error // when set, ApplyChanges fails with this error
|
||||
}
|
||||
|
||||
func (fakeProvider) Name() string { return "selectel" }
|
||||
@@ -35,9 +38,15 @@ func (fakeProvider) ListZones(context.Context, provider.Credentials) ([]provider
|
||||
return nil, nil
|
||||
}
|
||||
func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) {
|
||||
if f.getErr != nil {
|
||||
return nil, f.getErr
|
||||
}
|
||||
return f.actual, nil
|
||||
}
|
||||
func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error {
|
||||
if f.applyErr != nil {
|
||||
return f.applyErr
|
||||
}
|
||||
f.applied = cs
|
||||
return nil
|
||||
}
|
||||
@@ -191,3 +200,49 @@ func TestApplySelectsByKeyAndOrdersPrunesBeforeUpdates(t *testing.T) {
|
||||
t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyWrapsProviderError covers the fix: a failure from the provider's
|
||||
// ApplyChanges call (e.g. Selectel rejecting a change with a 409 conflict)
|
||||
// must be wrapped in ErrProviderUnavailable so the API layer can tell it
|
||||
// apart from a local resolution failure and surface the real provider
|
||||
// message instead of a generic "internal error".
|
||||
func TestApplyWrapsProviderError(t *testing.T) {
|
||||
actual := []model.Record{{Type: model.A, Name: "a.example.com.", TTL: 300, Values: []string{"9.9.9.9"}}}
|
||||
tmpl := dto.TemplateDoc{Records: []dto.RecordDTO{
|
||||
{Type: "A", Name: "a.example.com.", TTL: 300, Values: []string{"1.1.1.1"}},
|
||||
}}
|
||||
svc, fp := setup(t, actual, tmpl)
|
||||
fp.applyErr = errors.New("selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists")
|
||||
|
||||
_, err := svc.Apply(context.Background(), uuid.New(), uuid.New(), ApplyRequest{Updates: []string{"A a.example.com."}})
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrProviderUnavailable) {
|
||||
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
|
||||
}
|
||||
msg := ProviderMessage(err)
|
||||
if msg != "selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists" {
|
||||
t.Fatalf("expected clean provider message, got %q", msg)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveWrapsProviderError covers the resolve helper shared by Check and
|
||||
// Apply: a GetRecords failure from the provider must also be wrapped in
|
||||
// ErrProviderUnavailable, mirroring ZoneRecords' existing behavior.
|
||||
func TestResolveWrapsProviderError(t *testing.T) {
|
||||
svc, fp := setup(t, nil, dto.TemplateDoc{})
|
||||
fp.getErr = errors.New("selectel GET /zones/z1/rrset: 503: upstream unavailable")
|
||||
|
||||
_, err := svc.Check(context.Background(), uuid.New(), uuid.New())
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrProviderUnavailable) {
|
||||
t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err)
|
||||
}
|
||||
msg := ProviderMessage(err)
|
||||
if msg != "selectel GET /zones/z1/rrset: 503: upstream unavailable" {
|
||||
t.Fatalf("expected clean provider message, got %q", msg)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user