fix(api): surface real provider error on apply/check instead of generic internal error

resolve (shared by Check/Apply) and Apply now wrap GetRecords/ApplyChanges
failures in service.ErrProviderUnavailable, matching ZoneRecords' existing
behavior. handleApply/handleCheck use errors.Is against it to return 502
with the real provider message (e.g. Selectel's 409 conflict body) instead
of masking every failure as a generic 500 "internal error"; non-provider
errors (decrypt/db/loader) are unaffected.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01BwxdSt4reTm7Dj1oxRvpP3
This commit is contained in:
2026-07-05 15:53:27 +07:00
parent 6f9958af60
commit 879e9e14b1
4 changed files with 190 additions and 6 deletions
+101
View File
@@ -5,6 +5,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"strings"
@@ -26,6 +27,10 @@ type mockCheckApplier struct {
// used by handleCheck status-persistence tests (drift/in_sync/error).
checkCS *diff.Changeset
checkErr error
// applyErr, when set, makes Apply fail with this error — used by the
// provider-error-surfacing tests (502 with real message vs 500 generic).
applyErr error
}
func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) {
@@ -40,6 +45,9 @@ func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Ch
}
func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) {
m.lastReq = req
if m.applyErr != nil {
return diff.Changeset{}, m.applyErr
}
return diff.Changeset{}, nil
}
func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) {
@@ -271,6 +279,99 @@ func TestCheckEndpoint_ErrorScopesStatusToCallerProject(t *testing.T) {
}
}
// TestApplyEndpoint_ProviderErrorSurfacesRealMessage covers the fix: when
// Svc.Apply fails with an error wrapping service.ErrProviderUnavailable (e.g.
// Selectel rejecting a change with a 409 conflict), the handler must respond
// 502 with the actual provider message in body.error — not a generic 500
// "internal error" that hides the real cause from the user.
func TestApplyEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
a, m := newTestAPI()
m.applyErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
errors.New("selectel POST /zones/x/rrset: 409: conflicting CNAME record exists"))
router := NewRouter(a)
did := uuid.New().String()
req := requestWithSessionCookie(http.MethodPost,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
strings.NewReader(`{}`))
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadGateway {
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if !strings.Contains(resp["error"], "409") || !strings.Contains(resp["error"], "conflicting CNAME") {
t.Fatalf("expected real provider message in body, got %q", resp["error"])
}
if strings.Contains(resp["error"], "internal error") {
t.Fatalf("provider error must not be masked as internal error, got %q", resp["error"])
}
}
// TestApplyEndpoint_NonProviderErrorStaysGeneric covers the flip side: an
// unwrapped/local error (decrypt, db, loader) from Svc.Apply must still fall
// back to a generic 500 "internal error" — only provider errors get their
// real message surfaced.
func TestApplyEndpoint_NonProviderErrorStaysGeneric(t *testing.T) {
a, m := newTestAPI()
m.applyErr = errors.New("decrypt: cipher: message authentication failed")
router := NewRouter(a)
did := uuid.New().String()
req := requestWithSessionCookie(http.MethodPost,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply",
strings.NewReader(`{}`))
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d body %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if resp["error"] != "internal error" {
t.Fatalf("expected generic internal error, got %q", resp["error"])
}
}
// TestCheckEndpoint_ProviderErrorSurfacesRealMessage mirrors the Apply case
// for /check: a provider-wrapped error must come back as 502 with the real
// provider message, while the existing status-persistence behavior (SetDomainStatus
// error before responding) is unaffected.
func TestCheckEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) {
a, m := newTestAPI()
ts := a.Store.(*mockTenantStore)
m.checkErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable,
errors.New("selectel GET /zones/x/rrset: 503: upstream unavailable"))
router := NewRouter(a)
did := uuid.New()
req := requestWithSessionCookie(http.MethodGet,
"/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusBadGateway {
t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String())
}
var resp map[string]string
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatal(err)
}
if !strings.Contains(resp["error"], "503") || !strings.Contains(resp["error"], "upstream unavailable") {
t.Fatalf("expected real provider message in body, got %q", resp["error"])
}
if len(ts.statusCalls) != 1 || ts.statusCalls[0].status != service.StatusError {
t.Fatalf("expected SetDomainStatus(_, _, error) to still run, got %+v", ts.statusCalls)
}
}
// TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an
// empty changeset (zone matches its template exactly, e.g. right after a
// snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice
+15 -2
View File
@@ -43,7 +43,14 @@ func (a *API) handleCheck(w http.ResponseWriter, r *http.Request) {
log.Printf("api: set domain status (error) failed: %v", serr)
}
log.Printf("api: check failed: %v", err)
writeErr(w, http.StatusInternalServerError, "internal error")
// A provider failure (e.g. Selectel returning a 409 conflict) is safe
// and useful to show the user as-is; any other failure (decrypt/db/loader)
// stays a generic "internal error" to avoid leaking internals.
if errors.Is(err, service.ErrProviderUnavailable) {
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
} else {
writeErr(w, http.StatusInternalServerError, "internal error")
}
return
}
// Manual check persists status/history only — no notification. Notify
@@ -79,7 +86,13 @@ func (a *API) handleApply(w http.ResponseWriter, r *http.Request) {
})
if err != nil {
log.Printf("api: apply failed: %v", err)
writeErr(w, http.StatusInternalServerError, "internal error")
// Same distinction as handleCheck: surface the real provider message,
// keep everything else generic.
if errors.Is(err, service.ErrProviderUnavailable) {
writeErr(w, http.StatusBadGateway, service.ProviderMessage(err))
} else {
writeErr(w, http.StatusInternalServerError, "internal error")
}
return
}
writeJSON(w, http.StatusOK, toChangesetResponse(cs))