diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 89b3e27..94e1c99 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "net/http" "net/http/httptest" "strings" @@ -26,6 +27,10 @@ type mockCheckApplier struct { // used by handleCheck status-persistence tests (drift/in_sync/error). checkCS *diff.Changeset checkErr error + + // applyErr, when set, makes Apply fail with this error — used by the + // provider-error-surfacing tests (502 with real message vs 500 generic). + applyErr error } func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Changeset, error) { @@ -40,6 +45,9 @@ func (m *mockCheckApplier) Check(context.Context, uuid.UUID, uuid.UUID) (diff.Ch } func (m *mockCheckApplier) Apply(_ context.Context, _, _ uuid.UUID, req service.ApplyRequest) (diff.Changeset, error) { m.lastReq = req + if m.applyErr != nil { + return diff.Changeset{}, m.applyErr + } return diff.Changeset{}, nil } func (m *mockCheckApplier) ZoneRecords(context.Context, uuid.UUID, uuid.UUID) ([]model.Record, error) { @@ -271,6 +279,99 @@ func TestCheckEndpoint_ErrorScopesStatusToCallerProject(t *testing.T) { } } +// TestApplyEndpoint_ProviderErrorSurfacesRealMessage covers the fix: when +// Svc.Apply fails with an error wrapping service.ErrProviderUnavailable (e.g. +// Selectel rejecting a change with a 409 conflict), the handler must respond +// 502 with the actual provider message in body.error — not a generic 500 +// "internal error" that hides the real cause from the user. +func TestApplyEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) { + a, m := newTestAPI() + m.applyErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable, + errors.New("selectel POST /zones/x/rrset: 409: conflicting CNAME record exists")) + router := NewRouter(a) + + did := uuid.New().String() + req := requestWithSessionCookie(http.MethodPost, + "/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply", + strings.NewReader(`{}`)) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusBadGateway { + t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String()) + } + var resp map[string]string + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatal(err) + } + if !strings.Contains(resp["error"], "409") || !strings.Contains(resp["error"], "conflicting CNAME") { + t.Fatalf("expected real provider message in body, got %q", resp["error"]) + } + if strings.Contains(resp["error"], "internal error") { + t.Fatalf("provider error must not be masked as internal error, got %q", resp["error"]) + } +} + +// TestApplyEndpoint_NonProviderErrorStaysGeneric covers the flip side: an +// unwrapped/local error (decrypt, db, loader) from Svc.Apply must still fall +// back to a generic 500 "internal error" — only provider errors get their +// real message surfaced. +func TestApplyEndpoint_NonProviderErrorStaysGeneric(t *testing.T) { + a, m := newTestAPI() + m.applyErr = errors.New("decrypt: cipher: message authentication failed") + router := NewRouter(a) + + did := uuid.New().String() + req := requestWithSessionCookie(http.MethodPost, + "/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did+"/apply", + strings.NewReader(`{}`)) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusInternalServerError { + t.Fatalf("expected 500, got %d body %s", w.Code, w.Body.String()) + } + var resp map[string]string + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatal(err) + } + if resp["error"] != "internal error" { + t.Fatalf("expected generic internal error, got %q", resp["error"]) + } +} + +// TestCheckEndpoint_ProviderErrorSurfacesRealMessage mirrors the Apply case +// for /check: a provider-wrapped error must come back as 502 with the real +// provider message, while the existing status-persistence behavior (SetDomainStatus +// error before responding) is unaffected. +func TestCheckEndpoint_ProviderErrorSurfacesRealMessage(t *testing.T) { + a, m := newTestAPI() + ts := a.Store.(*mockTenantStore) + m.checkErr = fmt.Errorf("%w: %v", service.ErrProviderUnavailable, + errors.New("selectel GET /zones/x/rrset: 503: upstream unavailable")) + router := NewRouter(a) + + did := uuid.New() + req := requestWithSessionCookie(http.MethodGet, + "/api/v1/projects/00000000-0000-0000-0000-000000000002/domains/"+did.String()+"/check", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if w.Code != http.StatusBadGateway { + t.Fatalf("expected 502, got %d body %s", w.Code, w.Body.String()) + } + var resp map[string]string + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatal(err) + } + if !strings.Contains(resp["error"], "503") || !strings.Contains(resp["error"], "upstream unavailable") { + t.Fatalf("expected real provider message in body, got %q", resp["error"]) + } + if len(ts.statusCalls) != 1 || ts.statusCalls[0].status != service.StatusError { + t.Fatalf("expected SetDomainStatus(_, _, error) to still run, got %+v", ts.statusCalls) + } +} + // TestChangesetResponseEmptyMarshalsToArrays guards the белый-экран bug: an // empty changeset (zone matches its template exactly, e.g. right after a // snapshot) must marshal updates/prunes/readOnly as [] not null — a nil slice diff --git a/internal/api/handlers.go b/internal/api/handlers.go index a8491de..cc10af0 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -43,7 +43,14 @@ func (a *API) handleCheck(w http.ResponseWriter, r *http.Request) { log.Printf("api: set domain status (error) failed: %v", serr) } log.Printf("api: check failed: %v", err) - writeErr(w, http.StatusInternalServerError, "internal error") + // A provider failure (e.g. Selectel returning a 409 conflict) is safe + // and useful to show the user as-is; any other failure (decrypt/db/loader) + // stays a generic "internal error" to avoid leaking internals. + if errors.Is(err, service.ErrProviderUnavailable) { + writeErr(w, http.StatusBadGateway, service.ProviderMessage(err)) + } else { + writeErr(w, http.StatusInternalServerError, "internal error") + } return } // Manual check persists status/history only — no notification. Notify @@ -79,7 +86,13 @@ func (a *API) handleApply(w http.ResponseWriter, r *http.Request) { }) if err != nil { log.Printf("api: apply failed: %v", err) - writeErr(w, http.StatusInternalServerError, "internal error") + // Same distinction as handleCheck: surface the real provider message, + // keep everything else generic. + if errors.Is(err, service.ErrProviderUnavailable) { + writeErr(w, http.StatusBadGateway, service.ProviderMessage(err)) + } else { + writeErr(w, http.StatusInternalServerError, "internal error") + } return } writeJSON(w, http.StatusOK, toChangesetResponse(cs)) diff --git a/internal/service/service.go b/internal/service/service.go index 80d8ca8..46646b1 100644 --- a/internal/service/service.go +++ b/internal/service/service.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strings" "github.com/google/uuid" @@ -23,6 +24,16 @@ import ( // pick 502 vs 404 without leaking provider error details as "not found". var ErrProviderUnavailable = errors.New("service: provider unavailable") +// ProviderMessage extracts the provider's own error text from an error +// wrapped with ErrProviderUnavailable, stripping the sentinel prefix so +// callers can surface it to the user as-is (e.g. Selectel's "409: conflicting +// CNAME record exists"). Only meant to be called on errors that +// errors.Is(err, ErrProviderUnavailable) — otherwise it just returns +// err.Error() unchanged. +func ProviderMessage(err error) string { + return strings.TrimPrefix(err.Error(), ErrProviderUnavailable.Error()+": ") +} + // DomainRef is the minimal data the service needs about a domain. type DomainRef struct { ZoneID string @@ -84,7 +95,11 @@ func (s *DomainService) resolve(ctx context.Context, projectID, domainID uuid.UU creds := provider.Credentials{Secret: string(secret)} actual, err := p.GetRecords(ctx, creds, ref.ZoneID) if err != nil { - return nil, provider.Credentials{}, ref, diff.Changeset{}, err + // Only a failure of the provider call itself is "provider unavailable" — + // LoadDomain/ByName/Decrypt errors above are local resolution failures + // (e.g. domain not found, bad stored credentials) and must not be + // conflated with it. + return nil, provider.Credentials{}, ref, diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err) } cs := diff.Diff(tmpl.Materialize(ref.Template, ref.ZoneName), actual) return p, creds, ref, cs, nil @@ -155,7 +170,7 @@ func (s *DomainService) Apply(ctx context.Context, projectID, domainID uuid.UUID applied := diff.Changeset{Diffs: toApply} if len(toApply) > 0 { if err := p.ApplyChanges(ctx, creds, ref.ZoneID, applied); err != nil { - return diff.Changeset{}, err + return diff.Changeset{}, fmt.Errorf("%w: %v", ErrProviderUnavailable, err) } } return applied, nil diff --git a/internal/service/service_test.go b/internal/service/service_test.go index 69f080a..8fb8c8b 100644 --- a/internal/service/service_test.go +++ b/internal/service/service_test.go @@ -2,6 +2,7 @@ package service import ( "context" + "errors" "testing" "github.com/google/uuid" @@ -26,8 +27,10 @@ func testCipher(t *testing.T) *crypto.Cipher { // fakeProvider records applied changesets and returns canned zone records. type fakeProvider struct { - actual []model.Record - applied diff.Changeset + actual []model.Record + applied diff.Changeset + getErr error // when set, GetRecords fails with this error + applyErr error // when set, ApplyChanges fails with this error } func (fakeProvider) Name() string { return "selectel" } @@ -35,9 +38,15 @@ func (fakeProvider) ListZones(context.Context, provider.Credentials) ([]provider return nil, nil } func (f *fakeProvider) GetRecords(context.Context, provider.Credentials, string) ([]model.Record, error) { + if f.getErr != nil { + return nil, f.getErr + } return f.actual, nil } func (f *fakeProvider) ApplyChanges(_ context.Context, _ provider.Credentials, _ string, cs diff.Changeset) error { + if f.applyErr != nil { + return f.applyErr + } f.applied = cs return nil } @@ -191,3 +200,49 @@ func TestApplySelectsByKeyAndOrdersPrunesBeforeUpdates(t *testing.T) { t.Fatalf("expected update SECOND in applied order, got %+v", fp4.applied.Diffs) } } + +// TestApplyWrapsProviderError covers the fix: a failure from the provider's +// ApplyChanges call (e.g. Selectel rejecting a change with a 409 conflict) +// must be wrapped in ErrProviderUnavailable so the API layer can tell it +// apart from a local resolution failure and surface the real provider +// message instead of a generic "internal error". +func TestApplyWrapsProviderError(t *testing.T) { + actual := []model.Record{{Type: model.A, Name: "a.example.com.", TTL: 300, Values: []string{"9.9.9.9"}}} + tmpl := dto.TemplateDoc{Records: []dto.RecordDTO{ + {Type: "A", Name: "a.example.com.", TTL: 300, Values: []string{"1.1.1.1"}}, + }} + svc, fp := setup(t, actual, tmpl) + fp.applyErr = errors.New("selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists") + + _, err := svc.Apply(context.Background(), uuid.New(), uuid.New(), ApplyRequest{Updates: []string{"A a.example.com."}}) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrProviderUnavailable) { + t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err) + } + msg := ProviderMessage(err) + if msg != "selectel POST /zones/z1/rrset: 409: conflicting CNAME record exists" { + t.Fatalf("expected clean provider message, got %q", msg) + } +} + +// TestResolveWrapsProviderError covers the resolve helper shared by Check and +// Apply: a GetRecords failure from the provider must also be wrapped in +// ErrProviderUnavailable, mirroring ZoneRecords' existing behavior. +func TestResolveWrapsProviderError(t *testing.T) { + svc, fp := setup(t, nil, dto.TemplateDoc{}) + fp.getErr = errors.New("selectel GET /zones/z1/rrset: 503: upstream unavailable") + + _, err := svc.Check(context.Background(), uuid.New(), uuid.New()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrProviderUnavailable) { + t.Fatalf("expected error to wrap ErrProviderUnavailable, got %v", err) + } + msg := ProviderMessage(err) + if msg != "selectel GET /zones/z1/rrset: 503: upstream unavailable" { + t.Fatalf("expected clean provider message, got %q", msg) + } +}