fix(orchestrator): prevent concurrent double-run duplicating messages; reflect errors in status
This commit is contained in:
@@ -78,6 +78,14 @@ bash scripts/e2e.sh
|
|||||||
# or: make e2e
|
# or: make e2e
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Known limitations
|
||||||
|
|
||||||
|
Deduplication key is `UNIQUE(account_id, message_key)` **without folder**. If
|
||||||
|
the same message appears in multiple source folders (e.g. Gmail `INBOX` +
|
||||||
|
`[Gmail]/All Mail` + labels-as-folders), it is copied only into whichever
|
||||||
|
destination folder is processed first; folder placement for such duplicated
|
||||||
|
messages is not guaranteed. This is intentional per the design spec.
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
- `cmd/server` — entrypoint: runs DB migrations, then serves HTTP.
|
- `cmd/server` — entrypoint: runs DB migrations, then serves HTTP.
|
||||||
|
|||||||
@@ -74,6 +74,10 @@ func (s *Server) handleRun(w http.ResponseWriter, r *http.Request) {
|
|||||||
http.Error(w, "accounts must pass connection tests first", http.StatusConflict)
|
http.Error(w, "accounts must pass connection tests first", http.StatusConflict)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if errors.Is(err, orchestrator.ErrAlreadyRunning) {
|
||||||
|
http.Error(w, "task is already running", http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var ErrNotTested = errors.New("accounts not fully tested")
|
var ErrNotTested = errors.New("accounts not fully tested")
|
||||||
|
var ErrAlreadyRunning = errors.New("task already running")
|
||||||
|
|
||||||
type Orchestrator struct {
|
type Orchestrator struct {
|
||||||
store *store.Store
|
store *store.Store
|
||||||
@@ -99,15 +100,23 @@ func (o *Orchestrator) Run(ctx context.Context, taskID int64) (int64, error) {
|
|||||||
if !gateOK(accs) {
|
if !gateOK(accs) {
|
||||||
return 0, ErrNotTested
|
return 0, ErrNotTested
|
||||||
}
|
}
|
||||||
|
acquired, err := o.store.TryMarkTaskRunning(ctx, taskID)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if !acquired {
|
||||||
|
return 0, ErrAlreadyRunning
|
||||||
|
}
|
||||||
srcEP, dstEP, err := o.endpoints(ctx, task)
|
srcEP, dstEP, err := o.endpoints(ctx, task)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
_ = o.store.SetTaskStatus(ctx, taskID, "error")
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
runID, err := o.store.CreateRun(ctx, taskID)
|
runID, err := o.store.CreateRun(ctx, taskID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
_ = o.store.SetTaskStatus(ctx, taskID, "error")
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
_ = o.store.SetTaskStatus(ctx, taskID, "running")
|
|
||||||
o.hub.Publish(wshub.Event{Type: "run_started", TaskID: taskID, Data: map[string]any{"run_id": runID}})
|
o.hub.Publish(wshub.Event{Type: "run_started", TaskID: taskID, Data: map[string]any{"run_id": runID}})
|
||||||
|
|
||||||
go o.runAll(context.WithoutCancel(ctx), task, runID, accs, srcEP, dstEP)
|
go o.runAll(context.WithoutCancel(ctx), task, runID, accs, srcEP, dstEP)
|
||||||
@@ -138,8 +147,12 @@ func (o *Orchestrator) runAll(ctx context.Context, task store.Task, runID int64,
|
|||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
_ = o.store.FinishRun(ctx, runID, "done", totCopied, totSkipped, totErr)
|
status := "done"
|
||||||
_ = o.store.SetTaskStatus(ctx, task.ID, "done")
|
if totErr > 0 {
|
||||||
|
status = "done_with_errors"
|
||||||
|
}
|
||||||
|
_ = o.store.FinishRun(ctx, runID, status, totCopied, totSkipped, totErr)
|
||||||
|
_ = o.store.SetTaskStatus(ctx, task.ID, status)
|
||||||
o.hub.Publish(wshub.Event{Type: "run_done", TaskID: task.ID,
|
o.hub.Publish(wshub.Event{Type: "run_done", TaskID: task.ID,
|
||||||
Data: map[string]any{"run_id": runID, "copied": totCopied, "skipped": totSkipped, "errors": totErr}})
|
Data: map[string]any{"run_id": runID, "copied": totCopied, "skipped": totSkipped, "errors": totErr}})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,3 +55,14 @@ func (s *Store) SetTaskStatus(ctx context.Context, id int64, status string) erro
|
|||||||
_, err := s.Pool.Exec(ctx, `UPDATE tasks SET status=$2 WHERE id=$1`, id, status)
|
_, err := s.Pool.Exec(ctx, `UPDATE tasks SET status=$2 WHERE id=$1`, id, status)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TryMarkTaskRunning atomically sets status='running' only if the task is not already running.
|
||||||
|
// Returns true if this call acquired the run (status was not 'running' before), false otherwise.
|
||||||
|
func (s *Store) TryMarkTaskRunning(ctx context.Context, id int64) (bool, error) {
|
||||||
|
ct, err := s.Pool.Exec(ctx,
|
||||||
|
`UPDATE tasks SET status='running' WHERE id=$1 AND status<>'running'`, id)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return ct.RowsAffected() == 1, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTryMarkTaskRunningIsExclusive(t *testing.T) {
|
||||||
|
s := testStore(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
epSrc, _ := s.CreateEndpoint(ctx, Endpoint{RoleLabel: "src", Host: "a", Port: 993, TLSMode: "ssl"})
|
||||||
|
epDst, _ := s.CreateEndpoint(ctx, Endpoint{RoleLabel: "dst", Host: "b", Port: 993, TLSMode: "ssl"})
|
||||||
|
taskID, _ := s.CreateTask(ctx, Task{Name: "t", SrcEndpointID: epSrc, DstEndpointID: epDst})
|
||||||
|
|
||||||
|
first, err := s.TryMarkTaskRunning(ctx, taskID)
|
||||||
|
if err != nil || !first {
|
||||||
|
t.Fatalf("first acquire must succeed: ok=%v err=%v", first, err)
|
||||||
|
}
|
||||||
|
second, err := s.TryMarkTaskRunning(ctx, taskID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
if second {
|
||||||
|
t.Fatal("second acquire must fail while running")
|
||||||
|
}
|
||||||
|
// after completion, a re-run may acquire again
|
||||||
|
_ = s.SetTaskStatus(ctx, taskID, "done")
|
||||||
|
third, _ := s.TryMarkTaskRunning(ctx, taskID)
|
||||||
|
if !third {
|
||||||
|
t.Fatal("acquire after completion must succeed")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -152,7 +152,7 @@ export function TaskDetail({ id }: { id: number }) {
|
|||||||
<button className="btn" onClick={onTest} disabled={busy !== null || accounts.length === 0}>
|
<button className="btn" onClick={onTest} disabled={busy !== null || accounts.length === 0}>
|
||||||
{busy === 'test' ? 'Testing…' : 'Test connections'}
|
{busy === 'test' ? 'Testing…' : 'Test connections'}
|
||||||
</button>
|
</button>
|
||||||
<button className="btn btn-primary" onClick={onRun} disabled={busy !== null || !allTested}>
|
<button className="btn btn-primary" onClick={onRun} disabled={busy !== null || !allTested || task.status === 'running'}>
|
||||||
{busy === 'run' ? 'Starting…' : 'Run migration'}
|
{busy === 'run' ? 'Starting…' : 'Run migration'}
|
||||||
</button>
|
</button>
|
||||||
{!allTested && accounts.length > 0 && <span className="hint">run unlocks once every account tests OK on both sides</span>}
|
{!allTested && accounts.length > 0 && <span className="hint">run unlocks once every account tests OK on both sides</span>}
|
||||||
|
|||||||
Reference in New Issue
Block a user