`define N_BITS 16 // word length

// A realistic implementation of "GCD slow", literally using only 1 subtraction unit.
module gcd_slow_impl (x, y, out, valid);
    output [`N_BITS-1:0] out;
    output valid;

    input [`N_BITS-2:0] x, y;  // must be positive, so 1 bit less

    reg _init;
    reg [`N_BITS-1:0] _x, _y;

    wire [`N_BITS-1:0] xmy;  // x-y
    assign xmy = _x - _y;

    wire xmy_neg;  // x-y < 0
    assign xmy_neg = xmy[`N_BITS-1];

    wire x_eq_y;  // x == y
    assign x_eq_y = _x == _y;

    wire x_gt_y;  // x > y
    assign x_gt_y = ~xmy_neg & ~x_eq_y;

    assign valid = _init & x_eq_y;
    assign out = _x;

    initial begin
        _init = 0;
    end

    always @($global_clock) begin
        if (_init) begin
            _x <= x_gt_y ? xmy : _y;
            _y <= x_gt_y ? _y : _x;
        end else begin
            _x <= {0, x};
            _y <= {0, y};
        end
        _init <= 1;
    end
endmodule
