Verilog UART在工作之前的收敛时间很长。为什么?

发布于 2025-01-25 18:53:11 字数 8284 浏览 2 评论 0 原文

我已经尝试拉出

它与RX“知道”停止和启动位在哪里有关?我一直在努力理解这样的UARTS如何通过将某种误解为停止或开始而无法获得同步。确实,看起来RX给出的值以某种方式受到计数器的初始值的影响。

编辑:多亏了Toolic,现在我知道它不能适用于512微秒,然后正确遵循 - 从计数器的值0x81开始。在此之前,Obyte显然会逐渐增加8个字节,而不是1个字节,环绕,每个字节都会递增4个字节,并在锁定前的另一个时间周围包裹。当我将CNT递增2个字节而不是1个时,Obyte将COUN COUNT 128计数,然后包裹着64个,然后包裹着32,32,环绕,16,环绕,8,缠绕,然后锁定。 ?这与UART的可靠性有关吗?我问这个问题,因为如果我将CNT设置为$ Random%255,则每个BYTECLK TICK都可以使用。

这是testbench.v code(我的代码):

module testbench();
    //Clock half periods in picoseconds
    localparam HALF_CLK_PERIOD_FAST = 25000;
    localparam HALF_CLK_PERIOD_SLOW = 200000;
    localparam HALF_CLK_PERIOD_BYTES = 2000000;
    
    reg rst; //Reset line
    reg clkFast; //Oversampling
    reg clkSlow; //Bit clock
    reg byteClk; //Byte clock
    reg load; //Load line for TX = every byteClk tick
    
    reg [7:0] cnt; //Test data to send. Is incremented every byteClk tick

    wire serial; //Serial link between TX and RX
    wire [7:0] obyte; //Output byte out of the RX
    wire ovalid; //Data valid strobe for obyte

    uarttx tx
    (
        .clk(clkSlow),
        .txbyte(cnt),
        .txload(load),
        .txd(serial),
        .txholding_reg_full()
    );
    
    uartrx rx
    (
        .clk_slow(clkSlow),
        .clk_fast(clkFast),
        .rxd(serial),
        .obyte(obyte),
        .ovalid(ovalid)
    );

    initial
    begin
        fork
            clkSlow = 1'b0;
            clkFast = 1'b0;
            byteClk = 1'b0;
            cnt = 8'b0; //This initial value seems to have an effect on the performance of the RX
            load = 1'b1; //Start by loading the initial value of cnt
            rst = 1'b0; //Makes a pulse in RST
        join
        #(HALF_CLK_PERIOD_FAST) rst = 1'b1; //Makes a pulse in RST
        #512000000 $stop;
    end 
    
    //Every byteClk tick, the counter is incremented and load is pulled up. 
    always @(posedge byteClk)
    begin
        cnt = cnt+1;
        #(HALF_CLK_PERIOD_FAST) load = 1'b1;
    end
    
    //As soon as the next falling edge of the bit clock, the load line is pulled back down
    always @(negedge clkSlow)
    begin
        if(load)
            load <= 1'b0;
    end
    
    //Clocks generation
    always #HALF_CLK_PERIOD_SLOW clkSlow=~clkSlow;
    always #HALF_CLK_PERIOD_FAST clkFast=~clkFast;
    always #HALF_CLK_PERIOD_BYTES byteClk=~byteClk;
endmodule

uart_rx.v(dut)(我认为这是其中最具发展的版本):

module uartrx
(
    input clk_slow,//baud rate
    input clk_fast,//8*baud rate
    input rxd,//signal in
    output [7:0] obyte,//valid when ovalid is high but will not change till ovalid strobes again
    output ovalid//high for 1 bit to mark a valid obyte
);

//sr for both delay line and syncronizer
parameter SHIFT_REG_LEN=5;
reg [SHIFT_REG_LEN-1:0] shift_reg=0;
always@(posedge clk_fast)
begin
    shift_reg<={shift_reg[SHIFT_REG_LEN-2:0],rxd};
end

//edge detect, counter and lockout
wire neg_edge;//neg edge detect
wire counter_start;//resets/starts the counters
wire idle;//if the line is idle
wire convert;//will go high when valid data arrives
wire Dclk;//data bit clock
wire bit_data;//data offset in future from bit edge
counter_strobe counter_strobe0(clk_fast,counter_start,Dclk);
counter_timeout counter_timeout0(clk_fast,Dclk,counter_start,idle,convert);
assign neg_edge=(shift_reg[SHIFT_REG_LEN-1] & ~shift_reg[SHIFT_REG_LEN-2]);
assign counter_start=(neg_edge&&idle);
assign bit_data=shift_reg[SHIFT_REG_LEN-4];//shift so we are looking at center of symbol rather than edge

//add flipflops to data and convert.
//the strange way of writing this was because
//this gave the best fmax and slack the compiler could
//do as well as making a readable RTL schematic
reg bit_data_latched;
reg convert_latched;
always @(posedge clk_fast)
begin
    if(Dclk)
    begin
        convert_latched<=~convert;
        bit_data_latched<=~bit_data;
    end
end

//convert to parallel
wire [7:0] latched_byte;
wire latched_byte_strobe;
serial_to_parallel #(.N(8)) serial_to_parallel0(clk_slow,(~convert_latched),(~bit_data_latched),latched_byte_strobe,latched_byte);

//for output port assignments
assign obyte=latched_byte;
assign ovalid=latched_byte_strobe;

endmodule

//---------------- submodules begin -------------------

module serial_to_parallel
#(parameter N=8)
(input clk,convert,data_bit,output reg valid_strobe, output reg [N-1:0] data_out);
reg act=0;
initial valid_strobe=0;
reg [N-1:0] abyte;
reg [$clog2(N)-1:0] counter=0;
always @(posedge clk)
begin
    valid_strobe<=0;
    act<=0;
    if(|counter)
    begin
        abyte<={data_bit,abyte[N-1:1]};
        counter<=counter-1;
        act<=1;
    end
    else if(act)
    begin
        valid_strobe<=1;
        data_out<=abyte;
    end
    else if(convert)
    begin
        counter<=(N-1);
        abyte<={data_bit,abyte[N-1:1]};
    end
end
endmodule

//next 2 modules take carefull note of resets. The resets
//must cause the state of the two counters to happen at
//the same time.

//strobe every 8 clks
module counter_strobe(input clk,reset,output reg strobe);
parameter N=3;
reg [N-1:0] count=0;
initial strobe=1;
always@(posedge clk)
begin
    if(reset)
    begin
        strobe<=0;
        count<=1;
    end
    else
    begin
        count<=count+1'b1;
        strobe<=!(|count);
    end
end
endmodule

//timeout after 8 enabled clks plus a bit (IDLE_HOLD_DELAY). timeout causes idle to go high.
//convert goes high on reset and will go low on next enabled clk. when it goes high it's
//time to convert the current 8 bits from serial to parallel
module counter_timeout(input clk,enable,reset,output reg idle,output reg convert);
reg invalid;
parameter N=3;
parameter IDLE_HOLD_DELAY=9;
reg [N-1:0] count=0;
reg [IDLE_HOLD_DELAY-1:0] shift_reg=0;
initial invalid=0;
initial idle=0;
always@(posedge clk)
begin
    {idle,shift_reg}<={shift_reg[IDLE_HOLD_DELAY-1:0],invalid};
    if(reset)
    begin
        {invalid,idle,count}<=0;
        shift_reg<=0;
        convert<=1;
    end
    else if(enable)
    begin
        count<=count+1'b1;
        if(&count)invalid<=1;
        convert<=0;
    end
end
endmodule

uart_tx.v(我认为这是最多的 版本那里进化的版本):

module uarttx
(
    input clk,//baud rate
    input [7:0] txbyte,
    input txload,
    output reg txd,//signal out
    output reg txholding_reg_full
);
reg [7:0] holding_reg=~0;
reg [7:0] shift_reg=~0;
initial txholding_reg_full=0;
reg shift_reg_empty=1;
reg [3:0] shift_reg_ptr=8;
initial txd=1;
reg txload_last=0;
always @(posedge clk)
begin
    //send either shift reg or idle
    if(shift_reg_ptr<=7)
    begin
        txd<=shift_reg[shift_reg_ptr];
        shift_reg_ptr<=shift_reg_ptr+1;
    end
    else
    begin
        shift_reg_empty<=1;
        txd<=1;
    end
    //if +ve edge of load and hold reg empty then either start sending it or move it to the holding reg
    if((txload&&(!txload_last))&&(!txholding_reg_full))
    begin
        if(shift_reg_empty)
        begin
            shift_reg<=txbyte;
            shift_reg_ptr<=0;
            txd<=0;
            shift_reg_empty<=0;
        end
        else
        begin
            holding_reg<=txbyte;
            txholding_reg_full<=1;
        end
    end
    //else if the shift reg has gone empty then start sending the next holding one if available
    else if(shift_reg_empty&&txholding_reg_full)
    begin
        shift_reg<=holding_reg;
        txholding_reg_full<=0;
        shift_reg_ptr<=0;
        txd<=0;
        shift_reg_empty<=0;
    end
    //for keeping track of +ve edge of txload line
    txload_last<=txload;
end
endmodule

I've tried to pull the fast UART implementation found on this fantastic article and test it myself, but I get random data out of the UART RX that I can't even relate to what is transferred on the serial link. Everything looks just fine to me into the UART RX, the output is just totally wrong: obyte should follow cnt, and it doesn't. Why?
enter image description here

Does it have something to do with the RX "knowing" where the stop and start bits are? I've always struggled to understand how UARTs such as this one could not get out-of-sync by misinterpreting a bit as a stop or a start. Indeed, it looks like the values given by the RX are affected by the initial value of the counter, somehow.

Edit: Thanks to toolic, now I know it does not work for 512 microseconds, and then follows properly - starting with value 0x81 of the counter. Before then, obyte is apparently incremented by 8 every byte instead of by 1, wraps around, is incremented by 4 every byte, and wraps around another time before locking. When I increment cnt by 2 every byte instead of 1, obyte counts by 128, wraps around, then by 64, wraps around, 32, wraps around, 16, wraps around, 8, wraps around, and only then locks in. What's happening? Is this concerning for the reliability of the UART? I'm asking this because if I set cnt to $random%255 every byteClk tick it works.
enter image description here

Here is the testbench.v code (my code):

module testbench();
    //Clock half periods in picoseconds
    localparam HALF_CLK_PERIOD_FAST = 25000;
    localparam HALF_CLK_PERIOD_SLOW = 200000;
    localparam HALF_CLK_PERIOD_BYTES = 2000000;
    
    reg rst; //Reset line
    reg clkFast; //Oversampling
    reg clkSlow; //Bit clock
    reg byteClk; //Byte clock
    reg load; //Load line for TX = every byteClk tick
    
    reg [7:0] cnt; //Test data to send. Is incremented every byteClk tick

    wire serial; //Serial link between TX and RX
    wire [7:0] obyte; //Output byte out of the RX
    wire ovalid; //Data valid strobe for obyte

    uarttx tx
    (
        .clk(clkSlow),
        .txbyte(cnt),
        .txload(load),
        .txd(serial),
        .txholding_reg_full()
    );
    
    uartrx rx
    (
        .clk_slow(clkSlow),
        .clk_fast(clkFast),
        .rxd(serial),
        .obyte(obyte),
        .ovalid(ovalid)
    );

    initial
    begin
        fork
            clkSlow = 1'b0;
            clkFast = 1'b0;
            byteClk = 1'b0;
            cnt = 8'b0; //This initial value seems to have an effect on the performance of the RX
            load = 1'b1; //Start by loading the initial value of cnt
            rst = 1'b0; //Makes a pulse in RST
        join
        #(HALF_CLK_PERIOD_FAST) rst = 1'b1; //Makes a pulse in RST
        #512000000 $stop;
    end 
    
    //Every byteClk tick, the counter is incremented and load is pulled up. 
    always @(posedge byteClk)
    begin
        cnt = cnt+1;
        #(HALF_CLK_PERIOD_FAST) load = 1'b1;
    end
    
    //As soon as the next falling edge of the bit clock, the load line is pulled back down
    always @(negedge clkSlow)
    begin
        if(load)
            load <= 1'b0;
    end
    
    //Clocks generation
    always #HALF_CLK_PERIOD_SLOW clkSlow=~clkSlow;
    always #HALF_CLK_PERIOD_FAST clkFast=~clkFast;
    always #HALF_CLK_PERIOD_BYTES byteClk=~byteClk;
endmodule

uart_rx.v (DUT) (I think this is the most evolved version on there):

module uartrx
(
    input clk_slow,//baud rate
    input clk_fast,//8*baud rate
    input rxd,//signal in
    output [7:0] obyte,//valid when ovalid is high but will not change till ovalid strobes again
    output ovalid//high for 1 bit to mark a valid obyte
);

//sr for both delay line and syncronizer
parameter SHIFT_REG_LEN=5;
reg [SHIFT_REG_LEN-1:0] shift_reg=0;
always@(posedge clk_fast)
begin
    shift_reg<={shift_reg[SHIFT_REG_LEN-2:0],rxd};
end

//edge detect, counter and lockout
wire neg_edge;//neg edge detect
wire counter_start;//resets/starts the counters
wire idle;//if the line is idle
wire convert;//will go high when valid data arrives
wire Dclk;//data bit clock
wire bit_data;//data offset in future from bit edge
counter_strobe counter_strobe0(clk_fast,counter_start,Dclk);
counter_timeout counter_timeout0(clk_fast,Dclk,counter_start,idle,convert);
assign neg_edge=(shift_reg[SHIFT_REG_LEN-1] & ~shift_reg[SHIFT_REG_LEN-2]);
assign counter_start=(neg_edge&&idle);
assign bit_data=shift_reg[SHIFT_REG_LEN-4];//shift so we are looking at center of symbol rather than edge

//add flipflops to data and convert.
//the strange way of writing this was because
//this gave the best fmax and slack the compiler could
//do as well as making a readable RTL schematic
reg bit_data_latched;
reg convert_latched;
always @(posedge clk_fast)
begin
    if(Dclk)
    begin
        convert_latched<=~convert;
        bit_data_latched<=~bit_data;
    end
end

//convert to parallel
wire [7:0] latched_byte;
wire latched_byte_strobe;
serial_to_parallel #(.N(8)) serial_to_parallel0(clk_slow,(~convert_latched),(~bit_data_latched),latched_byte_strobe,latched_byte);

//for output port assignments
assign obyte=latched_byte;
assign ovalid=latched_byte_strobe;

endmodule

//---------------- submodules begin -------------------

module serial_to_parallel
#(parameter N=8)
(input clk,convert,data_bit,output reg valid_strobe, output reg [N-1:0] data_out);
reg act=0;
initial valid_strobe=0;
reg [N-1:0] abyte;
reg [$clog2(N)-1:0] counter=0;
always @(posedge clk)
begin
    valid_strobe<=0;
    act<=0;
    if(|counter)
    begin
        abyte<={data_bit,abyte[N-1:1]};
        counter<=counter-1;
        act<=1;
    end
    else if(act)
    begin
        valid_strobe<=1;
        data_out<=abyte;
    end
    else if(convert)
    begin
        counter<=(N-1);
        abyte<={data_bit,abyte[N-1:1]};
    end
end
endmodule

//next 2 modules take carefull note of resets. The resets
//must cause the state of the two counters to happen at
//the same time.

//strobe every 8 clks
module counter_strobe(input clk,reset,output reg strobe);
parameter N=3;
reg [N-1:0] count=0;
initial strobe=1;
always@(posedge clk)
begin
    if(reset)
    begin
        strobe<=0;
        count<=1;
    end
    else
    begin
        count<=count+1'b1;
        strobe<=!(|count);
    end
end
endmodule

//timeout after 8 enabled clks plus a bit (IDLE_HOLD_DELAY). timeout causes idle to go high.
//convert goes high on reset and will go low on next enabled clk. when it goes high it's
//time to convert the current 8 bits from serial to parallel
module counter_timeout(input clk,enable,reset,output reg idle,output reg convert);
reg invalid;
parameter N=3;
parameter IDLE_HOLD_DELAY=9;
reg [N-1:0] count=0;
reg [IDLE_HOLD_DELAY-1:0] shift_reg=0;
initial invalid=0;
initial idle=0;
always@(posedge clk)
begin
    {idle,shift_reg}<={shift_reg[IDLE_HOLD_DELAY-1:0],invalid};
    if(reset)
    begin
        {invalid,idle,count}<=0;
        shift_reg<=0;
        convert<=1;
    end
    else if(enable)
    begin
        count<=count+1'b1;
        if(&count)invalid<=1;
        convert<=0;
    end
end
endmodule

uart_tx.v (same, I think it's the most evolved version on there) :

module uarttx
(
    input clk,//baud rate
    input [7:0] txbyte,
    input txload,
    output reg txd,//signal out
    output reg txholding_reg_full
);
reg [7:0] holding_reg=~0;
reg [7:0] shift_reg=~0;
initial txholding_reg_full=0;
reg shift_reg_empty=1;
reg [3:0] shift_reg_ptr=8;
initial txd=1;
reg txload_last=0;
always @(posedge clk)
begin
    //send either shift reg or idle
    if(shift_reg_ptr<=7)
    begin
        txd<=shift_reg[shift_reg_ptr];
        shift_reg_ptr<=shift_reg_ptr+1;
    end
    else
    begin
        shift_reg_empty<=1;
        txd<=1;
    end
    //if +ve edge of load and hold reg empty then either start sending it or move it to the holding reg
    if((txload&&(!txload_last))&&(!txholding_reg_full))
    begin
        if(shift_reg_empty)
        begin
            shift_reg<=txbyte;
            shift_reg_ptr<=0;
            txd<=0;
            shift_reg_empty<=0;
        end
        else
        begin
            holding_reg<=txbyte;
            txholding_reg_full<=1;
        end
    end
    //else if the shift reg has gone empty then start sending the next holding one if available
    else if(shift_reg_empty&&txholding_reg_full)
    begin
        shift_reg<=holding_reg;
        txholding_reg_full<=0;
        shift_reg_ptr<=0;
        txd<=0;
        shift_reg_empty<=0;
    end
    //for keeping track of +ve edge of txload line
    txload_last<=txload;
end
endmodule

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文