Commit 660ec751 authored by Tomasz Wlostowski's avatar Tomasz Wlostowski

100 MHz version, still room for improvement

parent 51d4addc
......@@ -18,4 +18,5 @@ files = [ "rv_cpu.v",
"rv_csr.v",
"rv_timer.v",
"rv_exceptions.v",
"urv_iram.v",
"../sim/rv_icache_model.sv"];
......@@ -50,7 +50,11 @@ endmodule
module rv_cpu
(
#(
parameter g_timer_frequency = 1000,
parameter g_clock_frequency = 100000000
)
(
input clk_i,
input rst_i,
......@@ -119,12 +123,16 @@ module rv_cpu
wire d_stall, d_kill;
wire [39:0] csr_time, csr_cycles;
wire [31:0] im_addr;
assign im_addr_o = im_addr;
rv_fetch fetch
(
.clk_i(clk_i),
.rst_i(rst_i),
.im_addr_o(im_addr_o),
.im_addr_o(im_addr),
.im_data_i(im_data_i),
.im_valid_i(im_valid_i),
......@@ -161,19 +169,22 @@ module rv_cpu
.TRIG3(TRIG3) );
-----/\----- EXCLUDED -----/\----- */
assign TRIG0 = f2d_pc;
assign TRIG1 = f2d_ir;
assign TRIG2[0] = rst_i;
assign TRIG2[1] = f2d_valid;
assign TRIG2[2] = f_kill;
assign TRIG2[3] = f_stall;
assign TRIG2[4] = w_stall_req;
assign TRIG2[5] = x_stall_req;
wire d_stall_req;
wire [31:0] d2x_alu_op1, d2x_alu_op2;
wire d2x_use_op1, d2x_use_op2;
wire d_x_rs1_bypass;
wire d_x_rs2_bypass;
wire d_w_rs1_bypass;
wire d_w_rs2_bypass;
assign TRIG0 = im_addr;
assign TRIG1 = im_data_i;
assign TRIG2[0] = im_valid_i;
rv_decode decode
(
......@@ -193,22 +204,27 @@ module rv_cpu
.rf_rs1_o(rf_rs1),
.rf_rs2_o(rf_rs2),
.d_x_rs1_bypass_i(d_x_rs1_bypass),
.d_x_rs2_bypass_i(d_x_rs2_bypass),
.d_w_rs1_bypass_i(d_w_rs1_bypass),
.d_w_rs2_bypass_i(d_w_rs2_bypass),
.x_load_hazard_o(d2x_load_hazard),
.x_valid_o(d2x_valid),
.x_pc_o(d2x_pc),
.x_rs1_o(d2x_rs1),
.x_rs2_o(d2x_rs2),
.x_imm_o(d2x_imm),
.x_rd_o(d2x_rd),
.x_shamt_o(d2x_shamt),
.x_fun_o(d2x_fun),
.x_opcode_o(d2x_opcode),
.x_shifter_sign_o(d2x_shifter_sign),
.x_imm_o(d2x_imm),
.x_is_signed_compare_o(d2x_is_signed_compare),
.x_is_signed_alu_op_o(d2x_is_signed_alu_op),
.x_is_add_o(d2x_is_add),
......@@ -220,15 +236,19 @@ module rv_cpu
.x_rd_source_o(d2x_rd_source),
.x_rd_write_o(d2x_rd_write),
.x_csr_sel_o ( d2x_csr_sel),
.x_csr_imm_o ( d2x_csr_imm),
.x_is_csr_o ( d2x_is_csr ),
.x_is_eret_o ( d2x_is_eret )
.x_csr_sel_o (d2x_csr_sel),
.x_csr_imm_o (d2x_csr_imm),
.x_is_csr_o (d2x_is_csr),
.x_is_eret_o (d2x_is_eret),
.x_alu_op1_o(d2x_alu_op1),
.x_alu_op2_o(d2x_alu_op2),
);
.x_use_op1_o(d2x_use_op1),
.x_use_op2_o(d2x_use_op2)
);
wire [4:0] x2w_rd;
wire [4:0] x2w_rd;
wire [31:0] x2w_rd_value;
wire [31:0] x2w_rd_shifter;
wire [31:0] x2w_rd_multiply;
......@@ -251,6 +271,13 @@ module rv_cpu
wire [31:0] rf_bypass_rd_value = x2w_rd_value;
wire rf_bypass_rd_write = rf_rd_write && !x2w_load; // multiply/shift too?
assign d_x_rs1_bypass = (d2x_rd == rf_rs1) && d2x_rd_write && d2x_valid;
assign d_x_rs2_bypass = (d2x_rd == rf_rs2) && d2x_rd_write && d2x_valid;
assign d_w_rs1_bypass = (x2w_rd == rf_rs1) && rf_rd_write && x2w_valid;
assign d_w_rs2_bypass = (x2w_rd == rf_rs2) && rf_rd_write && x2w_valid;
rv_regfile regfile
(
......@@ -277,7 +304,6 @@ module rv_cpu
);
rv_exec execute
......@@ -311,6 +337,13 @@ module rv_cpu
.d_is_load_i(d2x_is_load),
.d_is_store_i(d2x_is_store),
.d_is_undef_i(d2x_is_undef),
.d_alu_op1_i(d2x_alu_op1),
.d_alu_op2_i(d2x_alu_op2),
.d_use_op1_i(d2x_use_op1),
.d_use_op2_i(d2x_use_op2),
.d_rd_source_i(d2x_rd_source),
.d_rd_write_i(d2x_rd_write),
......@@ -355,7 +388,6 @@ module rv_cpu
wire [31:0] wb_trig2;
rv_writeback writeback
(
.clk_i(clk_i),
......@@ -406,7 +438,11 @@ module rv_cpu
assign TRIG2[16] = (stall_timeout == 63) ? 1'b1 : 1'b0;
rv_timer ctimer (
rv_timer
#(
.g_timer_frequency(g_timer_frequency),
.g_clock_frequency(g_clock_frequency)
) ctimer (
.clk_i(clk_i),
.rst_i(rst_i),
......
......@@ -96,4 +96,7 @@
`define EXCEPT_TIMER 9
`define EXCEPT_IRQ 10
`define OP_SEL_BYPASS_X 0
`define OP_SEL_BYPASS_W 1
`define OP_SEL_DIRECT 2
`define OP_SEL_IMM 3
`include "rv_defs.v"
`timescale 1ns/1ps
module rv_divide
(
input clk_i,
input rst_i,
input x_stall_i,
input x_kill_i,
output x_stall_req_o,
input d_valid_i,
input d_is_divide_i,
input [31:0] d_rs1_i,
input [31:0] d_rs2_i,
input [2:0] d_fun_i,
output reg [31:0] x_rd_o
);
reg [31:0] q,r,n,d;
reg n_sign, d_sign;
reg [5:0] state;
wire [32:0] alu_result;
reg [31:0] alu_op1;
reg [31:0] alu_op2;
reg is_rem;
wire [31:0] r_next = { r[30:0], n[31 - (state - 3)] };
always@*
case(state) // synthesis full_case parallel_case
0: begin alu_op1 <= 'hx; alu_op2 <= 'hx; end
1: begin alu_op1 <= 0; alu_op2 <= d_rs1_i; end
2: begin alu_op1 <= 0; alu_op2 <= d_rs2_i; end
35: begin alu_op1 <= 0; alu_op2 <= q; end
36: begin alu_op1 <= 0; alu_op2 <= r; end
default: begin alu_op1 <= r_next; alu_op2 <= d; end
endcase // case (state)
reg alu_sub;
assign alu_result = alu_sub ? {1'b0, alu_op1} - {1'b0, alu_op2} : {1'b0, alu_op1} + {1'b0, alu_op2};
wire alu_ge = ~alu_result [32];
wire start_divide = !x_stall_i && !x_kill_i && d_valid_i && d_is_divide_i;
wire done = (is_rem ? state == 37 : state == 36 );
assign x_stall_req_o = (start_divide || !done);
always@*
case (state) // synthesis full_case parallel_case
1:
alu_sub <= n_sign;
2:
alu_sub <= d_sign;
35:
alu_sub <= n_sign ^ d_sign;
36:
alu_sub <= n_sign;
default:
alu_sub <= 1;
endcase // case (state)
always@(posedge clk_i)
if(rst_i || done)
state <= 0;
else if (state != 0 || start_divide)
state <= state + 1;
always@(posedge clk_i)
case ( state ) // synthesis full_case parallel_case
0:
if(start_divide)
begin
q <= 0;
r <= 0;
is_rem <= (d_fun_i == `FUNC_REM || d_fun_i ==`FUNC_REMU);
n_sign <= d_rs1_i[31];
d_sign <= d_rs2_i[31];
end
1:
n <= alu_result[31:0];
2:
d <= alu_result[31:0];
35:
x_rd_o <= alu_result; // quotient
36:
x_rd_o <= alu_result; // remainder
default: // 3..34: 32 divider iterations
begin
q <= { q[30:0], alu_ge };
r <= alu_ge ? alu_result : r_next;
end
endcase // case ( state )
endmodule // rv_divide
module rv_divide_nonrestoring
(
input clk_i,
input rst_i,
input x_stall_i,
input x_kill_i,
output x_stall_req_o,
input d_valid_i,
input d_is_divide_i,
input [31:0] d_rs1_i,
input [31:0] d_rs2_i,
input [2:0] d_fun_i,
output reg [31:0] x_rd_o
);
reg [31:0] a,n,d,q;
reg n_sign, d_sign;
reg [5:0] state;
wire [32:0] alu_result;
reg [31:0] alu_op1;
reg [31:0] alu_op2;
reg is_rem;
wire [31:0] a_next = { a[30:0], 1'b0 };
always@*
case(state) // synthesis full_case parallel_case
0: begin alu_op1 <= 'hx; alu_op2 <= 'hx; end
1: begin alu_op1 <= 0; alu_op2 <= d_rs1_i; end
2: begin alu_op1 <= 0; alu_op2 <= d_rs2_i; end
35: begin alu_op1 <= a; alu_op2 <= d; end
36: begin alu_op1 <= 0; alu_op2 <= q; end
37: begin alu_op1 <= 0; alu_op2 <= a; end
default: begin alu_op1 <= a_next; alu_op2 <= d; end
endcase // case (state)
reg alu_sub;
assign alu_result = alu_sub ? {1'b0, alu_op1} - {1'b0, alu_op2} : {1'b0, alu_op1} + {1'b0, alu_op2};
wire alu_ge = ~alu_result [32];
wire start_divide = !x_stall_i && !x_kill_i && d_valid_i && d_is_divide_i;
wire done = (is_rem ? state == 38 : state == 37 );
assign x_stall_req_o = (start_divide || !done);
always@*
case (state) // synthesis full_case parallel_case
1:
alu_sub <= n_sign;
2:
alu_sub <= d_sign;
35:
alu_sub <= 0;
36:
alu_sub <= n_sign ^ d_sign;
37:
alu_sub <= n_sign;
default:
alu_sub <= ~a_next[31];
endcase // case (state)
always@(posedge clk_i)
if(rst_i || done)
state <= 0;
else if (state != 0 || start_divide)
state <= state + 1;
always@(posedge clk_i)
case ( state ) // synthesis full_case parallel_case
0:
if(start_divide)
begin
//q <= 0;
a <= 0;
is_rem <= (d_fun_i == `FUNC_REM || d_fun_i ==`FUNC_REMU);
n_sign <= d_rs1_i[31];
d_sign <= d_rs2_i[31];
end
1:
q <= alu_result[31:0];
2:
d <= alu_result[31:0];
35: // correction step
if(a[31])
a <= alu_result;
36:
x_rd_o <= alu_result; // quotient
37:
x_rd_o <= alu_result; // remainder
default: // 3..34: 32 divider iterations
begin
a <= alu_result;
q <= { q[30:0], ~alu_result[31] };
// r <= alu_ge ? alu_result : r_next;
end
endcase // case ( state )
endmodule // rv_divide_nonrestoring
......@@ -62,6 +62,12 @@ module rv_exec
input d_is_store_i,
input d_is_divide_i,
input d_is_undef_i,
input [31:0] d_alu_op1_i,
input [31:0] d_alu_op2_i,
input d_use_op1_i,
input d_use_op2_i,
input [2:0] d_rd_source_i,
......@@ -231,25 +237,10 @@ module rv_exec
// decode ALU operands
always@*
begin
case (d_opcode_i)
`OPC_LUI: alu_op1 <= d_imm_i;
`OPC_AUIPC: alu_op1 <= d_imm_i;
`OPC_JAL: alu_op1 <= 4;
`OPC_JALR: alu_op1 <= 4;
default: alu_op1 <= rs1;
endcase // case (d_opcode_i)
case (d_opcode_i)
`OPC_LUI: alu_op2 <= 0;
`OPC_AUIPC: alu_op2 <= d_pc_i;
`OPC_JAL: alu_op2 <= d_pc_i;
`OPC_JALR: alu_op2 <= d_pc_i;
`OPC_OP_IMM: alu_op2 <= d_imm_i;
default: alu_op2 <= rs2;
endcase // case (d_opcode_i)
alu_op1 <= d_use_op1_i ? d_alu_op1_i : rs1;
alu_op2 <= d_use_op2_i ? d_alu_op2_i : rs2;
end
......@@ -274,13 +265,16 @@ module rv_exec
case (d_fun_i)
`FUNC_ADD:
alu_result <= alu_addsub_result[31:0];
`FUNC_XOR: alu_result <= alu_op1 ^ alu_op2;
`FUNC_OR: alu_result <= alu_op1 | alu_op2;
`FUNC_AND: alu_result <= alu_op1 & alu_op2;
`FUNC_SLT: alu_result <= alu_addsub_result[32]?1:0;
`FUNC_SLTU: alu_result <= alu_addsub_result[32]?1:0;
`FUNC_XOR:
alu_result <= alu_op1 ^ alu_op2;
`FUNC_OR:
alu_result <= alu_op1 | alu_op2;
`FUNC_AND:
alu_result <= alu_op1 & alu_op2;
`FUNC_SLT:
alu_result <= alu_addsub_result[32]?1:0;
`FUNC_SLTU:
alu_result <= alu_addsub_result[32]?1:0;
default: alu_result <= 32'hx;
endcase // case (d_fun_i)
end // always@ *
......@@ -409,8 +403,6 @@ module rv_exec
end
endcase // case (d_fun_i)
end
//branch decision
always@*
......@@ -433,63 +425,36 @@ module rv_exec
assign dm_data_s_o = dm_data_s;
assign dm_data_select_o = dm_select_s;
/* -----\/----- EXCLUDED -----\/-----
wire is_load = (d_opcode_i == `OPC_LOAD ? 1: 0) && d_valid_i && !x_kill_i;
wire is_store = (d_opcode_i == `OPC_STORE ? 1: 0) && d_valid_i && !x_kill_i;
-----/\----- EXCLUDED -----/\----- */
assign dm_load_o = d_is_load_i & d_valid_i & !x_kill_i & !x_stall_i & !exception;
assign dm_store_o = d_is_store_i & d_valid_i & !x_kill_i & !x_stall_i & !exception;
/* -----\/----- EXCLUDED -----\/-----
wire trig_ent = (d_pc_i == 'h264 && !x_kill_i);
wire trig_ret = (d_pc_i == 'h2bc && !x_kill_i);
wire trig_wr = (dm_addr == 'hf368 && is_store && !x_stall_i);
-----/\----- EXCLUDED -----/\----- */
always@(posedge clk_i)
if (rst_i) begin
f_branch_target_o <= 0;
// f_branch_target_o <= 0;
f_branch_take <= 0;
w_rd_write_o <= 0;
// w_rd_write_o <= 0;
// w_rd_o <= 0;
w_fun_o <= 0;
// w_fun_o <= 0;
w_load_o <= 0;
w_store_o <= 0;
w_dm_addr_o <= 0;
w_rd_source_o <= 0;
// w_dm_addr_o <= 0;
// w_rd_source_o <= 0;
w_valid_o <= 0;
end else if (!x_stall_i) begin
f_branch_target_o <= branch_target;
f_branch_take <= branch_take && !x_kill_i && d_valid_i;
w_rd_o <= d_rd_i;
w_rd_value_o <= rd_value;
w_rd_write_o <= d_rd_write_i && !x_kill_i && !exception;
w_rd_write_o <= d_rd_write_i && !x_kill_i && d_valid_i && !exception;
w_rd_source_o <= d_rd_source_i;
w_fun_o <= d_fun_i;
w_load_o <= d_is_load_i & d_valid_i && !x_kill_i && !exception;
w_store_o <= d_is_store_i & d_valid_i && !x_kill_i && !exception;
/* -----\/----- EXCLUDED -----\/-----
if ( (d_is_load_i || is_store) && !exception && unaligned_addr)
begin
$error("Unaligned address!");
$stop;
end
-----/\----- EXCLUDED -----/\----- */
w_load_o <= d_is_load_i && !x_kill_i && d_valid_i && !exception;
w_store_o <= d_is_store_i && !x_kill_i && d_valid_i && !exception;
w_dm_addr_o <= dm_addr;
w_valid_o <= d_valid_i && !x_kill_i && !exception;
end else begin // if (!x_stall_i)
w_valid_o <= 0;
w_valid_o <= !exception;
end // else: !if(rst_i)
assign f_branch_take_o = f_branch_take;
......
......@@ -2,6 +2,71 @@
`timescale 1ns/1ps
module rv_mult18x18
(
input clk_i,
input rst_i,
input stall_i,
input [17:0] x_i,
input [17:0] y_i,
output [35:0] q_o
);
DSP48A1 #(
.A0REG(0),
.A1REG(0),
.B0REG(0),
.B1REG(0),
.CARRYINREG(0),
.CARRYINSEL("OPMODE5"),
.CARRYOUTREG(0),
.CREG(0),
.DREG(0),
.MREG(1),
.OPMODEREG(0),
.PREG(0),
.RSTTYPE("SYNC")
) D1 (
.BCOUT(),
.PCOUT(),
.CARRYOUT(),
.CARRYOUTF(),
.M(q_o),
.P(),
.PCIN(),
.CLK(clk_i),
.OPMODE(8'd1),
.A(x_i),
.B(y_i),
.C(48'h0),
.CARRYIN(),
.D(18'b0),
.CEA(1'b0),
.CEB(1'b0),
.CEC(1'b0),
.CECARRYIN(1'b0),
.CED(1'b0),
.CEM(~stall_i),
.CEOPMODE(1'b0),
.CEP(1'b1),
.RSTA(rst_i),
.RSTB(rst_i),
.RSTC(1'b0),
.RSTCARRYIN(1'b0),
.RSTD(1'b0),
.RSTM(rst_i),
.RSTOPMODE(1'b0),
.RSTP(1'b0)
);
endmodule // rv_mult18x18
module rv_multiply
(
input clk_i,
......@@ -15,14 +80,55 @@ module rv_multiply
output reg [31:0] w_rd_o
);
reg [31:0] yl_xl, yl_xh, yh_xl;
wire[17:0] xl = d_rs1_i[17:0];
wire[13:0] xh = d_rs1_i[31:18];
wire[17:0] yl = d_rs2_i[17:0];
wire[13:0] yh = d_rs2_i[31:18];
wire[17:0] xl_u = {1'b0, d_rs1_i[16:0] };
wire[17:0] yl_u = {1'b0, d_rs2_i[16:0] };
wire[17:0] xl_s = {d_rs1_i[16], d_rs1_i[16:0] };
wire[17:0] yl_s = {d_rs2_i[16], d_rs2_i[16:0] };
wire[17:0] xh = { {3{d_rs1_i[31]}}, d_rs1_i[31:17] };
wire[17:0] yh = { {3{d_rs2_i[31]}}, d_rs2_i[31:17] };
wire [35:0] yl_xl, yl_xh, yh_xl;
rv_mult18x18 U_mul0
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(x_stall_i),
.x_i(xl_u),
.y_i(yl_u),
.q_o(yl_xl)
);
rv_mult18x18 U_mul1
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(x_stall_i),
.x_i(xl_s),
.y_i(yh),
.q_o(yh_xl)
);
rv_mult18x18 U_mul2
(
.clk_i(clk_i),
.rst_i(rst_i),
.stall_i(x_stall_i),
.x_i(yl_s),
.y_i(xh),
.q_o(yl_xh)
);
/* -----\/----- EXCLUDED -----\/-----
always@(posedge clk_i)
......@@ -36,8 +142,9 @@ module rv_multiply
// stage0 <= $signed(d_rs1_i) * $signed(d_rs2_i);
*/
always@*
w_rd_o <= yl_xl + {yl_xh[13:0], 18'h0} + {yh_xl[13:0], 18'h0};
w_rd_o <= yl_xl + {yl_xh[14:0], 17'h0} + {yh_xl[14:0], 17'h0};
......
......@@ -42,9 +42,14 @@ module rv_decode
output x_valid_o,
output reg [31:0] x_pc_o,
input d_x_rs1_bypass_i,
input d_x_rs2_bypass_i,
input d_w_rs1_bypass_i,
input d_w_rs2_bypass_i,
output reg [4:0] rf_rs1_o,
output reg [4:0] rf_rs2_o,
output [4:0] rf_rs1_o,
output [4:0] rf_rs2_o,
output [4:0] x_rs1_o,
output [4:0] x_rs2_o,
......@@ -57,7 +62,6 @@ module rv_decode
output [4:0] x_opcode_o,
output reg x_shifter_sign_o,
output reg [31:0] x_imm_o,
output reg x_is_signed_compare_o,
output reg x_is_signed_alu_op_o,
output reg x_is_add_o,
......@@ -74,8 +78,19 @@ module rv_decode
output reg [4:0] x_csr_imm_o,
output reg x_is_csr_o,
output reg x_is_eret_o
);
output reg x_is_eret_o,
output reg [31:0] x_imm_o,
output reg [31:0] x_alu_op1_o,
output reg [31:0] x_alu_op2_o,
output reg x_use_op1_o,
output reg x_use_op2_o,
output reg [1:0] x_op1_sel_o,
output reg [1:0] x_op2_sel_o
);
wire [4:0] f_rs1 = f_ir_i[19:15];
......@@ -95,31 +110,11 @@ module rv_decode
assign x_rd_o = x_rd;
assign x_opcode_o = x_opcode;
always@*
if(d_stall_i)
begin
rf_rs1_o <= x_rs1;
rf_rs2_o <= x_rs2;
end else begin
rf_rs1_o <= f_rs1;
rf_rs2_o <= f_rs2;
end
reg[31:0] x_ir;
always@(posedge clk_i)
if(rst_i)
begin
x_pc_o <= 0;
x_valid <= 0;
end else if(!d_stall_i) begin
x_valid <= f_valid_i && !d_kill_i;
x_pc_o <= f_pc_i;
x_ir <= f_ir_i;
end
assign rf_rs1_o = f_rs1;
assign rf_rs2_o = f_rs2;
assign x_valid_o = x_valid;
reg [31:0] x_ir;
wire [4:0] d_opcode = f_ir_i[6:2];
......@@ -154,12 +149,18 @@ module rv_decode
load_hazard <= 0;
reg inserting_nop = 0;
reg load_hazard_d;
always@(posedge clk_i)
if(rst_i)
if(rst_i) begin
inserting_nop <= 0;
else if (!d_stall_i)
begin
load_hazard_d <= 0;
end else if (!d_stall_i)
begin
load_hazard_d <= load_hazard;
if (inserting_nop)
inserting_nop <= 0;
else
......@@ -168,30 +169,44 @@ module rv_decode
assign d_stall_req_o = load_hazard && !inserting_nop;
reg load_hazard_d;
wire [4:0] f_rd = f_ir_i[11:7];
assign x_valid_o = x_valid;
always@(posedge clk_i)
if(!d_stall_i)
if(rst_i || d_kill_i )
begin
x_pc_o <= 0;
x_valid <= 0;
end else if(!d_stall_i) begin
x_pc_o <= f_pc_i;
if (load_hazard && !inserting_nop)
x_valid <= 0;
else
x_valid <= f_valid_i;
x_ir <= f_ir_i;
x_rs1 <= f_rs1;
x_rs2 <= f_rs2;
x_rd <= (load_hazard && !inserting_nop) ? 0 : f_ir_i [11:7];
x_opcode <= (load_hazard && !inserting_nop) ? `OPC_OP : d_opcode;
load_hazard_d <= load_hazard;
x_rd <= f_rd;
x_opcode <= d_opcode;
x_shamt_o <= f_ir_i[24:20];
end
always@(posedge clk_i)
if(!d_stall_i)
if (load_hazard)
x_fun_o <= `FUNC_ADD;
else case (d_opcode)
`OPC_JAL, `OPC_JALR, `OPC_LUI, `OPC_AUIPC:
x_fun_o <= `FUNC_ADD;
default:
x_fun_o <= d_fun;
endcase // case (f_opcode)
case (d_opcode)
`OPC_JAL, `OPC_JALR, `OPC_LUI, `OPC_AUIPC:
x_fun_o <= `FUNC_ADD;
default:
x_fun_o <= d_fun;
endcase // case (f_opcode)
always@(posedge clk_i)
if(!d_stall_i)
......@@ -208,22 +223,76 @@ module rv_decode
reg [31:0] d_imm;
always@*
case(d_opcode)
`OPC_LUI, `OPC_AUIPC: d_imm <= d_imm_u;
`OPC_OP_IMM, `OPC_LOAD: d_imm <= d_imm_i;
`OPC_STORE: d_imm <= d_imm_s;
`OPC_JAL: d_imm <= d_imm_j;
`OPC_JALR: d_imm <= d_imm_i;
`OPC_BRANCH: d_imm <= d_imm_b;
default: d_imm <= 32'hx;
endcase // case (opcode)
always@(posedge clk_i)
begin
if(!d_stall_i)
case(d_opcode)
`OPC_LUI, `OPC_AUIPC: x_imm_o <= d_imm_u;
`OPC_OP_IMM, `OPC_LOAD: x_imm_o <= d_imm_i;
`OPC_STORE: x_imm_o <= d_imm_s;
`OPC_JAL: x_imm_o <= d_imm_j;
`OPC_JALR: x_imm_o <= d_imm_i;
`OPC_BRANCH: x_imm_o <= d_imm_b;
default: x_imm_o <= 32'hx;
endcase // case (opcode)
end // always@ (posedge clk_i)
if(!d_stall_i)
x_imm_o <= d_imm;
always@(posedge clk_i)
if(!d_stall_i)
begin
case (d_opcode)
`OPC_LUI, `OPC_AUIPC:
begin
x_alu_op1_o <= d_imm;
x_use_op1_o <= 1;
end
`OPC_JAL, `OPC_JALR:
begin
x_alu_op1_o <= 4;
x_use_op1_o <= 1;
end
default:
begin
x_alu_op1_o <= 32'hx;
x_use_op1_o <= 0;
end
endcase // case (d_opcode)
case (d_opcode)
`OPC_LUI:
begin
x_alu_op2_o <= 0;
x_use_op2_o <= 1;
end
`OPC_AUIPC, `OPC_JAL, `OPC_JALR:
begin
x_alu_op2_o <= f_pc_i;
x_use_op2_o <= 1;
end
`OPC_OP_IMM:
begin
x_alu_op2_o <= d_imm;
x_use_op2_o <= 1;
end
default:
begin
x_alu_op2_o <= 32'hx;
x_use_op2_o <= 0;
end
endcase // case (d_opcode_i)
end // if (!d_stall_i)
wire d_rd_nonzero = (f_rd != 0);
// misc decoding
always@(posedge clk_i)
if(!d_stall_i)
......@@ -263,9 +332,9 @@ module rv_decode
// rdest write value
case (d_opcode)
`OPC_OP_IMM, `OPC_OP, `OPC_JAL, `OPC_JALR, `OPC_LUI, `OPC_AUIPC:
x_rd_write <= 1;
x_rd_write <= d_rd_nonzero;
`OPC_SYSTEM:
x_rd_write <= (d_fun != 0); // CSR instructions write to RD
x_rd_write <= d_rd_nonzero && (d_fun != 0); // CSR instructions write to RD
default:
x_rd_write <= 0;
endcase // case (d_opcode)
......
......@@ -22,7 +22,7 @@
`timescale 1ns/1ps
module rv_regmem
module rv_regmem
(
input clk_i,
input rst_i,
......@@ -30,7 +30,7 @@ module rv_regmem
input en1_i,
input [4:0] a1_i,
output [31:0] q1_o,
output reg [31:0] q1_o,
input [4:0] a2_i,
input [31:0] d2_i,
......@@ -47,48 +47,24 @@ module rv_regmem
always@(posedge clk_i)
if(en1_i)
q1_int <= ram[a1_i];
q1_o <= ram[a1_i];
always@(posedge clk_i)
if(we2_i)
ram[a2_i] <= d2_i;
// bypass logic
always@(posedge clk_i)
if(rst_i)
bypass <= 0;
else
bypass <= we2_i && (a1_i == a2_i);
always@(posedge clk_i)
begin
if(we2_i)
bypass_r <= d2_i;
end
assign q1_o = bypass ? bypass_r : q1_int;
// synthesis translate_off
initial begin : ram_init
integer i;
for(i=0;i<32; i=i+1) begin
ram[i] = 0;
end
end
// synthesis translate_on
// synthesis translate_on
endmodule // rv_regmem
endmodule // rv_regmem2
module rv_regfile
(
......@@ -103,8 +79,8 @@ module rv_regfile
input [4:0] d_rs1_i,
input [4:0] d_rs2_i,
output [31:0] x_rs1_value_o,
output [31:0] x_rs2_value_o,
output reg [31:0] x_rs1_value_o,
output reg [31:0] x_rs2_value_o,
input [4:0] w_rd_i,
input [31:0] w_rd_value_i,
......@@ -144,14 +120,49 @@ module rv_regfile
.we2_i (write)
);
wire rs1_bypass = w_bypass_rd_write_i && (w_rd_i == d_rs1_i) && (w_rd_i != 0);
wire rs2_bypass = w_bypass_rd_write_i && (w_rd_i == d_rs2_i) && (w_rd_i != 0);
wire rs1_bypass_x = w_bypass_rd_write_i && (w_rd_i == d_rs1_i) && (w_rd_i != 0);
wire rs2_bypass_x = w_bypass_rd_write_i && (w_rd_i == d_rs2_i) && (w_rd_i != 0);
reg rs1_bypass_w, rs2_bypass_w;
always@(posedge clk_i)
if(rst_i)
begin
rs1_bypass_w <= 0;
rs2_bypass_w <= 0;
end else begin
rs1_bypass_w <= write && (rf_rs1_i == w_rd_i);
rs2_bypass_w <= write && (rf_rs2_i == w_rd_i);
end
reg [31:0] bypass_w;
always@(posedge clk_i)
if(write)
bypass_w <= w_rd_value_i;
always@*
begin
case ( {rs1_bypass_x, rs1_bypass_w } ) // synthesis parallel_case full_case
2'b10, 2'b11:
x_rs1_value_o <= w_bypass_rd_value_i;
2'b01:
x_rs1_value_o <= bypass_w;
default:
x_rs1_value_o <= rs1_regfile;
endcase // case ( {rs1_bypass_x, rs1_bypass_w } )
case ( {rs2_bypass_x, rs2_bypass_w } ) // synthesis parallel_case full_case
2'b10, 2'b11:
x_rs2_value_o <= w_bypass_rd_value_i;
2'b01:
x_rs2_value_o <= bypass_w;
default:
x_rs2_value_o <= rs2_regfile;
endcase // case ( {rs2_bypass_x, rs2_bypass_w } )
end // always@ *
assign x_rs1_value_o = rs1_bypass ? w_bypass_rd_value_i : rs1_regfile;
assign x_rs2_value_o = rs2_bypass ? w_bypass_rd_value_i : rs2_regfile;
endmodule // rv_regfile
......@@ -46,15 +46,19 @@ module rv_writeback
input [31:0] x_shifter_rd_value_i,
input [31:0] x_multiply_rd_value_i,
input [1:0] x_rd_source_i,
output reg [31:0] x_bypass_o,
input [31:0] dm_data_l_i,
input dm_load_done_i,
input dm_store_done_i,
output reg [31:0] rf_rd_value_o,
output [31:0] rf_rd_value_o,
output [4:0] rf_rd_o,
output reg rf_rd_write_o,
output rf_rd_write_o,
output [31:0] TRIG2
);
......@@ -104,26 +108,37 @@ module rv_writeback
endcase // case (d_fun_i)
end // always@ *
reg rf_rd_write;
reg [31:0] rf_rd_value;
always@*
if( x_load_i )
rf_rd_value_o <= load_value;
rf_rd_value <= load_value;
else if ( x_rd_source_i == `RD_SOURCE_SHIFTER )
rf_rd_value_o <= x_shifter_rd_value_i;
rf_rd_value <= x_shifter_rd_value_i;
else if ( x_rd_source_i == `RD_SOURCE_MULTIPLY )
rf_rd_value_o <= x_multiply_rd_value_i;
rf_rd_value <= x_multiply_rd_value_i;
else
rf_rd_value_o <= x_rd_value_i;
rf_rd_value <= x_rd_value_i;
always@*
if (w_stall_i)
rf_rd_write_o <= 0;
rf_rd_write <= 0;
else if (x_load_i && dm_load_done_i)
rf_rd_write_o <= 1;
rf_rd_write <= x_valid_i;
else
rf_rd_write_o <= x_rd_write_i & x_valid_i;
rf_rd_write <= x_rd_write_i & x_valid_i;
always@(posedge clk_i)
if( rf_rd_write )
x_bypass_o <= rf_rd_value;
assign rf_rd_o = (x_rd_i);
assign w_stall_req_o = (x_load_i && !dm_load_done_i) || (x_store_i && !dm_store_done_i);
assign rf_rd_write_o = rf_rd_write;
assign rf_rd_value_o = rf_rd_value;
assign rf_rd_o = x_rd_i;
assign w_stall_req_o = x_valid_i && ((x_load_i && !dm_load_done_i) || (x_store_i && !dm_store_done_i));
assign TRIG2[6] = x_load_i;
assign TRIG2[8] = dm_load_done_i;
......
`timescale 1ns/1ps
module urv_iram
#(
parameter g_size = 65536,
parameter g_init_file = "",
parameter g_simulation = 0
)
(
input clk_i,
input ena_i,
input wea_i,
input [31:0] aa_i,
input [3:0] bwea_i,
input [31:0] da_i,
output [31:0] qa_o,
input enb_i,
input web_i,
input [31:0] ab_i,
input [3:0] bweb_i,
input [31:0] db_i,
output [31:0] qb_o
);
genvar i;
// synthesis translate_off
reg [31:0] mem[0:g_size/4-1];
reg [31:0] qa_int, qb_int;
// synthesis translate_on
`define RAM_INST(id, entity, range_a, range_d, range_bw) \
entity RV_IRAM_BLK_``id \
( \
.CLKA(clk_i), \
.CLKB(clk_i), \
.ADDRA(aa_i[range_a]), \
.ADDRB(ab_i[range_a]), \
.DOA(qa_o[range_d]), \
.DOB(qb_o[range_d]), \
.DIA(da_i[range_d]), \
.DIB(db_i[range_d]), \
.SSRA(1'b0), \
.SSRB(1'b0), \
.ENA(ena_i), \
.ENB(enb_i), \
.WEA(wea_i & bwea_i[range_bw]), \
.WEB(web_i & bweb_i[range_bw]) \
);
generate
if (!g_simulation) begin
if (g_size == 65536) begin
`RAM_INST(64K_0, RAMB16_S1_S1, 15:2, 0, 0)
`RAM_INST(64K_1, RAMB16_S1_S1, 15:2, 1, 0)
`RAM_INST(64K_2, RAMB16_S1_S1, 15:2, 2, 0)
`RAM_INST(64K_3, RAMB16_S1_S1, 15:2, 3, 0)
`RAM_INST(64K_4, RAMB16_S1_S1, 15:2, 4, 0)
`RAM_INST(64K_5, RAMB16_S1_S1, 15:2, 5, 0)
`RAM_INST(64K_6, RAMB16_S1_S1, 15:2, 6, 0)
`RAM_INST(64K_7, RAMB16_S1_S1, 15:2, 7, 0)
`RAM_INST(64K_8, RAMB16_S1_S1, 15:2, 8, 1)
`RAM_INST(64K_9, RAMB16_S1_S1, 15:2, 9, 1)
`RAM_INST(64K_10, RAMB16_S1_S1, 15:2, 10, 1)
`RAM_INST(64K_11, RAMB16_S1_S1, 15:2, 11, 1)
`RAM_INST(64K_12, RAMB16_S1_S1, 15:2, 12, 1)
`RAM_INST(64K_13, RAMB16_S1_S1, 15:2, 13, 1)
`RAM_INST(64K_14, RAMB16_S1_S1, 15:2, 14, 1)
`RAM_INST(64K_15, RAMB16_S1_S1, 15:2, 15, 1)
`RAM_INST(64K_16, RAMB16_S1_S1, 15:2, 16, 2)
`RAM_INST(64K_17, RAMB16_S1_S1, 15:2, 17, 2)
`RAM_INST(64K_18, RAMB16_S1_S1, 15:2, 18, 2)
`RAM_INST(64K_19, RAMB16_S1_S1, 15:2, 19, 2)
`RAM_INST(64K_20, RAMB16_S1_S1, 15:2, 20, 2)
`RAM_INST(64K_21, RAMB16_S1_S1, 15:2, 21, 2)
`RAM_INST(64K_22, RAMB16_S1_S1, 15:2, 22, 2)
`RAM_INST(64K_23, RAMB16_S1_S1, 15:2, 23, 2)
`RAM_INST(64K_24, RAMB16_S1_S1, 15:2, 24, 3)
`RAM_INST(64K_25, RAMB16_S1_S1, 15:2, 25, 3)
`RAM_INST(64K_26, RAMB16_S1_S1, 15:2, 26, 3)
`RAM_INST(64K_27, RAMB16_S1_S1, 15:2, 27, 3)
`RAM_INST(64K_28, RAMB16_S1_S1, 15:2, 28, 3)
`RAM_INST(64K_29, RAMB16_S1_S1, 15:2, 29, 3)
`RAM_INST(64K_30, RAMB16_S1_S1, 15:2, 30, 3)
`RAM_INST(64K_31, RAMB16_S1_S1, 15:2, 31, 3)
end // if (g_size == 65536)
end else begin // if (!g_simulation)
// synthesis translate_off
always@(posedge clk_i)
begin
if(ena_i)
begin
qa_int <= mem[(aa_i / 4) % g_size];
if(wea_i && bwea_i[0])
mem [(aa_i / 4) % g_size][7:0] <= da_i[7:0];
if(wea_i && bwea_i[1])
mem [(aa_i / 4) % g_size][15:8] <= da_i[15:8];
if(wea_i && bwea_i[2])
mem [(aa_i / 4) % g_size][23:16] <= da_i[23:16];
if(wea_i && bwea_i[3])
mem [(aa_i / 4) % g_size][31:24] <= da_i[31:24];
end
if(enb_i)
begin
qb_int <= mem[(ab_i / 4) % g_size];
if(web_i && bweb_i[0])
mem [(ab_i / 4) % g_size][7:0] <= db_i[7:0];
if(web_i && bweb_i[1])
mem [(ab_i / 4) % g_size][15:8] <= db_i[15:8];
if(web_i && bweb_i[2])
mem [(ab_i / 4) % g_size][23:16] <= db_i[23:16];
if(web_i && bweb_i[3])
mem [(ab_i / 4) % g_size][31:24] <= db_i[31:24];
end
end // always@ (posedge clk_i)
assign qa_o = qa_int;
assign qb_o = qb_int;
// synthesis translate_on
end // else: !if(!g_simulation)
endgenerate
// synthesis translate_off
integer f, addr, data;
reg [8*20-1:0] cmd;
initial begin
if(g_simulation && g_init_file != "") begin : init_ram_contents
f = $fopen(g_init_file,"r");
if( f == 0)
begin
$error("can't open: %s", g_init_file);
$stop;
end
while(!$feof(f))
begin
$fscanf(f,"%s %08x %08x", cmd,addr,data);
if(cmd == "write")
begin
mem[addr % g_size] = data;
end
end
end // if (g_simulation && g_init_file != "")
end
// synthesis translate_on
endmodule // urv_iram
......@@ -28,6 +28,7 @@ entity xrv_core is
generic (
g_internal_ram_size : integer := 65536;
g_internal_ram_init_file : string := "";
g_simulation : boolean := false;
g_address_bits : integer := 32;
g_wishbone_start : unsigned(31 downto 0) := x"00020000"
);
......@@ -76,6 +77,30 @@ architecture wrapper of xrv_core is
);
end component;
component urv_iram
generic (
g_size : integer;
g_init_file : string;
g_simulation : boolean
);
port (
clk_i : in std_logic;
ena_i : in std_logic;
wea_i : in std_logic;
aa_i : in std_logic_vector(31 downto 0);
bwea_i : in std_logic_vector(3 downto 0);
da_i : in std_logic_vector(31 downto 0);
qa_o :out std_logic_vector(31 downto 0);
enb_i : in std_logic;
web_i : in std_logic;
ab_i : in std_logic_vector(31 downto 0);
bweb_i : in std_logic_vector(3 downto 0);
db_i : in std_logic_vector(31 downto 0);
qb_o :out std_logic_vector(31 downto 0)
);
end component;
signal cpu_rst, cpu_rst_d : std_logic;
signal im_addr : std_logic_vector(31 downto 0);
......@@ -211,7 +236,7 @@ begin
port map (
clk_i => clk_sys_i,
rst_i => cpu_rst,
irq_i => '0',
irq_i => '0',
im_addr_o => im_addr,
im_data_i => im_data,
im_valid_i => im_valid,
......@@ -227,27 +252,25 @@ begin
dm_data_write <= not dm_is_wishbone and dm_store;
U_iram : generic_dpram
U_iram : urv_iram
generic map (
g_data_width => 32,
g_size => g_internal_ram_size/ 4,
g_with_byte_enable => true,
g_dual_clock => false,
g_addr_conflict_resolution => "read_first",
g_init_file => g_internal_ram_init_file)
g_size => g_internal_ram_size,
g_init_file => g_internal_ram_init_file,
g_simulation => g_simulation)
port map (
rst_n_i => rst_n_i,
clka_i => clk_sys_i,
clk_i => clk_sys_i,
wea_i => '0', --ha_im_write,
aa_i => im_addr_muxed(c_mem_address_bits + 1 downto 2),
ena_i => '1',
wea_i => '0',
bwea_i => "0000",
aa_i => im_addr_muxed,
da_i => ha_im_wdata,
qa_o => im_data,
clkb_i => clk_sys_i,
enb_i => '1',
bweb_i => dm_data_select,
web_i => dm_data_write,
ab_i => dm_addr(c_mem_address_bits + 1 downto 2),
ab_i => dm_addr,
db_i => dm_data_s,
qb_o => dm_mem_rdata
);
......
......@@ -26,10 +26,10 @@
#include "riscv.h"
#define BASE_CLOCK 62500000 // Xtal frequency
#define BASE_CLOCK 100000000 // Xtal frequency
#define BASE_UART 0x20000
#define BASE_GPIO 0x21000
#define BASE_UART 0x80000000
#define BASE_GPIO 0x80001000
#define UART_BAUDRATE 115200
......
.macro save_tf
# save gprs
sw x1,1*4(x2)
sw x3,3*4(x2)
sw x4,4*4(x2)
sw x5,5*4(x2)
sw x6,6*4(x2)
sw x7,7*4(x2)
sw x8,8*4(x2)
sw x9,9*4(x2)
sw x10,10*4(x2)
sw x11,11*4(x2)
sw x12,12*4(x2)
sw x13,13*4(x2)
sw x14,14*4(x2)
sw x15,15*4(x2)
sw x16,16*4(x2)
sw x17,17*4(x2)
sw x18,18*4(x2)
sw x19,19*4(x2)
sw x20,20*4(x2)
sw x21,21*4(x2)
sw x22,22*4(x2)
sw x23,23*4(x2)
sw x24,24*4(x2)
sw x25,25*4(x2)
sw x26,26*4(x2)
sw x27,27*4(x2)
sw x28,28*4(x2)
sw x29,29*4(x2)
sw x30,30*4(x2)
sw x31,31*4(x2)
# get sr, epc, badvaddr, cause
csrrw t0,sscratch,x0
csrr s0,sstatus
csrr t2,sepc
csrr t3,scause
sw t0,2*4(x2)
sw s0,32*4(x2)
sw t2,33*4(x2)
sw t3,35*4(x2)
# get faulting insn, if it wasn't a fetch-related trap
li x5,-1
sw x5,36*4(x2)
1:
.endm
.section .boot, "ax", @progbits
.global _start
_start:
j _entry
_irq_entry:
csrrw sp, sscratch, sp
bnez sp, 1f
csrr sp, sscratch
1:addi sp,sp,-320
save_tf
move a0,sp
jal _handle_irq
mv a0,sp
# don't resw sstatus if trap came from kernel
# andi s0,s0,SSTATUS_PS
# bnez s0,start_user
addi sp,sp,320
csrw sscratch,sp
#.globl start_user
#start_user:
# csrc sstatus, SSTATUS_IE
li t0, SSTATUS_PS
lw t1, 32*4(a0)
lw t2, 33*4(a0)
csrc sstatus, t0
and t0, t0, t1
csrs sstatus, t0
csrw sepc, t2
# resw x registers
lw x1,1*4(a0)
lw x2,2*4(a0)
lw x3,3*4(a0)
lw x4,4*4(a0)
lw x5,5*4(a0)
lw x6,6*4(a0)
lw x7,7*4(a0)
lw x8,8*4(a0)
lw x9,9*4(a0)
lw x11,11*4(a0)
lw x12,12*4(a0)
lw x13,13*4(a0)
lw x14,14*4(a0)
lw x15,15*4(a0)
lw x16,16*4(a0)
lw x17,17*4(a0)
lw x18,18*4(a0)
lw x19,19*4(a0)
lw x20,20*4(a0)
lw x21,21*4(a0)
lw x22,22*4(a0)
lw x23,23*4(a0)
lw x24,24*4(a0)
lw x25,25*4(a0)
lw x26,26*4(a0)
lw x27,27*4(a0)
lw x28,28*4(a0)
lw x29,29*4(a0)
lw x30,30*4(a0)
lw x31,31*4(a0)
# resw a0 last
lw x10,10*4(a0)
eret
# gtfo
_entry:
la gp, _gp # Initialize global pointer
la sp, _fstack
# clear the bss segment
la t0, _fbss
la t1, _end
1:
#ifdef __riscv64
sd zero,0(t0)
addi t0, t0, 8
#else
sw zero,0(t0)
addi t0, t0, 4
#endif
bltu t0, t1, 1b
call main
OUTPUT_FORMAT("elf32-littleriscv")
ENTRY(_start)
SECTIONS
{
/*--------------------------------------------------------------------*/
/* Code and read-only segment */
/*--------------------------------------------------------------------*/
/* Begining of code and text segment */
. = 0x00000000;
_ftext = .;
PROVIDE( eprol = . );
/* text: Program code section */
.text :
{
*(.boot)
*(.text)
*(.text.*)
*(.gnu.linkonce.t.*)
}
/* init: Code to execute before main (called by crt0.S) */
.init :
{
KEEP( *(.init) )
}
/* fini: Code to execute after main (called by crt0.S) */
.fini :
{
KEEP( *(.fini) )
}
/* rodata: Read-only data */
.rodata :
{
*(.rdata)
*(.rodata)
*(.rodata.*)
*(.gnu.linkonce.r.*)
}
/* End of code and read-only segment */
PROVIDE( etext = . );
_etext = .;
/*--------------------------------------------------------------------*/
/* Global constructor/destructor segement */
/*--------------------------------------------------------------------*/
/* The .ctors/.dtors sections are special sections which contain a
list of constructor/destructor function pointers. crtbegin.o
includes code in a .init section which goes through the .ctors list
and calls each constuctor. crtend.o includes code in a .fini
section which goes through the .dtors list and calls each
destructor. crtbegin.o includes a special null pointer in its own
.ctors/.dtors sections which acts as a start indicator for those
lists. crtend.o also includes a special null pointer in its own
.ctors/.dtors sections which acts as an end indictor. The linker
commands below are setup so that crtbegin.o's .ctors/.dtors
sections are always first and crtend.o's .ctors/.dtors sections are
always last. This is the only way the list of functions will have
the begin and end indicators in the right place. */
/* ctors : Array of global constructor function pointers */
/*--------------------------------------------------------------------*/
/* Initialized data segment */
/*--------------------------------------------------------------------*/
/* Start of initialized data segment */
. = ALIGN(16);
_fdata = .;
/* data: Writable data */
.data :
{
*(.data)
*(.data.*)
*(.gnu.linkonce.d.*)
}
/* End of initialized data segment */
PROVIDE( edata = . );
_edata = .;
/* Have _gp point to middle of sdata/sbss to maximize displacement range */
. = ALIGN(16);
_gp = . + 0x800;
/* Writable small data segment */
.sdata :
{
*(.sdata)
*(.sdata.*)
*(.srodata.*)
*(.gnu.linkonce.s.*)
}
/*--------------------------------------------------------------------*/
/* Uninitialized data segment */
/*--------------------------------------------------------------------*/
/* Start of uninitialized data segment */
. = ALIGN(8);
_fbss = .;
/* Writable uninitialized small data segment */
.sbss :
{
*(.sbss)
*(.sbss.*)
*(.gnu.linkonce.sb.*)
}
/* bss: Uninitialized writeable data section */
. = .;
_bss_start = .;
.bss :
{
*(.bss)
*(.bss.*)
*(.gnu.linkonce.b.*)
*(COMMON)
}
/* End of uninitialized data segment (used by syscalls.c for heap) */
PROVIDE( end = . );
_end = ALIGN(8);
PROVIDE( _fstack = 0x7fffffc - 0x400 );
PROVIDE( _fexception_stack = 0x7fffffc );
}
/*
* This work is part of the White Rabbit project
*
* Copyright (C) 2011 CERN (www.cern.ch)
* Author: Tomasz Wlostowski <tomasz.wlostowski@cern.ch>
*
* Released according to the GNU GPL, version 2 or any later version.
*/
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
if (argc < 3)
return -1;
FILE *f = fopen(argv[1], "rb");
if (!f)
return -1;
unsigned char x[4];
int i = 0;
int n = atoi(argv[2]);
int base = 0;
int file_pos = 0;
if(argc >= 5)
{
file_pos = atoi(argv[3]);
base = atoi(argv[4]);
}
fseek(f, file_pos, SEEK_SET);
while (!feof(f) && (n == 0 || (i < n))) {
fread(x, 1, 4, f);
printf("@%x %02X%02X%02X%02X\n", (base + i) * 0x4, x[3], x[2], x[1],
x[0]);
i++;
}
for (; i < n;) {
printf("@%x %02X%02X%02X%02X\n", (base + i) * 0x4, 0, 0, 0, 0);
i++;
}
fclose(f);
return 0;
}
......@@ -66,6 +66,6 @@ print("Programming done!")
while True:
b=ser.read(1)
if(b):
sys.stdout.write(b)
sys.stderr.write(b)
else:
time.sleep(0.01)
......@@ -7,21 +7,30 @@ OBJDUMP = $(CROSS_COMPILE)objdump
OBJCOPY = $(CROSS_COMPILE)objcopy
SIZE = $(CROSS_COMPILE)size
CFLAGS = -g -m32 -msoft-float -march=RV32I -O2 -I. -I../common
OBJS = crt0.o irq.o boot.o ../common/uart.o
SYN_DIR=../../syn/spec
SYN_TOP_LEVEL=spec_top
CFLAGS = -g -O2 -m32 -msoft-float -march=RV32I -I. -I../common -ffunction-sections -fdata-sections
OBJS = crt0.o boot.o ../common/uart.o ../common/emulate.o ../common/irq.o
LDS = boot.ld
OUTPUT=uart-bootloader
$(OUTPUT): $(LDS) $(OBJS)
${CC} -g -m32 -msoft-float -march=RV32I -o $(OUTPUT).elf -nostartfiles $(OBJS) -lm -T $(LDS)
${CC} -g -m32 -Wl,--gc-sections -march=RV32I -o $(OUTPUT).elf -nostartfiles $(OBJS) -lm -T $(LDS)
${OBJCOPY} -O binary $(OUTPUT).elf $(OUTPUT).bin
${OBJDUMP} -D $(OUTPUT).elf > disasm.S
$(SIZE) $(OUTPUT).elf
../genraminit $(OUTPUT).bin 512 0 0 > uart-bootloader.ram
../genraminit $(OUTPUT).bin 512 0 0 > $(OUTPUT).ram
../genmeminit $(OUTPUT).bin 512 0 0 > $(OUTPUT).mem
# ../genraminit $(OUTPUT).bin 512 63488 15872 >> uart-bootloader.ram
clean:
rm -f $(OUTPUT).elf $(OUTPUT).bin $(OBJS)
bitstream: $(OUTPUT)
data2mem -o b $(SYN_TOP_LEVEL)_$(OUTPUT).bit -bt $(SYN_DIR)/$(SYN_TOP_LEVEL).bit -bm $(SYN_DIR)/$(SYN_TOP_LEVEL)_bd.bmm -bd $(OUTPUT).mem tag urv_iram
# data2mem -bx 1 -bt $(SYN_DIR)/$(SYN_TOP_LEVEL).bit -bm $(SYN_DIR)/$(SYN_TOP_LEVEL)_bd.bmm -bd $(OUTPUT).mem tag urv_iram
%.o: %.S
${CC} -c -m32 $^ -o $@
\ No newline at end of file
.section .boot, "ax", @progbits
.global _start
_start:
j _entry
.org 0x8
.extern trap_entry
_exception_handler:
j trap_entry
_entry:
la gp, _gp # Initialize global pointer
la sp, _fstack
la t0, _fexception_stack
csrrw t0, mscratch, t0
# clear the bss segment
la t0, _fbss
la t1, _end
1:
#ifdef __riscv64
sd zero,0(t0)
addi t0, t0, 8
#else
sw zero,0(t0)
addi t0, t0, 4
#endif
bltu t0, t1, 1b
call main
\ No newline at end of file
......@@ -190,7 +190,7 @@
<property xil_pn:name="Other Compxlib Command Line Options" xil_pn:value="" xil_pn:valueState="default"/>
<property xil_pn:name="Other Map Command Line Options" xil_pn:value="" xil_pn:valueState="default"/>
<property xil_pn:name="Other NETGEN Command Line Options" xil_pn:value="" xil_pn:valueState="default"/>
<property xil_pn:name="Other Ngdbuild Command Line Options" xil_pn:value="" xil_pn:valueState="default"/>
<property xil_pn:name="Other Ngdbuild Command Line Options" xil_pn:value="-bm spec_top.bmm" xil_pn:valueState="non-default"/>
<property xil_pn:name="Other Place &amp; Route Command Line Options" xil_pn:value="" xil_pn:valueState="default"/>
<property xil_pn:name="Other Simulator Commands Behavioral" xil_pn:value="" xil_pn:valueState="default"/>
<property xil_pn:name="Other Simulator Commands Post-Map" xil_pn:value="" xil_pn:valueState="default"/>
......@@ -202,7 +202,7 @@
<property xil_pn:name="Output File Name" xil_pn:value="spec_top" xil_pn:valueState="default"/>
<property xil_pn:name="Overwrite Compiled Libraries" xil_pn:value="false" xil_pn:valueState="default"/>
<property xil_pn:name="Pack I/O Registers into IOBs" xil_pn:value="Yes" xil_pn:valueState="non-default"/>
<property xil_pn:name="Pack I/O Registers/Latches into IOBs" xil_pn:value="Off" xil_pn:valueState="default"/>
<property xil_pn:name="Pack I/O Registers/Latches into IOBs" xil_pn:value="For Inputs and Outputs" xil_pn:valueState="non-default"/>
<property xil_pn:name="Package" xil_pn:value="fgg484" xil_pn:valueState="default"/>
<property xil_pn:name="Perform Advanced Analysis" xil_pn:value="false" xil_pn:valueState="default"/>
<property xil_pn:name="Perform Advanced Analysis Post Trace" xil_pn:value="false" xil_pn:valueState="default"/>
......@@ -285,6 +285,7 @@
<property xil_pn:name="Starting Placer Cost Table (1-100) Map spartan6" xil_pn:value="1" xil_pn:valueState="default"/>
<property xil_pn:name="Synthesis Tool" xil_pn:value="XST (VHDL/Verilog)" xil_pn:valueState="default"/>
<property xil_pn:name="Target Simulator" xil_pn:value="Please Specify" xil_pn:valueState="default"/>
<property xil_pn:name="Target UCF File Name" xil_pn:value="rv_cpu.ucf" xil_pn:valueState="non-default"/>
<property xil_pn:name="Timing Mode Map" xil_pn:value="Performance Evaluation" xil_pn:valueState="default"/>
<property xil_pn:name="Timing Mode Par" xil_pn:value="Performance Evaluation" xil_pn:valueState="default"/>
<property xil_pn:name="Top-Level Module Name in Output Netlist" xil_pn:value="" xil_pn:valueState="default"/>
......@@ -355,7 +356,7 @@
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_uart/xwb_simple_uart.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="32"/>
<association xil_pn:name="Implementation" xil_pn:seqID="33"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/platform/xilinx/wb_xilinx_fpga_loader/xloader_registers_pkg.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
......@@ -427,13 +428,13 @@
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../top/spec/spec_top.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="35"/>
<association xil_pn:name="Implementation" xil_pn:seqID="36"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_slave_adapter/wb_slave_adapter.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="23"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_gpio_port/xwb_gpio_port.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="33"/>
<association xil_pn:name="Implementation" xil_pn:seqID="34"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/platform/xilinx/wb_xil_multiboot/multiboot_regs.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
......@@ -513,9 +514,6 @@
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_lm32/generated/lm32_allprofiles.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../rtl/rv_regfile.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="Implementation" xil_pn:seqID="15"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/common/gc_fsm_watchdog.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
......@@ -583,7 +581,7 @@
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_uart/wb_simple_uart.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="28"/>
<association xil_pn:name="Implementation" xil_pn:seqID="29"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/common/gc_word_packer.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
......@@ -598,7 +596,7 @@
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_gpio_port/wb_gpio_port.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="29"/>
<association xil_pn:name="Implementation" xil_pn:seqID="30"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_spi/spi_clgen.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
......@@ -616,7 +614,7 @@
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../rtl/rv_cpu.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="Implementation" xil_pn:seqID="27"/>
<association xil_pn:name="Implementation" xil_pn:seqID="28"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_onewire_master/sockit_owm.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
......@@ -630,9 +628,6 @@
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_dma/xwb_streamer.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../rtl/rv_shifter.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="Implementation" xil_pn:seqID="6"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/common/gc_serial_dac.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
......@@ -664,13 +659,13 @@
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_crossbar/xwb_crossbar.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="34"/>
<association xil_pn:name="Implementation" xil_pn:seqID="35"/>
</file>
<file xil_pn:name="../../top/spec/reset_gen.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="30"/>
<association xil_pn:name="Implementation" xil_pn:seqID="31"/>
</file>
<file xil_pn:name="../../rtl/xrv_core.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="31"/>
<association xil_pn:name="Implementation" xil_pn:seqID="32"/>
</file>
<file xil_pn:name="../../ip_cores/general-cores/modules/wishbone/wb_irq/wb_irq_slave.vhd" xil_pn:type="FILE_VHDL">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
......@@ -744,6 +739,18 @@
<file xil_pn:name="../../ip_cores/chipscope/chipscope_ila.ngc" xil_pn:type="FILE_NGC">
<association xil_pn:name="Implementation" xil_pn:seqID="0"/>
</file>
<file xil_pn:name="../../rtl/urv_iram.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="BehavioralSimulation" xil_pn:seqID="257"/>
<association xil_pn:name="Implementation" xil_pn:seqID="27"/>
</file>
<file xil_pn:name="../../rtl/rv_regfile.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="BehavioralSimulation" xil_pn:seqID="259"/>
<association xil_pn:name="Implementation" xil_pn:seqID="15"/>
</file>
<file xil_pn:name="../../rtl/rv_shifter.v" xil_pn:type="FILE_VERILOG">
<association xil_pn:name="BehavioralSimulation" xil_pn:seqID="260"/>
<association xil_pn:name="Implementation" xil_pn:seqID="6"/>
</file>
</files>
<bindings>
......
ADDRESS_SPACE urv_iram RAMB16 [0x00000000:0x000FFFF]
BUS_BLOCK
U_CPU/U_iram/RV_IRAM_BLK_64K_31 [31];
U_CPU/U_iram/RV_IRAM_BLK_64K_30 [30];
U_CPU/U_iram/RV_IRAM_BLK_64K_29 [29];
U_CPU/U_iram/RV_IRAM_BLK_64K_28 [28];
U_CPU/U_iram/RV_IRAM_BLK_64K_27 [27];
U_CPU/U_iram/RV_IRAM_BLK_64K_26 [26];
U_CPU/U_iram/RV_IRAM_BLK_64K_25 [25];
U_CPU/U_iram/RV_IRAM_BLK_64K_24 [24];
U_CPU/U_iram/RV_IRAM_BLK_64K_23 [23];
U_CPU/U_iram/RV_IRAM_BLK_64K_22 [22];
U_CPU/U_iram/RV_IRAM_BLK_64K_21 [21];
U_CPU/U_iram/RV_IRAM_BLK_64K_20 [20];
U_CPU/U_iram/RV_IRAM_BLK_64K_19 [19];
U_CPU/U_iram/RV_IRAM_BLK_64K_18 [18];
U_CPU/U_iram/RV_IRAM_BLK_64K_17 [17];
U_CPU/U_iram/RV_IRAM_BLK_64K_16 [16];
U_CPU/U_iram/RV_IRAM_BLK_64K_15 [15];
U_CPU/U_iram/RV_IRAM_BLK_64K_14 [14];
U_CPU/U_iram/RV_IRAM_BLK_64K_13 [13];
U_CPU/U_iram/RV_IRAM_BLK_64K_12 [12];
U_CPU/U_iram/RV_IRAM_BLK_64K_11 [11];
U_CPU/U_iram/RV_IRAM_BLK_64K_10 [10];
U_CPU/U_iram/RV_IRAM_BLK_64K_9 [9];
U_CPU/U_iram/RV_IRAM_BLK_64K_8 [8];
U_CPU/U_iram/RV_IRAM_BLK_64K_7 [7];
U_CPU/U_iram/RV_IRAM_BLK_64K_6 [6];
U_CPU/U_iram/RV_IRAM_BLK_64K_5 [5];
U_CPU/U_iram/RV_IRAM_BLK_64K_4 [4];
U_CPU/U_iram/RV_IRAM_BLK_64K_3 [3];
U_CPU/U_iram/RV_IRAM_BLK_64K_2 [2];
U_CPU/U_iram/RV_IRAM_BLK_64K_1 [1];
U_CPU/U_iram/RV_IRAM_BLK_64K_0 [0];
END_BUS_BLOCK;
END_ADDRESS_SPACE;
......@@ -36,7 +36,8 @@ use unisim.vcomponents.all;
entity spec_top is
generic (
g_riscv_firmware : string := "uart-bootloader.ram";
g_riscv_mem_size : integer := 65536
g_riscv_mem_size : integer := 65536;
g_simulation : boolean := false
);
port (
button1_n_i: in std_logic := '1';
......@@ -67,6 +68,7 @@ architecture rtl of spec_top is
generic (
g_internal_ram_size : integer;
g_internal_ram_init_file : string;
g_simulation : boolean;
g_address_bits : integer;
g_wishbone_start : unsigned(31 downto 0));
port (
......@@ -95,13 +97,14 @@ architecture rtl of spec_top is
signal cnx_master_out : t_wishbone_master_out_array(c_cnx_master_ports-1 downto 0);
constant c_cfg_base_addr : t_wishbone_address_array(c_cnx_master_ports-1 downto 0) :=
(c_slave_gpio => x"00021000", -- GPIO
c_slave_uart => x"00020000"); -- UART
(c_slave_gpio => x"80001000", -- GPIO
c_slave_uart => x"80000000"); -- UART
constant c_cfg_base_mask : t_wishbone_address_array(c_cnx_master_ports-1 downto 0) :=
(c_slave_gpio => x"000ff000",
c_slave_uart => x"000ff000" );
(c_slave_gpio => x"8000f000",
c_slave_uart => x"8000f000" );
signal clk_125m_pllref : std_logic;
signal pllout_clk_fb_pllref, pllout_clk_sys, clk_sys, sys_locked, sys_locked_n : std_logic;
......@@ -130,7 +133,7 @@ begin -- rtl
DIVCLK_DIVIDE => 1,
CLKFBOUT_MULT => 8,
CLKFBOUT_PHASE => 0.000,
CLKOUT0_DIVIDE => 16, -- 62.5 MHz
CLKOUT0_DIVIDE => 10, -- 62.5 MHz
CLKOUT0_PHASE => 0.000,
CLKOUT0_DUTY_CYCLE => 0.500,
CLKOUT1_DIVIDE => 8, -- not used
......@@ -175,6 +178,7 @@ begin -- rtl
generic map (
g_internal_ram_size => g_riscv_mem_size,
g_internal_ram_init_file => g_riscv_firmware,
g_simulation => g_simulation,
g_address_bits => 32,
g_wishbone_start => x"00020000")
port map (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment