[rds-devel] Re: RDS IB transport software flow control?

Richard Frank richard.frank at oracle.com
Mon Nov 5 07:05:34 PST 2007


Yes - it relies on IB RNRs - reasoning was simply to keep the RDS wire 
protocol to a minimum.

Or Gerlitz wrote:
> Zach, Rick,
>
> From the patch below I conclude that the RDS IB transport relies on IB 
> RNR NAKs, is this correct? if yes, why?
>
> Or.
>> From: Olaf Kirch <olaf.kirch at oracle.com>
>> Date: Thu, 1 Nov 2007 16:13:00 +0000 (+0100)
>> Subject: RDS: prime the RNR timeout
>> X-Git-Url: 
>> http://www.openfabrics.org/git/?p=%7Evlad%2Fofed_1_2%2F.git;a=commitdiff_plain;h=62b2ea93e20b130dd7cb081271d4f556dfccc25a 
>>
>>
>> RDS: prime the RNR timeout
>>
>> When the receiver has not posted any buffers to the receive queue,
>> the sender will receive a RNR NAK, and retry after a certain
>> timeout. The default value is 560 ms.
>>
>> This patch lowers the RNR timeout to 0.32 ms, which is still not
>> the absolute minimum, but close.
>>
>> Signed-off-by: Olaf Kirch <olaf.kirch at oracle.com>
>> ---
>>
>> diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
>> index ee63402..725d3e5 100644
>> --- a/net/rds/ib_cm.c
>> +++ b/net/rds/ib_cm.c
>> @@ -53,6 +53,30 @@ static void rds_ib_cq_event_handler(struct 
>> ib_event *event, void *data)
>>      rdsdebug("event %u data %p\n", event->event, data);
>>  }
>>  
>> +static void rds_ib_connect_complete(struct rds_connection *conn)
>> +{
>> +    struct rds_ib_connection *ic = conn->c_transport_data;
>> +    struct ib_qp_attr qp_attr;
>> +    int ret;
>> +
>> +    /* Tune the RNR timeout. We use a rather low timeout, but
>> +     * not the absolute minimum - this should be tunable.
>> +     *
>> +     * We already set the RNR retry count to 7 (which is the
>> +     * smallest infinite number :-) above
>> +     */
>> +    qp_attr.qp_state = IB_QPS_RTS;
>> +    qp_attr.min_rnr_timer = IB_RNR_TIMER_000_32;
>> +    ret = ib_modify_qp(ic->i_cm_id->qp, &qp_attr,
>> +                IB_QP_STATE | IB_QP_MIN_RNR_TIMER);
>> +    if (ret) {
>> +        printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER, %u): 
>> err=%d\n",
>> +                qp_attr.min_rnr_timer, -ret);
>> +    }
>> +
>> +    rds_connect_complete(conn);
>> +}
>> +
>>  static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
>>  {
>>      struct rds_connection *conn = data;
>> @@ -62,7 +86,7 @@ static void rds_ib_qp_event_handler(struct ib_event 
>> *event, void *data)
>>  
>>      switch (event->event) {
>>          case IB_EVENT_COMM_EST:
>> -            rds_connect_complete(conn);
>> +            rds_ib_connect_complete(conn);
>>              break;
>>          default:
>>              printk(KERN_WARNING "RDS/ib: unhandled QP event %u "
>> @@ -98,10 +122,13 @@ static int rds_ib_setup_qp(struct rds_connection 
>> *conn)
>>          goto out;
>>      }
>>  
>> +    printk(KERN_DEBUG "Creating QP, recv=%u, send=%u\n",
>> +            ic->i_recv_ring.w_nr,
>> +            ic->i_send_ring.w_nr + 1);
>>      ic->i_send_cq = ib_create_cq(ic->i_cm_id->device,
>>                       rds_ib_send_cq_comp_handler,
>>                       rds_ib_cq_event_handler, conn,
>> -                     ic->i_send_ring.w_nr, 0);
>> +                     ic->i_send_ring.w_nr + 1, 0);
>>      if (IS_ERR(ic->i_send_cq)) {
>>          ret = PTR_ERR(ic->i_send_cq);
>>          ic->i_send_cq = NULL;
>> @@ -378,7 +405,7 @@ static int rds_ib_cm_event_handler(struct 
>> rdma_cm_id *cm_id,
>>              break;
>>  
>>          case RDMA_CM_EVENT_ESTABLISHED:
>> -            rds_connect_complete(conn);
>> +            rds_ib_connect_complete(conn);
>>              break;
>>  
>>          case RDMA_CM_EVENT_ADDR_ERROR:
>



More information about the rds-devel mailing list