Merge "Add missing man pages to doc build list"

[ric-plt/lib/rmr.git] / src / rmr / nng / src / sr_nng_static.c
diff --git a/src/rmr/nng/src/sr_nng_static.c b/src/rmr/nng/src/sr_nng_static.c

index 79e6793..bb45ac6 100644 (file)
--- a/src/rmr/nng/src/sr_nng_static.c
+++ b/src/rmr/nng/src/sr_nng_static.c
@@ -1,4 +1,4 @@
-// : vi ts=4 sw=4 noet :
+// vim: ts=4 sw=4 noet :
  /*
  ==================================================================================
         Copyright (c) 2019 Nokia
@@ -42,6 +42,10 @@
         into the message, and sets errno to something that might be useful.
         If we don't have a specific RMr state, then we return the default (e.g.
         receive failed).
+
+       The addition of the connection shut error code to the switch requires
+       that the NNG version at commit e618abf8f3db2a94269a (or after) be
+       used for compiling RMR. 
  */
  static inline int xlate_nng_state( int state, int def_state ) {
  
@@ -81,6 +85,7 @@ static inline int xlate_nng_state( int state, int def_state ) {
                         state = def_state;
                         break;
  
+               case NNG_ECONNSHUT:                                     // new error with nng commit e618abf8f3db2a94269a79c8901a51148d48fcc2 (Sept 2019)
                 case NNG_ECLOSED:
                         errno  = EBADFD;                                // file des not in a good state for the operation
                         state = def_state;
@@ -121,7 +126,7 @@ static rmr_mbuf_t* alloc_zcmsg( uta_ctx_t* ctx, rmr_mbuf_t* msg, int size, int s
         if( msg == NULL ) {
                 msg = (rmr_mbuf_t *) malloc( sizeof *msg );
                 if( msg == NULL ) {
-                       fprintf( stderr, "[CRI] rmr_alloc_zc: cannot get memory for message\n" );
+                       rmr_vlog( RMR_VL_CRIT, "rmr_alloc_zc: cannot get memory for message\n" );
                         exit( 1 );
                 }
         } else {
@@ -131,7 +136,7 @@ static rmr_mbuf_t* alloc_zcmsg( uta_ctx_t* ctx, rmr_mbuf_t* msg, int size, int s
         memset( msg, 0, sizeof( *msg ) );
  
         if( (state = nng_msg_alloc( (nng_msg **) &msg->tp_buf, mlen )) != 0 ) {
-               fprintf( stderr, "[CRI] rmr_alloc_zc: cannot get memory for zero copy buffer: %d\n", ENOMEM );
+               rmr_vlog( RMR_VL_CRIT, "rmr_alloc_zc: cannot get memory for zero copy buffer: %d\n", ENOMEM );
                 abort( );                                                                                       // toss out a core file for this
         }
  
@@ -156,7 +161,7 @@ static rmr_mbuf_t* alloc_zcmsg( uta_ctx_t* ctx, rmr_mbuf_t* msg, int size, int s
         strncpy( (char *) ((uta_mhdr_t *)msg->header)->src, ctx->my_name, RMR_MAX_SRC );
         strncpy( (char *) ((uta_mhdr_t *)msg->header)->srcip, ctx->my_ip, RMR_MAX_SRC );
  
-       if( DEBUG > 1 ) fprintf( stderr, "[DBUG] alloc_zcmsg mlen=%ld size=%d mpl=%d flags=%02x\n", (long) mlen, size, ctx->max_plen, msg->flags );
+       if( DEBUG > 1 ) rmr_vlog( RMR_VL_DEBUG, "alloc_zcmsg mlen=%ld size=%d mpl=%d flags=%02x\n", (long) mlen, size, ctx->max_plen, msg->flags );
  
         return msg;
  }
@@ -172,7 +177,7 @@ static rmr_mbuf_t* alloc_mbuf( uta_ctx_t* ctx, int state ) {
  
         msg = (rmr_mbuf_t *) malloc( sizeof *msg );
         if( msg == NULL ) {
-               fprintf( stderr, "[CRI] rmr_alloc_zc: cannot get memory for message\n" );
+               rmr_vlog( RMR_VL_CRIT, "rmr_alloc_zc: cannot get memory for message\n" );
                 exit( 1 );
         }
  
@@ -270,14 +275,14 @@ static inline rmr_mbuf_t* clone_msg( rmr_mbuf_t* old_msg  ) {
  
         nm = (rmr_mbuf_t *) malloc( sizeof *nm );
         if( nm == NULL ) {
-               fprintf( stderr, "[CRI] rmr_clone: cannot get memory for message buffer\n" );
+               rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for message buffer\n" );
                 exit( 1 );
         }
         memset( nm, 0, sizeof( *nm ) );
  
         mlen = old_msg->alloc_len;                                                                              // length allocated before
         if( (state = nng_msg_alloc( (nng_msg **) &nm->tp_buf, mlen )) != 0 ) {
-               fprintf( stderr, "[CRI] rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
+               rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
                 exit( 1 );
         }
  
@@ -291,7 +296,7 @@ static inline rmr_mbuf_t* clone_msg( rmr_mbuf_t* old_msg  ) {
  
                 default:                                                                                        // current message always caught  here
                         hdr = nm->header;
-                       memcpy( hdr, old_msg->header, RMR_HDR_LEN( old_msg->header ) + RMR_TR_LEN( old_msg->header ) + RMR_D1_LEN( old_msg->header ) + RMR_D2_LEN( old_msg->header ));  // copy complete header, trace and other data
+                       memcpy( hdr, old_msg->header, RMR_HDR_LEN( old_msg->header ) ); // copy complete header, trace and other data
                         nm->payload = PAYLOAD_ADDR( hdr );                              // at user payload
                         break;
         }
@@ -301,8 +306,9 @@ static inline rmr_mbuf_t* clone_msg( rmr_mbuf_t* old_msg  ) {
         nm->sub_id = old_msg->sub_id;
         nm->len = old_msg->len;                                                                 // length of data in the payload
         nm->alloc_len = mlen;                                                                   // length of allocated payload
+       if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "clone values: mty=%d sid=%d len=%d alloc=%d\n", nm->mtype, nm->sub_id, nm->len, nm->alloc_len );
  
-       nm->xaction = hdr->xid;                                                                 // reference xaction
+       nm->xaction = &hdr->xid[0];                                                     // point at transaction id in header area
         nm->state = old_msg->state;                                                             // fill in caller's state (likely the state of the last operation)
         nm->flags = old_msg->flags | MFL_ZEROCOPY;                              // this is a zerocopy sendable message
         memcpy( nm->payload, old_msg->payload, old_msg->len );
@@ -326,7 +332,7 @@ static inline rmr_mbuf_t* realloc_msg( rmr_mbuf_t* old_msg, int tr_len  ) {
  
         nm = (rmr_mbuf_t *) malloc( sizeof *nm );
         if( nm == NULL ) {
-               fprintf( stderr, "[CRI] rmr_clone: cannot get memory for message buffer\n" );
+               rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for message buffer\n" );
                 exit( 1 );
         }
         memset( nm, 0, sizeof( *nm ) );
@@ -335,9 +341,9 @@ static inline rmr_mbuf_t* realloc_msg( rmr_mbuf_t* old_msg, int tr_len  ) {
         tr_old_len = RMR_TR_LEN( hdr );                         // bytes in old header for trace
  
         mlen = old_msg->alloc_len + (tr_len - tr_old_len);                                                      // new length with trace adjustment
-       if( DEBUG ) fprintf( stderr, "[DBUG] tr_realloc old size=%d new size=%d new tr_len=%d\n", (int) old_msg->alloc_len, (int) mlen, (int) tr_len );
+       if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "tr_realloc old size=%d new size=%d new tr_len=%d\n", (int) old_msg->alloc_len, (int) mlen, (int) tr_len );
         if( (state = nng_msg_alloc( (nng_msg **) &nm->tp_buf, mlen )) != 0 ) {
-               fprintf( stderr, "[CRI] rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
+               rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
                 exit( 1 );
         }
  
@@ -371,7 +377,7 @@ static inline rmr_mbuf_t* realloc_msg( rmr_mbuf_t* old_msg, int tr_len  ) {
         nm->len = old_msg->len;                                                                 // length of data in the payload
         nm->alloc_len = mlen;                                                                   // length of allocated payload
  
-       nm->xaction = hdr->xid;                                                                 // reference xaction
+       nm->xaction = &hdr->xid[0];                                                     // point at transaction id in header area
         nm->state = old_msg->state;                                                             // fill in caller's state (likely the state of the last operation)
         nm->flags = old_msg->flags | MFL_ZEROCOPY;                              // this is a zerocopy sendable message
         memcpy( nm->payload, old_msg->payload, old_msg->len );
@@ -379,6 +385,113 @@ static inline rmr_mbuf_t* realloc_msg( rmr_mbuf_t* old_msg, int tr_len  ) {
         return nm;
  }
  
+/*
+       Realloc the message such that the payload is at least payload_len bytes.  If the current
+       payload size is large enough, no action is taken. If copy is false, the actual payload
+       bytes are NOT copied.  This allows a caller to realloc for a response message (to retain
+       the source information which would be lost on a simple alloc) which has no need for the
+       original message.
+
+       The old message buffer will reference the new underlying transport, and the original payload
+       will be lost unless clone is set to true. If clone is true, the old message buffer will continue
+       to reference the original payload, and a new message buffer will be allocated (even if the
+       payload size in the old message was larger than requested).
+
+       The return value is a pointer to the message with at least payload_len bytes allocated. It 
+       will be the same as the old_message if clone is false.
+
+       CAUTION:
+       If the message is not a message which was received, the mtype, sub-id, length values in the
+       RMR header in the allocated transport buffer will NOT be accurate and will cause the resulting
+       mbuffer information for mtype and subid to be reset even when copy is true. To avoid silently
+       resetting information in the mbuffer, this funciton will reset the mbuf values from the current
+       settings and NOT from the copied RMR header in transport buffer.
+*/
+static inline rmr_mbuf_t* realloc_payload( rmr_mbuf_t* old_msg, int payload_len, int copy, int clone ) {
+       rmr_mbuf_t* nm = NULL;  // new message buffer when cloning
+       size_t  mlen;
+       int state;
+       uta_mhdr_t* omhdr;              // old message header
+       uta_v1mhdr_t* v1hdr;
+       int     tr_old_len;                     // tr size in new buffer
+       int old_psize = 0;              // current size of message for payload
+       int     hdr_len = 0;            // length of RMR header in old msg
+       void*   old_tp_buf;             // pointer to the old tp buffer
+       int     free_tp = 1;            // free the transport buffer (old) when done (when not cloning)
+       int             old_mt;                 // msg type and sub-id from the message passed in
+       int             old_sid;
+       int             old_len;
+
+       if( old_msg == NULL || payload_len <= 0 ) {
+               errno = EINVAL;
+               return NULL;
+       }
+
+       old_mt = old_msg->mtype;
+       old_sid = old_msg->sub_id;
+       old_len = old_msg->len;
+       old_psize = old_msg->alloc_len - RMR_HDR_LEN( old_msg->header );                                // allocated transport size less the header and other data bits
+       if( !clone  && payload_len <= old_psize ) {                                                             // old message is large enough, nothing to do
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: old msg payload larger than requested: cur=%d need=%d\n", old_psize, payload_len );
+               return old_msg;
+       }
+
+       hdr_len = RMR_HDR_LEN( old_msg->header );
+       old_tp_buf = old_msg->tp_buf;
+
+       if( clone ) {
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: cloning message\n" );
+               free_tp = 0;
+
+               nm = (rmr_mbuf_t *) malloc( sizeof( *nm ) );
+               if( nm == NULL ) {
+                       rmr_vlog( RMR_VL_CRIT, "rmr_realloc_payload: cannot get memory for message buffer. bytes requested: %d\n", (int) sizeof(*nm) );
+                       return NULL;
+               }
+               memset( nm, 0, sizeof( *nm ) );
+       } else {
+               nm = old_msg;
+       }
+
+       omhdr = old_msg->header;
+       mlen = hdr_len + (payload_len > old_psize ? payload_len : old_psize);           // must have larger in case copy is true
+
+       if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "reallocate for payload increase. new message size: %d\n", (int) mlen );    
+       if( (state = nng_msg_alloc( (nng_msg **) &nm->tp_buf, mlen )) != 0 ) {
+               rmr_vlog( RMR_VL_CRIT, "rmr_realloc_payload: cannot get memory for zero copy buffer. bytes requested: %d\n", (int) mlen );
+               return NULL;
+       }
+
+       nm->header = nng_msg_body( nm->tp_buf );                                // set and copy the header from old message
+       SET_HDR_LEN( nm->header );
+
+       if( copy ) {                                                                                                                            // if we need to copy the old payload too
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: copy payload into new message: %d bytes\n", old_psize );
+               memcpy( nm->header, omhdr, sizeof( char ) * (old_psize + RMR_HDR_LEN( omhdr )) );
+       } else {                                                                                                                                        // just need to copy header
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: copy only header into new message: %d bytes\n", RMR_HDR_LEN( nm->header ) );
+               memcpy( nm->header, omhdr, sizeof( char ) * RMR_HDR_LEN( omhdr ) );
+       }
+
+       ref_tpbuf( nm, mlen );                  // set payload and other pointers in the message to the new tp buffer
+
+       if( !copy ) {
+               nm->mtype = -1;                                         // didn't copy payload, so mtype and sub-id are invalid
+               nm->sub_id = -1;
+               nm->len = 0;                                            // and len is 0
+       } else {
+               nm->len = old_len;                                      // we must force these to avoid losing info if msg wasn't a received message
+               nm->mtype = old_mt;
+               nm->sub_id = old_sid;
+       }
+
+       if( free_tp ) {
+               free( old_tp_buf );                             // we did not clone, so free b/c no references
+       }
+
+       return nm;
+}
+
  /*
         This is the receive work horse used by the outer layer receive functions.
         It waits for a message to be received on our listen socket. If old msg
@@ -402,6 +515,9 @@ static inline rmr_mbuf_t* realloc_msg( rmr_mbuf_t* old_msg, int tr_len  ) {
         reuse.  They have their reasons I guess.  Thus, we will free
         the old transport buffer if user passes the message in; at least
         our mbuf will be reused.
+
+       When msg->state is not ok, this function must set tp_state in the message as some API 
+       fucntions return the message directly and do not propigate errno into the message.
  */
  static rmr_mbuf_t* rcv_msg( uta_ctx_t* ctx, rmr_mbuf_t* old_msg ) {
         int state;
@@ -428,11 +544,14 @@ static rmr_mbuf_t* rcv_msg( uta_ctx_t* ctx, rmr_mbuf_t* old_msg ) {
  
         msg->state = nng_recvmsg( ctx->nn_sock, (nng_msg **) &msg->tp_buf, NO_FLAGS );                  // blocks hard until received
         if( (msg->state = xlate_nng_state( msg->state, RMR_ERR_RCVFAILED )) != RMR_OK ) {
+               msg->tp_state = errno;
                 return msg;
         }
  
+       msg->tp_state = 0;
         if( msg->tp_buf == NULL ) {             // if state is good this _should_ not be nil, but parninoia says check anyway
                 msg->state = RMR_ERR_EMPTY;
+               msg->tp_state = 0;
                 return msg;
         }
  
@@ -442,10 +561,11 @@ static rmr_mbuf_t* rcv_msg( uta_ctx_t* ctx, rmr_mbuf_t* old_msg ) {
                 hdr = (uta_mhdr_t *) msg->header;
                 msg->flags |= MFL_ADDSRC;                                       // turn on so if user app tries to send this buffer we reset src
  
-               if( DEBUG > 1 ) fprintf( stderr, "[DBUG] rcv_msg: got something: type=%d state=%d len=%d diff=%ld\n",
+               if( DEBUG > 1 ) rmr_vlog( RMR_VL_DEBUG, "rcv_msg: got something: type=%d state=%d len=%d diff=%ld\n",
                                 msg->mtype, msg->state, msg->len,  msg->payload - (unsigned char *) msg->header );
         } else {
                 msg->state = RMR_ERR_EMPTY;
+               msg->tp_state = 0;
                 msg->len = 0;
                 msg->alloc_len = rsize;
                 msg->payload = NULL;
@@ -495,7 +615,7 @@ static void* rcv_payload( uta_ctx_t* ctx, rmr_mbuf_t* old_msg ) {
         msg->payload = msg->header;                                     // payload is the whole thing; no header
         msg->xaction = NULL;
  
-       if( DEBUG > 1 ) fprintf( stderr, "[DBUG] rcv_payload: got something: type=%d state=%d len=%d\n", msg->mtype, msg->state, msg->len );
+       if( DEBUG > 1 ) rmr_vlog( RMR_VL_DEBUG, "rcv_payload: got something: type=%d state=%d len=%d\n", msg->mtype, msg->state, msg->len );
  
         return msg;
  }
@@ -509,13 +629,16 @@ static void* rcv_payload( uta_ctx_t* ctx, rmr_mbuf_t* old_msg ) {
  
         Called by rmr_send_msg() and rmr_rts_msg(), etc. and thus we assume that all pointer
         validation has been done prior.
+
+       When msg->state is not ok, this function must set tp_state in the message as some API 
+       fucntions return the message directly and do not propigate errno into the message.
  */
  static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock, int retries ) {
         int state;
         uta_mhdr_t*     hdr;
         int nng_flags = NNG_FLAG_NONBLOCK;              // if we need to set any nng flags (zc buffer) add it to this
         int spin_retries = 1000;                                // if eagain/timeout we'll spin, at max, this many times before giving up the CPU
-       int     tr_len;                                                         // trace len in sending message so we alloc new message with same trace size
+       int     tr_len;                                                         // trace len in sending message so we alloc new message with same trace sizes
  
         // future: ensure that application did not overrun the XID buffer; last byte must be 0
  
@@ -530,6 +653,11 @@ static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock
                 strncpy( (char *) ((uta_mhdr_t *)msg->header)->srcip, ctx->my_ip, RMR_MAX_SRC );
         }
  
+       if( retries == 0 ) {
+               spin_retries = 100;
+               retries++;
+       }
+
         errno = 0;
         msg->state = RMR_OK;
         if( msg->flags & MFL_ZEROCOPY ) {                                                                       // faster sending with zcopy buffer
@@ -560,6 +688,7 @@ static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock
                 // future: this should not happen as all buffers we deal with are zc buffers; might make sense to remove the test and else
                 msg->state = RMR_ERR_SENDFAILED;
                 errno = ENOTSUP;
+               msg->tp_state = errno;
                 return msg;
                 /*
                 NOT SUPPORTED
@@ -585,7 +714,7 @@ static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock
                         msg->state = xlate_nng_state( msg->state, RMR_ERR_SENDFAILED );         // xlate to our state and set errno
                 }
  
-               if( DEBUG ) fprintf( stderr, "[DBUG] send failed: %d %s\n", (int) msg->state, strerror( msg->state ) );
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "send failed: %d %s\n", (int) msg->state, strerror( msg->state ) );
         }
  
         return msg;
@@ -607,6 +736,10 @@ static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock
         message type is used.  If the initial lookup, with a subid, fails, then a
         second lookup using just the mtype is tried.
  
+       When msg->state is not OK, this function must set tp_state in the message as 
+       some API fucntions return the message directly and do not propigate errno into 
+       the message.
+
         CAUTION: this is a non-blocking send.  If the message cannot be sent, then
                 it will return with an error and errno set to eagain. If the send is
                 a limited fanout, then the returned status is the status of the last
@@ -614,30 +747,33 @@ static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock
  
  */
  static  rmr_mbuf_t* mtosend_msg( void* vctx, rmr_mbuf_t* msg, int max_to ) {
+       endpoint_t*     ep;                                     // end point that we're attempting to send to
+       rtable_ent_t*   rte;                    // the route table entry which matches the message key
         nng_socket      nn_sock;                        // endpoint socket for send
         uta_ctx_t*      ctx;
         int                     group;                          // selected group to get socket for
         int                     send_again;                     // true if the message must be sent again
         rmr_mbuf_t*     clone_m;                        // cloned message for an nth send
         int                     sock_ok;                        // got a valid socket from round robin select
-       uint64_t         key;                           // mtype or sub-id/mtype sym table key
-       int                     altk_ok = 0;            // set true if we can lookup on alternate key if mt/sid lookup fails
         char*           d1;
+       int                     ok_sends = 0;           // track number of ok sends
  
         if( (ctx = (uta_ctx_t *) vctx) == NULL || msg == NULL ) {               // bad stuff, bail fast
                 errno = EINVAL;                                                                                         // if msg is null, this is their clue
                 if( msg != NULL ) {
                         msg->state = RMR_ERR_BADARG;
                         errno = EINVAL;                                                                                 // must ensure it's not eagain
+                       msg->tp_state = errno;
                 }
                 return msg;
         }
  
         errno = 0;                                                                                                      // clear; nano might set, but ensure it's not left over if it doesn't
         if( msg->header == NULL ) {
-               fprintf( stderr, "rmr_send_msg: ERROR: message had no header\n" );
+               fprintf( stderr, "rmr_mtosend_msg: ERROR: message had no header\n" );
                 msg->state = RMR_ERR_NOHDR;
                 errno = EBADMSG;                                                                                        // must ensure it's not eagain
+               msg->tp_state = errno;
                 return msg;
         }
  
@@ -645,48 +781,97 @@ static  rmr_mbuf_t* mtosend_msg( void* vctx, rmr_mbuf_t* msg, int max_to ) {
                 max_to = ctx->send_retries;             // convert to retries
         }
  
+       if( (rte = uta_get_rte( ctx->rtable, msg->sub_id, msg->mtype, TRUE )) == NULL ) {               // find the entry which matches subid/type allow fallback to type only key
+               if( ctx->flags & CTXFL_WARN ) {
+                       rmr_vlog( RMR_VL_WARN, "no endpoint for mtype=%d sub_id=%d\n", msg->mtype, msg->sub_id );
+               }
+               msg->state = RMR_ERR_NOENDPT;
+               errno = ENXIO;                                                                          // must ensure it's not eagain
+               msg->tp_state = errno;
+               return msg;                                                                                     // caller can resend (maybe) or free
+       }
+
         send_again = 1;                                                                                 // force loop entry
         group = 0;                                                                                              // always start with group 0
-
-       key = build_rt_key( msg->sub_id, msg->mtype );                  // route table key to find the entry
-       if( msg->sub_id != UNSET_SUBID ) {
-               altk_ok = 1;                                                                            // if caller's sub-id doesn't hit with mtype, allow mtype only key for retry
-       }
         while( send_again ) {
-               sock_ok = uta_epsock_rr( ctx->rtable, key, group, &send_again, &nn_sock );              // round robin sel epoint; again set if mult groups
-               if( DEBUG ) fprintf( stderr, "[DBUG] send msg: type=%d again=%d group=%d len=%d sock_ok=%d ak_ok=%d\n",
-                               msg->mtype, send_again, group, msg->len, sock_ok, altk_ok );
-
-               if( ! sock_ok ) {
-                       if( altk_ok ) {                                                                                 // we can try with the alternate (no sub-id) key
-                               altk_ok = 0;
-                               key = build_rt_key( UNSET_SUBID, msg->mtype );          // build with just the mtype and try again
-                               send_again = 1;                                                                         // ensure we don't exit the while
-                               continue;
-                       }
-
-                       msg->state = RMR_ERR_NOENDPT;
-                       errno = ENXIO;                                                                                  // must ensure it's not eagain
-                       return msg;                                                                                             // caller can resend (maybe) or free
+               if( rte->nrrgroups > 0 ) {                                                      // this is a round robin entry
+                       sock_ok = uta_epsock_rr( rte, group, &send_again, &nn_sock, &ep );              // select endpt from rr group and set again if more groups
+               } else {
+                       sock_ok = epsock_meid( ctx->rtable, msg, &nn_sock, &ep );
+                       send_again = 0;
                 }
  
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "mtosend_msg: flgs=0x%04x type=%d again=%d group=%d len=%d sock_ok=%d\n",
+                               msg->flags, msg->mtype, send_again, group, msg->len, sock_ok );
+
                 group++;
  
-               if( send_again ) {
-                       clone_m = clone_msg( msg );                                                             // must make a copy as once we send this message is not available
-                       if( DEBUG ) fprintf( stderr, "[DBUG] msg cloned: type=%d len=%d\n", msg->mtype, msg->len );
-                       msg->flags |= MFL_NOALLOC;                                                              // send should not allocate a new buffer
-                       msg = send_msg( ctx, msg, nn_sock, max_to );                    // do the hard work, msg should be nil on success
-                       /*
-                       if( msg ) {
-                               // error do we need to count successes/errors, how to report some success, esp if last fails?
+               if( sock_ok ) {                                                                                                 // with an rte we _should_ always have a socket, but don't bet on it
+                       if( send_again ) {
+                               clone_m = clone_msg( msg );                                                             // must make a copy as once we send this message is not available
+                               if( clone_m == NULL ) {
+                                       msg->state = RMR_ERR_SENDFAILED;
+                                       errno = ENOMEM;
+                                       msg->tp_state = errno;
+                                       if( ctx->flags & CTXFL_WARN ) {
+                                               rmr_vlog( RMR_VL_WARN, "unable to clone message for multiple rr-group send\n" );
+                                       }
+                                       return msg;
+                               }
+
+                               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "msg cloned: type=%d len=%d\n", msg->mtype, msg->len );
+                               msg->flags |= MFL_NOALLOC;                                                              // keep send from allocating a new message; we have a clone to use
+                               msg = send_msg( ctx, msg, nn_sock, max_to );                    // do the hard work, msg should be nil on success
+       
+                               if( msg != NULL ) {                                                                             // returned message indicates send error of some sort
+                                       rmr_free_msg( msg );                                                            // must ditchone; pick msg so we don't have to unfiddle flags
+                                       msg = clone_m;
+                               } else {
+                                       ok_sends++;
+                                       msg = clone_m;                                                                          // clone will be the next to send
+                               }
+                       } else {
+                               msg = send_msg( ctx, msg, nn_sock, max_to );                    // send the last, and allocate a new buffer; drops the clone if it was
+                               if( DEBUG ) {
+                                       if( msg == NULL ) {
+                                               rmr_vlog( RMR_VL_DEBUG, "mtosend_msg:  send returned nil message!\n" );         
+                                       }
+                               }
                         }
-                       */
  
-                       msg = clone_m;                                                                                  // clone will be the next to send
+                       if( ep != NULL && msg != NULL ) {
+                               switch( msg->state ) {
+                                       case RMR_OK:
+                                               ep->scounts[EPSC_GOOD]++;
+                                               break;
+                               
+                                       case RMR_ERR_RETRY:
+                                               ep->scounts[EPSC_TRANS]++;
+                                               break;
+
+                                       default:
+                                               ep->scounts[EPSC_FAIL]++;
+                                               break;
+                               }
+                       }
                 } else {
-                       msg = send_msg( ctx, msg, nn_sock, max_to );                    // send the last, and allocate a new buffer; drops the clone if it was
+                       if( ctx->flags & CTXFL_WARN ) {
+                               rmr_vlog( RMR_VL_WARN, "invalid socket for rte, setting no endpoint err: mtype=%d sub_id=%d\n", msg->mtype, msg->sub_id );
+                       }
+                       msg->state = RMR_ERR_NOENDPT;
+                       errno = ENXIO;
+               }
+       }
+
+       if( msg ) {                                                     // call functions don't get a buffer back, so a nil check is required
+               msg->flags &= ~MFL_NOALLOC;             // must return with this flag off
+               if( ok_sends ) {                                // multiple rr-groups and one was successful; report ok
+                       msg->state = RMR_OK;
                 }
+       
+               if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "final send stats: ok=%d group=%d state=%d\n\n", ok_sends, group, msg->state );
+       
+               msg->tp_state = errno;
         }
  
         return msg;                                                                     // last message caries the status of last/only send attempt