-// : vi ts=4 sw=4 noet :
+// vim: ts=4 sw=4 noet :
/*
==================================================================================
Copyright (c) 2019 Nokia
into the message, and sets errno to something that might be useful.
If we don't have a specific RMr state, then we return the default (e.g.
receive failed).
+
+ The addition of the connection shut error code to the switch requires
+ that the NNG version at commit e618abf8f3db2a94269a (or after) be
+ used for compiling RMR.
*/
static inline int xlate_nng_state( int state, int def_state ) {
state = def_state;
break;
+ case NNG_ECONNSHUT: // new error with nng commit e618abf8f3db2a94269a79c8901a51148d48fcc2 (Sept 2019)
case NNG_ECLOSED:
errno = EBADFD; // file des not in a good state for the operation
state = def_state;
if( msg == NULL ) {
msg = (rmr_mbuf_t *) malloc( sizeof *msg );
if( msg == NULL ) {
- fprintf( stderr, "[CRI] rmr_alloc_zc: cannot get memory for message\n" );
+ rmr_vlog( RMR_VL_CRIT, "rmr_alloc_zc: cannot get memory for message\n" );
exit( 1 );
}
} else {
memset( msg, 0, sizeof( *msg ) );
if( (state = nng_msg_alloc( (nng_msg **) &msg->tp_buf, mlen )) != 0 ) {
- fprintf( stderr, "[CRI] rmr_alloc_zc: cannot get memory for zero copy buffer: %d\n", ENOMEM );
+ rmr_vlog( RMR_VL_CRIT, "rmr_alloc_zc: cannot get memory for zero copy buffer: %d\n", ENOMEM );
abort( ); // toss out a core file for this
}
strncpy( (char *) ((uta_mhdr_t *)msg->header)->src, ctx->my_name, RMR_MAX_SRC );
strncpy( (char *) ((uta_mhdr_t *)msg->header)->srcip, ctx->my_ip, RMR_MAX_SRC );
- if( DEBUG > 1 ) fprintf( stderr, "[DBUG] alloc_zcmsg mlen=%ld size=%d mpl=%d flags=%02x\n", (long) mlen, size, ctx->max_plen, msg->flags );
+ if( DEBUG > 1 ) rmr_vlog( RMR_VL_DEBUG, "alloc_zcmsg mlen=%ld size=%d mpl=%d flags=%02x\n", (long) mlen, size, ctx->max_plen, msg->flags );
return msg;
}
msg = (rmr_mbuf_t *) malloc( sizeof *msg );
if( msg == NULL ) {
- fprintf( stderr, "[CRI] rmr_alloc_zc: cannot get memory for message\n" );
+ rmr_vlog( RMR_VL_CRIT, "rmr_alloc_zc: cannot get memory for message\n" );
exit( 1 );
}
nm = (rmr_mbuf_t *) malloc( sizeof *nm );
if( nm == NULL ) {
- fprintf( stderr, "[CRI] rmr_clone: cannot get memory for message buffer\n" );
+ rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for message buffer\n" );
exit( 1 );
}
memset( nm, 0, sizeof( *nm ) );
mlen = old_msg->alloc_len; // length allocated before
if( (state = nng_msg_alloc( (nng_msg **) &nm->tp_buf, mlen )) != 0 ) {
- fprintf( stderr, "[CRI] rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
+ rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
exit( 1 );
}
default: // current message always caught here
hdr = nm->header;
- memcpy( hdr, old_msg->header, RMR_HDR_LEN( old_msg->header ) + RMR_TR_LEN( old_msg->header ) + RMR_D1_LEN( old_msg->header ) + RMR_D2_LEN( old_msg->header )); // copy complete header, trace and other data
+ memcpy( hdr, old_msg->header, RMR_HDR_LEN( old_msg->header ) ); // copy complete header, trace and other data
nm->payload = PAYLOAD_ADDR( hdr ); // at user payload
break;
}
nm->sub_id = old_msg->sub_id;
nm->len = old_msg->len; // length of data in the payload
nm->alloc_len = mlen; // length of allocated payload
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "clone values: mty=%d sid=%d len=%d alloc=%d\n", nm->mtype, nm->sub_id, nm->len, nm->alloc_len );
- nm->xaction = hdr->xid; // reference xaction
+ nm->xaction = &hdr->xid[0]; // point at transaction id in header area
nm->state = old_msg->state; // fill in caller's state (likely the state of the last operation)
nm->flags = old_msg->flags | MFL_ZEROCOPY; // this is a zerocopy sendable message
memcpy( nm->payload, old_msg->payload, old_msg->len );
nm = (rmr_mbuf_t *) malloc( sizeof *nm );
if( nm == NULL ) {
- fprintf( stderr, "[CRI] rmr_clone: cannot get memory for message buffer\n" );
+ rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for message buffer\n" );
exit( 1 );
}
memset( nm, 0, sizeof( *nm ) );
tr_old_len = RMR_TR_LEN( hdr ); // bytes in old header for trace
mlen = old_msg->alloc_len + (tr_len - tr_old_len); // new length with trace adjustment
- if( DEBUG ) fprintf( stderr, "[DBUG] tr_realloc old size=%d new size=%d new tr_len=%d\n", (int) old_msg->alloc_len, (int) mlen, (int) tr_len );
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "tr_realloc old size=%d new size=%d new tr_len=%d\n", (int) old_msg->alloc_len, (int) mlen, (int) tr_len );
if( (state = nng_msg_alloc( (nng_msg **) &nm->tp_buf, mlen )) != 0 ) {
- fprintf( stderr, "[CRI] rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
+ rmr_vlog( RMR_VL_CRIT, "rmr_clone: cannot get memory for zero copy buffer: %d\n", ENOMEM );
exit( 1 );
}
nm->len = old_msg->len; // length of data in the payload
nm->alloc_len = mlen; // length of allocated payload
- nm->xaction = hdr->xid; // reference xaction
+ nm->xaction = &hdr->xid[0]; // point at transaction id in header area
nm->state = old_msg->state; // fill in caller's state (likely the state of the last operation)
nm->flags = old_msg->flags | MFL_ZEROCOPY; // this is a zerocopy sendable message
memcpy( nm->payload, old_msg->payload, old_msg->len );
return nm;
}
+/*
+ Realloc the message such that the payload is at least payload_len bytes. If the current
+ payload size is large enough, no action is taken. If copy is false, the actual payload
+ bytes are NOT copied. This allows a caller to realloc for a response message (to retain
+ the source information which would be lost on a simple alloc) which has no need for the
+ original message.
+
+ The old message buffer will reference the new underlying transport, and the original payload
+ will be lost unless clone is set to true. If clone is true, the old message buffer will continue
+ to reference the original payload, and a new message buffer will be allocated (even if the
+ payload size in the old message was larger than requested).
+
+ The return value is a pointer to the message with at least payload_len bytes allocated. It
+ will be the same as the old_message if clone is false.
+
+ CAUTION:
+ If the message is not a message which was received, the mtype, sub-id, length values in the
+ RMR header in the allocated transport buffer will NOT be accurate and will cause the resulting
+ mbuffer information for mtype and subid to be reset even when copy is true. To avoid silently
+ resetting information in the mbuffer, this funciton will reset the mbuf values from the current
+ settings and NOT from the copied RMR header in transport buffer.
+*/
+static inline rmr_mbuf_t* realloc_payload( rmr_mbuf_t* old_msg, int payload_len, int copy, int clone ) {
+ rmr_mbuf_t* nm = NULL; // new message buffer when cloning
+ size_t mlen;
+ int state;
+ uta_mhdr_t* omhdr; // old message header
+ uta_v1mhdr_t* v1hdr;
+ int tr_old_len; // tr size in new buffer
+ int old_psize = 0; // current size of message for payload
+ int hdr_len = 0; // length of RMR header in old msg
+ void* old_tp_buf; // pointer to the old tp buffer
+ int free_tp = 1; // free the transport buffer (old) when done (when not cloning)
+ int old_mt; // msg type and sub-id from the message passed in
+ int old_sid;
+ int old_len;
+
+ if( old_msg == NULL || payload_len <= 0 ) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ old_mt = old_msg->mtype;
+ old_sid = old_msg->sub_id;
+ old_len = old_msg->len;
+ old_psize = old_msg->alloc_len - RMR_HDR_LEN( old_msg->header ); // allocated transport size less the header and other data bits
+ if( !clone && payload_len <= old_psize ) { // old message is large enough, nothing to do
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: old msg payload larger than requested: cur=%d need=%d\n", old_psize, payload_len );
+ return old_msg;
+ }
+
+ hdr_len = RMR_HDR_LEN( old_msg->header );
+ old_tp_buf = old_msg->tp_buf;
+
+ if( clone ) {
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: cloning message\n" );
+ free_tp = 0;
+
+ nm = (rmr_mbuf_t *) malloc( sizeof( *nm ) );
+ if( nm == NULL ) {
+ rmr_vlog( RMR_VL_CRIT, "rmr_realloc_payload: cannot get memory for message buffer. bytes requested: %d\n", (int) sizeof(*nm) );
+ return NULL;
+ }
+ memset( nm, 0, sizeof( *nm ) );
+ } else {
+ nm = old_msg;
+ }
+
+ omhdr = old_msg->header;
+ mlen = hdr_len + (payload_len > old_psize ? payload_len : old_psize); // must have larger in case copy is true
+
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "reallocate for payload increase. new message size: %d\n", (int) mlen );
+ if( (state = nng_msg_alloc( (nng_msg **) &nm->tp_buf, mlen )) != 0 ) {
+ rmr_vlog( RMR_VL_CRIT, "rmr_realloc_payload: cannot get memory for zero copy buffer. bytes requested: %d\n", (int) mlen );
+ return NULL;
+ }
+
+ nm->header = nng_msg_body( nm->tp_buf ); // set and copy the header from old message
+ SET_HDR_LEN( nm->header );
+
+ if( copy ) { // if we need to copy the old payload too
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: copy payload into new message: %d bytes\n", old_psize );
+ memcpy( nm->header, omhdr, sizeof( char ) * (old_psize + RMR_HDR_LEN( omhdr )) );
+ } else { // just need to copy header
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "rmr_realloc_payload: copy only header into new message: %d bytes\n", RMR_HDR_LEN( nm->header ) );
+ memcpy( nm->header, omhdr, sizeof( char ) * RMR_HDR_LEN( omhdr ) );
+ }
+
+ ref_tpbuf( nm, mlen ); // set payload and other pointers in the message to the new tp buffer
+
+ if( !copy ) {
+ nm->mtype = -1; // didn't copy payload, so mtype and sub-id are invalid
+ nm->sub_id = -1;
+ nm->len = 0; // and len is 0
+ } else {
+ nm->len = old_len; // we must force these to avoid losing info if msg wasn't a received message
+ nm->mtype = old_mt;
+ nm->sub_id = old_sid;
+ }
+
+ if( free_tp ) {
+ free( old_tp_buf ); // we did not clone, so free b/c no references
+ }
+
+ return nm;
+}
+
/*
This is the receive work horse used by the outer layer receive functions.
It waits for a message to be received on our listen socket. If old msg
reuse. They have their reasons I guess. Thus, we will free
the old transport buffer if user passes the message in; at least
our mbuf will be reused.
+
+ When msg->state is not ok, this function must set tp_state in the message as some API
+ fucntions return the message directly and do not propigate errno into the message.
*/
static rmr_mbuf_t* rcv_msg( uta_ctx_t* ctx, rmr_mbuf_t* old_msg ) {
int state;
msg->state = nng_recvmsg( ctx->nn_sock, (nng_msg **) &msg->tp_buf, NO_FLAGS ); // blocks hard until received
if( (msg->state = xlate_nng_state( msg->state, RMR_ERR_RCVFAILED )) != RMR_OK ) {
+ msg->tp_state = errno;
return msg;
}
+ msg->tp_state = 0;
if( msg->tp_buf == NULL ) { // if state is good this _should_ not be nil, but parninoia says check anyway
msg->state = RMR_ERR_EMPTY;
+ msg->tp_state = 0;
return msg;
}
hdr = (uta_mhdr_t *) msg->header;
msg->flags |= MFL_ADDSRC; // turn on so if user app tries to send this buffer we reset src
- if( DEBUG > 1 ) fprintf( stderr, "[DBUG] rcv_msg: got something: type=%d state=%d len=%d diff=%ld\n",
+ if( DEBUG > 1 ) rmr_vlog( RMR_VL_DEBUG, "rcv_msg: got something: type=%d state=%d len=%d diff=%ld\n",
msg->mtype, msg->state, msg->len, msg->payload - (unsigned char *) msg->header );
} else {
msg->state = RMR_ERR_EMPTY;
+ msg->tp_state = 0;
msg->len = 0;
msg->alloc_len = rsize;
msg->payload = NULL;
msg->payload = msg->header; // payload is the whole thing; no header
msg->xaction = NULL;
- if( DEBUG > 1 ) fprintf( stderr, "[DBUG] rcv_payload: got something: type=%d state=%d len=%d\n", msg->mtype, msg->state, msg->len );
+ if( DEBUG > 1 ) rmr_vlog( RMR_VL_DEBUG, "rcv_payload: got something: type=%d state=%d len=%d\n", msg->mtype, msg->state, msg->len );
return msg;
}
Called by rmr_send_msg() and rmr_rts_msg(), etc. and thus we assume that all pointer
validation has been done prior.
+
+ When msg->state is not ok, this function must set tp_state in the message as some API
+ fucntions return the message directly and do not propigate errno into the message.
*/
static rmr_mbuf_t* send_msg( uta_ctx_t* ctx, rmr_mbuf_t* msg, nng_socket nn_sock, int retries ) {
int state;
uta_mhdr_t* hdr;
int nng_flags = NNG_FLAG_NONBLOCK; // if we need to set any nng flags (zc buffer) add it to this
int spin_retries = 1000; // if eagain/timeout we'll spin, at max, this many times before giving up the CPU
- int tr_len; // trace len in sending message so we alloc new message with same trace size
+ int tr_len; // trace len in sending message so we alloc new message with same trace sizes
// future: ensure that application did not overrun the XID buffer; last byte must be 0
strncpy( (char *) ((uta_mhdr_t *)msg->header)->srcip, ctx->my_ip, RMR_MAX_SRC );
}
+ if( retries == 0 ) {
+ spin_retries = 100;
+ retries++;
+ }
+
errno = 0;
msg->state = RMR_OK;
if( msg->flags & MFL_ZEROCOPY ) { // faster sending with zcopy buffer
// future: this should not happen as all buffers we deal with are zc buffers; might make sense to remove the test and else
msg->state = RMR_ERR_SENDFAILED;
errno = ENOTSUP;
+ msg->tp_state = errno;
return msg;
/*
NOT SUPPORTED
msg->state = xlate_nng_state( msg->state, RMR_ERR_SENDFAILED ); // xlate to our state and set errno
}
- if( DEBUG ) fprintf( stderr, "[DBUG] send failed: %d %s\n", (int) msg->state, strerror( msg->state ) );
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "send failed: %d %s\n", (int) msg->state, strerror( msg->state ) );
}
return msg;
message type is used. If the initial lookup, with a subid, fails, then a
second lookup using just the mtype is tried.
+ When msg->state is not OK, this function must set tp_state in the message as
+ some API fucntions return the message directly and do not propigate errno into
+ the message.
+
CAUTION: this is a non-blocking send. If the message cannot be sent, then
it will return with an error and errno set to eagain. If the send is
a limited fanout, then the returned status is the status of the last
*/
static rmr_mbuf_t* mtosend_msg( void* vctx, rmr_mbuf_t* msg, int max_to ) {
+ endpoint_t* ep; // end point that we're attempting to send to
+ rtable_ent_t* rte; // the route table entry which matches the message key
nng_socket nn_sock; // endpoint socket for send
uta_ctx_t* ctx;
int group; // selected group to get socket for
int send_again; // true if the message must be sent again
rmr_mbuf_t* clone_m; // cloned message for an nth send
int sock_ok; // got a valid socket from round robin select
- uint64_t key; // mtype or sub-id/mtype sym table key
- int altk_ok = 0; // set true if we can lookup on alternate key if mt/sid lookup fails
char* d1;
+ int ok_sends = 0; // track number of ok sends
if( (ctx = (uta_ctx_t *) vctx) == NULL || msg == NULL ) { // bad stuff, bail fast
errno = EINVAL; // if msg is null, this is their clue
if( msg != NULL ) {
msg->state = RMR_ERR_BADARG;
errno = EINVAL; // must ensure it's not eagain
+ msg->tp_state = errno;
}
return msg;
}
errno = 0; // clear; nano might set, but ensure it's not left over if it doesn't
if( msg->header == NULL ) {
- fprintf( stderr, "rmr_send_msg: ERROR: message had no header\n" );
+ fprintf( stderr, "rmr_mtosend_msg: ERROR: message had no header\n" );
msg->state = RMR_ERR_NOHDR;
errno = EBADMSG; // must ensure it's not eagain
+ msg->tp_state = errno;
return msg;
}
max_to = ctx->send_retries; // convert to retries
}
+ if( (rte = uta_get_rte( ctx->rtable, msg->sub_id, msg->mtype, TRUE )) == NULL ) { // find the entry which matches subid/type allow fallback to type only key
+ if( ctx->flags & CTXFL_WARN ) {
+ rmr_vlog( RMR_VL_WARN, "no endpoint for mtype=%d sub_id=%d\n", msg->mtype, msg->sub_id );
+ }
+ msg->state = RMR_ERR_NOENDPT;
+ errno = ENXIO; // must ensure it's not eagain
+ msg->tp_state = errno;
+ return msg; // caller can resend (maybe) or free
+ }
+
send_again = 1; // force loop entry
group = 0; // always start with group 0
-
- key = build_rt_key( msg->sub_id, msg->mtype ); // route table key to find the entry
- if( msg->sub_id != UNSET_SUBID ) {
- altk_ok = 1; // if caller's sub-id doesn't hit with mtype, allow mtype only key for retry
- }
while( send_again ) {
- sock_ok = uta_epsock_rr( ctx->rtable, key, group, &send_again, &nn_sock ); // round robin sel epoint; again set if mult groups
- if( DEBUG ) fprintf( stderr, "[DBUG] send msg: type=%d again=%d group=%d len=%d sock_ok=%d ak_ok=%d\n",
- msg->mtype, send_again, group, msg->len, sock_ok, altk_ok );
-
- if( ! sock_ok ) {
- if( altk_ok ) { // we can try with the alternate (no sub-id) key
- altk_ok = 0;
- key = build_rt_key( UNSET_SUBID, msg->mtype ); // build with just the mtype and try again
- send_again = 1; // ensure we don't exit the while
- continue;
- }
-
- msg->state = RMR_ERR_NOENDPT;
- errno = ENXIO; // must ensure it's not eagain
- return msg; // caller can resend (maybe) or free
+ if( rte->nrrgroups > 0 ) { // this is a round robin entry
+ sock_ok = uta_epsock_rr( rte, group, &send_again, &nn_sock, &ep ); // select endpt from rr group and set again if more groups
+ } else {
+ sock_ok = epsock_meid( ctx->rtable, msg, &nn_sock, &ep );
+ send_again = 0;
}
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "mtosend_msg: flgs=0x%04x type=%d again=%d group=%d len=%d sock_ok=%d\n",
+ msg->flags, msg->mtype, send_again, group, msg->len, sock_ok );
+
group++;
- if( send_again ) {
- clone_m = clone_msg( msg ); // must make a copy as once we send this message is not available
- if( DEBUG ) fprintf( stderr, "[DBUG] msg cloned: type=%d len=%d\n", msg->mtype, msg->len );
- msg->flags |= MFL_NOALLOC; // send should not allocate a new buffer
- msg = send_msg( ctx, msg, nn_sock, max_to ); // do the hard work, msg should be nil on success
- /*
- if( msg ) {
- // error do we need to count successes/errors, how to report some success, esp if last fails?
+ if( sock_ok ) { // with an rte we _should_ always have a socket, but don't bet on it
+ if( send_again ) {
+ clone_m = clone_msg( msg ); // must make a copy as once we send this message is not available
+ if( clone_m == NULL ) {
+ msg->state = RMR_ERR_SENDFAILED;
+ errno = ENOMEM;
+ msg->tp_state = errno;
+ if( ctx->flags & CTXFL_WARN ) {
+ rmr_vlog( RMR_VL_WARN, "unable to clone message for multiple rr-group send\n" );
+ }
+ return msg;
+ }
+
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "msg cloned: type=%d len=%d\n", msg->mtype, msg->len );
+ msg->flags |= MFL_NOALLOC; // keep send from allocating a new message; we have a clone to use
+ msg = send_msg( ctx, msg, nn_sock, max_to ); // do the hard work, msg should be nil on success
+
+ if( msg != NULL ) { // returned message indicates send error of some sort
+ rmr_free_msg( msg ); // must ditchone; pick msg so we don't have to unfiddle flags
+ msg = clone_m;
+ } else {
+ ok_sends++;
+ msg = clone_m; // clone will be the next to send
+ }
+ } else {
+ msg = send_msg( ctx, msg, nn_sock, max_to ); // send the last, and allocate a new buffer; drops the clone if it was
+ if( DEBUG ) {
+ if( msg == NULL ) {
+ rmr_vlog( RMR_VL_DEBUG, "mtosend_msg: send returned nil message!\n" );
+ }
+ }
}
- */
- msg = clone_m; // clone will be the next to send
+ if( ep != NULL && msg != NULL ) {
+ switch( msg->state ) {
+ case RMR_OK:
+ ep->scounts[EPSC_GOOD]++;
+ break;
+
+ case RMR_ERR_RETRY:
+ ep->scounts[EPSC_TRANS]++;
+ break;
+
+ default:
+ ep->scounts[EPSC_FAIL]++;
+ break;
+ }
+ }
} else {
- msg = send_msg( ctx, msg, nn_sock, max_to ); // send the last, and allocate a new buffer; drops the clone if it was
+ if( ctx->flags & CTXFL_WARN ) {
+ rmr_vlog( RMR_VL_WARN, "invalid socket for rte, setting no endpoint err: mtype=%d sub_id=%d\n", msg->mtype, msg->sub_id );
+ }
+ msg->state = RMR_ERR_NOENDPT;
+ errno = ENXIO;
+ }
+ }
+
+ if( msg ) { // call functions don't get a buffer back, so a nil check is required
+ msg->flags &= ~MFL_NOALLOC; // must return with this flag off
+ if( ok_sends ) { // multiple rr-groups and one was successful; report ok
+ msg->state = RMR_OK;
}
+
+ if( DEBUG ) rmr_vlog( RMR_VL_DEBUG, "final send stats: ok=%d group=%d state=%d\n\n", ok_sends, group, msg->state );
+
+ msg->tp_state = errno;
}
return msg; // last message caries the status of last/only send attempt