src/rmr/si/src/rmr_si.c

   1 // vim: ts=4 sw=4 noet :
   2 /*
   3 ==================================================================================
   4         Copyright (c) 2019-2020 Nokia
   5         Copyright (c) 2018-2020 AT&T Intellectual Property.
   6
   7    Licensed under the Apache License, Version 2.0 (the "License");
   8    you may not use this file except in compliance with the License.
   9    You may obtain a copy of the License at
  10
  11            http://www.apache.org/licenses/LICENSE-2.0
  12
  13    Unless required by applicable law or agreed to in writing, software
  14    distributed under the License is distributed on an "AS IS" BASIS,
  15    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16    See the License for the specific language governing permissions and
  17    limitations under the License.
  18 ==================================================================================
  19 */
  20
  21 /*
  22         Mnemonic:       rmr_si.c
  23         Abstract:       This is the compile point for the si version of the rmr
  24                                 library (formarly known as uta, so internal function names
  25                                 are likely still uta_*)
  26
  27                                 With the exception of the symtab portion of the library,
  28                                 RMr is built with a single compile so as to "hide" the
  29                                 internal functions as statics.  Because they interdepend
  30                                 on each other, and CMake has issues with generating two
  31                                 different wormhole objects from a single source, we just
  32                                 pull it all together with a centralised comple using
  33                                 includes.
  34
  35                                 Future:  the API functions at this point can be separated
  36                                 into a common source module.
  37
  38         Author:         E. Scott Daniels
  39         Date:           1 February 2019
  40 */
  41
  42 #include <ctype.h>
  43 #include <stdio.h>
  44 #include <stdlib.h>
  45 #include <netdb.h>
  46 #include <errno.h>
  47 #include <string.h>
  48 #include <errno.h>
  49 #include <pthread.h>
  50 #include <unistd.h>
  51 #include <time.h>
  52 #include <arpa/inet.h>
  53 #include <semaphore.h>
  54 #include <pthread.h>
  55
  56 #include "si95/socket_if.h"
  57 #include "si95/siproto.h"
  58
  59
  60 #include "rmr.h"                                // things the users see
  61 #include "rmr_agnostic.h"               // agnostic things (must be included before private)
  62 #include "rmr_si_private.h"     // things that we need too
  63 #include "rmr_symtab.h"
  64
  65 #include "ring_static.c"                        // message ring support
  66 #include "rt_generic_static.c"          // route table things not transport specific
  67 #include "rtable_si_static.c"           // route table things -- transport specific
  68 #include "rtc_static.c"                         // route table collector
  69 #include "rtc_si_static.c"                      // our private test function
  70 #include "tools_static.c"
  71 #include "sr_si_static.c"                       // send/receive static functions
  72 #include "wormholes.c"                          // wormhole api externals and related static functions (must be LAST!)
  73 #include "mt_call_static.c"
  74 #include "mt_call_si_static.c"
  75
  76
  77 //------------------------------------------------------------------------------
  78
  79
  80 /*
  81         Clean up a context.
  82 */
  83 static void free_ctx( uta_ctx_t* ctx ) {
  84         if( ctx ) {
  85                 if( ctx->rtg_addr ) {
  86                         free( ctx->rtg_addr );
  87                 }
  88         }
  89 }
  90
  91 // --------------- public functions --------------------------------------------------------------------------
  92
  93 /*
  94         Returns the size of the payload (bytes) that the msg buffer references.
  95         Len in a message is the number of bytes which were received, or should
  96         be transmitted, however, it is possible that the mbuf was allocated
  97         with a larger payload space than the payload length indicates; this
  98         function returns the absolute maximum space that the user has available
  99         in the payload. On error (bad msg buffer) -1 is returned and errno should
 100         indicate the rason.
 101
 102         The allocated len stored in the msg is:
 103                 transport header length +
 104                 message header +
 105                 user requested payload
 106
 107         The msg header is a combination of the fixed RMR header and the variable
 108         trace data and d2 fields which may vary for each message.
 109 */
 110 extern int rmr_payload_size( rmr_mbuf_t* msg ) {
 111         if( msg == NULL || msg->header == NULL ) {
 112                 errno = EINVAL;
 113                 return -1;
 114         }
 115
 116         errno = 0;
 117         return msg->alloc_len - RMR_HDR_LEN( msg->header ) - TP_HDR_LEN;        // allocated transport size less the header and other data bits
 118 }
 119
 120 /*
 121         Allocates a send message as a zerocopy message allowing the underlying message protocol
 122         to send the buffer without copy.
 123 */
 124 extern rmr_mbuf_t* rmr_alloc_msg( void* vctx, int size ) {
 125         uta_ctx_t*      ctx;
 126         rmr_mbuf_t*     m;
 127
 128         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 129                 return NULL;
 130         }
 131
 132         m = alloc_zcmsg( ctx, NULL, size, 0, DEF_TR_LEN );                              // alloc with default trace data
 133         return  m;
 134 }
 135
 136
 137 /*
 138         Allocates a send message as a zerocopy message allowing the underlying message protocol
 139         to send the buffer without copy. In addition, a trace data field of tr_size will be
 140         added and the supplied data coppied to the buffer before returning the message to
 141         the caller.
 142 */
 143 extern rmr_mbuf_t* rmr_tralloc_msg( void* vctx, int size, int tr_size, unsigned const char* data ) {
 144         uta_ctx_t*      ctx;
 145         rmr_mbuf_t*     m;
 146         int state;
 147
 148         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 149                 return NULL;
 150         }
 151
 152         m = alloc_zcmsg( ctx, NULL, size, 0, tr_size );                         // alloc with specific tr size
 153         if( m != NULL ) {
 154                 state = rmr_set_trace( m, data, tr_size );                              // roll their data in
 155                 if( state != tr_size ) {
 156                         m->state = RMR_ERR_INITFAILED;
 157                 }
 158         }
 159
 160         return  m;
 161 }
 162
 163 /*
 164         This provides an external path to the realloc static function as it's called by an
 165         outward facing mbuf api function. Used to reallocate a message with a different
 166         trace data size.
 167 */
 168 extern rmr_mbuf_t* rmr_realloc_msg( rmr_mbuf_t* msg, int new_tr_size ) {
 169         return realloc_msg( msg, new_tr_size );
 170 }
 171
 172
 173 /*
 174         Return the message to the available pool, or free it outright.
 175 */
 176 extern void rmr_free_msg( rmr_mbuf_t* mbuf ) {
 177         //fprintf( stderr, "SKIPPING FREE: %p\n", mbuf );
 178         //return;
 179
 180         if( mbuf == NULL ) {
 181                 return;
 182         }
 183
 184         if( !mbuf->ring || ! uta_ring_insert( mbuf->ring, mbuf ) ) {                    // just queue, free if ring is full
 185                 if( mbuf->tp_buf ) {
 186                         free( mbuf->tp_buf );
 187                 }
 188                 free( mbuf );
 189         }
 190 }
 191
 192 /*
 193         This is a wrapper to the real timeout send. We must wrap it now to ensure that
 194         the call flag and call-id are reset
 195 */
 196 extern rmr_mbuf_t* rmr_mtosend_msg( void* vctx, rmr_mbuf_t* msg, int max_to ) {
 197         char* d1;                                                                                                                       // point at the call-id in the header
 198
 199         if( msg != NULL ) {
 200                 ((uta_mhdr_t *) msg->header)->flags &= ~HFL_CALL_MSG;                   // must ensure call flag is off
 201
 202                 d1 = DATA1_ADDR( msg->header );
 203                 d1[D1_CALLID_IDX] = NO_CALL_ID;                                                                         // must blot out so it doesn't queue on a chute at the other end
 204         }
 205
 206         return mtosend_msg( vctx, msg, max_to );
 207 }
 208
 209 /*
 210         Send with default max timeout as is set in the context.
 211         See rmr_mtosend_msg() for more details on the parameters.
 212         See rmr_stimeout() for info on setting the default timeout.
 213 */
 214 extern rmr_mbuf_t* rmr_send_msg( void* vctx, rmr_mbuf_t* msg ) {
 215         char* d1;                                                                                                               // point at the call-id in the header
 216
 217         if( msg != NULL ) {
 218                 ((uta_mhdr_t *) msg->header)->flags &= ~HFL_CALL_MSG;                   // must ensure call flag is off
 219
 220                 d1 = DATA1_ADDR( msg->header );
 221                 d1[D1_CALLID_IDX] = NO_CALL_ID;                                                                         // must blot out so it doesn't queue on a chute at the other end
 222         }
 223
 224         return rmr_mtosend_msg( vctx, msg,  -1 );                                                       // retries < 0  uses default from ctx
 225 }
 226
 227 /*
 228         Return to sender allows a message to be sent back to the endpoint where it originated.
 229
 230         In the SI world the file descriptor that was the source of the message is captured in
 231         the mbuffer and thus can be used to quickly find the target for an RTS call.
 232
 233         The source information in the message is used to select the socket on which to write
 234         the message rather than using the message type and round-robin selection. This
 235         should return a message buffer with the state of the send operation set. On success
 236         (state is RMR_OK, the caller may use the buffer for another receive operation), and on
 237         error it can be passed back to this function to retry the send if desired. On error,
 238         errno will liklely have the failure reason set by the nng send processing.
 239         The following are possible values for the state in the message buffer:
 240
 241         Message states returned:
 242                 RMR_ERR_BADARG - argument (context or msg) was nil or invalid
 243                 RMR_ERR_NOHDR  - message did not have a header
 244                 RMR_ERR_NOENDPT- an endpoint to send the message to could not be determined
 245                 RMR_ERR_SENDFAILED - send failed; errno has nano error code
 246                 RMR_ERR_RETRY   - the reqest failed but should be retried (EAGAIN)
 247
 248         A nil message as the return value is rare, and generally indicates some kind of horrible
 249         failure. The value of errno might give a clue as to what is wrong.
 250
 251         CAUTION:
 252                 Like send_msg(), this is non-blocking and will return the msg if there is an errror.
 253                 The caller must check for this and handle it properly.
 254 */
 255 extern rmr_mbuf_t*  rmr_rts_msg( void* vctx, rmr_mbuf_t* msg ) {
 256         int                     nn_sock;                        // endpoint socket for send
 257         uta_ctx_t*      ctx;
 258         int                     state;
 259         char*           hold_src;                       // we need the original source if send fails
 260         char*           hold_ip;                        // also must hold original ip
 261         int                     sock_ok = 0;            // true if we found a valid endpoint socket
 262         endpoint_t*     ep = NULL;                      // end point to track counts
 263
 264         if( (ctx = (uta_ctx_t *) vctx) == NULL || msg == NULL ) {               // bad stuff, bail fast
 265                 errno = EINVAL;                                                                                         // if msg is null, this is their clue
 266                 if( msg != NULL ) {
 267                         msg->state = RMR_ERR_BADARG;
 268                         msg->tp_state = errno;
 269                 }
 270                 return msg;
 271         }
 272
 273         errno = 0;                                                                                                              // at this point any bad state is in msg returned
 274         if( msg->header == NULL ) {
 275                 fprintf( stderr, "[ERR] rmr_send_msg: message had no header\n" );
 276                 msg->state = RMR_ERR_NOHDR;
 277                 msg->tp_state = errno;
 278                 return msg;
 279         }
 280
 281         ((uta_mhdr_t *) msg->header)->flags &= ~HFL_CALL_MSG;                   // must ensure call flag is off
 282
 283 /*
 284         sock_ok = uta_epsock_byname( ctx->rtable, (char *) ((uta_mhdr_t *)msg->header)->src, &nn_sock, &ep, ctx->si_ctx );                      // src is always used first for rts
 285         if( ! sock_ok ) {
 286 */
 287         if( (nn_sock = msg->rts_fd) < 0 ) {
 288                 if( HDR_VERSION( msg->header ) > 2 ) {                                                  // with ver2 the ip is there, try if src name not known
 289                         sock_ok = uta_epsock_byname( ctx->rtable, (char *) ((uta_mhdr_t *)msg->header)->srcip, &nn_sock, &ep, ctx->si_ctx );
 290                 }
 291                 if( ! sock_ok ) {
 292                         msg->state = RMR_ERR_NOENDPT;
 293                         return msg;                                                                                                                             // preallocated msg can be reused since not given back to nn
 294                 }
 295         }
 296
 297
 298         msg->state = RMR_OK;                                                                                                                            // ensure it is clear before send
 299         hold_src = strdup( (char *) ((uta_mhdr_t *)msg->header)->src );                                         // the dest where we're returning the message to
 300         hold_ip = strdup( (char *) ((uta_mhdr_t *)msg->header)->srcip );                                        // both the src host and src ip
 301         strncpy( (char *) ((uta_mhdr_t *)msg->header)->src, ctx->my_name, RMR_MAX_SRC );        // must overlay the source to be ours
 302         msg = send_msg( ctx, msg, nn_sock, -1 );
 303         if( msg ) {
 304                 if( ep != NULL ) {
 305                         switch( msg->state ) {
 306                                 case RMR_OK:
 307                                         ep->scounts[EPSC_GOOD]++;
 308                                         break;
 309
 310                                 case RMR_ERR_RETRY:
 311                                         ep->scounts[EPSC_TRANS]++;
 312                                         break;
 313
 314                                 default:
 315                                         // FIX ME uta_fd_failed( nn_sock );                     // we don't have an ep so this requires a look up/search to mark it failed
 316                                         ep->scounts[EPSC_FAIL]++;
 317                                         break;
 318                         }
 319                 }
 320                 strncpy( (char *) ((uta_mhdr_t *)msg->header)->src, hold_src, RMR_MAX_SRC );    // always return original source so rts can be called again
 321                 strncpy( (char *) ((uta_mhdr_t *)msg->header)->srcip, hold_ip, RMR_MAX_SRC );   // always return original source so rts can be called again
 322                 msg->flags |= MFL_ADDSRC;                                                                                                               // if msg given to send() it must add source
 323         }
 324
 325         free( hold_src );
 326         free( hold_ip );
 327         return msg;
 328 }
 329
 330 /*
 331         If multi-threading call is turned on, this invokes that mechanism with the special call
 332         id of 1 and a max wait of 1 second.  If multi threaded call is not on, then the original
 333         behavour (described below) is carried out.  This is safe to use when mt is enabled, but
 334         the user app is invoking rmr_call() from only one thread, and the caller doesn't need
 335         a flexible timeout.
 336
 337         On timeout this function will return a nil pointer. If the original message could not
 338         be sent without blocking, it will be returned with the RMR_ERR_RETRY set as the status.
 339
 340         Original behavour:
 341         Call sends the message based on message routing using the message type, and waits for a
 342         response message to arrive with the same transaction id that was in the outgoing message.
 343         If, while wiating for the expected response,  messages are received which do not have the
 344         desired transaction ID, they are queued. Calls to uta_rcv_msg() will dequeue them in the
 345         order that they were received.
 346
 347         Normally, a message struct pointer is returned and msg->state must be checked for RMR_OK
 348         to ensure that no error was encountered. If the state is UTA_BADARG, then the message
 349         may be resent (likely the context pointer was nil).  If the message is sent, but no
 350         response is received, a nil message is returned with errno set to indicate the likley
 351         issue:
 352                 ETIMEDOUT -- too many messages were queued before reciving the expected response
 353                 ENOBUFS -- the queued message ring is full, messages were dropped
 354                 EINVAL  -- A parameter was not valid
 355                 EAGAIN  -- the underlying message system wsa interrupted or the device was busy;
 356                                         user should call this function with the message again.
 357
 358 */
 359 extern rmr_mbuf_t* rmr_call( void* vctx, rmr_mbuf_t* msg ) {
 360         uta_ctx_t*              ctx;
 361
 362         if( (ctx = (uta_ctx_t *) vctx) == NULL || msg == NULL ) {               // bad stuff, bail fast
 363                 if( msg != NULL ) {
 364                         msg->state = RMR_ERR_BADARG;
 365                 }
 366                 return msg;
 367         }
 368
 369         return rmr_mt_call( vctx, msg, 1, 1000 );               // use the reserved call-id of 1 and wait up to 1 sec
 370 }
 371
 372 /*
 373         The outward facing receive function. When invoked it will pop the oldest message
 374         from the receive ring, if any are queued, and return it. If the ring is empty
 375         then the receive function is invoked to wait for the next message to arrive (blocking).
 376
 377         If old_msg is provided, it will be populated (avoiding lots of free/alloc cycles). If
 378         nil, a new one will be allocated. However, the caller should NOT expect to get the same
 379         struct back (if a queued message is returned the message struct will be different).
 380 */
 381 extern rmr_mbuf_t* rmr_rcv_msg( void* vctx, rmr_mbuf_t* old_msg ) {
 382         uta_ctx_t*      ctx;
 383         rmr_mbuf_t*     qm;                             // message that was queued on the ring
 384
 385         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 386                 errno = EINVAL;
 387                 if( old_msg != NULL ) {
 388                         old_msg->state = RMR_ERR_BADARG;
 389                         old_msg->tp_state = errno;
 390                 }
 391                 return old_msg;
 392         }
 393         errno = 0;
 394
 395         return rmr_mt_rcv( ctx, old_msg, -1 );
 396 }
 397
 398 /*
 399         This allows a timeout based receive for applications unable to implement epoll_wait()
 400         (e.g. wrappers).
 401 */
 402 extern rmr_mbuf_t* rmr_torcv_msg( void* vctx, rmr_mbuf_t* old_msg, int ms_to ) {
 403         uta_ctx_t*      ctx;
 404
 405         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 406                 errno = EINVAL;
 407                 if( old_msg != NULL ) {
 408                         old_msg->state = RMR_ERR_BADARG;
 409                         old_msg->tp_state = errno;
 410                 }
 411                 return old_msg;
 412         }
 413
 414         return rmr_mt_rcv( ctx, old_msg, ms_to );
 415 }
 416
 417 /*
 418         This blocks until the message with the 'expect' ID is received. Messages which are received
 419         before the expected message are queued onto the message ring.  The function will return
 420         a nil message and set errno to ETIMEDOUT if allow2queue messages are received before the
 421         expected message is received. If the queued message ring fills a nil pointer is returned
 422         and errno is set to ENOBUFS.
 423
 424         Generally this will be invoked only by the call() function as it waits for a response, but
 425         it is exposed to the user application as three is no reason not to.
 426 */
 427 extern rmr_mbuf_t* rmr_rcv_specific( void* vctx, rmr_mbuf_t* msg, char* expect, int allow2queue ) {
 428         uta_ctx_t*      ctx;
 429         int     queued = 0;                             // number we pushed into the ring
 430         int     exp_len = 0;                    // length of expected ID
 431
 432         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 433                 errno = EINVAL;
 434                 if( msg != NULL ) {
 435                         msg->state = RMR_ERR_BADARG;
 436                         msg->tp_state = errno;
 437                 }
 438                 return msg;
 439         }
 440
 441         errno = 0;
 442
 443         if( expect == NULL || ! *expect ) {                             // nothing expected if nil or empty string, just receive
 444                 return rmr_rcv_msg( ctx, msg );
 445         }
 446
 447         exp_len = strlen( expect );
 448         if( exp_len > RMR_MAX_XID ) {
 449                 exp_len = RMR_MAX_XID;
 450         }
 451         if( DEBUG ) fprintf( stderr, "[DBUG] rcv_specific waiting for id=%s\n",  expect );
 452
 453         while( queued < allow2queue ) {
 454                 msg = rcv_msg( ctx, msg );                                      // hard wait for next
 455                 if( msg->state == RMR_OK ) {
 456                         if( memcmp( msg->xaction, expect, exp_len ) == 0 ) {                    // got it -- return it
 457                                 if( DEBUG ) fprintf( stderr, "[DBUG] rcv-specific matched (%s); %d messages were queued\n", msg->xaction, queued );
 458                                 return msg;
 459                         }
 460
 461                         if( ! uta_ring_insert( ctx->mring, msg ) ) {                                    // just queue, error if ring is full
 462                                 if( DEBUG > 1 ) fprintf( stderr, "[DBUG] rcv_specific ring is full\n" );
 463                                 errno = ENOBUFS;
 464                                 return NULL;
 465                         }
 466
 467                         if( DEBUG ) fprintf( stderr, "[DBUG] rcv_specific queued message type=%d\n", msg->mtype );
 468                         queued++;
 469                         msg = NULL;
 470                 }
 471         }
 472
 473         if( DEBUG ) fprintf( stderr, "[DBUG] rcv_specific timeout waiting for %s\n", expect );
 474         errno = ETIMEDOUT;
 475         return NULL;
 476 }
 477
 478 /*
 479         Set send timeout. The value time is assumed to be milliseconds.  The timeout is the
 480         _rough_ maximum amount of time that RMr will block on a send attempt when the underlying
 481         mechnism indicates eagain or etimeedout.  All other error conditions are reported
 482         without this delay. Setting a timeout of 0 causes no retries to be attempted in
 483         RMr code. Setting a timeout of 1 causes RMr to spin up to 1K retries before returning,
 484         but _without_ issuing a sleep.  If timeout is > 1, then RMr will issue a sleep (1us)
 485         after every 1K send attempts until the "time" value is reached. Retries are abandoned
 486         if NNG returns anything other than NNG_EAGAIN or NNG_ETIMEDOUT.
 487
 488         The default, if this function is not used, is 1; meaning that RMr will retry, but will
 489         not enter a sleep.  In all cases the caller should check the status in the message returned
 490         after a send call.
 491
 492         Returns -1 if the context was invalid; RMR_OK otherwise.
 493 */
 494 extern int rmr_set_stimeout( void* vctx, int time ) {
 495         uta_ctx_t*      ctx;
 496
 497         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 498                 return -1;
 499         }
 500
 501         if( time < 0 ) {
 502                 time = 0;
 503         }
 504
 505         ctx->send_retries = time;
 506         return RMR_OK;
 507 }
 508
 509 /*
 510         Set receive timeout -- not supported in nng implementation
 511
 512         CAUTION:  this is not supported as they must be set differently (between create and open) in NNG.
 513 */
 514 extern int rmr_set_rtimeout( void* vctx, int time ) {
 515         fprintf( stderr, "[WRN] Current underlying transport mechanism (SI) does not support rcv timeout; not set\n" );
 516         return 0;
 517 }
 518
 519
 520 /*
 521         This is the actual init workhorse. The user visible function meerly ensures that the
 522         calling programme does NOT set any internal flags that are supported, and then
 523         invokes this.  Internal functions (the route table collector) which need additional
 524         open ports without starting additional route table collectors, will invoke this
 525         directly with the proper flag.
 526 */
 527 static void* init(  char* uproto_port, int max_msg_size, int flags ) {
 528         static  int announced = 0;
 529         uta_ctx_t*      ctx = NULL;
 530         char    bind_info[NNG_MAXADDRLEN];      // bind info
 531         char*   proto = "tcp";                          // pointer into the proto/port string user supplied
 532         char*   port;
 533         char*   interface = NULL;                       // interface to bind to (from RMR_BIND_IF, 0.0.0.0 if not defined)
 534         char*   proto_port;
 535         char    wbuf[1024];                                     // work buffer
 536         char*   tok;                                            // pointer at token in a buffer
 537         char*   tok2;
 538         int             state;
 539         int             i;
 540
 541         if( ! announced ) {
 542                 fprintf( stderr, "[INFO] ric message routing library on SI95 mv=%d flg=%02x (%s %s.%s.%s built: %s)\n",
 543                         RMR_MSG_VER, flags, QUOTE_DEF(GIT_ID), QUOTE_DEF(MAJOR_VER), QUOTE_DEF(MINOR_VER), QUOTE_DEF(PATCH_VER), __DATE__ );
 544                 announced = 1;
 545         }
 546
 547         errno = 0;
 548         if( uproto_port == NULL ) {
 549                 proto_port = strdup( DEF_COMM_PORT );
 550         } else {
 551                 proto_port = strdup( uproto_port );             // so we can modify it
 552         }
 553
 554         if( (ctx = (uta_ctx_t *) malloc( sizeof( uta_ctx_t ) )) == NULL ) {
 555                 errno = ENOMEM;
 556                 return NULL;
 557         }
 558         memset( ctx, 0, sizeof( uta_ctx_t ) );
 559
 560         if( DEBUG ) fprintf( stderr, "[DBUG] rmr_init: allocating 266 rivers\n" );
 561         ctx->nrivers = 256;                                                             // number of input flows we'll manage
 562         ctx->rivers = (river_t *) malloc( sizeof( river_t ) * ctx->nrivers );
 563         memset( ctx->rivers, 0, sizeof( river_t ) * ctx->nrivers );
 564         for( i = 0; i < ctx->nrivers; i++ ) {
 565                 ctx->rivers[i].state = RS_NEW;                          // force allocation of accumulator on first received packet
 566         }
 567
 568         ctx->send_retries = 1;                                                  // default is not to sleep at all; RMr will retry about 10K times before returning
 569         ctx->d1_len = 4;                                                                // data1 space in header -- 4 bytes for now
 570         ctx->max_ibm = max_msg_size;                                    // default to user supplied message size
 571
 572         ctx->mring = uta_mk_ring( 4096 );                               // message ring is always on for si
 573         init_mtcall( ctx );                                                             // set up call chutes
 574
 575         ctx->zcb_mring = uta_mk_ring( 128 );                    // zero copy buffer mbuf ring
 576
 577         ctx->max_plen = RMR_MAX_RCV_BYTES;                              // max user payload lengh
 578         if( max_msg_size > 0 ) {
 579                 ctx->max_plen = max_msg_size;
 580         }
 581
 582         // we're using a listener to get rtg updates, so we do NOT need this.
 583         //uta_lookup_rtg( ctx );                                                        // attempt to fill in rtg info; rtc will handle missing values/errors
 584
 585         ctx->si_ctx = SIinitialise( SI_OPT_FG );                // FIX ME: si needs to streamline and drop fork/bg stuff
 586         if( ctx->si_ctx == NULL ) {
 587                 fprintf( stderr, "[CRI] unable to initialise SI95 interface\n" );
 588                 free_ctx( ctx );
 589                 return NULL;
 590         }
 591
 592         if( (port = strchr( proto_port, ':' )) != NULL ) {
 593                 if( port == proto_port ) {              // ":1234" supplied; leave proto to default and point port correctly
 594                         port++;
 595                 } else {
 596                         *(port++) = 0;                  // term proto string and point at port string
 597                         proto = proto_port;             // user supplied proto so point at it rather than default
 598                 }
 599         } else {
 600                 port = proto_port;                      // assume something like "1234" was passed
 601         }
 602
 603         if( (tok = getenv( ENV_SRC_ID )) != NULL ) {                                                    // env var overrides what we dig from system
 604                 tok = strdup( tok );                                    // something we can destroy
 605                 if( *tok == '[' ) {                                             // we allow an ipv6 address here
 606                         tok2 = strchr( tok, ']' ) + 1;          // we will chop the port (...]:port) if given
 607                 } else {
 608                         tok2 = strchr( tok, ':' );                      // find :port if there so we can chop
 609                 }
 610                 if( tok2  && *tok2 ) {                                  // if it's not the end of string marker
 611                         *tok2 = 0;                                                      // make it so
 612                 }
 613
 614                 snprintf( wbuf, RMR_MAX_SRC, "%s", tok );
 615                 free( tok );
 616         } else {
 617                 if( (gethostname( wbuf, sizeof( wbuf ) )) != 0 ) {
 618                         fprintf( stderr, "[CRI] rmr_init: cannot determine localhost name: %s\n", strerror( errno ) );
 619                         return NULL;
 620                 }
 621                 if( (tok = strchr( wbuf, '.' )) != NULL ) {
 622                         *tok = 0;                                                                       // we don't keep domain portion
 623                 }
 624         }
 625
 626         ctx->my_name = (char *) malloc( sizeof( char ) * RMR_MAX_SRC );
 627         if( snprintf( ctx->my_name, RMR_MAX_SRC, "%s:%s", wbuf, port ) >= RMR_MAX_SRC ) {                       // our registered name is host:port
 628                 fprintf( stderr, "[CRI] rmr_init: hostname + port must be less than %d characters; %s:%s is not\n", RMR_MAX_SRC, wbuf, port );
 629                 return NULL;
 630         }
 631
 632         if( (tok = getenv( ENV_NAME_ONLY )) != NULL ) {
 633                 if( atoi( tok ) > 0 ) {
 634                         flags |= RMRFL_NAME_ONLY;                                       // don't allow IP addreess to go out in messages
 635                 }
 636         }
 637
 638         ctx->ip_list = mk_ip_list( port );                              // suss out all IP addresses we can find on the box, and bang on our port for RT comparisons
 639         if( flags & RMRFL_NAME_ONLY ) {
 640                 ctx->my_ip = strdup( ctx->my_name );                    // user application or env var has specified that IP address is NOT sent out, use name
 641         } else {
 642                 ctx->my_ip = get_default_ip( ctx->ip_list );    // and (guess) at what should be the default to put into messages as src
 643                 if( ctx->my_ip == NULL ) {
 644                         fprintf( stderr, "[WRN] rmr_init: default ip address could not be sussed out, using name\n" );
 645                         strcpy( ctx->my_ip, ctx->my_name );                     // if we cannot suss it out, use the name rather than a nil pointer
 646                 }
 647         }
 648         if( DEBUG ) fprintf( stderr, "[DBUG] default ip address: %s\n", ctx->my_ip );
 649
 650         if( (tok = getenv( ENV_WARNINGS )) != NULL ) {
 651                 if( *tok == '1' ) {
 652                         ctx->flags |= CTXFL_WARN;                                       // turn on some warnings (not all, just ones that shouldn't impact performance)
 653                 }
 654         }
 655
 656
 657         if( (interface = getenv( ENV_BIND_IF )) == NULL ) {
 658                 interface = "0.0.0.0";
 659         }
 660
 661         snprintf( bind_info, sizeof( bind_info ), "%s:%s", interface, port );           // FIXME -- si only supports 0.0.0.0 by default
 662         if( (state = SIlistener( ctx->si_ctx, TCP_DEVICE, bind_info )) < 0 ) {
 663                 fprintf( stderr, "[CRI] rmr_init: unable to start si listener for %s: %s\n", bind_info, strerror( errno ) );
 664                 free_ctx( ctx );
 665                 return NULL;
 666         }
 667
 668         if( !(flags & FL_NOTHREAD) ) {                                                                                          // skip if internal function that doesnt need an rtc
 669                 if( pthread_create( &ctx->rtc_th,  NULL, rtc_file, (void *) ctx ) ) {   // kick the rt collector thread
 670                         fprintf( stderr, "[WRN] rmr_init: unable to start route table collector thread: %s", strerror( errno ) );
 671                 }
 672         }
 673
 674         //fprintf( stderr, ">>>>> starting threaded receiver with ctx=%p si_ctx=%p\n", ctx, ctx->si_ctx );
 675         ctx->flags |= CFL_MTC_ENABLED;                                                                                          // for SI threaded receiver is the only way
 676         if( pthread_create( &ctx->mtc_th,  NULL, mt_receive, (void *) ctx ) ) {         // so kick it
 677                 fprintf( stderr, "[WRN] rmr_init: unable to start multi-threaded receiver: %s", strerror( errno ) );
 678         }
 679
 680         free( proto_port );
 681         return (void *) ctx;
 682 }
 683
 684 /*
 685         Initialise the message routing environment. Flags are one of the UTAFL_
 686         constants. Proto_port is a protocol:port string (e.g. tcp:1234). If default protocol
 687         (tcp) to be used, then :port is all that is needed.
 688
 689         At the moment it seems that TCP really is the only viable protocol, but
 690         we'll allow flexibility.
 691
 692         The return value is a void pointer which must be passed to most uta functions. On
 693         error, a nil pointer is returned and errno should be set.
 694
 695         Flags:
 696                 No user flags supported (needed) at the moment, but this provides for extension
 697                 without drastically changing anything. The user should invoke with RMRFL_NONE to
 698                 avoid any misbehavour as there are internal flags which are suported
 699 */
 700 extern void* rmr_init( char* uproto_port, int max_msg_size, int flags ) {
 701         return init( uproto_port, max_msg_size, flags & UFL_MASK  );            // ensure any internal flags are off
 702 }
 703
 704 /*
 705         This sets the default trace length which will be added to any message buffers
 706         allocated.  It can be set at any time, and if rmr_set_trace() is given a
 707         trace len that is different than the default allcoated in a message, the message
 708         will be resized.
 709
 710         Returns 0 on failure and 1 on success. If failure, then errno will be set.
 711 */
 712 extern int rmr_init_trace( void* vctx, int tr_len ) {
 713         uta_ctx_t* ctx;
 714
 715         errno = 0;
 716         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 717                 errno = EINVAL;
 718                 return 0;
 719         }
 720
 721         ctx->trace_data_len = tr_len;
 722         return 1;
 723 }
 724
 725 /*
 726         Return true if routing table is initialised etc. and app can send/receive.
 727 */
 728 extern int rmr_ready( void* vctx ) {
 729         uta_ctx_t *ctx;
 730
 731         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 732                 return FALSE;
 733         }
 734
 735         if( ctx->rtable != NULL ) {
 736                 return TRUE;
 737         }
 738
 739         return FALSE;
 740 }
 741
 742 /*
 743         This returns the message queue ring's filedescriptor which can be used for
 744         calls to epoll.  The user shouild NOT read, write, or close the fd.
 745
 746         Returns the file descriptor or -1 on error.
 747 */
 748 extern int rmr_get_rcvfd( void* vctx ) {
 749         uta_ctx_t* ctx;
 750         int state;
 751
 752         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 753                 return -1;
 754         }
 755
 756 /*
 757         if( (state = nng_getopt_int( ctx->nn_sock, NNG_OPT_RECVFD, &fd )) != 0 ) {
 758                 fprintf( stderr, "[WRN] rmr cannot get recv fd: %s\n", nng_strerror( state ) );
 759                 return -1;
 760         }
 761 */
 762
 763         return uta_ring_getpfd( ctx->mring );
 764 }
 765
 766
 767 /*
 768         Clean up things.
 769
 770         There isn't an si_flush() per se, but we can pause, generate
 771         a context switch, which should allow the last sent buffer to
 772         flow. There isn't exactly an nng_term/close either, so there
 773         isn't much we can do.
 774 */
 775 extern void rmr_close( void* vctx ) {
 776         uta_ctx_t *ctx;
 777
 778         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 779                 return;
 780         }
 781
 782         ctx->shutdown = 1;
 783
 784         SItp_stats( ctx->si_ctx );                      // dump some interesting stats
 785
 786         // FIX ME -- how to we turn off si; close all sessions etc?
 787         //SIclose( ctx->nn_sock );
 788
 789 }
 790
 791
 792 // ----- multi-threaded call/receive support -------------------------------------------------
 793
 794 /*
 795         Blocks on the receive ring chute semaphore and then reads from the ring
 796         when it is tickled.  If max_wait is -1 then the function blocks until
 797         a message is ready on the ring. Else max_wait is assumed to be the number
 798         of millaseconds to wait before returning a timeout message.
 799 */
 800 extern rmr_mbuf_t* rmr_mt_rcv( void* vctx, rmr_mbuf_t* mbuf, int max_wait ) {
 801         uta_ctx_t*      ctx;
 802         uta_mhdr_t*     hdr;                    // header in the transport buffer
 803         chute_t*        chute;
 804         struct timespec ts;                     // time info if we have a timeout
 805         long    new_ms;                         // adjusted mu-sec
 806         long    seconds = 0;            // max wait seconds
 807         long    nano_sec;                       // max wait xlated to nano seconds
 808         int             state;
 809         rmr_mbuf_t*     ombuf;                  // mbuf user passed; if we timeout we return state here
 810
 811         if( (ctx = (uta_ctx_t *) vctx) == NULL ) {
 812                 errno = EINVAL;
 813                 if( mbuf ) {
 814                         mbuf->state = RMR_ERR_BADARG;
 815                         mbuf->tp_state = errno;
 816                 }
 817                 return mbuf;
 818         }
 819
 820         ombuf = mbuf;           // if we timeout we must return original msg with status, so save it
 821
 822         chute = &ctx->chutes[0];                                        // chute 0 used only for its semaphore
 823
 824         if( max_wait == 0 ) {                                           // one shot poll; handle wihtout sem check as that is SLOW!
 825                 if( (mbuf = (rmr_mbuf_t *) uta_ring_extract( ctx->mring )) != NULL ) {                  // pop if queued
 826                         if( ombuf ) {
 827                                 rmr_free_msg( ombuf );                          // can't reuse, caller's must be trashed now
 828                         }
 829                 } else {
 830                         mbuf = ombuf;                                           // return original if it was given with timeout status
 831                         if( ombuf != NULL ) {
 832                                 mbuf->state = RMR_ERR_TIMEOUT;                  // preset if for failure
 833                                 mbuf->len = 0;
 834                         }
 835                 }
 836
 837                 return mbuf;
 838         }
 839
 840         if( ombuf ) {
 841                 ombuf->state = RMR_ERR_TIMEOUT;                 // preset if for failure
 842                 ombuf->len = 0;
 843         }
 844         if( max_wait > 0 ) {
 845                 clock_gettime( CLOCK_REALTIME, &ts );   // sem timeout based on clock, not a delta
 846
 847                 if( max_wait > 999 ) {
 848                         seconds = max_wait / 1000;
 849                         max_wait -= seconds * 1000;
 850                         ts.tv_sec += seconds;
 851                 }
 852                 if( max_wait > 0 ) {
 853                         nano_sec = max_wait * 1000000;
 854                         ts.tv_nsec += nano_sec;
 855                         if( ts.tv_nsec > 999999999 ) {
 856                                 ts.tv_nsec -= 999999999;
 857                                 ts.tv_sec++;
 858                         }
 859                 }
 860
 861                 seconds = 1;                                                                                                    // use as flag later to invoked timed wait
 862         }
 863
 864         errno = EINTR;
 865         state = -1;
 866         while( state < 0 && errno == EINTR ) {
 867                 if( seconds ) {
 868                         state = sem_timedwait( &chute->barrier, &ts );                          // wait for msg or timeout
 869                 } else {
 870                         state = sem_wait( &chute->barrier );
 871                 }
 872         }
 873
 874         if( state < 0 ) {
 875                 mbuf = ombuf;                           // return caller's buffer if they passed one in
 876         } else {
 877                 errno = 0;                                              // interrupted call state could be left; clear
 878                 if( DEBUG ) fprintf( stderr, "[DBUG] mt_rcv extracting from normal ring\n" );
 879                 if( (mbuf = (rmr_mbuf_t *) uta_ring_extract( ctx->mring )) != NULL ) {                  // pop if queued
 880                         mbuf->state = RMR_OK;
 881
 882                         if( ombuf ) {
 883                                 rmr_free_msg( ombuf );                                  // we cannot reuse as mbufs are queued on the ring
 884                         }
 885                 } else {
 886                         errno = ETIMEDOUT;
 887                         mbuf = ombuf;                           // no buffer, return user's if there
 888                 }
 889         }
 890
 891         if( mbuf ) {
 892                 mbuf->tp_state = errno;
 893         }
 894         return mbuf;
 895 }
 896
 897 /*
 898         Accept a message buffer and caller ID, send the message and then wait
 899         for the receiver to tickle the semaphore letting us know that a message
 900         has been received. The call_id is a value between 2 and 255, inclusive; if
 901         it's not in this range an error will be returned. Max wait is the amount
 902         of time in millaseconds that the call should block for. If 0 is given
 903         then no timeout is set.
 904
 905         If the mt_call feature has not been initialised, then the attempt to use this
 906         funciton will fail with RMR_ERR_NOTSUPP
 907
 908         If no matching message is received before the max_wait period expires, a
 909         nil pointer is returned, and errno is set to ETIMEOUT. If any other error
 910         occurs after the message has been sent, then a nil pointer is returned
 911         with errno set to some other value.
 912 */
 913 extern rmr_mbuf_t* rmr_mt_call( void* vctx, rmr_mbuf_t* mbuf, int call_id, int max_wait ) {
 914         rmr_mbuf_t* ombuf;                      // original mbuf passed in
 915         uta_ctx_t*      ctx;
 916         uta_mhdr_t*     hdr;                    // header in the transport buffer
 917         chute_t*        chute;
 918         unsigned char*  d1;                     // d1 data in header
 919         struct timespec ts;                     // time info if we have a timeout
 920         long    new_ms;                         // adjusted mu-sec
 921         long    seconds = 0;            // max wait seconds
 922         long    nano_sec;                       // max wait xlated to nano seconds
 923         int             state;
 924
 925         errno = EINVAL;
 926         if( (ctx = (uta_ctx_t *) vctx) == NULL || mbuf == NULL ) {
 927                 if( mbuf ) {
 928                         mbuf->tp_state = errno;
 929                         mbuf->state = RMR_ERR_BADARG;
 930                 }
 931                 return mbuf;
 932         }
 933
 934         if( ! (ctx->flags & CFL_MTC_ENABLED) ) {
 935                 mbuf->state = RMR_ERR_NOTSUPP;
 936                 mbuf->tp_state = errno;
 937                 return mbuf;
 938         }
 939
 940         if( call_id > MAX_CALL_ID || call_id < 2 ) {                                    // 0 and 1 are reserved; user app cannot supply them
 941                 mbuf->state = RMR_ERR_BADARG;
 942                 mbuf->tp_state = errno;
 943                 return mbuf;
 944         }
 945
 946         ombuf = mbuf;                                                                                                   // save to return timeout status with
 947
 948         chute = &ctx->chutes[call_id];
 949         if( chute->mbuf != NULL ) {                                                                             // probably a delayed message that wasn't dropped
 950                 rmr_free_msg( chute->mbuf );
 951                 chute->mbuf = NULL;
 952         }
 953
 954         hdr = (uta_mhdr_t *) mbuf->header;
 955         hdr->flags |= HFL_CALL_MSG;                                                                             // must signal this sent with a call
 956         memcpy( chute->expect, mbuf->xaction, RMR_MAX_XID );                    // xaction that we will wait for
 957         d1 = DATA1_ADDR( hdr );
 958         d1[D1_CALLID_IDX] = (unsigned char) call_id;                                    // set the caller ID for the response
 959         mbuf->flags |= MFL_NOALLOC;                                                                             // send message without allocating a new one (expect nil from mtosend
 960
 961         if( max_wait >= 0 ) {
 962                 clock_gettime( CLOCK_REALTIME, &ts );
 963
 964                 if( max_wait > 999 ) {
 965                         seconds = max_wait / 1000;
 966                         max_wait -= seconds * 1000;
 967                         ts.tv_sec += seconds;
 968                 }
 969                 if( max_wait > 0 ) {
 970                         nano_sec = max_wait * 1000000;
 971                         ts.tv_nsec += nano_sec;
 972                         if( ts.tv_nsec > 999999999 ) {
 973                                 ts.tv_nsec -= 999999999;
 974                                 ts.tv_sec++;
 975                         }
 976                 }
 977
 978                 seconds = 1;                                                                            // use as flag later to invoked timed wait
 979         }
 980
 981         mbuf = mtosend_msg( ctx, mbuf, 0 );                                             // use internal function so as not to strip call-id; should be nil on success!
 982         if( mbuf ) {
 983                 if( mbuf->state != RMR_OK ) {
 984                         mbuf->tp_state = errno;
 985                         return mbuf;                                                                    // timeout or unable to connect or no endpoint are most likely issues
 986                 }
 987         }
 988
 989         state = 0;
 990         errno = 0;
 991         while( chute->mbuf == NULL && ! errno ) {
 992                 if( seconds ) {
 993                         state = sem_timedwait( &chute->barrier, &ts );                          // wait for msg or timeout
 994                 } else {
 995                         state = sem_wait( &chute->barrier );
 996                 }
 997
 998                 if( state < 0 && errno == EINTR ) {                                                             // interrupted go back and wait; all other errors cause exit
 999                         errno = 0;
1000                 }
1001
1002                 if( chute->mbuf != NULL ) {                                                                             // offload receiver thread and check xaction buffer here
1003                         if( memcmp( chute->expect, chute->mbuf->xaction, RMR_MAX_XID ) != 0 ) {
1004                                 rmr_free_msg( chute->mbuf );
1005                                 chute->mbuf = NULL;
1006                                 errno = 0;
1007                         }
1008                 }
1009         }
1010
1011         if( state < 0 ) {
1012                 return NULL;                                    // leave errno as set by sem wait call
1013         }
1014
1015         mbuf = chute->mbuf;
1016         mbuf->state = RMR_OK;
1017         chute->mbuf = NULL;
1018
1019         return mbuf;
1020 }
1021
1022 /*
1023         Enable low latency things in the transport (when supported).
1024 */
1025 extern void rmr_set_low_latency( void* vctx ) {
1026         uta_ctx_t*      ctx;
1027
1028         if( (ctx = (uta_ctx_t *) vctx) != NULL ) {
1029                 if( ctx->si_ctx != NULL ) {
1030                         SIset_tflags( ctx->si_ctx, SI_TF_NODELAY );
1031                 }
1032         }
1033 }
1034
1035 /*
1036         Turn on fast acks.
1037 */
1038 extern void rmr_set_fack( void* vctx ) {
1039         uta_ctx_t*      ctx;
1040
1041         if( (ctx = (uta_ctx_t *) vctx) != NULL ) {
1042                 if( ctx->si_ctx != NULL ) {
1043                         SIset_tflags( ctx->si_ctx, SI_TF_FASTACK );
1044                 }
1045         }
1046 }
1047