// :vim ts=4 sw=4 noet: /* ================================================================================== Copyright (c) 2019 Nokia Copyright (c) 2018-2019 AT&T Intellectual Property. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================================================== */ /* Mnemonic: lcaller.c Abstract: This is a simple sender which will send a series of messages using rmr_call(). Similar to caller.c the major difference is that a timestamp is placed into the message and the receiver is expected to add a timestamp before executing an rts call. We can then compute the total round trip latency as well as the forward send latency. Overall, N threads are started each sending the desired number of messages and expecting an 'ack' for each. Each ack is examined to verify that the thread id placed into the message matches (meaning that the ack was delivered by RMr to the correct thread's chute. The message format is 'binary' defined by the lc_msg struct. Parms: argv[1] == number of msgs to send (10) argv[2] == delay (mu-seconds, 1000000 default) argv[3] == number of threads (3) argv[4] == listen port Sender will send for at most 20 seconds, so if nmsgs and delay extend beyond that period the total number of messages sent will be less than n. Date: 18 April 2019 Author: E. Scott Daniels */ #include #include #include #include #include #include #include #include #include #define TRACE_SIZE 40 // bytes in header to provide for trace junk #define SUCCESS (-1) /* Thread data */ typedef struct tdata { int id; // the id we'll pass to RMr mt-call function NOT the thread id int n2send; // number of messages to send int delay; // ms delay between messages void* mrc; // RMr context int state; int* in_bins; // latency count bins int* out_bins; int nbins; // number of bins allocated long long in_max; long long out_max; int out_oor; // out of range count int in_oor; int in_bcount; // total messages tracked in bins int out_bcount; // total messages tracked in bins } tdata_t; /* The message type placed into the payload. */ typedef struct lc_msg { struct timespec out_ts; // time just before call executed struct timespec turn_ts; // time at the receiver, on receipt struct timespec in_ts; // time received back by the caller int out_retries; // number of retries required to send int turn_retries; // number of retries required to send } lc_msg_t; // -------------------------------------------------------------------------------- static int sum( char* str ) { int sum = 0; int i = 0; while( *str ) { sum += *(str++) + i++; } return sum % 255; } static void print_stats( tdata_t* td, int out, int hist ) { int sum; // sum of latencies int csum = 0; // cutoff sum int i95 = 0; // bin for the 95th count int i99 = 0; // bin for the 99th count int mean = -1; int cutoff_95; // 95% of total messages int cutoff_99; // 99% of total messages int oor; int max; int j; if( out ) { cutoff_95 = .95 * (td->out_oor + td->out_bcount); cutoff_99 = .95 * (td->out_oor + td->out_bcount); oor = td->out_oor; max = td->out_max; } else { cutoff_95 = .95 * (td->in_oor + td->in_bcount); cutoff_99 = .95 * (td->in_oor + td->in_bcount); oor = td->in_oor; max = td->in_max; } sum = 0; for( j = 0; j < td->nbins; j++ ) { if( csum < cutoff_95 ) { i95++; } if( csum < cutoff_99 ) { i99++; } if( out ) { csum += td->out_bins[j]; sum += td->out_bins[j] * j; } else { csum += td->in_bins[j]; sum += td->in_bins[j] * j; } } if( out ) { if( td->out_bcount ) { mean = sum/(td->out_bcount); } } else { if( td->in_bcount ) { mean = sum/(td->in_bcount); } } if( hist ) { for( j = 0; j < td->nbins; j++ ) { fprintf( stderr, "%3d %d\n", j, out ? td->out_bins[j] : td->in_bins[j] ); } } fprintf( stderr, "%s: oor=%d max=%.2fms mean=%.2fms 95th=%.2fms 99th=%.2f\n", out ? "out" : " in", oor, (double)max/1000000.0, (double)mean/100.0, (double) i95/100.0, i99/100.0 ); } /* Given a message, compute the in/out and round trip latencies. */ static void compute_latency( tdata_t* td, lc_msg_t* lcm ) { long long out; long long turn; long long in; double rtl; // round trip latency double outl; // caller to receiver latency (out) double inl; // receiver to caller latency (in) int bin; if( lcm == NULL || td == NULL ) { return; } out = (lcm->out_ts.tv_sec * 1000000000) + lcm->out_ts.tv_nsec; in = (lcm->in_ts.tv_sec * 1000000000) + lcm->in_ts.tv_nsec; turn = (lcm->turn_ts.tv_sec * 1000000000) + lcm->turn_ts.tv_nsec; if( in - turn > td->in_max ) { td->in_max = in - turn; } if( turn - out > td->out_max ) { td->out_max = turn-out; } bin = (turn-out) / 10000; // 100ths of ms #ifdef PRINT outl = ((double) turn - out) / 1000000.0; // convert to ms inl = ((double) in - turn) / 1000000.0; rtl = ((double) in - out) / 1000000.0; fprintf( stderr, "outl = %5.3fms inl = %5.3fms rtl = %5.3fms bin=%d\n", outl, inl, rtl, bin ); #else bin = (turn - out) / 10000; // 100ths of ms if( bin < td->nbins ) { td->out_bins[bin]++; td->out_bcount++; } else { td->out_oor++; } bin = (in - turn) / 10000; // 100ths of ms if( bin < td->nbins ) { td->in_bins[bin]++; td->in_bcount++; } else { td->in_oor++; } #endif } /* Executed as a thread, this puppy will generate calls to ensure that we get the response back to the right thread, that we can handle threads, etc. */ static void* mk_calls( void* data ) { lc_msg_t* lcm; // pointer at the payload as a struct tdata_t* control; rmr_mbuf_t* sbuf; // send buffer int count = 0; int ack_count = 0; int rt_count = 0; // number of messages requiring a spin retry int ok_msg = 0; // received messages that were sent by us int bad_msg = 0; // received messages that were sent by a different thread int drops = 0; int fail_count = 0; // # of failure sends after first successful send int successful = 0; // set to true after we have a successful send char xbuf[1024]; // build transaction string here int xaction_id = 1; char* tok; int state = 0; if( (control = (tdata_t *) data) == NULL ) { fprintf( stderr, "thread data was nil; bailing out\n" ); } //fprintf( stderr, " thread started with parallel call id=%d sending=%d delay=%d\n", control->id, control->n2send, control->delay ); sbuf = rmr_alloc_msg( control->mrc, 512 ); // alloc first send buffer; subsequent buffers allcoated on send usleep( rand() % 777 ); // stagger starts a bit so that they all don't pile up on the first connections while( count < control->n2send ) { // we send n messages after the first message is successful lcm = (lc_msg_t *) sbuf->payload; snprintf( xbuf, 200, "%31d", xaction_id ); xaction_id += control->id; rmr_bytes2xact( sbuf, xbuf, 32 ); sbuf->mtype = 5; // all go with the same type sbuf->len = sizeof( *lcm ); sbuf->state = RMR_OK; lcm->out_retries = 0; lcm->turn_retries = 0; clock_gettime( CLOCK_REALTIME, &lcm->out_ts ); // mark time out sbuf = rmr_mt_call( control->mrc, sbuf, control->id, 1000 ); // send it (send returns an empty payload on success, or the original payload on fail/retry) if( sbuf && sbuf->state == RMR_ERR_RETRY ) { // number of times we had to spin to send rt_count++; } while( sbuf != NULL && sbuf->state == RMR_ERR_RETRY ) { // send blocked; keep trying lcm->out_retries++; sbuf = rmr_mt_call( control->mrc, sbuf, control->id, 100 ); // call and wait up to 100ms for a response } count++; if( sbuf != NULL ) { switch( sbuf->state ) { case RMR_OK: // we should have a buffer back from the sender here lcm = (lc_msg_t *) sbuf->payload; clock_gettime( CLOCK_REALTIME, &lcm->in_ts ); // mark time back successful = 1; compute_latency( control, lcm ); ack_count++; //fprintf( stderr, "%d have received %d\n", control->id, count ); break; default: fprintf( stderr, "unexpected error: tid=%d rmr-state=%d ernro=%d\n", control->id, sbuf->state, errno ); sbuf = rmr_alloc_msg( control->mrc, 512 ); // allocate a sendable buffer if( successful ) { fail_count++; // count failures after first successful message } else { // some error (not connected likely), don't count this sleep( 1 ); } break; } } else { //fprintf( stderr, " tid=%-2d call finished, no sbuf\n", control->id ); sbuf = rmr_alloc_msg( control->mrc, 512 ); // loop expects an subf drops++; } if( control->delay > 0 ) { usleep( control->delay ); } } control->state = SUCCESS; fprintf( stderr, " %d finished sent %d, received %d messages\n", control->id, count, ack_count ); return NULL; } int main( int argc, char** argv ) { void* mrc; // msg router context rmr_mbuf_t* rbuf = NULL; // received on 'normal' flow struct epoll_event events[1]; // list of events to give to epoll struct epoll_event epe; // event definition for event to listen to int ep_fd = -1; // epoll's file des (given to epoll_wait) int rcv_fd; // file des that NNG tickles -- give this to epoll to listen on int nready; // number of events ready for receive char* listen_port = "43086"; long timeout = 0; int delay = 100000; // usec between send attempts int nmsgs = 10; // number of messages to send int nthreads = 3; int cutoff; int sum; tdata_t* cvs; // vector of control blocks int i; int j; pthread_t* pt_info; // thread stuff int failures = 0; int pings = 0; // number of messages received on normal channel if( argc > 1 ) { nmsgs = atoi( argv[1] ); } if( argc > 2 ) { delay = atoi( argv[2] ); } if( argc > 3 ) { nthreads = atoi( argv[3] ); } if( argc > 4 ) { listen_port = argv[4]; } fprintf( stderr, " listen port: %s; sending %d messages; delay=%d\n", listen_port, nmsgs, delay ); if( (mrc = rmr_init( listen_port, 1400, RMRFL_MTCALL )) == NULL ) { // initialise with multi-threaded call enabled fprintf( stderr, " unable to initialise RMr\n" ); exit( 1 ); } //rmr_init_trace( mrc, TRACE_SIZE ); if( (rcv_fd = rmr_get_rcvfd( mrc )) >= 0 ) { // epoll only available from NNG -- skip receive later if not NNG if( rcv_fd < 0 ) { fprintf( stderr, " unable to set up polling fd\n" ); exit( 1 ); } if( (ep_fd = epoll_create1( 0 )) < 0 ) { fprintf( stderr, " [FAIL] unable to create epoll fd: %d\n", errno ); exit( 1 ); } epe.events = EPOLLIN; epe.data.fd = rcv_fd; if( epoll_ctl( ep_fd, EPOLL_CTL_ADD, rcv_fd, &epe ) != 0 ) { fprintf( stderr, " [FAIL] epoll_ctl status not 0 : %s\n", strerror( errno ) ); exit( 1 ); } } else { rmr_set_rtimeout( mrc, 0 ); // for nano we must set the receive timeout to 0; non-blocking receive } cvs = malloc( sizeof( tdata_t ) * nthreads ); pt_info = malloc( sizeof( pthread_t ) * nthreads ); if( cvs == NULL ) { fprintf( stderr, " unable to allocate control vector\n" ); exit( 1 ); } timeout = time( NULL ) + 20; // give rmr 20s to find the route table (shouldn't need that much) while( ! rmr_ready( mrc ) ) { // must have a route table before we can send; wait til RMr says it has one fprintf( stderr, " waiting for rmr to show ready\n" ); sleep( 1 ); if( time( NULL ) > timeout ) { fprintf( stderr, " giving up\n" ); exit( 1 ); } } fprintf( stderr, " rmr is ready; starting threads\n" ); for( i = 0; i < nthreads; i++ ) { cvs[i].mrc = mrc; cvs[i].id = i + 2; // we pass this as the call-id to rmr, so must be >1 cvs[i].delay = delay; cvs[i].n2send = nmsgs; cvs[i].state = 1; cvs[i].nbins = 100; cvs[i].out_bins = (int *) malloc( sizeof( int ) * cvs[i].nbins ); cvs[i].in_bins = (int *) malloc( sizeof( int ) * cvs[i].nbins ); memset( cvs[i].out_bins, 0, sizeof( int ) * cvs[i].nbins ); memset( cvs[i].in_bins, 0, sizeof( int ) * cvs[i].nbins ); pthread_create( &pt_info[i], NULL, mk_calls, &cvs[i] ); // kick a thread } timeout = time( NULL ) + 20; i = 0; while( nthreads > 0 ) { if( cvs[i].state < 1 ) { // states 0 or below indicate done. 0 == failure, -n == success //print_stats( &cvs[i], 1, i == 0 ); print_stats( &cvs[i], 1, 0 ); print_stats( &cvs[i], 0, 0 ); nthreads--; if( cvs[i].state == 0 ) { failures++; } i++; } else { // sleep( 1 ); rbuf = rmr_torcv_msg( mrc, rbuf, 1000 ); if( rbuf != NULL && rbuf->state != RMR_ERR_RETRY ) { pings++; rmr_free_msg( rbuf ); rbuf = NULL; } } if( time( NULL ) > timeout ) { failures += nthreads; fprintf( stderr, " timeout waiting for threads to finish; %d were not finished\n", nthreads ); break; } } fprintf( stderr, " [%s] failing threads=%d pings reeived=%d\n", failures == 0 ? "PASS" : "FAIL", failures, pings ); sleep( 2 ); rmr_close( mrc ); return failures > 0; }