set( major_version "1" ) # should be automatically populated from git tag later, but until CI process sets a tag we use this
set( minor_version "0" )
-set( patch_level "24" )
+set( patch_level "25" )
set( install_root "${CMAKE_INSTALL_PREFIX}" )
set( install_lib "lib" )
*.gcno
*.gcda
*.xml
-*test
+*.test
--- /dev/null
+
+In general, seeing a "PASS" from the sender(s) and receiver(s) for each execution
+is a good indication that all was successful. Reeceivers will fail if the
+simple checksum calculated for the payload and trace data doesn't match. Senders
+will fail if a returned message doesn't have its matching tag (meaning it was
+returned to the wrong sender). Both will error on a timeout either no route
+information, or receiver did not receive the expected number of messages.
+
+Receivers send an 'ack' for message type 5, so for some tests the number of ack
+messages sent will not be the same as the number of messages received. Senders
+loop through message types 0-9 inclusive, unless otherwise directed on the
+command line (e.g. the rts test sends nothing but message type 5 messages so that
+all messages are ack'd).
+
+Receivers will generate a final histogram of message types received. For example
+
+<RCVR> mtype histogram: 0 0 0 0 0 100000 0 0 0 0 0
+
+is generated for the rts test -- all messages are type 5 and thus all other message
+type bins should be 0.
+
+By default, senders send 10 messages at a rate of about 1/sec. Receivers give up
+after 20 seconds, so even though the rate and number of messages sent can be
+adjusted from the command line, if the combination is such that the total number
+of messages sent requires more than 20 seconds to send the tests will fail.
+
+Specific examples
+The output is chopped to the last few lines.
+
+Return to sender test with 20 senders sending 5K messages each:
+ ksh run_rts_test.ksh -s 20 -d 180 -n 5000
+
+
+ <SNDR> [PASS] sent=5000 rcvd=4999 rts-ok=4999 failures=0 retries=4
+ <RCVR> mtype histogram: 0 0 0 0 0 100000 0 0 0 0 0
+ <RCVR> [PASS] 100000 messages; good=100000 acked=99983 bad=0 bad-trace=0 bad-sub_id=0
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=4
+ <SNDR> [PASS] sent=5000 rcvd=4998 rts-ok=4998 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=4998 rts-ok=4998 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=4
+ <SNDR> [PASS] sent=5000 rcvd=4998 rts-ok=4998 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=4
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=4
+ <SNDR> [PASS] sent=5000 rcvd=4999 rts-ok=4999 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=4999 rts-ok=4999 failures=0 retries=4
+ <SNDR> [PASS] sent=5000 rcvd=4999 rts-ok=4999 failures=0 retries=5
+ <SNDR> [PASS] sent=5000 rcvd=4999 rts-ok=4999 failures=0 retries=1
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=4
+ <SNDR> [PASS] sent=5000 rcvd=4997 rts-ok=4997 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=4999 rts-ok=4999 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=2
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=3
+ <SNDR> [PASS] sent=5000 rcvd=5000 rts-ok=5000 failures=0 retries=1
+ <SNDR> [PASS] sent=5000 rcvd=4998 rts-ok=4998 failures=0 retries=2
+ [PASS] sender rc=0 receiver rc=0
+
+Important notes
+ + The receiver will only retry acks for a finite number of tries before
+ giving up, thus the total acs sent may still be less than messages
+ received. As a cross validation, the total acks sent by the receiver
+ should match the recvd count sum over all senders.
+
+ + The recvd and rts-ok counts for each sender should match. If they don't
+ the receiver should mark the overall state as a failure as this indicates
+ that a return to sender message was returned to the wrong place.
+
+
+
+Multiple Receiver test
+Test run with 10 receivers and sender sending 10K messages. The histograms
+and status messages were reorganised for easier reading here.
+
+ ksh run_multi_test.ksh -r 10 -d 180 -n 10000
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+ <RCVR> mtype histogram: 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 0
+
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <SNDR> [PASS] sent=10000 rcvd=10000 rts-ok=10000 failures=0 retries=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ <RCVR> [PASS] 10000 messages; good=10000 acked=1000 bad=0 bad-trace=0 bad-sub_id=0
+ [PASS] sender rc=0 receiver rc=0
+
+Important notes:
+ + histograms should show messages for all types, except type 10 which are never sent.
+
+ + sender should receive only 1/10th of the number of messages sent back as acks;
+ modulo receiver giving up on an ack retry, so as before the sum of ack counts should
+ match the sender's received count.
+
+ + sender should fail if the received count does not match the rts-ok count indicating
+ that a return to sender was sent to the wrong spot (very unlikely here as there is
+ only one sender).
+
+
+
+Retries
+The retries counter for a sender is the number of times that a retry send loop had to be
+entered in order to successfully send a message. The sender will never give up on a send
+attempt, but retrying will affect latency of that message. A count of less than 10/10000
+messages is good, but it also depends on the rate that the sender is attempting. The
+higher the rate, the more likely the need to retry, and thus the higher this counter will
+be.
# Abstract: This is a simple script that will cause RMr to be rebuilt. It
# may be invoked by any of the run_* scripts in this directory.
#
+# NOTE:
+# The build path is echod onto stdout so that the caller is able
+# to reference build items for compile/linking. All other communication
+# should be directed to stderr.
+#
# Date: 24 April 2019
# Author: E. Scott Daniels
# ---------------------------------------------------------------------------------
-build_path=../../.build
+parent=${PWD%/*} # allow us to step up gracefully
+gparent=${parent%/*}
+build_path=${gparent}/.build # where we'll build
echo "$(date) build starts" >&2
(
set -e
mkdir -p $build_path
- cd ${build_path%/*} # cd barfs on ../../.build, so we do this
- cd ${build_path##*/}
+ cd $gparent
+ if [[ $1 != "nopull" ]] # pull by default, but for local dev testing this needs to be avoided
+ then
+ git pull # get the up to date code so if run from an old image it's a good test
+ fi
+ cd $build_path
cmake ..
make package
) >/tmp/PID$$.log
echo "$(date) build failed" >&2
exit 1
fi
+
echo "$(date) build completed" >&2
+echo "$build_path"
--- /dev/null
+
+# run all of the tests, building rmr before the first one.
+set -e
+ksh run_app_test.ksh -B
+ksh run_multi_test.ksh
+ksh run_rr_test.ksh
+ksh run_rts_test.ksh -s 20
nano_receiver=0
wait=1
rebuild=0
+nopull="" # -b sets so that build does not pull
verbose=0
while [[ $1 == -* ]]
do
case $1 in
- -B) rebuild=1;;
+ -B) rebuild=1;; # build with pull first
+ -b) rebuild=1; nopull="nopull";; # buld without pull
-d) delay=$2; shift;;
-N) nano_sender=1
nano_receiver=1
if (( rebuild ))
then
- build_path=../../.build
set -e
- ksh ./rebuild.ksh
+ ksh ./rebuild.ksh $nopull | read build_path
set +e
else
build_path=${BUILD_PATH:-"../../.build"} # we prefer .build at the root level, but allow user option
nano_receiver=0
wait=1
rebuild=0
+nopull=""
verbose=0
nrcvrs=3 # this is sane, but -r allows it to be set up
do
case $1 in
-B) rebuild=1;;
+ -b) rebuild=1; nopull="nopull";; # enable build but without pull
-d) delay=$2; shift;;
-N) nano_sender=1
nano_receiver=1
if (( rebuild ))
then
- build_path=../../.build # if we rebuild we can insist that it is in .build :)
set -e
- ksh ./rebuild.ksh
+ ksh ./rebuild.ksh $nopull | read build_path
set +e
else
build_path=${BUILD_PATH:-"../../.build"} # we prefer .build at the root level, but allow user option
nano_receiver=0
wait=1
rebuild=0
+nopull=""
verbose=0
max_mtype=1 # causes all msgs to go with type 1; use -M to set up, but likely harder to validate
nrcvrs=3 # this is sane, but -r allows it to be set up
do
case $1 in
-B) rebuild=1;;
+ -b) rebuild=1; nopull="nopull";; # build without pulling
-d) delay=$2; shift;;
-m) max_mtype=$2; shift;;
-N) nano_sender=1
if (( rebuild ))
then
- build_path=../../.build
set -e
- ksh ./rebuild.ksh
+ ksh ./rebuild.ksh $nopull | read build_path
set +e
else
build_path=${BUILD_PATH:-"../../.build"} # we prefer .build at the root level, but allow user option
--- /dev/null
+#!/usr/bin/env ksh
+# :vi ts=4 sw=4 noet :
+#==================================================================================
+# Copyright (c) 2019 Nokia
+# Copyright (c) 2018-2019 AT&T Intellectual Property.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#==================================================================================
+#
+
+# ---------------------------------------------------------------------------------
+# Mnemonic: run_rts_test.ksh
+# Abstract: This is a simple script to set up and run the basic send/receive
+# processes for some library validation on top of nano/nng. This
+# particular test starts several senders and one receiver. All messages
+# go to the receiver and an ack is sent back to the sending process.
+# Each sender puts a tag into the message allowing it to verify that
+# all messages received were 'acks' to the ones it sent, and that no
+# rts messages were routed to the wrong sender.
+#
+# Example command line:
+#
+# Date: 15 May 2019
+# Author: E. Scott Daniels
+# ---------------------------------------------------------------------------------
+
+
+# The sender and receivers are run asynch. Their exit statuses are captured in a
+# file in order for the 'main' to pick them up easily.
+# $1 is the instance so we can keep logs separate.
+#
+function run_sender {
+ export RMR_RTG_SVC=$(( 9991 + $1 ))
+ port=$(( 43080 + $1 ))
+ if (( $nano_sender ))
+ then
+ ./sender_nano $nmsg $delay 5:6 $port # 5:6 causes mtype 5 only which is what receiver acks
+ else
+ ./sender $nmsg $delay 5:6 $port
+ fi
+ echo $? >/tmp/PID$$.$1.src # must communicate state back via file b/c asynch
+}
+
+# $1 is the instance so we can keep logs separate
+function run_rcvr {
+ typeset port
+
+ port=4460
+ export RMR_RTG_SVC=9990
+ if (( $nano_receiver ))
+ then
+ ./receiver_nano $(( nmsg * nsenders )) $port
+ else
+ ./receiver $(( nmsg * nsenders )) $port
+ fi
+ echo $? >/tmp/PID$$.rrc
+}
+
+# Drop in a contrived route table. It should have only one entry which points
+# message type 0 to the receiver. The sender must NOT be defined for a valid
+# tests (rts should not require the sendter be in the route thable).
+#
+function set_rt {
+ cat <<endKat >rts.rt
+ newrt | start
+ mse |5 | -1 | localhost:4460
+ newrt | end
+endKat
+
+}
+
+# ---------------------------------------------------------
+
+nmsg=10 # total number of messages to be exchanged (-n value changes)
+delay=1000000 # microsec sleep between msg 1,000,000 == 1s
+nano_sender=0 # start nano version if set (-N)
+nano_receiver=0
+wait=1
+rebuild=0
+nopull=""
+verbose=0
+nsenders=3 # this is sane, but -s allows it to be set up
+
+while [[ $1 == -* ]]
+do
+ case $1 in
+ -B) rebuild=1;;
+ -b) rebuild=1; nopull="nopull";; # build without pulling
+ -d) delay=$2; shift;;
+ -N) nano_sender=1
+ nano_receiver=1
+ ;;
+ -n) nmsg=$2; shift;;
+ -s) nsenders=$2; shift;;
+ -v) verbose=1;;
+
+ *) echo "unrecognised option: $1"
+ echo "usage: $0 [-B] [-d micor-sec-delay] [-N] [-n num-msgs] [-s nsenders]"
+ echo " -B forces a rebuild which will use .build"
+ exit 1
+ ;;
+ esac
+
+ shift
+done
+
+if (( verbose ))
+then
+ echo "2" >.verbose
+ export RMR_VCTL_FILE=".verbose"
+fi
+
+if (( rebuild ))
+then
+ set -e
+ ksh ./rebuild.ksh $nopull | read build_path
+ set +e
+else
+ build_path=${BUILD_PATH:-"../../.build"} # we prefer .build at the root level, but allow user option
+
+ if [[ ! -d $build_path ]]
+ then
+ echo "cannot find build in: $build_path"
+ echo "either create, and then build RMr, or set BUILD_PATH as an evironment var before running this"
+ exit 1
+ fi
+fi
+
+if [[ -d $build_path/lib64 ]]
+then
+ export LD_LIBRARY_PATH=$build_path:$build_path/lib64
+else
+ export LD_LIBRARY_PATH=$build_path:$build_path/lib
+fi
+export LIBRARY_PATH=$LD_LIBRARY_PATH
+export RMR_SEED_RT=./rts.rt
+
+set_rt # create the route table
+
+if [[ ! -f ./sender ]]
+then
+ if ! make >/dev/null 2>&1
+ then
+ echo "[FAIL] cannot find sender binary, and cannot make it.... humm?"
+ exit 1
+ fi
+fi
+
+run_rcvr &
+
+sleep 2 # let receivers init so we don't shoot at an empty target
+for (( i=0; i < nsenders; i++ )) # start the receivers with an instance number
+do
+ run_sender $i &
+done
+
+wait
+
+
+for (( i=0; i < nsenders; i++ )) # collect return codes
+do
+ head -1 /tmp/PID$$.$i.src | read x
+ (( src += x ))
+done
+
+head -1 /tmp/PID$$.rrc | read rrc
+
+if (( !! (src + rrc) ))
+then
+ echo "[FAIL] sender rc=$src receiver rc=$rrc"
+else
+ echo "[PASS] sender rc=$src receiver rc=$rrc"
+ rm -f multi.rt
+fi
+
+rm /tmp/PID$$.*
+rm -f .verbose
+
+exit $(( !! (src + rrc) ))
+
return sum % 255;
}
+/*
+ See if my id string is in the buffer immediately after the first >.
+ Return 1 if so, 0 if not.
+*/
+static int vet_received( char* me, char* buf ) {
+ char* ch;
+
+ if( (ch = strchr( buf, '>' )) == NULL ) {
+ return 0;
+ }
+
+ return strcmp( me, ch+1 ) == 0;
+}
+
int main( int argc, char** argv ) {
void* mrc; // msg router context
struct epoll_event events[1]; // list of events to give to epoll
int nready; // number of events ready for receive
rmr_mbuf_t* sbuf; // send buffer
rmr_mbuf_t* rbuf; // received buffer
+ char* ch;
int count = 0;
int rt_count = 0; // number of messages requiring a spin retry
int rcvd_count = 0;
+ int rts_ok = 0; // number received with our tag
int fail_count = 0; // # of failure sends after first successful send
char* listen_port = "43086";
int mtype = 0;
int stats_freq = 100;
int successful = 0; // set to true after we have a successful send
char wbuf[1024];
+ char me[128]; // who I am to vet rts was actually from me
char trace[1024];
long timeout = 0;
int delay = 100000; // usec between send attempts
int nmsgs = 10; // number of messages to send
int max_mt = 10; // reset point for message type
+ int start_mt = 0;
+ int pass = 1;
if( argc > 1 ) {
nmsgs = atoi( argv[1] );
delay = atoi( argv[2] );
}
if( argc > 3 ) {
- max_mt = atoi( argv[3] );
+ if( (ch = strchr( argv[3], ':' )) != NULL ) {
+ max_mt = atoi( ch+1 );
+ start_mt = atoi( argv[3] );
+ } else {
+ max_mt = atoi( argv[3] );
+ }
}
if( argc > 4 ) {
listen_port = argv[4];
}
+ mtype = start_mt;
+
fprintf( stderr, "<SNDR> listen port: %s; sending %d messages; delay=%d\n", listen_port, nmsgs, delay );
if( (mrc = rmr_init( listen_port, 1400, RMRFL_NONE )) == NULL ) {
rmr_set_rtimeout( mrc, 0 ); // for nano we must set the receive timeout to 0; non-blocking receive
}
- sbuf = rmr_alloc_msg( mrc, 512 ); // alloc first send buffer; subsequent buffers allcoated on send
- //sbuf = rmr_tralloc_msg( mrc, 512, 11, "xxxxxxxxxx" ); // alloc first send buffer; subsequent buffers allcoated on send
+ sbuf = rmr_alloc_msg( mrc, 1024 ); // alloc first send buffer; subsequent buffers allcoated on send
+ //sbuf = rmr_tralloc_msg( mrc, 1024, 11, "xxxxxxxxxx" ); // alloc first send buffer; subsequent buffers allcoated on send
rbuf = NULL; // don't need to alloc receive buffer
timeout = time( NULL ) + 20; // give rmr 20s to find the route table (shouldn't need that much)
timeout = time( NULL ) + 20;
+ gethostname( wbuf, sizeof( wbuf ) );
+ snprintf( me, sizeof( me ), "%s-%d", wbuf, getpid( ) );
+
while( count < nmsgs ) { // we send n messages after the first message is successful
snprintf( trace, 100, "%lld", (long long) time( NULL ) );
rmr_set_trace( sbuf, trace, strlen( trace ) + 1 );
- snprintf( wbuf, 200, "count=%d tr=%s %d stand up and cheer!", count, trace, rand() );
- snprintf( sbuf->payload, 300, "%d %d|%s", sum( wbuf ), sum( trace ), wbuf );
+ snprintf( wbuf, 512, "count=%d tr=%s %d stand up and cheer!>%s", count, trace, rand(), me );
+ snprintf( sbuf->payload, 1024, "%d %d|%s", sum( wbuf ), sum( trace ), wbuf );
sbuf->mtype = mtype; // fill in the message bits
if( mtype < 3 ) {
switch( sbuf->state ) {
case RMR_ERR_RETRY:
rt_count++;
- while( sbuf->state == RMR_ERR_RETRY ) { // soft failure (device busy?) retry
+ while( time( NULL ) < timeout && sbuf->state == RMR_ERR_RETRY ) { // soft failure (device busy?) retry
sbuf = rmr_send_msg( mrc, sbuf ); // retry send until it's good (simple test; real programmes should do better)
}
if( sbuf->state == RMR_OK ) {
count++;
mtype++;
if( mtype >= max_mt ) { // if large number of sends don't require infinite rt entries :)
- mtype = 0;
+ mtype = start_mt;
}
}
errno = 0;
rbuf = rmr_rcv_msg( mrc, rbuf );
if( rbuf ) {
+ rts_ok += vet_received( me, rbuf->payload );
rcvd_count++;
}
}
} else { // nano, we will only pick up one at a time.
if( (rbuf = rmr_rcv_msg( mrc, rbuf ) ) != NULL ) {
if( rbuf->state == RMR_OK ) {
+ rts_ok += vet_received( me, rbuf->payload );
rcvd_count++;
}
}
}
}
-
timeout = time( NULL ) + 2; // allow 2 seconds for the pipe to drain from the receiver
while( time( NULL ) < timeout );
if( rcv_fd >= 0 ) {
rbuf = rmr_rcv_msg( mrc, rbuf );
if( rbuf ) {
rcvd_count++;
+ rts_ok += vet_received( me, rbuf->payload );
timeout = time( NULL ) + 2;
}
}
if( (rbuf = rmr_torcv_msg( mrc, rbuf, 100 ) ) != NULL ) {
if( rbuf->state == RMR_OK ) {
rcvd_count++;
+ rts_ok += vet_received( me, rbuf->payload );
}
}
}
- fprintf( stderr, "<SNDR> [%s] sent=%d rcvd-acks=%d failures=%d retries=%d\n", count == nmsgs ? "PASS" : "FAIL", count, rcvd_count, fail_count, rt_count );
+ if( rcvd_count != rts_ok || count != nmsgs ) {
+ pass = 0;
+ }
+
+ fprintf( stderr, "<SNDR> [%s] sent=%d rcvd=%d rts-ok=%d failures=%d retries=%d\n",
+ pass ? "PASS" : "FAIL", count, rcvd_count, rts_ok, fail_count, rt_count );
rmr_close( mrc );
return !( count == nmsgs );