1 From 95c0ec5cb26efbe2c5dbf45df21518d8d1776be0 Mon Sep 17 00:00:00 2001
2 From: Don Penney <don.penney@windriver.com>
3 Date: Wed, 4 Jan 2017 12:15:53 -0500
4 Subject: [PATCH] TIS Patches
6 This patch rolls up the previous TIS patches, which includes:
7 1. CGTS-4787 Set DRBD service ensure parameter
9 2. Updates to fix DRBD resync-rate and engineered parameters:
11 There are several DRBD performance related parameters that must be set to
12 get reasonable resync performance, otherwise default resync throughput
13 is limited to 40MB/s. Note that user community has noted this limit
14 when they use default settings, or up-rev DRBD from 8.3, etc. Eg. they
15 realize they hit this limit despite having 10G link or better and faster
18 The following parameters were added to puppet-drbd module for resource
19 file generation, in addition to: c-plan-ahead, c-fill-target, c-min-rate,
20 c-max-rate, currently engineered for dynamic resync-rates.
23 - 'resync-rate' (aka 'rate') was missed in the CentOS port from Kilo
24 - 'al-extents' set to 3389, set to a prime number. Increasing this improves
25 random write throughput. Could set a bit higher, but would need a study.
28 - 'max-buffers' engineered to scale with supported MBps, setting too low
29 (eg., default setting) is a bottleneck on 10G link. Set this to
30 maximum settable value of 20000. Note this parm may be settable to
31 larger values in more current DRBD rev. If we need to support faster
32 disks, likely need to increase this proportionately.
33 - 'max-epoch-size' also set to 20000. DRBD tuning recommendation page
34 sets this the same as max-buffers.
35 - 'unplug-watermark' set to 16 based on DRBD tuning recommendations page
36 - 'sndbuf-size' set to 0 to auto-tune; historically default was too small
37 - 'rcvbuf-size' set to 0 to auto-tune
39 manifests/init.pp | 11 ++--
40 manifests/resource.pp | 93 +++++++++++++++++++++++++---
41 manifests/resource/up.pp | 2 +-
42 manifests/service.pp | 2 +-
43 templates/header.res.erb | 53 ++++++++++++++--
44 templates/primary-resource.res.erb | 2 +-
45 templates/primary-stacked-resource.res.erb | 2 +-
46 templates/resource.res.erb | 2 +-
47 templates/secondary-resource.res.erb | 2 +-
48 templates/secondary-stacked-resource.res.erb | 2 +-
49 10 files changed, 148 insertions(+), 23 deletions(-)
51 diff --git a/manifests/init.pp b/manifests/init.pp
52 index 09f7d48..76ce9c9 100644
53 --- a/manifests/init.pp
54 +++ b/manifests/init.pp
58 $service_enable = true,
59 - $package_name = 'drbd8-utils',
60 + $service_ensure = 'running',
61 + $package_name = 'drbd-utils',
63 include ::drbd::service
65 @@ -22,7 +23,7 @@ class drbd(
73 require => Package['drbd'],
74 @@ -45,8 +46,10 @@ class drbd(
75 # only allow files managed by puppet in this directory.
81 + # Set purge to false so that it does not clear the dir
82 + # when the 2nd drbd resource is added.
86 require => Package['drbd'],
87 diff --git a/manifests/resource.pp b/manifests/resource.pp
88 index af2ff77..10edc1a 100644
89 --- a/manifests/resource.pp
90 +++ b/manifests/resource.pp
92 # [ha_primary] If the resource is being applied on the primary host.
93 # [initial_setup] If this run is associated with the initial setup. Allows a user
94 # to only perform dangerous setup on the initial run.
95 +# [link_util] replication link network utilization percent
96 +# [link_speed] replication link network speed mbps
97 +# [num_parallel] number of parallel drbd filesystems to sync
98 +# [rtt_ms] round-trip-time milliseconds (i.e., ping between replication nodes)
99 define drbd::resource (
102 @@ -39,7 +43,10 @@ define drbd::resource (
105 $verify_alg = 'crc32c',
107 + $link_util = false,
108 + $link_speed = false,
109 + $num_parallel = false,
111 $net_parameters = false,
114 @@ -47,6 +54,7 @@ define drbd::resource (
122 @@ -67,6 +75,75 @@ define drbd::resource (
126 + if $link_util and $link_speed and $num_parallel and $rtt_ms {
127 + # Engineer drbd variable sync rate parameters based on the following:
128 + # https://blogs.linbit.com/p/128/drbd-sync-rate-controller/
129 + # https://blogs.linbit.com/p/443/drbd-sync-rate-controller-2/
130 + # Methodology adapted to account for replication link speed and parallelism.
132 + # Since there is no aggregate bandwidth control, prorate the drbd
133 + # replication bandwidth based on parallelism.
134 + # Based on experimentation, it seems generally better to set num_parallel
135 + # to 1 and let DRBD auto-regulate its throughput. The end result is that
136 + # multiple competing filesystems (i.e., on same disk device) already have
137 + # their sync throughput reduced.
138 + $mbps = $link_speed / $num_parallel
140 + # bandwidth delay product
141 + $bdp_k = $mbps * $rtt_ms
143 + # engineer initial sync rate as percent of link bandwidth
144 + $rate_M = floor($link_util * $mbps / 8 / 100)
145 + $rate = "${rate_M}M"
147 + # engineer c_plan_ahead to default value (tenths)
148 + # Documentation indicates this value OK even for 200 ms RTT.
151 + # engineer c_fill_target as 1*BDP (tune within 1x to 3x BDP;
152 + # choose minimum value that saturates bandwidth)
153 + $fill_target_k = floor(1 * $bdp_k)
154 + $c_fill_target = "${fill_target_k}k"
156 + # engineer c_min_rate -- experimentally determined so DRBD is not
157 + # throttled to a crawl even when there is minimal application IO.
158 + # DRBD default is way too small.
159 + $min_rate_M = 15 + floor($link_util * $mbps / 8 / 100 / 25)
160 + $c_min_rate = "${min_rate_M}M"
162 + # engineer c_max_rate as percent of link bandwidth
163 + $max_rate_M = floor($link_util * $mbps / 8 / 100)
164 + $c_max_rate = "${max_rate_M}M"
166 + # various tuning settings to enable larger link bandwidth (eg, 10G)
167 + # max_buffers should scale with MBps; set to maximum settable
168 + $max_buffers = 20000
169 + $max_epoch_size = 20000
170 + $unplug_watermark = 16
171 + # sndbuf_size and rcvbuf_size should scale with mbps; set 0 to auto-tune
174 + # increase al_extents to improve random write throughput; set to prime number
177 + # disable variable sync rate
179 + $c_fill_target = false
180 + $c_min_rate = false
181 + $c_max_rate = false
183 + # engineer fixed sync rate at 40 percent of 1G
184 + $rate_M = floor(40 * 1000 / 8 / 100)
185 + $rate = "${rate_M}M"
187 + $max_buffers = false
188 + $max_epoch_size = false
189 + $unplug_watermark = false
190 + $sndbuf_size = false
191 + $rcvbuf_size = false
192 + $al_extents = false
195 concat { "/etc/drbd.d/${name}.res":
198 @@ -94,13 +171,13 @@ define drbd::resource (
200 # Export our fragment for the clustered node
201 if $ha_primary and $cluster {
202 - @@concat::fragment { "${name} ${cluster} primary resource":
203 + concat::fragment { "${name} ${cluster} primary resource":
204 target => "/etc/drbd.d/${name}.res",
205 content => template('drbd/resource.res.erb'),
209 - @@concat::fragment { "${name} ${cluster} secondary resource":
210 + concat::fragment { "${name} ${cluster} secondary resource":
211 target => "/etc/drbd.d/${name}.res",
212 content => template('drbd/resource.res.erb'),
214 @@ -137,11 +214,11 @@ define drbd::resource (
219 - # Import cluster nodes
220 - Concat::Fragment <<| title == "${name} ${cluster} primary resource" |>>
221 - Concat::Fragment <<| title == "${name} ${cluster} secondary resource" |>>
224 +# # Import cluster nodes
225 +# Concat::Fragment <<| title == "${name} ${cluster} primary resource" |>>
226 +# Concat::Fragment <<| title == "${name} ${cluster} secondary resource" |>>
229 # Due to a bug in puppet, defined() conditionals must be in a defined
230 # resource to be evaluated *after* the collector instead of before.
231 diff --git a/manifests/resource/up.pp b/manifests/resource/up.pp
232 index 7668792..b626f55 100644
233 --- a/manifests/resource/up.pp
234 +++ b/manifests/resource/up.pp
235 @@ -70,7 +70,7 @@ define drbd::resource::up (
236 # ensure that the device is mounted
243 options => 'defaults,noauto',
244 diff --git a/manifests/service.pp b/manifests/service.pp
245 index de56b34..f9b217a 100644
246 --- a/manifests/service.pp
247 +++ b/manifests/service.pp
249 class drbd::service {
252 + ensure => $drbd::service_ensure,
253 enable => $drbd::service_enable,
254 require => Package['drbd'],
255 restart => 'service drbd reload',
256 diff --git a/templates/header.res.erb b/templates/header.res.erb
257 index 2d785c4..a3256a3 100644
258 --- a/templates/header.res.erb
259 +++ b/templates/header.res.erb
260 @@ -5,7 +5,32 @@ resource <%= @name %> {
266 + resync-rate <%= @rate %>;
268 +<% if @c_plan_ahead -%>
269 + c-plan-ahead <%= @c_plan_ahead %>;
271 +<% if @c_fill_target -%>
272 + c-fill-target <%= @c_fill_target %>;
274 +<% if @c_min_rate -%>
275 + c-min-rate <%= @c_min_rate %>;
277 +<% if @c_max_rate -%>
278 + c-max-rate <%= @c_max_rate %>;
280 +<% if @al_extents -%>
281 + al-extents <%= @al_extents %>;
286 + after-sb-0pri discard-zero-changes;
287 + after-sb-1pri discard-secondary;
288 + after-sb-2pri disconnect;
292 shared-secret "<%= @secret %>";
293 @@ -16,12 +41,32 @@ resource <%= @name %> {
300 +<% if @max_buffers -%>
301 + max-buffers <%= @max_buffers %>;
303 +<% if @max_epoch_size -%>
304 + max-epoch-size <%= @max_epoch_size %>;
306 +<% if @unplug_watermark -%>
307 + unplug-watermark <%= @unplug_watermark %>;
309 +<% if @sndbuf_size -%>
310 + sndbuf-size <%= @sndbuf_size %>;
312 +<% if @rcvbuf_size -%>
313 + rcvbuf-size <%= @rcvbuf_size %>;
315 +<% if @verify_alg -%>
316 verify-alg <%= @verify_alg %>;
324 +<% @handlers.sort_by {|k, v| k}.each do |k, v| -%>
325 + <%= k %> "<%= v %>";
330 diff --git a/templates/primary-resource.res.erb b/templates/primary-resource.res.erb
331 index f8af77e..6032fd2 100644
332 --- a/templates/primary-resource.res.erb
333 +++ b/templates/primary-resource.res.erb
336 - address <%= @ip1 %>:<%= @port %>;
337 + address <%= IPAddr.new(@ip1).ipv6?() ? "ipv6 ["+@ip1+"]:"+@port : "ipv4 "+@ip1+":"+@port %>;
339 diff --git a/templates/primary-stacked-resource.res.erb b/templates/primary-stacked-resource.res.erb
340 index 7eb4dad..a22d8b3 100644
341 --- a/templates/primary-stacked-resource.res.erb
342 +++ b/templates/primary-stacked-resource.res.erb
344 stacked-on-top-of <%= @res1 %> {
345 - address <%= @ip1 %>:<%= @port %>;
346 + address <%= IPAddr.new(ip1).ipv6?() ? "ipv6 ["+ip1+"]:"+port : "ipv4 "+ip1+":"+port %>;
348 diff --git a/templates/resource.res.erb b/templates/resource.res.erb
349 index 047877e..9dd4c4d 100644
350 --- a/templates/resource.res.erb
351 +++ b/templates/resource.res.erb
353 on <%= @hostname %> {
354 - address <%= @ipaddress %>:<%= @port %>;
355 + address <%= IPAddr.new(ipaddress).ipv6?() ? "ipv6 ["+ipaddress+"]:"+@port : "ipv4 "+ipaddress+":"+port %>;
357 diff --git a/templates/secondary-resource.res.erb b/templates/secondary-resource.res.erb
358 index 678640a..cf2fd96 100644
359 --- a/templates/secondary-resource.res.erb
360 +++ b/templates/secondary-resource.res.erb
363 - address <%= @ip2 %>:<%= @port %>;
364 + address <%= IPAddr.new(@ip2).ipv6?() ? "ipv6 ["+@ip2+"]:"+@port : "ipv4 "+@ip2+":"+@port %>;
366 diff --git a/templates/secondary-stacked-resource.res.erb b/templates/secondary-stacked-resource.res.erb
367 index 409a705..87d28f5 100644
368 --- a/templates/secondary-stacked-resource.res.erb
369 +++ b/templates/secondary-stacked-resource.res.erb
371 stacked-on-top-of <%= @res2 %> {
372 - address <%= @ip2 %>:<%= @port %>;
373 + address <%= IPAddr.new(ip2).ipv6?() ? "ipv6 ["+ip2+"]:"+port : "ipv4 "+ip2+":"+port %>;