32. MHA-vip漂移实现应用透明¶

1. 前提¶

#1.GTID主从复制
192.168.178.151
192.168.178.161 #主库
192.168.178.162

#2.mha
192.168.178.162

#由于修改虚拟机内存（1G->2G），所有节点重启，gtid和mha都停止运行了
#重启后要重新关闭防火墙

2. 启动GTID¶

#1.启动gtid主从复制

systemctl start mysqld
netstat -tunlp |grep 330

#2.从库查看状态
mysql
show slave status\G

#151
mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Connecting to master
                  Master_Host: 192.168.178.161
                  Master_User: repl
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000001
          Read_Master_Log_Pos: 1045
               Relay_Log_File: 151-relay-bin.000002
                Relay_Log_Pos: 437
        Relay_Master_Log_File: mysql-bin.000001
             Slave_IO_Running: Connecting
            Slave_SQL_Running: Yes
              Replicate_Do_DB:
          Replicate_Ignore_DB:
           Replicate_Do_Table:
       Replicate_Ignore_Table:
      Replicate_Wild_Do_Table:
  Replicate_Wild_Ignore_Table:
                   Last_Errno: 0
                   Last_Error:
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 1045
              Relay_Log_Space: 796
              Until_Condition: None
               Until_Log_File:
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File:
           Master_SSL_CA_Path:
              Master_SSL_Cert:
            Master_SSL_Cipher:
               Master_SSL_Key:
        Seconds_Behind_Master: NULL
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 2003
                Last_IO_Error: error connecting to master 'repl@192.168.178.161:3306' - retry-time: 60  retries: 2
               Last_SQL_Errno: 0
               Last_SQL_Error:
  Replicate_Ignore_Server_Ids:
             Master_Server_Id: 0
                  Master_UUID: 559ab858-a7c0-11eb-8c51-000c29a0a205
             Master_Info_File: /application/mysql/data/master.info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind:
      Last_IO_Error_Timestamp: 210430 11:37:13
     Last_SQL_Error_Timestamp:
               Master_SSL_Crl:
           Master_SSL_Crlpath:
           Retrieved_Gtid_Set:
            Executed_Gtid_Set: 741453b1-a83a-11eb-acdf-000c29482705:1-4
                Auto_Position: 1
         Replicate_Rewrite_DB:
                 Channel_Name:
           Master_TLS_Version:
1 row in set (0.00 sec)

mysql>

#io线程报错，连不上主库

#162
mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Connecting to master
                  Master_Host: 192.168.178.161
                  Master_User: repl
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000001
          Read_Master_Log_Pos: 1045
               Relay_Log_File: 162-relay-bin.000002
                Relay_Log_Pos: 437
        Relay_Master_Log_File: mysql-bin.000001
             Slave_IO_Running: Connecting
            Slave_SQL_Running: Yes
              Replicate_Do_DB:
          Replicate_Ignore_DB:
           Replicate_Do_Table:
       Replicate_Ignore_Table:
      Replicate_Wild_Do_Table:
  Replicate_Wild_Ignore_Table:
                   Last_Errno: 0
                   Last_Error:
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 1045
              Relay_Log_Space: 796
              Until_Condition: None
               Until_Log_File:
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File:
           Master_SSL_CA_Path:
              Master_SSL_Cert:
            Master_SSL_Cipher:
               Master_SSL_Key:
        Seconds_Behind_Master: NULL
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 2003
                Last_IO_Error: error connecting to master 'repl@192.168.178.161:3306' - retry-time: 60  retries: 2
               Last_SQL_Errno: 0
               Last_SQL_Error:
  Replicate_Ignore_Server_Ids:
             Master_Server_Id: 0
                  Master_UUID: 559ab858-a7c0-11eb-8c51-000c29a0a205
             Master_Info_File: /application/mysql/data/master.info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind:
      Last_IO_Error_Timestamp: 210429 19:37:19
     Last_SQL_Error_Timestamp:
               Master_SSL_Crl:
           Master_SSL_Crlpath:
           Retrieved_Gtid_Set:
            Executed_Gtid_Set: 741453b1-a83a-11eb-acdf-000c29482705:1-4
                Auto_Position: 1
         Replicate_Rewrite_DB:
                 Channel_Name:
           Master_TLS_Version:
1 row in set (0.00 sec)

mysql>

#io线程故障，连不上主库

#排查故障------1.查看主库用户
#161
mysql> select user,host from mysql.user;
+---------------+---------------+
| user          | host          |
+---------------+---------------+
| mha           | 192.168.178.% |
| repl          | 192.168.178.% |
| mysql.session | localhost     |
| mysql.sys     | localhost     |
| root          | localhost     |
+---------------+---------------+
5 rows in set (0.00 sec)

mysql>
#2.查看主库状态
mysql> show master status;
+------------------+----------+--------------+------------------+------------------------------------------+
| File             | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set                        |
+------------------+----------+--------------+------------------+------------------------------------------+
| mysql-bin.000002 |      194 |              |                  | 741453b1-a83a-11eb-acdf-000c29482705:1-4 |
+------------------+----------+--------------+------------------+------------------------------------------+
1 row in set (0.00 sec)

mysql>


#3.所有节点关闭防火墙
iptables -F

#4.再次查看主从状态，成功！！！
show slave status\G

3. 开启mha¶

#1.mha manager节点开启mha
[root@162 ~]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null >/var/log/mha/app1/manager.log 2>&1 &
[1] 8555
[root@162 ~]#

#2.查看mha状态
[root@162 ~]# masterha_check_status  --conf=/etc/mha/app1.cnf
app1 (pid:8555) is running(0:PING_OK), master:192.168.178.161
[root@162 ~]#

4. 上传vip迁移脚本¶

#脚本从模块六课件中获得，上传到mha manager节点
cd /usr/local/bin
ls /usr/local/bin

[root@162 ~]# cd /usr/local/bin
[root@162 bin]# ls /usr/local/bin
[root@162 bin]#
[root@162 bin]# ls
master_ip_failover

5. 主库手动生成VIP¶

#1.查看主库IP信息
[root@161 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:a0:a2:05 brd ff:ff:ff:ff:ff:ff
    inet 192.168.178.161/24 brd 192.168.178.255 scope global noprefixroute ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::59f5:ba12:6b95:3cc7/64 scope link noprefixroute
       valid_lft forever preferred_lft forever
[root@161 ~]#
[root@161 ~]#
[root@161 ~]#
[root@161 ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.161  netmask 255.255.255.0  broadcast 192.168.178.255
        inet6 fe80::59f5:ba12:6b95:3cc7  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:a0:a2:05  txqueuelen 1000  (Ethernet)
        RX packets 2382  bytes 179655 (175.4 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 3039  bytes 330731 (322.9 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

[root@161 ~]#

#2.手动生成VIP

#根据视频中eth0生成测试
ifconfig eth0:0 192.168.178.200/24

[root@161 ~]# ifconfig eth0:0 192.168.178.200/24
SIOCSIFADDR: No such device
eth0:0: ERROR while getting interface flags: No such device
SIOCSIFNETMASK: No such device
[root@161 ~]#

#报错，看样子需要根据自己电脑的网卡来建
ifconfig ens33:1 192.168.178.150/24

[root@161 ~]# ifconfig ens33:1 192.168.178.150/24
[root@161 ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.161  netmask 255.255.255.0  broadcast 192.168.178.255
        inet6 fe80::59f5:ba12:6b95:3cc7  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:a0:a2:05  txqueuelen 1000  (Ethernet)
        RX packets 3456  bytes 255373 (249.3 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 5624  bytes 620277 (605.7 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens33:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.150  netmask 255.255.255.0  broadcast 192.168.178.255
        ether 00:0c:29:a0:a2:05  txqueuelen 1000  (Ethernet)

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

[root@161 ~]#
[root@161 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:a0:a2:05 brd ff:ff:ff:ff:ff:ff
    inet 192.168.178.161/24 brd 192.168.178.255 scope global noprefixroute ens33
       valid_lft forever preferred_lft forever
    inet 192.168.178.150/24 brd 192.168.178.255 scope global secondary ens33:1
       valid_lft forever preferred_lft forever
    inet6 fe80::59f5:ba12:6b95:3cc7/64 scope link noprefixroute
       valid_lft forever preferred_lft forever
[root@161 ~]#

#得到vip 
#ens33:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
#inet 192.168.178.150  netmask 255.255.255.0  broadcast 192.168.178.255
#ether 00:0c:29:a0:a2:05  txqueuelen 1000  (Ethernet)

6. 根据生成的vip修改脚本¶

#1.查看脚本内容
[root@162 bin]# cat master_ip_failover
#!/usr/bin/env perl

use strict;
use warnings FATAL => 'all';

use Getopt::Long;

my (
    $command,          $ssh_user,        $orig_master_host, $orig_master_ip,
    $orig_master_port, $new_master_host, $new_master_ip,    $new_master_port
);

my $vip = '10.0.0.55/24';
my $key = '1';
my $ssh_start_vip = "/sbin/ifconfig eth1:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig eth1:$key down";

GetOptions(
    'command=s'          => \$command,
    'ssh_user=s'         => \$ssh_user,
    'orig_master_host=s' => \$orig_master_host,
    'orig_master_ip=s'   => \$orig_master_ip,
    'orig_master_port=i' => \$orig_master_port,
    'new_master_host=s'  => \$new_master_host,
    'new_master_ip=s'    => \$new_master_ip,
    'new_master_port=i'  => \$new_master_port,
);

exit &main();

sub main {

    print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";

    if ( $command eq "stop" || $command eq "stopssh" ) {

        my $exit_code = 1;
        eval {
            print "Disabling the VIP on old master: $orig_master_host \n";
            &stop_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn "Got Error: $@\n";
            exit $exit_code;
        }
        exit $exit_code;
    }
    elsif ( $command eq "start" ) {

        my $exit_code = 10;
        eval {
            print "Enabling the VIP - $vip on the new master - $new_master_host \n";
            &start_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn $@;
            exit $exit_code;
        }
        exit $exit_code;
    }
    elsif ( $command eq "status" ) {
        print "Checking the Status of the script.. OK \n";
        exit 0;
    }
    else {
        &usage();
        exit 1;
    }
}

sub start_vip() {
    `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
sub stop_vip() {
     return 0  unless  ($ssh_user);
    `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}

sub usage {
    print
    "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}

#2.修改VIP地址
my $vip = '192.168.178.150/24';
my $key = '1';
my $ssh_start_vip = "/sbin/ifconfig ens33:$key $vip";   #根据自己电脑修改，否则会报错第10u
my $ssh_stop_vip = "/sbin/ifconfig ens33:$key down";

7. 给脚本赋予可执行权限¶

#162
chmod +x /usr/local/bin/master_ip_failover
ll /usr/local/bin/master_ip_failover

[root@162 bin]# chmod +x /usr/local/bin/master_ip_failover
[root@162 bin]# ll /usr/local/bin/master_ip_failover
-rwxr-xr-x 1 root root 2256 Apr 29 21:07 /usr/local/bin/master_ip_failover
[root@162 bin]#

8. 修改mha manager配置文件¶

#162
vim /etc/mha/app1.cnf
master_ip_failover_script=/usr/local/bin/master_ip_failover

[root@162 bin]# vim /etc/mha/app1.cnf
[server default]
manager_log=/var/log/mha/app1/manager
manager_workdir=/var/log/mha/app1
master_binlog_dir=/data/mysql                   #主库二进制日志目录
password=mha
ping_interval=2
repl_password=123
repl_user=repl
ssh_user=root
user=mha
master_ip_failover_script=/usr/local/bin/master_ip_failover

[server1]
hostname=192.168.178.151
port=3306

[server2]
hostname=192.168.178.161
port=3306

[server3]
hostname=192.168.178.162
port=3306

9. 重启mha¶

#162
#关闭mha
masterha_stop --conf=/etc/mha/app1.cnf
#启动mha
nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null > /var/log/mha/app1/manager.log 2>&1 &

[root@162 bin]# masterha_stop --conf=/etc/mha/app1.cnf
Stopped app1 successfully.
[1]+  Exit 1                  nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/mha/app1/manager.log 2>&1  (wd: ~)
(wd now: /usr/local/bin)
[root@162 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null > /var/log/mha/app1/manager.log 2>&1 &
[1] 18081
[root@162 bin]#
[1]+  Exit 1                  nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/mha/app1/manager.log 2>&1
[root@162 bin]#

#查看mha状态
[root@162 bin]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 is stopped(2:NOT_RUNNING).

#启动失败，查看日志信息

#启动失败，查看日志信息

[root@162 bin]# cat /var/log/mha/app1/manager
Wed Apr 28 22:38:42 2021 - [info] MHA::MasterMonitor version 0.58.
Wed Apr 28 22:38:43 2021 - [info] GTID failover mode = 1
Wed Apr 28 22:38:43 2021 - [info] Dead Servers:
Wed Apr 28 22:38:43 2021 - [info] Alive Servers:
Wed Apr 28 22:38:43 2021 - [info]   192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:38:43 2021 - [info]   192.168.178.161(192.168.178.161:3306)
Wed Apr 28 22:38:43 2021 - [info]   192.168.178.162(192.168.178.162:3306)
Wed Apr 28 22:38:43 2021 - [info] Alive Slaves:
Wed Apr 28 22:38:43 2021 - [info]   192.168.178.161(192.168.178.161:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:38:43 2021 - [info]     GTID ON
Wed Apr 28 22:38:43 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:38:43 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:38:43 2021 - [info]     GTID ON
Wed Apr 28 22:38:43 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:38:43 2021 - [info] Current Alive Master: 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:38:43 2021 - [info] Checking slave configurations..
Wed Apr 28 22:38:43 2021 - [info]  read_only=1 is not set on slave 192.168.178.161(192.168.178.161:3306).
Wed Apr 28 22:38:43 2021 - [info]  read_only=1 is not set on slave 192.168.178.162(192.168.178.162:3306).
Wed Apr 28 22:38:43 2021 - [info] Checking replication filtering settings..
Wed Apr 28 22:38:43 2021 - [info]  binlog_do_db= , binlog_ignore_db=
Wed Apr 28 22:38:43 2021 - [info]  Replication filtering check ok.
Wed Apr 28 22:38:43 2021 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Wed Apr 28 22:38:43 2021 - [info] Checking SSH publickey authentication settings on the current master..
Wed Apr 28 22:38:43 2021 - [info] HealthCheck: SSH to 192.168.178.151 is reachable.
Wed Apr 28 22:38:43 2021 - [info]
192.168.178.151(192.168.178.151:3306) (current master)
 +--192.168.178.161(192.168.178.161:3306)
 +--192.168.178.162(192.168.178.162:3306)

Wed Apr 28 22:38:43 2021 - [warning] master_ip_failover_script is not defined.
Wed Apr 28 22:38:43 2021 - [warning] shutdown_script is not defined.
Wed Apr 28 22:38:43 2021 - [info] Set master ping interval 2 seconds.
Wed Apr 28 22:38:43 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Wed Apr 28 22:38:43 2021 - [info] Starting ping health check on 192.168.178.151(192.168.178.151:3306)..
Wed Apr 28 22:38:43 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
Wed Apr 28 22:57:42 2021 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away)
Wed Apr 28 22:57:42 2021 - [info] Executing SSH check script: exit 0
Wed Apr 28 22:57:42 2021 - [info] HealthCheck: SSH to 192.168.178.151 is reachable.
Wed Apr 28 22:57:44 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.178.151' (111))
Wed Apr 28 22:57:44 2021 - [warning] Connection failed 2 time(s)..
Wed Apr 28 22:57:46 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.178.151' (111))
Wed Apr 28 22:57:46 2021 - [warning] Connection failed 3 time(s)..
Wed Apr 28 22:57:48 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.178.151' (111))
Wed Apr 28 22:57:48 2021 - [warning] Connection failed 4 time(s)..
Wed Apr 28 22:57:48 2021 - [warning] Master is not reachable from health checker!
Wed Apr 28 22:57:48 2021 - [warning] Master 192.168.178.151(192.168.178.151:3306) is not reachable!
Wed Apr 28 22:57:48 2021 - [warning] SSH is reachable.
Wed Apr 28 22:57:48 2021 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mha/app1.cnf again, and trying to connect to all servers to check server status..
Wed Apr 28 22:57:48 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Wed Apr 28 22:57:48 2021 - [info] Reading application default configuration from /etc/mha/app1.cnf..
Wed Apr 28 22:57:48 2021 - [info] Reading server configuration from /etc/mha/app1.cnf..
Wed Apr 28 22:57:49 2021 - [info] GTID failover mode = 1
Wed Apr 28 22:57:49 2021 - [info] Dead Servers:
Wed Apr 28 22:57:49 2021 - [info]   192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:49 2021 - [info] Alive Servers:
Wed Apr 28 22:57:49 2021 - [info]   192.168.178.161(192.168.178.161:3306)
Wed Apr 28 22:57:49 2021 - [info]   192.168.178.162(192.168.178.162:3306)
Wed Apr 28 22:57:49 2021 - [info] Alive Slaves:
Wed Apr 28 22:57:49 2021 - [info]   192.168.178.161(192.168.178.161:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:49 2021 - [info]     GTID ON
Wed Apr 28 22:57:49 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:49 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:49 2021 - [info]     GTID ON
Wed Apr 28 22:57:49 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:49 2021 - [info] Checking slave configurations..
Wed Apr 28 22:57:49 2021 - [info]  read_only=1 is not set on slave 192.168.178.161(192.168.178.161:3306).
Wed Apr 28 22:57:49 2021 - [info]  read_only=1 is not set on slave 192.168.178.162(192.168.178.162:3306).
Wed Apr 28 22:57:49 2021 - [info] Checking replication filtering settings..
Wed Apr 28 22:57:49 2021 - [info]  Replication filtering check ok.
Wed Apr 28 22:57:49 2021 - [info] Master is down!
Wed Apr 28 22:57:49 2021 - [info] Terminating monitoring script.
Wed Apr 28 22:57:49 2021 - [info] Got exit code 20 (Master dead).
Wed Apr 28 22:57:49 2021 - [info] MHA::MasterFailover version 0.58.
Wed Apr 28 22:57:49 2021 - [info] Starting master failover.
Wed Apr 28 22:57:49 2021 - [info]
Wed Apr 28 22:57:49 2021 - [info] * Phase 1: Configuration Check Phase..
Wed Apr 28 22:57:49 2021 - [info]
Wed Apr 28 22:57:50 2021 - [info] GTID failover mode = 1
Wed Apr 28 22:57:50 2021 - [info] Dead Servers:
Wed Apr 28 22:57:50 2021 - [info]   192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:50 2021 - [info] Checking master reachability via MySQL(double check)...
Wed Apr 28 22:57:50 2021 - [info]  ok.
Wed Apr 28 22:57:50 2021 - [info] Alive Servers:
Wed Apr 28 22:57:50 2021 - [info]   192.168.178.161(192.168.178.161:3306)
Wed Apr 28 22:57:50 2021 - [info]   192.168.178.162(192.168.178.162:3306)
Wed Apr 28 22:57:50 2021 - [info] Alive Slaves:
Wed Apr 28 22:57:50 2021 - [info]   192.168.178.161(192.168.178.161:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:50 2021 - [info]     GTID ON
Wed Apr 28 22:57:50 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:50 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:50 2021 - [info]     GTID ON
Wed Apr 28 22:57:50 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:50 2021 - [info] Starting GTID based failover.
Wed Apr 28 22:57:50 2021 - [info]
Wed Apr 28 22:57:50 2021 - [info] ** Phase 1: Configuration Check Phase completed.
Wed Apr 28 22:57:50 2021 - [info]
Wed Apr 28 22:57:50 2021 - [info] * Phase 2: Dead Master Shutdown Phase..
Wed Apr 28 22:57:50 2021 - [info]
Wed Apr 28 22:57:50 2021 - [info] Forcing shutdown so that applications never connect to the current master..
Wed Apr 28 22:57:50 2021 - [warning] master_ip_failover_script is not set. Skipping invalidating dead master IP address.
Wed Apr 28 22:57:50 2021 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Wed Apr 28 22:57:51 2021 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] * Phase 3: Master Recovery Phase..
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] The latest binary log file/position on all slaves is mysql-bin.000007:1045
Wed Apr 28 22:57:51 2021 - [info] Retrieved Gtid Set: 741453b1-a83a-11eb-acdf-000c29482705:1-4
Wed Apr 28 22:57:51 2021 - [info] Latest slaves (Slaves that received relay log files to the latest):
Wed Apr 28 22:57:51 2021 - [info]   192.168.178.161(192.168.178.161:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:51 2021 - [info]     GTID ON
Wed Apr 28 22:57:51 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:51 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:51 2021 - [info]     GTID ON
Wed Apr 28 22:57:51 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:51 2021 - [info] The oldest binary log file/position on all slaves is mysql-bin.000007:1045
Wed Apr 28 22:57:51 2021 - [info] Retrieved Gtid Set: 741453b1-a83a-11eb-acdf-000c29482705:1-4
Wed Apr 28 22:57:51 2021 - [info] Oldest slaves:
Wed Apr 28 22:57:51 2021 - [info]   192.168.178.161(192.168.178.161:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:51 2021 - [info]     GTID ON
Wed Apr 28 22:57:51 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:51 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 22:57:51 2021 - [info]     GTID ON
Wed Apr 28 22:57:51 2021 - [info]     Replicating from 192.168.178.151(192.168.178.151:3306)
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] * Phase 3.3: Determining New Master Phase..
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] Searching new master from slaves..
Wed Apr 28 22:57:51 2021 - [info]  Candidate masters from the configuration file:
Wed Apr 28 22:57:51 2021 - [info]  Non-candidate masters:
Wed Apr 28 22:57:51 2021 - [info] New master is 192.168.178.161(192.168.178.161:3306)
Wed Apr 28 22:57:51 2021 - [info] Starting master failover..
Wed Apr 28 22:57:51 2021 - [info]
From:
192.168.178.151(192.168.178.151:3306) (current master)
 +--192.168.178.161(192.168.178.161:3306)
 +--192.168.178.162(192.168.178.162:3306)

To:
192.168.178.161(192.168.178.161:3306) (new master)
 +--192.168.178.162(192.168.178.162:3306)
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] * Phase 3.3: New Master Recovery Phase..
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info]  Waiting all logs to be applied..
Wed Apr 28 22:57:51 2021 - [info]   done.
Wed Apr 28 22:57:51 2021 - [info] Getting new master's binlog name and position..
Wed Apr 28 22:57:51 2021 - [info]  mysql-bin.000001:1045
Wed Apr 28 22:57:51 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.178.161', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Wed Apr 28 22:57:51 2021 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000001, 1045, 741453b1-a83a-11eb-acdf-000c29482705:1-4
Wed Apr 28 22:57:51 2021 - [warning] master_ip_failover_script is not set. Skipping taking over new master IP address.
Wed Apr 28 22:57:51 2021 - [info] ** Finished master recovery successfully.
Wed Apr 28 22:57:51 2021 - [info] * Phase 3: Master Recovery Phase completed.
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] * Phase 4: Slaves Recovery Phase..
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] * Phase 4.1: Starting Slaves in parallel..
Wed Apr 28 22:57:51 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info] -- Slave recovery on host 192.168.178.162(192.168.178.162:3306) started, pid: 61068. Check tmp log /var/log/mha/app1/192.168.178.162_3306_20210428225749.log if it takes time..
Wed Apr 28 22:57:53 2021 - [info]
Wed Apr 28 22:57:53 2021 - [info] Log messages from 192.168.178.162 ...
Wed Apr 28 22:57:53 2021 - [info]
Wed Apr 28 22:57:51 2021 - [info]  Resetting slave 192.168.178.162(192.168.178.162:3306) and starting replication from the new master 192.168.178.161(192.168.178.161:3306)..
Wed Apr 28 22:57:51 2021 - [info]  Executed CHANGE MASTER.
Wed Apr 28 22:57:52 2021 - [info]  Slave started.
Wed Apr 28 22:57:52 2021 - [info]  gtid_wait(741453b1-a83a-11eb-acdf-000c29482705:1-4) completed on 192.168.178.162(192.168.178.162:3306). Executed 0 events.
Wed Apr 28 22:57:53 2021 - [info] End of log messages from 192.168.178.162.
Wed Apr 28 22:57:53 2021 - [info] -- Slave on host 192.168.178.162(192.168.178.162:3306) started.
Wed Apr 28 22:57:53 2021 - [info] All new slave servers recovered successfully.
Wed Apr 28 22:57:53 2021 - [info]
Wed Apr 28 22:57:53 2021 - [info] * Phase 5: New master cleanup phase..
Wed Apr 28 22:57:53 2021 - [info]
Wed Apr 28 22:57:53 2021 - [info] Resetting slave info on the new master..
Wed Apr 28 22:57:53 2021 - [info]  192.168.178.161: Resetting slave info succeeded.
Wed Apr 28 22:57:53 2021 - [info] Master failover to 192.168.178.161(192.168.178.161:3306) completed successfully.
Wed Apr 28 22:57:53 2021 - [info] Deleted server1 entry from /etc/mha/app1.cnf .
Wed Apr 28 22:57:53 2021 - [info]

----- Failover Report -----

app1: MySQL Master failover 192.168.178.151(192.168.178.151:3306) to 192.168.178.161(192.168.178.161:3306) succeeded

Master 192.168.178.151(192.168.178.151:3306) is down!

Check MHA Manager logs at 162:/var/log/mha/app1/manager for details.

Started automated(non-interactive) failover.
Selected 192.168.178.161(192.168.178.161:3306) as a new master.
192.168.178.161(192.168.178.161:3306): OK: Applying all logs succeeded.
192.168.178.162(192.168.178.162:3306): OK: Slave started, replicating from 192.168.178.161(192.168.178.161:3306)
192.168.178.161(192.168.178.161:3306): Resetting slave info succeeded.
Master failover to 192.168.178.161(192.168.178.161:3306) completed successfully.
Wed Apr 28 23:16:32 2021 - [info] MHA::MasterMonitor version 0.58.
Wed Apr 28 23:16:33 2021 - [info] GTID failover mode = 1
Wed Apr 28 23:16:33 2021 - [info] Dead Servers:
Wed Apr 28 23:16:33 2021 - [info] Alive Servers:
Wed Apr 28 23:16:33 2021 - [info]   192.168.178.151(192.168.178.151:3306)
Wed Apr 28 23:16:33 2021 - [info]   192.168.178.161(192.168.178.161:3306)
Wed Apr 28 23:16:33 2021 - [info]   192.168.178.162(192.168.178.162:3306)
Wed Apr 28 23:16:33 2021 - [info] Alive Slaves:
Wed Apr 28 23:16:33 2021 - [info]   192.168.178.151(192.168.178.151:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 23:16:33 2021 - [info]     GTID ON
Wed Apr 28 23:16:33 2021 - [info]     Replicating from 192.168.178.161(192.168.178.161:3306)
Wed Apr 28 23:16:33 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Wed Apr 28 23:16:33 2021 - [info]     GTID ON
Wed Apr 28 23:16:33 2021 - [info]     Replicating from 192.168.178.161(192.168.178.161:3306)
Wed Apr 28 23:16:33 2021 - [info] Current Alive Master: 192.168.178.161(192.168.178.161:3306)
Wed Apr 28 23:16:33 2021 - [info] Checking slave configurations..
Wed Apr 28 23:16:33 2021 - [info]  read_only=1 is not set on slave 192.168.178.151(192.168.178.151:3306).
Wed Apr 28 23:16:33 2021 - [info]  read_only=1 is not set on slave 192.168.178.162(192.168.178.162:3306).
Wed Apr 28 23:16:33 2021 - [info] Checking replication filtering settings..
Wed Apr 28 23:16:33 2021 - [info]  binlog_do_db= , binlog_ignore_db=
Wed Apr 28 23:16:33 2021 - [info]  Replication filtering check ok.
Wed Apr 28 23:16:33 2021 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Wed Apr 28 23:16:33 2021 - [info] Checking SSH publickey authentication settings on the current master..
Wed Apr 28 23:16:34 2021 - [info] HealthCheck: SSH to 192.168.178.161 is reachable.
Wed Apr 28 23:16:34 2021 - [info]
192.168.178.161(192.168.178.161:3306) (current master)
 +--192.168.178.151(192.168.178.151:3306)
 +--192.168.178.162(192.168.178.162:3306)

Wed Apr 28 23:16:34 2021 - [warning] master_ip_failover_script is not defined.
Wed Apr 28 23:16:34 2021 - [warning] shutdown_script is not defined.
Wed Apr 28 23:16:34 2021 - [info] Set master ping interval 2 seconds.
Wed Apr 28 23:16:34 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Wed Apr 28 23:16:34 2021 - [info] Starting ping health check on 192.168.178.161(192.168.178.161:3306)..
Wed Apr 28 23:16:34 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
Thu Apr 29 11:23:38 2021 - [warning] Got timeout on MySQL Ping(SELECT) child process and killed it! at /usr/share/perl5/vendor_perl/MHA/HealthCheck.pm line 432.
Thu Apr 29 11:23:38 2021 - [info] Executing SSH check script: exit 0
Thu Apr 29 11:23:40 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.178.161' (4))
Thu Apr 29 11:23:40 2021 - [warning] Connection failed 2 time(s)..
Thu Apr 29 11:23:40 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.178.161' (113))
Thu Apr 29 11:23:40 2021 - [warning] Connection failed 3 time(s)..
Thu Apr 29 11:23:40 2021 - [warning] HealthCheck: SSH to 192.168.178.161 is NOT reachable.
Thu Apr 29 11:23:42 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
Thu Apr 29 11:24:08 2021 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away)
Thu Apr 29 11:24:08 2021 - [info] Executing SSH check script: exit 0
Thu Apr 29 11:24:08 2021 - [warning] HealthCheck: SSH to 192.168.178.161 is NOT reachable.
Thu Apr 29 11:24:10 2021 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.178.161' (111))
Thu Apr 29 11:24:10 2021 - [warning] Connection failed 2 time(s)..
Thu Apr 29 11:24:10 2021 - [info] Got terminate signal. Exit.
Thu Apr 29 19:46:29 2021 - [info] MHA::MasterMonitor version 0.58.
Thu Apr 29 19:46:30 2021 - [info] GTID failover mode = 1
Thu Apr 29 19:46:30 2021 - [info] Dead Servers:
Thu Apr 29 19:46:30 2021 - [info] Alive Servers:
Thu Apr 29 19:46:30 2021 - [info]   192.168.178.151(192.168.178.151:3306)
Thu Apr 29 19:46:30 2021 - [info]   192.168.178.161(192.168.178.161:3306)
Thu Apr 29 19:46:30 2021 - [info]   192.168.178.162(192.168.178.162:3306)
Thu Apr 29 19:46:30 2021 - [info] Alive Slaves:
Thu Apr 29 19:46:30 2021 - [info]   192.168.178.151(192.168.178.151:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Thu Apr 29 19:46:30 2021 - [info]     GTID ON
Thu Apr 29 19:46:30 2021 - [info]     Replicating from 192.168.178.161(192.168.178.161:3306)
Thu Apr 29 19:46:30 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Thu Apr 29 19:46:30 2021 - [info]     GTID ON
Thu Apr 29 19:46:30 2021 - [info]     Replicating from 192.168.178.161(192.168.178.161:3306)
Thu Apr 29 19:46:30 2021 - [info] Current Alive Master: 192.168.178.161(192.168.178.161:3306)
Thu Apr 29 19:46:30 2021 - [info] Checking slave configurations..
Thu Apr 29 19:46:30 2021 - [info]  read_only=1 is not set on slave 192.168.178.151(192.168.178.151:3306).
Thu Apr 29 19:46:30 2021 - [info] Checking replication filtering settings..
Thu Apr 29 19:46:30 2021 - [info]  binlog_do_db= , binlog_ignore_db=
Thu Apr 29 19:46:30 2021 - [info]  Replication filtering check ok.
Thu Apr 29 19:46:30 2021 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Thu Apr 29 19:46:30 2021 - [info] Checking SSH publickey authentication settings on the current master..
Thu Apr 29 19:46:30 2021 - [info] HealthCheck: SSH to 192.168.178.161 is reachable.
Thu Apr 29 19:46:30 2021 - [info]
192.168.178.161(192.168.178.161:3306) (current master)
 +--192.168.178.151(192.168.178.151:3306)
 +--192.168.178.162(192.168.178.162:3306)

Thu Apr 29 19:46:30 2021 - [warning] master_ip_failover_script is not defined.
Thu Apr 29 19:46:30 2021 - [warning] shutdown_script is not defined.
Thu Apr 29 19:46:30 2021 - [info] Set master ping interval 2 seconds.
Thu Apr 29 19:46:30 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Thu Apr 29 19:46:30 2021 - [info] Starting ping health check on 192.168.178.161(192.168.178.161:3306)..
Thu Apr 29 19:46:30 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
Thu Apr 29 21:14:21 2021 - [info] Got terminate signal. Exit.
Thu Apr 29 21:14:29 2021 - [info] MHA::MasterMonitor version 0.58.
Thu Apr 29 21:14:30 2021 - [info] GTID failover mode = 1
Thu Apr 29 21:14:30 2021 - [info] Dead Servers:
Thu Apr 29 21:14:30 2021 - [info] Alive Servers:
Thu Apr 29 21:14:30 2021 - [info]   192.168.178.151(192.168.178.151:3306)
Thu Apr 29 21:14:30 2021 - [info]   192.168.178.161(192.168.178.161:3306)
Thu Apr 29 21:14:30 2021 - [info]   192.168.178.162(192.168.178.162:3306)
Thu Apr 29 21:14:30 2021 - [info] Alive Slaves:
Thu Apr 29 21:14:30 2021 - [info]   192.168.178.151(192.168.178.151:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Thu Apr 29 21:14:30 2021 - [info]     GTID ON
Thu Apr 29 21:14:30 2021 - [info]     Replicating from 192.168.178.161(192.168.178.161:3306)
Thu Apr 29 21:14:30 2021 - [info]   192.168.178.162(192.168.178.162:3306)  Version=5.7.20-log (oldest major version between slaves) log-bin:enabled
Thu Apr 29 21:14:30 2021 - [info]     GTID ON
Thu Apr 29 21:14:30 2021 - [info]     Replicating from 192.168.178.161(192.168.178.161:3306)
Thu Apr 29 21:14:30 2021 - [info] Current Alive Master: 192.168.178.161(192.168.178.161:3306)
Thu Apr 29 21:14:30 2021 - [info] Checking slave configurations..
Thu Apr 29 21:14:30 2021 - [info]  read_only=1 is not set on slave 192.168.178.151(192.168.178.151:3306).
Thu Apr 29 21:14:30 2021 - [info] Checking replication filtering settings..
Thu Apr 29 21:14:30 2021 - [info]  binlog_do_db= , binlog_ignore_db=
Thu Apr 29 21:14:30 2021 - [info]  Replication filtering check ok.
Thu Apr 29 21:14:30 2021 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Thu Apr 29 21:14:30 2021 - [info] Checking SSH publickey authentication settings on the current master..
Thu Apr 29 21:14:30 2021 - [info] HealthCheck: SSH to 192.168.178.161 is reachable.
Thu Apr 29 21:14:30 2021 - [info]
192.168.178.161(192.168.178.161:3306) (current master)
 +--192.168.178.151(192.168.178.151:3306)
 +--192.168.178.162(192.168.178.162:3306)

Thu Apr 29 21:14:30 2021 - [info] Checking master_ip_failover_script status:
Thu Apr 29 21:14:30 2021 - [info]   /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.178.161 --orig_master_ip=192.168.178.161 --orig_master_port=3306
: No such file or directory
Thu Apr 29 21:14:30 2021 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln229]  Failed to get master_ip_failover_script status with return code 127:0.
Thu Apr 29 21:14:30 2021 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln427] Error happened on checking configurations.  at /usr/bin/masterha_manager line 50.
Thu Apr 29 21:14:30 2021 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln525] Error happened on monitoring servers.
Thu Apr 29 21:14:30 2021 - [info] Got exit code 1 (Not master dead).


#从日志分析最后几行可以看出，应该是脚本文件的问题

#可能脚本文件中包含windows字符，转换一下
yum install -y dos2unix
dos2unix /usr/local/bin/master_ip_failover

[root@162 bin]# dos2unix /usr/local/bin/master_ip_failover
dos2unix: converting file /usr/local/bin/master_ip_failover to Unix format ...
[root@162 bin]#

#再次重启测试

[root@162 bin]# masterha_stop --conf=/etc/mha/app1.cnf
MHA Manager is not running on app1(2:NOT_RUNNING).
[root@162 bin]# #启动mha
[root@162 bin]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null > /var/log/mha/app1/manager.log 2>&1 &
[1] 20597
[root@162 bin]#

#再次查看mha状态，启动成功！
[root@162 bin]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:20597) is running(0:PING_OK), master:192.168.178.161
[root@162 bin]#

10. 模拟主库宕机，测试VIP漂移----失败¶

#161
#停数据库
pkill mysqld
netstat -tunlp|grep 330

#162
#查看主从状态
[root@162 bin]# mysql
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 13
Server version: 5.7.20-log Source distribution

Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql>
mysql>
mysql>
mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.178.151
                  Master_User: repl
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000009
          Read_Master_Log_Pos: 194
               Relay_Log_File: 162-relay-bin.000002
                Relay_Log_Pos: 367
        Relay_Master_Log_File: mysql-bin.000009
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB:
          Replicate_Ignore_DB:
           Replicate_Do_Table:
       Replicate_Ignore_Table:
      Replicate_Wild_Do_Table:
  Replicate_Wild_Ignore_Table:
                   Last_Errno: 0
                   Last_Error:
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 194
              Relay_Log_Space: 572
              Until_Condition: None
               Until_Log_File:
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File:
           Master_SSL_CA_Path:
              Master_SSL_Cert:
            Master_SSL_Cipher:
               Master_SSL_Key:
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error:
               Last_SQL_Errno: 0
               Last_SQL_Error:
  Replicate_Ignore_Server_Ids:
             Master_Server_Id: 51
                  Master_UUID: 741453b1-a83a-11eb-acdf-000c29482705
             Master_Info_File: /application/mysql/data/master.info
                    SQL_Delay: 0
          SQL_Remaining_Delay: NULL
      Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
           Master_Retry_Count: 86400
                  Master_Bind:
      Last_IO_Error_Timestamp:
     Last_SQL_Error_Timestamp:
               Master_SSL_Crl:
           Master_SSL_Crlpath:
           Retrieved_Gtid_Set:
            Executed_Gtid_Set: 741453b1-a83a-11eb-acdf-000c29482705:1-4
                Auto_Position: 1
         Replicate_Rewrite_DB:
                 Channel_Name:
           Master_TLS_Version:
1 row in set (0.00 sec)

mysql>

#可以看到主库变为151

#在新主库上查看vip是否漂移过来
#151

[root@151 ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.151  netmask 255.255.255.0  broadcast 192.168.178.255
        inet6 fe80::ba35:3885:8073:cd12  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:48:27:05  txqueuelen 1000  (Ethernet)
        RX packets 5219  bytes 363206 (354.6 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 9471  bytes 1032159 (1007.9 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

[root@151 ~]#
[root@151 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:48:27:05 brd ff:ff:ff:ff:ff:ff
    inet 192.168.178.151/24 brd 192.168.178.255 scope global noprefixroute ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::ba35:3885:8073:cd12/64 scope link noprefixroute
       valid_lft forever preferred_lft forever
[root@151 ~]#


#貌似失败了。。。vip没有漂移过来。
#检查后，发现是配置文件的问题，修改配置文件，重新测试！！！

11. 重新构建主从¶

#1.启动161的数据库
systemctl start mysqld
netstat -tunlp |grep 330

#2.从mha日志文件找到构建主从复制语句
[root@162 bin]# cat /var/log/mha/app1/manager |grep CHANGE
Wed Apr 28 22:57:51 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.178.161', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Wed Apr 28 22:57:51 2021 - [info]  Executed CHANGE MASTER.
Thu Apr 29 21:23:55 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.178.151', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Thu Apr 29 21:23:55 2021 - [info]  Executed CHANGE MASTER.
[root@162 bin]#

#3.登录161数据库，加入主从复制
CHANGE MASTER TO MASTER_HOST='192.168.178.151', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='123';
start slave;

#4.查看161主从状态
show slave status\G

mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.178.151
                  Master_User: repl
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000009
          Read_Master_Log_Pos: 194
               Relay_Log_File: 161-relay-bin.000002
                Relay_Log_Pos: 367
        Relay_Master_Log_File: mysql-bin.000009
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
#主从构建成功！

12. 重新构建mha¶

#1.162上查看mha manager配置文件
vim /etc/mha/app1.cnf
[server default]
manager_log=/var/log/mha/app1/manager
manager_workdir=/var/log/mha/app1
master_binlog_dir=/data/mysql                   #主库二进制日志目录
master_ip_failover_script=/usr/local/bin/master_ip_failover
password=mha
ping_interval=2
repl_password=123
repl_user=repl
ssh_user=root
user=mha

[server1]
hostname=192.168.178.151
port=3306

[server3]
hostname=192.168.178.162
port=3306

#2. 配置文件中加入161信息
[server2]
hostname=192.168.178.161
port=3306

#3.启动mha
nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null >/var/log/mha/app1/manager.log 2>&1 &

#4.查看mha状态
masterha_check_status  --conf=/etc/mha/app1.cnf

[root@162 bin]# masterha_check_status  --conf=/etc/mha/app1.cnf
app1 (pid:24852) is running(0:PING_OK), master:192.168.178.151
[root@162 bin]#

13. 修改vip迁移脚本¶

#1.之前脚本中是eth1，而我的虚拟机中是ens33，替换所有的eth1----ens33

vim /usr/local/bin/master_ip_failover
my $vip = '192.168.178.150/24';
my $key = '1';
my $ssh_start_vip = "/sbin/ifconfig ens33:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens33:$key down";

14. 在新主库手动生成vip¶

#1.151手动生成vip 
ifconfig ens33:1 192.168.178.150/24

#2.查看vip
ifconfig

[root@151 ~]# ifconfig ens33:1 192.168.178.150/24
[root@151 ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.151  netmask 255.255.255.0  broadcast 192.168.178.255
        inet6 fe80::ba35:3885:8073:cd12  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:48:27:05  txqueuelen 1000  (Ethernet)
        RX packets 7849  bytes 544880 (532.1 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 13071  bytes 1431594 (1.3 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens33:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.150  netmask 255.255.255.0  broadcast 192.168.178.255
        ether 00:0c:29:48:27:05  txqueuelen 1000  (Ethernet)

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

15. 重启mha¶

#162
#1.重启mha
masterha_stop --conf=/etc/mha/app1.cnf
nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null > /var/log/mha/app1/manager.log 2>&1 &

#查看mha状态
[root@162 bin]# masterha_check_status  --conf=/etc/mha/app1.cnf
app1 (pid:26963) is running(0:PING_OK), master:192.168.178.151
[root@162 bin]#

16. 再次模拟主库宕机，测试vip漂移¶

#151
#1.关闭数据库
pkill mysqld
netstat -tunlp|grep 330

#162
#2.查看新主库
show slave status\G

mysql> show slave status\G
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.178.161
                  Master_User: repl
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000003
          Read_Master_Log_Pos: 194
               Relay_Log_File: 162-relay-bin.000002
                Relay_Log_Pos: 367
        Relay_Master_Log_File: mysql-bin.000003
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
#新主库为： Master_Host: 192.168.178.161

#161
#3.查看vip
ifconfig

[root@161 bin]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.161  netmask 255.255.255.0  broadcast 192.168.178.255
        inet6 fe80::59f5:ba12:6b95:3cc7  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:a0:a2:05  txqueuelen 1000  (Ethernet)
        RX packets 21365  bytes 1524309 (1.4 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 45048  bytes 5105522 (4.8 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens33:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.178.150  netmask 255.255.255.0  broadcast 192.168.178.255
        ether 00:0c:29:a0:a2:05  txqueuelen 1000  (Ethernet)

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

[root@161 bin]#
#vip漂移成功！！！

最后更新: 2022-02-20 08:44:07