Heartbeat code for cluster environment

Working in a cluster environment, I often need to check if some of the nodes of my cluster are dead or live. To do so, I have a class called Heartbeat in my Python toolbox. This simple heartbeat class does a ping on the cluster node, and returns True, or false depending on the health of the targeted node. This class implements a stripped down version of ping. It sends a ICMP_ECHO_REQUEST packet and waits for the answer.

To use it, I call the constructor with the node name, or IP address, followed by the number of seconds between heartbeats. Then every time I need to check if the node is still alive, I call the method is_alive(), which returns a Boolean.

Here an example of how to use it, followed by the code.

How to use it

>>> import heartbeat
>>> master = heartbeat.Heartbeat('172.16.2.1', 5)
>>> master.is_alive()
True
>>> master.is_alive()
False
>>> master.is_alive()
True
>>>

>>> import heartbeat
>>> master = heartbeat.Heartbeat('172.16.2.1', 5)
>>> master.is_alive()
True
>>> master.is_alive()
False
>>> master.is_alive()
True
>>>

The code

#
# (c) 2009 - Fred Cirera http://blogmag.net/blog/fred/
#

import array
import os
import socket
import time
import select
from struct import pack, unpack, calcsize

ICMP_TYPE = 8
ICMP_CODE = 0
ICMP_CHECKSUM = 0
ICMP_ID = 0
ICMP_SEQ_NR = 0

PACKET_SIZE = 56
HEARTBEAT_PROBE_TIME = 20
HEARTBEAT_SERVER = 'homeserver01.us.archive.org'

# This version of ping is the stripped down version of the ping.py by
# Lars Strand <lars strand at gnist org> we only need to see if the
# network is up and running.

def _construct(id):
    """
    Constructs a ICMP echo packet of variable size
    """
    # construct header
    header = pack('bbHHh', ICMP_TYPE, ICMP_CODE, ICMP_CHECKSUM, \
                         ICMP_ID, ICMP_SEQ_NR+id)
    # space for time
    size = PACKET_SIZE - calcsize("d")
    data = pack("d", time.time()) + 'X' * size
    packet = header + data          # ping packet without checksum
    checksum = _in_cksum(packet)    # make checksum

    # construct header with correct checksum
    header = pack('bbHHh', ICMP_TYPE, ICMP_CODE, checksum, ICMP_ID, \
                         ICMP_SEQ_NR+id)

    # ping packet *with* checksum
    packet = header + data 

    # a perfectly formatted ICMP echo packet
    return packet

def _in_cksum(packet):
    """THE RFC792 states: 'The 16 bit one's complement of
    the one's complement sum of all 16 bit words in the header.'

    Generates a checksum of a (ICMP) packet. Based on in_chksum found
    in ping.c on FreeBSD.
    """

    # add byte if not divisible by 2
    if len(packet) & 1:              
        packet = packet + '\0'

    # split into 16-bit word and insert into a binary array
    words = array.array('h', packet) 
    sum = 0

    # perform ones complement arithmetic on 16-bit words
    for word in words:
        sum += (word & 0xffff) 

    hi = sum >> 16 
    lo = sum & 0xffff 
    sum = hi + lo
    sum = sum + (sum >> 16)
    
    return (~sum) & 0xffff # return ones complement


def pingNode(node, sock, timeout=1.0):
    """
    Pings a node based on input given to the function.
    return False for dead, and True when alive.
    """
    pid = os.getpid()
    packet = _construct(pid) # make a ping packet

    # send the ping
    try:
        sock.sendto(packet,(node,1))
    except socket.error, e:
        return False

    # reset values
    pong = ""; iwtd = []
    
    # wait until there is data in the socket
    while 1:
        # input, output, exceptional conditions
        iwtd, owtd, ewtd = select.select([sock], [], [], timeout)
        break # no data and timout occurred 

    # data on socket - this means we have an answer
    if iwtd:  # ok, data on socket
        # read data (we only need the header)
        pong, address = sock.recvfrom(PACKET_SIZE+48)

        # fetch pong header
        pongHeader = pong[20:28]
        pongType, pongCode, pongChksum, pongID, pongSeqnr = \
                  unpack("bbHHh", pongHeader)
        
        # valid ping packet received?
        if not pongSeqnr == pid:
            pong = None

        # NO data on socket - timeout waiting for answer
        if not pong:
            return False

        return True
    
    return False

class Heartbeat:
    last_check = 0
    status = 0
    def __init__(self, node=HEARTBEAT_SERVER, probe_time=HEARTBEAT_PROBE_TIME):
        self.node = node
        self.probe_time = probe_time
        return
    def is_alive(self):
        now = int(time.time())
        if self.last_check + self.probe_time > now:
            return self.status
        
        self.last_check = now
        self.status = False
        try:
            host = socket.gethostbyname(self.node)
            sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, \
                                 socket.getprotobyname("icmp"))
            self.status = pingNode(self.node, sock)
            sock.close()
        except socket.gaierror:
            pass
        
        return self.status

#
# (c) 2009 - Fred Cirera http://blogmag.net/blog/fred/
#

import array
import os
import socket
import time
import select
from struct import pack, unpack, calcsize

ICMP_TYPE = 8
ICMP_CODE = 0
ICMP_CHECKSUM = 0
ICMP_ID = 0
ICMP_SEQ_NR = 0

PACKET_SIZE = 56
HEARTBEAT_PROBE_TIME = 20
HEARTBEAT_SERVER = 'homeserver01.us.archive.org'

# This version of ping is the stripped down version of the ping.py by
# Lars Strand <lars strand at gnist org> we only need to see if the
# network is up and running.

def _construct(id):
"""
Constructs a ICMP echo packet of variable size
"""
# construct header
header = pack('bbHHh', ICMP_TYPE, ICMP_CODE, ICMP_CHECKSUM, \
ICMP_ID, ICMP_SEQ_NR+id)
# space for time
size = PACKET_SIZE - calcsize("d")
data = pack("d", time.time()) + 'X' * size
packet = header + data # ping packet without checksum
checksum = _in_cksum(packet) # make checksum

# construct header with correct checksum
header = pack('bbHHh', ICMP_TYPE, ICMP_CODE, checksum, ICMP_ID, \
ICMP_SEQ_NR+id)

# ping packet *with* checksum
packet = header + data

# a perfectly formatted ICMP echo packet
return packet

def _in_cksum(packet):
"""THE RFC792 states: 'The 16 bit one's complement of
the one's complement sum of all 16 bit words in the header.'

Generates a checksum of a (ICMP) packet. Based on in_chksum found
in ping.c on FreeBSD.
"""

# add byte if not divisible by 2
if len(packet) & 1:
packet = packet + '\0'

# split into 16-bit word and insert into a binary array
words = array.array('h', packet)
sum = 0

# perform ones complement arithmetic on 16-bit words
for word in words:
sum += (word & 0xffff)

hi = sum >> 16
lo = sum & 0xffff
sum = hi + lo
sum = sum + (sum >> 16)

return (~sum) & 0xffff # return ones complement


def pingNode(node, sock, timeout=1.0):
"""
Pings a node based on input given to the function.
return False for dead, and True when alive.
"""
pid = os.getpid()
packet = _construct(pid) # make a ping packet

# send the ping
try:
sock.sendto(packet,(node,1))
except socket.error, e:
return False

# reset values
pong = ""; iwtd = []

# wait until there is data in the socket
while 1:
# input, output, exceptional conditions
iwtd, owtd, ewtd = select.select([sock], [], [], timeout)
break # no data and timout occurred

# data on socket - this means we have an answer
if iwtd: # ok, data on socket
# read data (we only need the header)
pong, address = sock.recvfrom(PACKET_SIZE+48)

# fetch pong header
pongHeader = pong[20:28]
pongType, pongCode, pongChksum, pongID, pongSeqnr = \
unpack("bbHHh", pongHeader)

# valid ping packet received?
if not pongSeqnr == pid:
pong = None

# NO data on socket - timeout waiting for answer
if not pong:
return False

return True

return False

class Heartbeat:
last_check = 0
status = 0
def __init__(self, node=HEARTBEAT_SERVER, probe_time=HEARTBEAT_PROBE_TIME):
self.node = node
self.probe_time = probe_time
return
def is_alive(self):
now = int(time.time())
if self.last_check + self.probe_time > now:
return self.status

self.last_check = now
self.status = False
try:
host = socket.gethostbyname(self.node)
sock = socket.socket(socket.AF_INET, socket.SOCK_RAW, \
socket.getprotobyname("icmp"))
self.status = pingNode(self.node, sock)
sock.close()
except socket.gaierror:
pass

return self.status
 

Comments

Posted by: film izle Dec 23, 2009 @ 17:06

thanks for codes its nice

Posted by: flowers to usa Dec 27, 2009 @ 03:26

this is so great! thanks for the code

Posted by: wholesale china Jan 14, 2010 @ 00:17

Here is the richest in the world and you can find your needs bring you harvest yes there is no other better place than here here you are with my natural beauty but also naturally pure green world our homes a share in the bar.God bless you! www.trademic.com

Posted by: film izle Jan 17, 2010 @ 04:39

Thank you for sharing your friends. Hope to see you another day.

Posted by: divx film izle Jan 17, 2010 @ 04:40

Thanks for your good website and for sharing your experiences.

Posted by: online film izle Feb 09, 2010 @ 16:57

thanks Fred

Posted by: şentürk inşaat Feb 09, 2010 @ 16:57

good document

Posted by: evening dresses Feb 19, 2010 @ 06:02

I am sure a lot of people will benefit from it. Thanks!

Posted by: Nike Air Max Feb 22, 2010 @ 23:01

Very creative, one of the nicer sites I have seen today. Keep up the great work.

Posted by: Nike Air Max Mar 02, 2010 @ 23:51

So beautiful sharing!Thank you very much.

Posted by: tag heuer watch Mar 04, 2010 @ 23:19

can show a man not only his wealthy but also his high taste.tag heuer watch As the society developed, many people are pay more attention to one's wearing than before, especially a watch. tag heuer watch If you are wearing a famous brand watch, people will look up you, also will love to do business with you. You will be a successful man with a such luxury fashion watch.

Leave a message

(Required)
(Required and not displayed)
(Optional)
obfuscated letters Enter the text shown in the image