In article <36186e0e.0204242345.7931b...@posting.google.com>, Einat Ariel wrote:
> Hi,
> I'm trying to write a unix program that knows when another process
> goes down,(normally or abnormally). I know the pid of the process that
> I want to monitor.
> I tried using named-pipes - creating the pipe in the monitored process
> using mkfifo, and then monitoring it using poll, but sometimes the
> poll returns unexpectedly with POLLHUP even if process is still active
> and the pipe hasn't been broken.
> If you know any other solutions for this problem, please answer.
> Thanks in advance,
> Einat Ariel
I did this once:
it have been monitoring processes for me on a solaris box for 7 months now..
(you need to write code for daemon(..) if the standard libraries doesnt have
it..)
#include <stdio.h>
#include <syslog.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
static int interval;
static int sleepinterval;
static char *cfgfilename;
static char *outfile;
static char *pscmd = "ps --no-headers -e -o comm";
struct processentry {
char *name;
int max;
int min;
int nrfound;
struct processentry *next;
};
static sig_atomic_t rereadconfig = 0;
static void clear();
static void freelist();
static void check();
static struct processentry *head;
static void doexit();
static void debuglist();
void confighandler(int signo);
static void parseconfig();
static void addproc(char *cmd)
{
struct processentry *tmp = head;
while (tmp) {
if (!strcmp(cmd, tmp->name)) {
tmp->nrfound++;
}
tmp = tmp->next;
}
}
static void mainloop()
{
char cmd[128];
FILE *pspipe = NULL;
while (1) {
int ret = sleep(sleepinterval);
if (errno == EINTR) { //interrupted - dont care...
errno = 0;
sleepinterval = ret; //wait the rest of the interval
continue;
}
sleepinterval = interval; //reset interval incase interrupted
if (rereadconfig) { //flag set by signal handler..
syslog(LOG_DEBUG, "Rereading configfile: %s",
cfgfilename);
freelist(); //the parseconfig better DAMN not fail.. else we are lost..
parseconfig();
rereadconfig = (sig_atomic_t) 0;
}
pspipe = popen(pscmd, "r");
if (pspipe == NULL) {
syslog(LOG_ERR, "Unable to execute %s command: %s",
pscmd, strerror(errno));
errno = 0;
continue;
}
while (fscanf(pspipe, "%128s", cmd) != EOF) {
addproc(cmd);
}
check();
clear();
if (pclose(pspipe) != 0) {
syslog(LOG_ERR, "Executing %s failed.", pscmd);
}
}
}
static void clear()
{
struct processentry *tmp = head;
while (tmp) {
tmp->nrfound = 0;
tmp = tmp->next;
}
}
static void check()
{
struct processentry *tmp = head;
while (tmp) {
int haserror = 0;
if (tmp->min < 0) {
if (tmp->max < 0) {
if (tmp->nrfound <= 0) {
haserror = 1;
}
} else if (tmp->nrfound > tmp->max) {
haserror = 1;
}
} else if (tmp->max < 0) {
if (tmp->nrfound < tmp->min) {
haserror = 1;
}
} else if (tmp->min > tmp->nrfound
|| tmp->max < tmp->nrfound) {
haserror = 1;
}
if (haserror) {
syslog(LOG_WARNING,
"Found process %s %d times ,limit is [%4d-%4d]",
tmp->name, tmp->nrfound, tmp->min,
tmp->max);
}
tmp = tmp->next;
}
}
int main(int argc, char *argv[])
{
int ch;
while ((ch = getopt(argc, argv, "i:c:o:")) != -1) {
switch ((char) ch) {
case 'c':
cfgfilename = strdup(optarg);
break;
case 'o':
outfile = strdup(optarg);
break;
case 'i':
interval = atoi(optarg);
if (interval <= 0) {
fprintf(stderr,
"Interval cannot be %d, must be above zero.\n",
interval);
return 1;
}
break;
default:
return 1;
}
}
if (interval == 0) {
fprintf(stderr,
"Interval cannot be %d, must be above zero.\n",
interval);
return 1;
}
sleepinterval = interval;
if (cfgfilename == NULL) {
fprintf(stderr, "Configfile not supplied\n");
return 1;
}
openlog("chkproc", LOG_PID, LOG_DAEMON);
parseconfig();
debuglist();
if (daemon(0, 0) == -1) {
perror("Could not daemonize");
return 5;
}
atexit(doexit);
signal(SIGHUP, confighandler);
syslog(LOG_NOTICE,
"chkproc starting, checking processes every %d seconds.",
interval);
mainloop();
return 0;
}
static void freelist()
{
struct processentry *tmp = head;
while (tmp) {
struct processentry *helper = tmp;
tmp = tmp->next;
free(helper->name);
free(helper);
}
head = NULL;
}
static void doexit()
{
syslog(LOG_NOTICE, "chkproc exiting");
}
static void parseconfig()
{
FILE *cfile;
char entry[128];
int max;
int min;
struct processentry *tmp = NULL;
cfile = fopen(cfgfilename, "r");
if (cfile == NULL) {
syslog(LOG_ERR, "Opening configfile %s failed",
cfgfilename);
exit(2);
}
while (fscanf(cfile, "%128s %d %d", entry, &min, &max) != EOF) {
struct processentry *mtmp;
printf("[entry] %s %d %d\n", entry, min, max);
mtmp = malloc(sizeof(struct processentry));
if (mtmp == NULL) {
syslog(LOG_ERR,
"Out of memory when reading configfile %s",
cfgfilename);
exit(3);
}
mtmp->name = strdup(entry);
mtmp->max = max;
mtmp->min = min;
mtmp->nrfound = 0;
mtmp->next = NULL;
if (tmp == NULL) {
head = mtmp;
} else {
tmp->next = mtmp;
}
tmp = mtmp;
min = -1;
max = -1;
}
if (fclose(cfile) != 0) {
syslog(LOG_DEBUG, "Could not close configfile: %s",
strerror(errno));
errno = 0;
}
}
void debuglist()
{
struct processentry *tmp;
tmp = head;
while (tmp) {
printf("process %s min %d max %d found %d\n", tmp->name,
tmp->min, tmp->max, tmp->nrfound);
tmp = tmp->next;
}
}
void confighandler(int signo)
{
rereadconfig = (sig_atomic_t) 1;
}
it uses a config file like e.g:
xscreensaver 1 1
sshd 1 1
mingetty
syslogd 1 1
portmap 1 1
xinetd 1 -1
klogd 1 1