Network Block Device @PACKAGE_VERSION@

nbd-server.c

Go to the documentation of this file.
00001 /*
00002  * Network Block Device - server
00003  *
00004  * Copyright 1996-1998 Pavel Machek, distribute under GPL
00005  *  <pavel@atrey.karlin.mff.cuni.cz>
00006  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
00007  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
00008  *
00009  * Version 1.0 - hopefully 64-bit-clean
00010  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
00011  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
00012  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
00013  *      type, or don't have 64 bit file offsets by defining FS_32BIT
00014  *      in compile options for nbd-server *only*. This can be done
00015  *      with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
00016  *      original autoconf input file, or I would make it a configure
00017  *      option.) Ken Yap <ken@nlc.net.au>.
00018  * Version 1.6 - fix autodetection of block device size and really make 64 bit
00019  *      clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
00020  * Version 2.0 - Version synchronised with client
00021  * Version 2.1 - Reap zombie client processes when they exit. Removed
00022  *      (uncommented) the _IO magic, it's no longer necessary. Wouter
00023  *      Verhelst <wouter@debian.org>
00024  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
00025  * Version 2.3 - Fixed code so that Large File Support works. This
00026  *      removes the FS_32BIT compile-time directive; define
00027  *      _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
00028  *      using FS_32BIT. This will allow you to use files >2GB instead of
00029  *      having to use the -m option. Wouter Verhelst <wouter@debian.org>
00030  * Version 2.4 - Added code to keep track of children, so that we can
00031  *      properly kill them from initscripts. Add a call to daemon(),
00032  *      so that processes don't think they have to wait for us, which is
00033  *      interesting for initscripts as well. Wouter Verhelst
00034  *      <wouter@debian.org>
00035  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
00036  *      zero after fork()ing, resulting in nbd-server going berserk
00037  *      when it receives a signal with at least one child open. Wouter
00038  *      Verhelst <wouter@debian.org>
00039  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
00040  *      rectified type of mainloop::size_host (sf.net bugs 814435 and
00041  *      817385); close the PID file after writing to it, so that the
00042  *      daemon can actually be found. Wouter Verhelst
00043  *      <wouter@debian.org>
00044  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
00045  *      correctly put in network endianness. Many types were corrected
00046  *      (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
00047  * Version 2.6 - Some code cleanup.
00048  * Version 2.7 - Better build system.
00049  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
00050  *      lot more work, but this is a start. Wouter Verhelst
00051  *      <wouter@debian.org>
00052  * 16/03/2010 - Add IPv6 support.
00053  *      Kitt Tientanopajai <kitt@kitty.in.th>
00054  *      Neutron Soutmun <neo.neutron@gmail.com>
00055  *      Suriya Soutmun <darksolar@gmail.com>
00056  */
00057 
00058 /* Includes LFS defines, which defines behaviours of some of the following
00059  * headers, so must come before those */
00060 #include "lfs.h"
00061 
00062 #include <assert.h>
00063 #include <sys/types.h>
00064 #include <sys/socket.h>
00065 #include <sys/stat.h>
00066 #include <sys/select.h>
00067 #include <sys/wait.h>
00068 #ifdef HAVE_SYS_IOCTL_H
00069 #include <sys/ioctl.h>
00070 #endif
00071 #include <sys/param.h>
00072 #ifdef HAVE_SYS_MOUNT_H
00073 #include <sys/mount.h>
00074 #endif
00075 #include <signal.h>
00076 #include <errno.h>
00077 #include <netinet/tcp.h>
00078 #include <netinet/in.h>
00079 #include <netdb.h>
00080 #include <syslog.h>
00081 #include <unistd.h>
00082 #include <stdbool.h>
00083 #include <stdio.h>
00084 #include <stdlib.h>
00085 #include <string.h>
00086 #include <fcntl.h>
00087 #if HAVE_FALLOC_PH
00088 #include <linux/falloc.h>
00089 #endif
00090 #include <arpa/inet.h>
00091 #include <strings.h>
00092 #include <dirent.h>
00093 #include <unistd.h>
00094 #include <getopt.h>
00095 #include <pwd.h>
00096 #include <grp.h>
00097 #include <dirent.h>
00098 #include <ctype.h>
00099 
00100 #include <glib.h>
00101 
00102 /* used in cliserv.h, so must come first */
00103 #define MY_NAME "nbd_server"
00104 #include "cliserv.h"
00105 #include "nbd-debug.h"
00106 #include "netdb-compat.h"
00107 
00108 #ifdef WITH_SDP
00109 #include <sdp_inet.h>
00110 #endif
00111 
00112 /** Default position of the config file */
00113 #ifndef SYSCONFDIR
00114 #define SYSCONFDIR "/etc"
00115 #endif
00116 #define CFILE SYSCONFDIR "/nbd-server/config"
00117 
00118 /** Where our config file actually is */
00119 gchar* config_file_pos;
00120 
00121 /** global flags */
00122 int glob_flags=0;
00123 
00124 /* Whether we should avoid forking */
00125 int dontfork = 0;
00126 
00127 /**
00128  * The highest value a variable of type off_t can reach. This is a signed
00129  * integer, so set all bits except for the leftmost one.
00130  **/
00131 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
00132 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
00133 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
00134 
00135 /** Per-export flags: */
00136 #define F_READONLY 1      /**< flag to tell us a file is readonly */
00137 #define F_MULTIFILE 2     /**< flag to tell us a file is exported using -m */
00138 #define F_COPYONWRITE 4   /**< flag to tell us a file is exported using
00139                             copyonwrite */
00140 #define F_AUTOREADONLY 8  /**< flag to tell us a file is set to autoreadonly */
00141 #define F_SPARSE 16       /**< flag to tell us copyronwrite should use a sparse file */
00142 #define F_SDP 32          /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
00143 #define F_SYNC 64         /**< Whether to fsync() after a write */
00144 #define F_FLUSH 128       /**< Whether server wants FLUSH to be sent by the client */
00145 #define F_FUA 256         /**< Whether server wants FUA to be sent by the client */
00146 #define F_ROTATIONAL 512  /**< Whether server wants the client to implement the elevator algorithm */
00147 #define F_TEMPORARY 1024  /**< Whether the backing file is temporary and should be created then unlinked */
00148 #define F_TRIM 2048       /**< Whether server wants TRIM (discard) to be sent by the client */
00149 #define F_FIXED 4096      /**< Client supports fixed new-style protocol (and can thus send us extra options */
00150 
00151 /** Global flags: */
00152 #define F_OLDSTYLE 1      /**< Allow oldstyle (port-based) exports */
00153 #define F_LIST 2          /**< Allow clients to list the exports on a server */
00154 GHashTable *children;
00155 char pidfname[256]; /**< name of our PID file */
00156 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
00157 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
00158 
00159 #define NEG_INIT        (1 << 0)
00160 #define NEG_OLD         (1 << 1)
00161 #define NEG_MODERN      (1 << 2)
00162 
00163 #include <nbdsrv.h>
00164 
00165 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
00166                                                     handler to mark a
00167                                                     reconfiguration
00168                                                     request */
00169 
00170 GArray* modernsocks;      /**< Sockets for the modern handler. Not used
00171                                if a client was only specified on the
00172                                command line; only port used if
00173                                oldstyle is set to false (and then the
00174                                command-line client isn't used, gna gna).
00175                                This may be more than one socket on
00176                                systems that don't support serving IPv4
00177                                and IPv6 from the same socket (like,
00178                                e.g., FreeBSD) */
00179 
00180 bool logged_oversized=false;  /**< whether we logged oversized requests already */
00181 
00182 /**
00183  * Variables associated with an open file
00184  **/
00185 typedef struct {
00186         int fhandle;      /**< file descriptor */
00187         off_t startoff;   /**< starting offset of this file */
00188 } FILE_INFO;
00189 
00190 /**
00191  * Type of configuration file values
00192  **/
00193 typedef enum {
00194         PARAM_INT,              /**< This parameter is an integer */
00195         PARAM_INT64,            /**< This parameter is an integer */
00196         PARAM_STRING,           /**< This parameter is a string */
00197         PARAM_BOOL,             /**< This parameter is a boolean */
00198 } PARAM_TYPE;
00199 
00200 /**
00201  * Configuration file values
00202  **/
00203 typedef struct {
00204         gchar *paramname;       /**< Name of the parameter, as it appears in
00205                                   the config file */
00206         gboolean required;      /**< Whether this is a required (as opposed to
00207                                   optional) parameter */
00208         PARAM_TYPE ptype;       /**< Type of the parameter. */
00209         gpointer target;        /**< Pointer to where the data of this
00210                                   parameter should be written. If ptype is
00211                                   PARAM_BOOL, the data is or'ed rather than
00212                                   overwritten. */
00213         gint flagval;           /**< Flag mask for this parameter in case ptype
00214                                   is PARAM_BOOL. */
00215 } PARAM;
00216 
00217 /**
00218  * Configuration file values of the "generic" section
00219  **/
00220 struct generic_conf {
00221         gchar *user;            /**< user we run the server as    */
00222         gchar *group;           /**< group we run running as      */
00223         gchar *modernaddr;      /**< address of the modern socket */
00224         gchar *modernport;      /**< port of the modern socket    */
00225         gint flags;             /**< global flags                 */
00226 };
00227 
00228 /**
00229  * Translate a command name into human readable form
00230  *
00231  * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
00232  * @return pointer to the command name
00233  **/
00234 static inline const char * getcommandname(uint64_t command) {
00235         switch (command) {
00236         case NBD_CMD_READ:
00237                 return "NBD_CMD_READ";
00238         case NBD_CMD_WRITE:
00239                 return "NBD_CMD_WRITE";
00240         case NBD_CMD_DISC:
00241                 return "NBD_CMD_DISC";
00242         case NBD_CMD_FLUSH:
00243                 return "NBD_CMD_FLUSH";
00244         case NBD_CMD_TRIM:
00245                 return "NBD_CMD_TRIM";
00246         default:
00247                 return "UNKNOWN";
00248         }
00249 }
00250 
00251 /**
00252  * Read data from a file descriptor into a buffer
00253  *
00254  * @param f a file descriptor
00255  * @param buf a buffer
00256  * @param len the number of bytes to be read
00257  **/
00258 static inline void readit(int f, void *buf, size_t len) {
00259         ssize_t res;
00260         while (len > 0) {
00261                 DEBUG("*");
00262                 if ((res = read(f, buf, len)) <= 0) {
00263                         if(errno != EAGAIN) {
00264                                 err("Read failed: %m");
00265                         }
00266                 } else {
00267                         len -= res;
00268                         buf += res;
00269                 }
00270         }
00271 }
00272 
00273 /**
00274  * Consume data from an FD that we don't want
00275  *
00276  * @param f a file descriptor
00277  * @param buf a buffer
00278  * @param len the number of bytes to consume
00279  * @param bufsiz the size of the buffer
00280  **/
00281 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
00282         size_t curlen;
00283         while (len>0) {
00284                 curlen = (len>bufsiz)?bufsiz:len;
00285                 readit(f, buf, curlen);
00286                 len -= curlen;
00287         }
00288 }
00289 
00290 /**
00291  * Write data from a buffer into a filedescriptor
00292  *
00293  * @param f a file descriptor
00294  * @param buf a buffer containing data
00295  * @param len the number of bytes to be written
00296  **/
00297 static inline void writeit(int f, void *buf, size_t len) {
00298         ssize_t res;
00299         while (len > 0) {
00300                 DEBUG("+");
00301                 if ((res = write(f, buf, len)) <= 0)
00302                         err("Send failed: %m");
00303                 len -= res;
00304                 buf += res;
00305         }
00306 }
00307 
00308 /**
00309  * Print out a message about how to use nbd-server. Split out to a separate
00310  * function so that we can call it from multiple places
00311  */
00312 void usage() {
00313         printf("This is nbd-server version " VERSION "\n");
00314         printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
00315                "\t-r|--read-only\t\tread only\n"
00316                "\t-m|--multi-file\t\tmultiple file\n"
00317                "\t-c|--copy-on-write\tcopy on write\n"
00318                "\t-C|--config-file\tspecify an alternate configuration file\n"
00319                "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
00320                "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
00321                "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
00322                "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
00323                "\tif port is set to 0, stdin is used (for running from inetd).\n"
00324                "\tif file_to_export contains '%%s', it is substituted with the IP\n"
00325                "\t\taddress of the machine trying to connect\n" 
00326                "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
00327         printf("Using configuration file %s\n", CFILE);
00328 }
00329 
00330 /* Dumps a config file section of the given SERVER*, and exits. */
00331 void dump_section(SERVER* serve, gchar* section_header) {
00332         printf("[%s]\n", section_header);
00333         printf("\texportname = %s\n", serve->exportname);
00334         printf("\tlistenaddr = %s\n", serve->listenaddr);
00335         printf("\tport = %d\n", serve->port);
00336         if(serve->flags & F_READONLY) {
00337                 printf("\treadonly = true\n");
00338         }
00339         if(serve->flags & F_MULTIFILE) {
00340                 printf("\tmultifile = true\n");
00341         }
00342         if(serve->flags & F_COPYONWRITE) {
00343                 printf("\tcopyonwrite = true\n");
00344         }
00345         if(serve->expected_size) {
00346                 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
00347         }
00348         if(serve->authname) {
00349                 printf("\tauthfile = %s\n", serve->authname);
00350         }
00351         exit(EXIT_SUCCESS);
00352 }
00353 
00354 /**
00355  * Parse the command line.
00356  *
00357  * @param argc the argc argument to main()
00358  * @param argv the argv argument to main()
00359  **/
00360 SERVER* cmdline(int argc, char *argv[]) {
00361         int i=0;
00362         int nonspecial=0;
00363         int c;
00364         struct option long_options[] = {
00365                 {"read-only", no_argument, NULL, 'r'},
00366                 {"multi-file", no_argument, NULL, 'm'},
00367                 {"copy-on-write", no_argument, NULL, 'c'},
00368                 {"dont-fork", no_argument, NULL, 'd'},
00369                 {"authorize-file", required_argument, NULL, 'l'},
00370                 {"config-file", required_argument, NULL, 'C'},
00371                 {"pid-file", required_argument, NULL, 'p'},
00372                 {"output-config", required_argument, NULL, 'o'},
00373                 {"max-connection", required_argument, NULL, 'M'},
00374                 {0,0,0,0}
00375         };
00376         SERVER *serve;
00377         off_t es;
00378         size_t last;
00379         char suffix;
00380         gboolean do_output=FALSE;
00381         gchar* section_header="";
00382         gchar** addr_port;
00383 
00384         if(argc==1) {
00385                 return NULL;
00386         }
00387         serve=g_new0(SERVER, 1);
00388         serve->authname = g_strdup(default_authname);
00389         serve->virtstyle=VIRT_IPLIT;
00390         while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
00391                 switch (c) {
00392                 case 1:
00393                         /* non-option argument */
00394                         switch(nonspecial++) {
00395                         case 0:
00396                                 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
00397                                         addr_port=g_strsplit(optarg, ":", 2);
00398 
00399                                         /* Check for "@" - maybe user using this separator
00400                                                  for IPv4 address */
00401                                         if(!addr_port[1]) {
00402                                                 g_strfreev(addr_port);
00403                                                 addr_port=g_strsplit(optarg, "@", 2);
00404                                         }
00405                                 } else {
00406                                         addr_port=g_strsplit(optarg, "@", 2);
00407                                 }
00408 
00409                                 if(addr_port[1]) {
00410                                         serve->port=strtol(addr_port[1], NULL, 0);
00411                                         serve->listenaddr=g_strdup(addr_port[0]);
00412                                 } else {
00413                                         serve->listenaddr=NULL;
00414                                         serve->port=strtol(addr_port[0], NULL, 0);
00415                                 }
00416                                 g_strfreev(addr_port);
00417                                 break;
00418                         case 1:
00419                                 serve->exportname = g_strdup(optarg);
00420                                 if(serve->exportname[0] != '/') {
00421                                         fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
00422                                         exit(EXIT_FAILURE);
00423                                 }
00424                                 break;
00425                         case 2:
00426                                 last=strlen(optarg)-1;
00427                                 suffix=optarg[last];
00428                                 if (suffix == 'k' || suffix == 'K' ||
00429                                     suffix == 'm' || suffix == 'M')
00430                                         optarg[last] = '\0';
00431                                 es = (off_t)atoll(optarg);
00432                                 switch (suffix) {
00433                                         case 'm':
00434                                         case 'M':  es <<= 10;
00435                                         case 'k':
00436                                         case 'K':  es <<= 10;
00437                                         default :  break;
00438                                 }
00439                                 serve->expected_size = es;
00440                                 break;
00441                         }
00442                         break;
00443                 case 'r':
00444                         serve->flags |= F_READONLY;
00445                         break;
00446                 case 'm':
00447                         serve->flags |= F_MULTIFILE;
00448                         break;
00449                 case 'o':
00450                         do_output = TRUE;
00451                         section_header = g_strdup(optarg);
00452                         break;
00453                 case 'p':
00454                         strncpy(pidftemplate, optarg, 256);
00455                         break;
00456                 case 'c': 
00457                         serve->flags |=F_COPYONWRITE;
00458                         break;
00459                 case 'd': 
00460                         dontfork = 1;
00461                         break;
00462                 case 'C':
00463                         g_free(config_file_pos);
00464                         config_file_pos=g_strdup(optarg);
00465                         break;
00466                 case 'l':
00467                         g_free(serve->authname);
00468                         serve->authname=g_strdup(optarg);
00469                         break;
00470                 case 'M':
00471                         serve->max_connections = strtol(optarg, NULL, 0);
00472                         break;
00473                 default:
00474                         usage();
00475                         exit(EXIT_FAILURE);
00476                         break;
00477                 }
00478         }
00479         /* What's left: the port to export, the name of the to be exported
00480          * file, and, optionally, the size of the file, in that order. */
00481         if(nonspecial<2) {
00482                 g_free(serve);
00483                 serve=NULL;
00484         } else {
00485                 glob_flags |= F_OLDSTYLE;
00486         }
00487         if(do_output) {
00488                 if(!serve) {
00489                         g_critical("Need a complete configuration on the command line to output a config file section!");
00490                         exit(EXIT_FAILURE);
00491                 }
00492                 dump_section(serve, section_header);
00493         }
00494         return serve;
00495 }
00496 
00497 /* forward definition of parse_cfile */
00498 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, GError** e);
00499 
00500 /**
00501  * Parse config file snippets in a directory. Uses readdir() and friends
00502  * to find files and open them, then passes them on to parse_cfile
00503  * with have_global set false
00504  **/
00505 GArray* do_cfile_dir(gchar* dir, GError** e) {
00506         DIR* dirh = opendir(dir);
00507         struct dirent* de;
00508         gchar* fname;
00509         GArray* retval = NULL;
00510         GArray* tmp;
00511         struct stat stbuf;
00512 
00513         if(!dirh) {
00514                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
00515                 return NULL;
00516         }
00517         errno=0;
00518         while((de = readdir(dirh))) {
00519                 int saved_errno=errno;
00520                 fname = g_build_filename(dir, de->d_name, NULL);
00521                 switch(de->d_type) {
00522                         case DT_UNKNOWN:
00523                                 /* Filesystem doesn't return type of
00524                                  * file through readdir. Run stat() on
00525                                  * the file instead */
00526                                 if(stat(fname, &stbuf)) {
00527                                         perror("stat");
00528                                         goto err_out;
00529                                 }
00530                                 if (!S_ISREG(stbuf.st_mode)) {
00531                                         goto next;
00532                                 }
00533                         case DT_REG:
00534                                 /* Skip unless the name ends with '.conf' */
00535                                 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
00536                                         goto next;
00537                                 }
00538                                 tmp = parse_cfile(fname, NULL, e);
00539                                 errno=saved_errno;
00540                                 if(*e) {
00541                                         goto err_out;
00542                                 }
00543                                 if(!retval)
00544                                         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
00545                                 retval = g_array_append_vals(retval, tmp->data, tmp->len);
00546                                 g_array_free(tmp, TRUE);
00547                         default:
00548                                 break;
00549                 }
00550         next:
00551                 g_free(fname);
00552         }
00553         if(errno) {
00554                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
00555         err_out:
00556                 if(retval)
00557                         g_array_free(retval, TRUE);
00558                 if(dirh)
00559                         closedir(dirh);
00560                 return NULL;
00561         }
00562         return retval;
00563 }
00564 
00565 /**
00566  * Parse the config file.
00567  *
00568  * @param f the name of the config file
00569  *
00570  * @param genconf a pointer to generic configuration which will get
00571  *        updated with parsed values. If NULL, then parsed generic
00572  *        configuration values are safely and silently discarded.
00573  *
00574  * @param e a GError. Error code can be any of the following:
00575  *        NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
00576  *        NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
00577  *        or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
00578  *
00579  * @return a Array of SERVER* pointers, If the config file is empty or does not
00580  *      exist, returns an empty GHashTable; if the config file contains an
00581  *      error, returns NULL, and e is set appropriately
00582  **/
00583 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, GError** e) {
00584         const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
00585         const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
00586         gchar* cfdir = NULL;
00587         SERVER s;
00588         gchar *virtstyle=NULL;
00589         PARAM lp[] = {
00590                 { "exportname", TRUE,   PARAM_STRING,   &(s.exportname),        0 },
00591                 { "port",       TRUE,   PARAM_INT,      &(s.port),              0 },
00592                 { "authfile",   FALSE,  PARAM_STRING,   &(s.authname),          0 },
00593                 { "filesize",   FALSE,  PARAM_OFFT,     &(s.expected_size),     0 },
00594                 { "virtstyle",  FALSE,  PARAM_STRING,   &(virtstyle),           0 },
00595                 { "prerun",     FALSE,  PARAM_STRING,   &(s.prerun),            0 },
00596                 { "postrun",    FALSE,  PARAM_STRING,   &(s.postrun),           0 },
00597                 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog),   0 },
00598                 { "readonly",   FALSE,  PARAM_BOOL,     &(s.flags),             F_READONLY },
00599                 { "multifile",  FALSE,  PARAM_BOOL,     &(s.flags),             F_MULTIFILE },
00600                 { "copyonwrite", FALSE, PARAM_BOOL,     &(s.flags),             F_COPYONWRITE },
00601                 { "sparse_cow", FALSE,  PARAM_BOOL,     &(s.flags),             F_SPARSE },
00602                 { "sdp",        FALSE,  PARAM_BOOL,     &(s.flags),             F_SDP },
00603                 { "sync",       FALSE,  PARAM_BOOL,     &(s.flags),             F_SYNC },
00604                 { "flush",      FALSE,  PARAM_BOOL,     &(s.flags),             F_FLUSH },
00605                 { "fua",        FALSE,  PARAM_BOOL,     &(s.flags),             F_FUA },
00606                 { "rotational", FALSE,  PARAM_BOOL,     &(s.flags),             F_ROTATIONAL },
00607                 { "temporary",  FALSE,  PARAM_BOOL,     &(s.flags),             F_TEMPORARY },
00608                 { "trim",       FALSE,  PARAM_BOOL,     &(s.flags),             F_TRIM },
00609                 { "listenaddr", FALSE,  PARAM_STRING,   &(s.listenaddr),        0 },
00610                 { "maxconnections", FALSE, PARAM_INT,   &(s.max_connections),   0 },
00611         };
00612         const int lp_size=sizeof(lp)/sizeof(PARAM);
00613         struct generic_conf genconftmp;
00614         PARAM gp[] = {
00615                 { "user",       FALSE, PARAM_STRING,    &(genconftmp.user),       0 },
00616                 { "group",      FALSE, PARAM_STRING,    &(genconftmp.group),      0 },
00617                 { "oldstyle",   FALSE, PARAM_BOOL,      &(genconftmp.flags),      F_OLDSTYLE },
00618                 { "listenaddr", FALSE, PARAM_STRING,    &(genconftmp.modernaddr), 0 },
00619                 { "port",       FALSE, PARAM_STRING,    &(genconftmp.modernport), 0 },
00620                 { "includedir", FALSE, PARAM_STRING,    &cfdir,                   0 },
00621                 { "allowlist",  FALSE, PARAM_BOOL,      &(genconftmp.flags),      F_LIST },
00622         };
00623         PARAM* p=gp;
00624         int p_size=sizeof(gp)/sizeof(PARAM);
00625         GKeyFile *cfile;
00626         GError *err = NULL;
00627         const char *err_msg=NULL;
00628         GArray *retval=NULL;
00629         gchar **groups;
00630         gboolean bval;
00631         gint ival;
00632         gint64 i64val;
00633         gchar* sval;
00634         gchar* startgroup;
00635         gint i;
00636         gint j;
00637 
00638         memset(&genconftmp, 0, sizeof(struct generic_conf));
00639 
00640         if (genconf) {
00641                 /* Use the passed configuration values as defaults. The
00642                  * parsing algorithm below updates all parameter targets
00643                  * found from configuration files. */
00644                 memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
00645         }
00646 
00647         cfile = g_key_file_new();
00648         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
00649         if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
00650                         G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
00651                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
00652                                 f, err->message);
00653                 g_key_file_free(cfile);
00654                 return retval;
00655         }
00656         startgroup = g_key_file_get_start_group(cfile);
00657         if((!startgroup || strcmp(startgroup, "generic")) && genconf) {
00658                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
00659                 g_key_file_free(cfile);
00660                 return NULL;
00661         }
00662         groups = g_key_file_get_groups(cfile, NULL);
00663         for(i=0;groups[i];i++) {
00664                 memset(&s, '\0', sizeof(SERVER));
00665 
00666                 /* After the [generic] group or when we're parsing an include
00667                  * directory, start parsing exports */
00668                 if(i==1 || !genconf) {
00669                         p=lp;
00670                         p_size=lp_size;
00671                         if(!(glob_flags & F_OLDSTYLE)) {
00672                                 lp[1].required = FALSE;
00673                         }
00674                 } 
00675                 for(j=0;j<p_size;j++) {
00676                         assert(p[j].target != NULL);
00677                         assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
00678                         switch(p[j].ptype) {
00679                                 case PARAM_INT:
00680                                         ival = g_key_file_get_integer(cfile,
00681                                                                 groups[i],
00682                                                                 p[j].paramname,
00683                                                                 &err);
00684                                         if(!err) {
00685                                                 *((gint*)p[j].target) = ival;
00686                                         }
00687                                         break;
00688                                 case PARAM_INT64:
00689                                         i64val = g_key_file_get_int64(cfile,
00690                                                                 groups[i],
00691                                                                 p[j].paramname,
00692                                                                 &err);
00693                                         if(!err) {
00694                                                 *((gint64*)p[j].target) = i64val;
00695                                         }
00696                                         break;
00697                                 case PARAM_STRING:
00698                                         sval = g_key_file_get_string(cfile,
00699                                                                 groups[i],
00700                                                                 p[j].paramname,
00701                                                                 &err);
00702                                         if(!err) {
00703                                                 *((gchar**)p[j].target) = sval;
00704                                         }
00705                                         break;
00706                                 case PARAM_BOOL:
00707                                         bval = g_key_file_get_boolean(cfile,
00708                                                         groups[i],
00709                                                         p[j].paramname, &err);
00710                                         if(!err) {
00711                                                 if(bval) {
00712                                                         *((gint*)p[j].target) |= p[j].flagval;
00713                                                 } else {
00714                                                         *((gint*)p[j].target) &= ~(p[j].flagval);
00715                                                 }
00716                                         }
00717                                         break;
00718                         }
00719                         if(err) {
00720                                 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
00721                                         if(!p[j].required) {
00722                                                 /* Ignore not-found error for optional values */
00723                                                 g_clear_error(&err);
00724                                                 continue;
00725                                         } else {
00726                                                 err_msg = MISSING_REQUIRED_ERROR;
00727                                         }
00728                                 } else {
00729                                         err_msg = DEFAULT_ERROR;
00730                                 }
00731                                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
00732                                 g_array_free(retval, TRUE);
00733                                 g_error_free(err);
00734                                 g_key_file_free(cfile);
00735                                 return NULL;
00736                         }
00737                 }
00738                 if(virtstyle) {
00739                         if(!strncmp(virtstyle, "none", 4)) {
00740                                 s.virtstyle=VIRT_NONE;
00741                         } else if(!strncmp(virtstyle, "ipliteral", 9)) {
00742                                 s.virtstyle=VIRT_IPLIT;
00743                         } else if(!strncmp(virtstyle, "iphash", 6)) {
00744                                 s.virtstyle=VIRT_IPHASH;
00745                         } else if(!strncmp(virtstyle, "cidrhash", 8)) {
00746                                 s.virtstyle=VIRT_CIDR;
00747                                 if(strlen(virtstyle)<10) {
00748                                         g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
00749                                         g_array_free(retval, TRUE);
00750                                         g_key_file_free(cfile);
00751                                         return NULL;
00752                                 }
00753                                 s.cidrlen=strtol(virtstyle+8, NULL, 0);
00754                         } else {
00755                                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
00756                                 g_array_free(retval, TRUE);
00757                                 g_key_file_free(cfile);
00758                                 return NULL;
00759                         }
00760                 } else {
00761                         s.virtstyle=VIRT_IPLIT;
00762                 }
00763                 if(s.port && !(glob_flags & F_OLDSTYLE)) {
00764                         g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
00765                         g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
00766                 }
00767                 /* Don't need to free this, it's not our string */
00768                 virtstyle=NULL;
00769                 /* Don't append values for the [generic] group */
00770                 if(i>0 || !genconf) {
00771                         s.socket_family = AF_UNSPEC;
00772                         s.servename = groups[i];
00773 
00774                         append_serve(&s, retval);
00775                 }
00776 #ifndef WITH_SDP
00777                 if(s.flags & F_SDP) {
00778                         g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
00779                         g_array_free(retval, TRUE);
00780                         g_key_file_free(cfile);
00781                         return NULL;
00782                 }
00783 #endif
00784         }
00785         g_key_file_free(cfile);
00786         if(cfdir) {
00787                 GArray* extra = do_cfile_dir(cfdir, e);
00788                 if(extra) {
00789                         retval = g_array_append_vals(retval, extra->data, extra->len);
00790                         i+=extra->len;
00791                         g_array_free(extra, TRUE);
00792                 } else {
00793                         if(*e) {
00794                                 g_array_free(retval, TRUE);
00795                                 return NULL;
00796                         }
00797                 }
00798         }
00799         if(i==1 && genconf) {
00800                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
00801         }
00802 
00803         if (genconf) {
00804                 /* Return the updated generic configuration through the
00805                  * pointer parameter. */
00806                 memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
00807         }
00808 
00809         return retval;
00810 }
00811 
00812 /**
00813  * Signal handler for SIGCHLD
00814  * @param s the signal we're handling (must be SIGCHLD, or something
00815  * is severely wrong)
00816  **/
00817 void sigchld_handler(int s) {
00818         int status;
00819         int* i;
00820         pid_t pid;
00821 
00822         while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
00823                 if(WIFEXITED(status)) {
00824                         msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
00825                 }
00826                 i=g_hash_table_lookup(children, &pid);
00827                 if(!i) {
00828                         msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
00829                 } else {
00830                         DEBUG("Removing %d from the list of children", pid);
00831                         g_hash_table_remove(children, &pid);
00832                 }
00833         }
00834 }
00835 
00836 /**
00837  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
00838  *
00839  * @param key the key
00840  * @param value the value corresponding to the above key
00841  * @param user_data a pointer which we always set to 1, so that we know what
00842  * will happen next.
00843  **/
00844 void killchild(gpointer key, gpointer value, gpointer user_data) {
00845         pid_t *pid=value;
00846 
00847         kill(*pid, SIGTERM);
00848 }
00849 
00850 /**
00851  * Handle SIGTERM and dispatch it to our children
00852  * @param s the signal we're handling (must be SIGTERM, or something
00853  * is severely wrong).
00854  **/
00855 void sigterm_handler(int s) {
00856         g_hash_table_foreach(children, killchild, NULL);
00857         unlink(pidfname);
00858 
00859         exit(EXIT_SUCCESS);
00860 }
00861 
00862 /**
00863  * Handle SIGHUP by setting atomically a flag which will be evaluated in
00864  * the main loop of the root server process. This allows us to separate
00865  * the signal catching from th actual task triggered by SIGHUP and hence
00866  * processing in the interrupt context is kept as minimial as possible.
00867  *
00868  * @param s the signal we're handling (must be SIGHUP, or something
00869  * is severely wrong).
00870  **/
00871 static void sighup_handler(const int s G_GNUC_UNUSED) {
00872         is_sighup_caught = 1;
00873 }
00874 
00875 /**
00876  * Get the file handle and offset, given an export offset.
00877  *
00878  * @param export An array of export files
00879  * @param a The offset to get corresponding file/offset for
00880  * @param fhandle [out] File descriptor
00881  * @param foffset [out] Offset into fhandle
00882  * @param maxbytes [out] Tells how many bytes can be read/written
00883  * from fhandle starting at foffset (0 if there is no limit)
00884  * @return 0 on success, -1 on failure
00885  **/
00886 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
00887         /* Negative offset not allowed */
00888         if(a < 0)
00889                 return -1;
00890 
00891         /* Binary search for last file with starting offset <= a */
00892         FILE_INFO fi;
00893         int start = 0;
00894         int end = export->len - 1;
00895         while( start <= end ) {
00896                 int mid = (start + end) / 2;
00897                 fi = g_array_index(export, FILE_INFO, mid);
00898                 if( fi.startoff < a ) {
00899                         start = mid + 1;
00900                 } else if( fi.startoff > a ) {
00901                         end = mid - 1;
00902                 } else {
00903                         start = end = mid;
00904                         break;
00905                 }
00906         }
00907 
00908         /* end should never go negative, since first startoff is 0 and a >= 0 */
00909         assert(end >= 0);
00910 
00911         fi = g_array_index(export, FILE_INFO, end);
00912         *fhandle = fi.fhandle;
00913         *foffset = a - fi.startoff;
00914         *maxbytes = 0;
00915         if( end+1 < export->len ) {
00916                 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
00917                 *maxbytes = fi_next.startoff - a;
00918         }
00919 
00920         return 0;
00921 }
00922 
00923 /**
00924  * seek to a position in a file, with error handling.
00925  * @param handle a filedescriptor
00926  * @param a position to seek to
00927  * @todo get rid of this.
00928  **/
00929 void myseek(int handle,off_t a) {
00930         if (lseek(handle, a, SEEK_SET) < 0) {
00931                 err("Can not seek locally!\n");
00932         }
00933 }
00934 
00935 /**
00936  * Write an amount of bytes at a given offset to the right file. This
00937  * abstracts the write-side of the multiple file option.
00938  *
00939  * @param a The offset where the write should start
00940  * @param buf The buffer to write from
00941  * @param len The length of buf
00942  * @param client The client we're serving for
00943  * @param fua Flag to indicate 'Force Unit Access'
00944  * @return The number of bytes actually written, or -1 in case of an error
00945  **/
00946 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
00947         int fhandle;
00948         off_t foffset;
00949         size_t maxbytes;
00950         ssize_t retval;
00951 
00952         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
00953                 return -1;
00954         if(maxbytes && len > maxbytes)
00955                 len = maxbytes;
00956 
00957         DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
00958 
00959         myseek(fhandle, foffset);
00960         retval = write(fhandle, buf, len);
00961         if(client->server->flags & F_SYNC) {
00962                 fsync(fhandle);
00963         } else if (fua) {
00964 
00965           /* This is where we would do the following
00966            *   #ifdef USE_SYNC_FILE_RANGE
00967            * However, we don't, for the reasons set out below
00968            * by Christoph Hellwig <hch@infradead.org>
00969            *
00970            * [BEGINS] 
00971            * fdatasync is equivalent to fsync except that it does not flush
00972            * non-essential metadata (basically just timestamps in practice), but it
00973            * does flush metadata requried to find the data again, e.g. allocation
00974            * information and extent maps.  sync_file_range does nothing but flush
00975            * out pagecache content - it means you basically won't get your data
00976            * back in case of a crash if you either:
00977            * 
00978            *  a) have a volatile write cache in your disk (e.g. any normal SATA disk)
00979            *  b) are using a sparse file on a filesystem
00980            *  c) are using a fallocate-preallocated file on a filesystem
00981            *  d) use any file on a COW filesystem like btrfs
00982            * 
00983            * e.g. it only does anything useful for you if you do not have a volatile
00984            * write cache, and either use a raw block device node, or just overwrite
00985            * an already fully allocated (and not preallocated) file on a non-COW
00986            * filesystem.
00987            * [ENDS]
00988            *
00989            * What we should do is open a second FD with O_DSYNC set, then write to
00990            * that when appropriate. However, with a Linux client, every REQ_FUA
00991            * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
00992            * problems.
00993            *
00994            */
00995 #if 0
00996                 sync_file_range(fhandle, foffset, len,
00997                                 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
00998                                 SYNC_FILE_RANGE_WAIT_AFTER);
00999 #else
01000                 fdatasync(fhandle);
01001 #endif
01002         }
01003         return retval;
01004 }
01005 
01006 /**
01007  * Call rawexpwrite repeatedly until all data has been written.
01008  *
01009  * @param a The offset where the write should start
01010  * @param buf The buffer to write from
01011  * @param len The length of buf
01012  * @param client The client we're serving for
01013  * @param fua Flag to indicate 'Force Unit Access'
01014  * @return 0 on success, nonzero on failure
01015  **/
01016 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01017         ssize_t ret=0;
01018 
01019         while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
01020                 a += ret;
01021                 buf += ret;
01022                 len -= ret;
01023         }
01024         return (ret < 0 || len != 0);
01025 }
01026 
01027 /**
01028  * Read an amount of bytes at a given offset from the right file. This
01029  * abstracts the read-side of the multiple files option.
01030  *
01031  * @param a The offset where the read should start
01032  * @param buf A buffer to read into
01033  * @param len The size of buf
01034  * @param client The client we're serving for
01035  * @return The number of bytes actually read, or -1 in case of an
01036  * error.
01037  **/
01038 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
01039         int fhandle;
01040         off_t foffset;
01041         size_t maxbytes;
01042 
01043         if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
01044                 return -1;
01045         if(maxbytes && len > maxbytes)
01046                 len = maxbytes;
01047 
01048         DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
01049 
01050         myseek(fhandle, foffset);
01051         return read(fhandle, buf, len);
01052 }
01053 
01054 /**
01055  * Call rawexpread repeatedly until all data has been read.
01056  * @return 0 on success, nonzero on failure
01057  **/
01058 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
01059         ssize_t ret=0;
01060 
01061         while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
01062                 a += ret;
01063                 buf += ret;
01064                 len -= ret;
01065         }
01066         return (ret < 0 || len != 0);
01067 }
01068 
01069 /**
01070  * Read an amount of bytes at a given offset from the right file. This
01071  * abstracts the read-side of the copyonwrite stuff, and calls
01072  * rawexpread() with the right parameters to do the actual work.
01073  * @param a The offset where the read should start
01074  * @param buf A buffer to read into
01075  * @param len The size of buf
01076  * @param client The client we're going to read for
01077  * @return 0 on success, nonzero on failure
01078  **/
01079 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
01080         off_t rdlen, offset;
01081         off_t mapcnt, mapl, maph, pagestart;
01082 
01083         if (!(client->server->flags & F_COPYONWRITE))
01084                 return(rawexpread_fully(a, buf, len, client));
01085         DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
01086 
01087         mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
01088 
01089         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
01090                 pagestart=mapcnt*DIFFPAGESIZE;
01091                 offset=a-pagestart;
01092                 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
01093                         len : (size_t)DIFFPAGESIZE-offset;
01094                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
01095                         DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
01096                                (unsigned long)(client->difmap[mapcnt]));
01097                         myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
01098                         if (read(client->difffile, buf, rdlen) != rdlen) return -1;
01099                 } else { /* the block is not there */
01100                         DEBUG("Page %llu is not here, we read the original one\n",
01101                                (unsigned long long)mapcnt);
01102                         if(rawexpread_fully(a, buf, rdlen, client)) return -1;
01103                 }
01104                 len-=rdlen; a+=rdlen; buf+=rdlen;
01105         }
01106         return 0;
01107 }
01108 
01109 /**
01110  * Write an amount of bytes at a given offset to the right file. This
01111  * abstracts the write-side of the copyonwrite option, and calls
01112  * rawexpwrite() with the right parameters to do the actual work.
01113  *
01114  * @param a The offset where the write should start
01115  * @param buf The buffer to write from
01116  * @param len The length of buf
01117  * @param client The client we're going to write for.
01118  * @param fua Flag to indicate 'Force Unit Access'
01119  * @return 0 on success, nonzero on failure
01120  **/
01121 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01122         char pagebuf[DIFFPAGESIZE];
01123         off_t mapcnt,mapl,maph;
01124         off_t wrlen,rdlen; 
01125         off_t pagestart;
01126         off_t offset;
01127 
01128         if (!(client->server->flags & F_COPYONWRITE))
01129                 return(rawexpwrite_fully(a, buf, len, client, fua)); 
01130         DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
01131 
01132         mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
01133 
01134         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
01135                 pagestart=mapcnt*DIFFPAGESIZE ;
01136                 offset=a-pagestart ;
01137                 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
01138                         len : (size_t)DIFFPAGESIZE-offset;
01139 
01140                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
01141                         DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
01142                                (unsigned long)(client->difmap[mapcnt])) ;
01143                         myseek(client->difffile,
01144                                         client->difmap[mapcnt]*DIFFPAGESIZE+offset);
01145                         if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
01146                 } else { /* the block is not there */
01147                         myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
01148                         client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
01149                         DEBUG("Page %llu is not here, we put it at %lu\n",
01150                                (unsigned long long)mapcnt,
01151                                (unsigned long)(client->difmap[mapcnt]));
01152                         rdlen=DIFFPAGESIZE ;
01153                         if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
01154                                 return -1;
01155                         memcpy(pagebuf+offset,buf,wrlen) ;
01156                         if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
01157                                         DIFFPAGESIZE)
01158                                 return -1;
01159                 }                                                   
01160                 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
01161         }
01162         if (client->server->flags & F_SYNC) {
01163                 fsync(client->difffile);
01164         } else if (fua) {
01165                 /* open question: would it be cheaper to do multiple sync_file_ranges?
01166                    as we iterate through the above?
01167                  */
01168                 fdatasync(client->difffile);
01169         }
01170         return 0;
01171 }
01172 
01173 /**
01174  * Flush data to a client
01175  *
01176  * @param client The client we're going to write for.
01177  * @return 0 on success, nonzero on failure
01178  **/
01179 int expflush(CLIENT *client) {
01180         gint i;
01181 
01182         if (client->server->flags & F_COPYONWRITE) {
01183                 return fsync(client->difffile);
01184         }
01185         
01186         for (i = 0; i < client->export->len; i++) {
01187                 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
01188                 if (fsync(fi.fhandle) < 0)
01189                         return -1;
01190         }
01191         
01192         return 0;
01193 }
01194 
01195 /*
01196  * If the current system supports it, call fallocate() on the backend
01197  * file to resparsify stuff that isn't needed anymore (see NBD_CMD_TRIM)
01198  */
01199 int exptrim(struct nbd_request* req, CLIENT* client) {
01200 #if HAVE_FALLOC_PH
01201         FILE_INFO prev = g_array_index(client->export, FILE_INFO, 0);
01202         FILE_INFO cur = prev;
01203         int i = 1;
01204         /* We're running on a system that supports the
01205          * FALLOC_FL_PUNCH_HOLE option to re-sparsify a file */
01206         do {
01207                 if(i<client->export->len) {
01208                         cur = g_array_index(client->export, FILE_INFO, i);
01209                 }
01210                 if(prev.startoff <= req->from) {
01211                         off_t curoff = req->from - prev.startoff;
01212                         off_t curlen = cur.startoff - prev.startoff - curoff;
01213                         fallocate(prev.fhandle, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, curoff, curlen);
01214                 }
01215                 prev = cur;
01216         } while(i < client->export->len && cur.startoff < (req->from + req->len));
01217         DEBUG("Performed TRIM request from %llu to %llu", (unsigned long long) req->from, (unsigned long long) req->len);
01218 #else
01219         DEBUG("Ignoring TRIM request (not supported on current platform");
01220 #endif
01221         return 0;
01222 }
01223 
01224 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) {
01225         uint64_t magic = htonll(0x3e889045565a9LL);
01226         reply_type = htonl(reply_type);
01227         uint32_t datsize = htonl(datasize);
01228         opt = htonl(opt);
01229         struct iovec v_data[] = {
01230                 { &magic, sizeof(magic) },
01231                 { &opt, sizeof(opt) },
01232                 { &reply_type, sizeof(reply_type) },
01233                 { &datsize, sizeof(datsize) },
01234                 { data, datasize },
01235         };
01236         size_t total = sizeof(magic) + sizeof(opt) + sizeof(reply_type) + sizeof(datsize) + datasize;
01237         ssize_t sent = writev(net, v_data, 5);
01238         if(sent != total) {
01239                 perror("E: couldn't write enough data:");
01240         }
01241 }
01242 
01243 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
01244         uint32_t namelen;
01245         char* name;
01246         int i;
01247 
01248         if (read(net, &namelen, sizeof(namelen)) < 0) {
01249                 err("Negotiation failed/7: %m");
01250                 return NULL;
01251         }
01252         namelen = ntohl(namelen);
01253         name = malloc(namelen+1);
01254         name[namelen]=0;
01255         if (read(net, name, namelen) < 0) {
01256                 err("Negotiation failed/8: %m");
01257                 free(name);
01258                 return NULL;
01259         }
01260         for(i=0; i<servers->len; i++) {
01261                 SERVER* serve = &(g_array_index(servers, SERVER, i));
01262                 if(!strcmp(serve->servename, name)) {
01263                         CLIENT* client = g_new0(CLIENT, 1);
01264                         client->server = serve;
01265                         client->exportsize = OFFT_MAX;
01266                         client->net = net;
01267                         client->modern = TRUE;
01268                         client->transactionlogfd = -1;
01269                         client->clientfeats = cflags;
01270                         free(name);
01271                         return client;
01272                 }
01273         }
01274         err("Negotiation failed/8a: Requested export not found");
01275         free(name);
01276         return NULL;
01277 }
01278 
01279 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
01280         uint32_t len;
01281         int i;
01282         char buf[1024];
01283         char *ptr = buf + sizeof(len);
01284 
01285         if (read(net, &len, sizeof(len)) < 0)
01286                 err("Negotiation failed/8: %m");
01287         len = ntohl(len);
01288         if(len) {
01289                 send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL);
01290         }
01291         if(!(glob_flags & F_LIST)) {
01292                 send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL);
01293                 err_nonfatal("Client tried disallowed list option");
01294                 return;
01295         }
01296         for(i=0; i<servers->len; i++) {
01297                 SERVER* serve = &(g_array_index(servers, SERVER, i));
01298                 len = htonl(strlen(serve->servename));
01299                 memcpy(buf, &len, sizeof(len));
01300                 strcpy(ptr, serve->servename);
01301                 send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
01302         }
01303         send_reply(opt, net, NBD_REP_ACK, 0, NULL);
01304 }
01305 
01306 /**
01307  * Do the initial negotiation.
01308  *
01309  * @param client The client we're negotiating with.
01310  **/
01311 CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
01312         char zeros[128];
01313         uint64_t size_host;
01314         uint32_t flags = NBD_FLAG_HAS_FLAGS;
01315         uint16_t smallflags = 0;
01316         uint64_t magic;
01317 
01318         memset(zeros, '\0', sizeof(zeros));
01319         assert(((phase & NEG_INIT) && (phase & NEG_MODERN)) || client);
01320         if(phase & NEG_MODERN) {
01321                 smallflags |= NBD_FLAG_FIXED_NEWSTYLE;
01322         }
01323         if(phase & NEG_INIT) {
01324                 /* common */
01325                 if (write(net, INIT_PASSWD, 8) < 0) {
01326                         err_nonfatal("Negotiation failed/1: %m");
01327                         if(client)
01328                                 exit(EXIT_FAILURE);
01329                 }
01330                 if(phase & NEG_MODERN) {
01331                         /* modern */
01332                         magic = htonll(opts_magic);
01333                 } else {
01334                         /* oldstyle */
01335                         magic = htonll(cliserv_magic);
01336                 }
01337                 if (write(net, &magic, sizeof(magic)) < 0) {
01338                         err_nonfatal("Negotiation failed/2: %m");
01339                         if(phase & NEG_OLD)
01340                                 exit(EXIT_FAILURE);
01341                 }
01342         }
01343         if ((phase & NEG_MODERN) && (phase & NEG_INIT)) {
01344                 /* modern */
01345                 uint32_t cflags;
01346                 uint32_t opt;
01347 
01348                 if(!servers)
01349                         err("programmer error");
01350                 smallflags = htons(smallflags);
01351                 if (write(net, &smallflags, sizeof(uint16_t)) < 0)
01352                         err_nonfatal("Negotiation failed/3: %m");
01353                 if (read(net, &cflags, sizeof(cflags)) < 0)
01354                         err_nonfatal("Negotiation failed/4: %m");
01355                 cflags = htonl(cflags);
01356                 do {
01357                         if (read(net, &magic, sizeof(magic)) < 0)
01358                                 err_nonfatal("Negotiation failed/5: %m");
01359                         magic = ntohll(magic);
01360                         if(magic != opts_magic) {
01361                                 err_nonfatal("Negotiation failed/5a: magic mismatch");
01362                                 return NULL;
01363                         }
01364                         if (read(net, &opt, sizeof(opt)) < 0)
01365                                 err_nonfatal("Negotiation failed/6: %m");
01366                         opt = ntohl(opt);
01367                         switch(opt) {
01368                         case NBD_OPT_EXPORT_NAME:
01369                                 // NBD_OPT_EXPORT_NAME must be the last
01370                                 // selected option, so return from here
01371                                 // if that is chosen.
01372                                 return handle_export_name(opt, net, servers, cflags);
01373                                 break;
01374                         case NBD_OPT_LIST:
01375                                 handle_list(opt, net, servers, cflags);
01376                                 break;
01377                         case NBD_OPT_ABORT:
01378                                 // handled below
01379                                 break;
01380                         default:
01381                                 send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL);
01382                                 break;
01383                         }
01384                 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
01385                 if(opt == NBD_OPT_ABORT) {
01386                         err_nonfatal("Session terminated by client");
01387                         return NULL;
01388                 }
01389         }
01390         /* common */
01391         size_host = htonll((u64)(client->exportsize));
01392         if (write(net, &size_host, 8) < 0)
01393                 err("Negotiation failed/9: %m");
01394         if (client->server->flags & F_READONLY)
01395                 flags |= NBD_FLAG_READ_ONLY;
01396         if (client->server->flags & F_FLUSH)
01397                 flags |= NBD_FLAG_SEND_FLUSH;
01398         if (client->server->flags & F_FUA)
01399                 flags |= NBD_FLAG_SEND_FUA;
01400         if (client->server->flags & F_ROTATIONAL)
01401                 flags |= NBD_FLAG_ROTATIONAL;
01402         if (client->server->flags & F_TRIM)
01403                 flags |= NBD_FLAG_SEND_TRIM;
01404         if (phase & NEG_OLD) {
01405                 /* oldstyle */
01406                 flags = htonl(flags);
01407                 if (write(client->net, &flags, 4) < 0)
01408                         err("Negotiation failed/10: %m");
01409         } else {
01410                 /* modern */
01411                 smallflags = (uint16_t)(flags & ~((uint16_t)0));
01412                 smallflags = htons(smallflags);
01413                 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
01414                         err("Negotiation failed/11: %m");
01415                 }
01416         }
01417         /* common */
01418         if (write(client->net, zeros, 124) < 0)
01419                 err("Negotiation failed/12: %m");
01420         return NULL;
01421 }
01422 
01423 /** sending macro. */
01424 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
01425         if (client->transactionlogfd != -1) \
01426                 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
01427 /** error macro. */
01428 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
01429 /**
01430  * Serve a file to a single client.
01431  *
01432  * @todo This beast needs to be split up in many tiny little manageable
01433  * pieces. Preferably with a chainsaw.
01434  *
01435  * @param client The client we're going to serve to.
01436  * @return when the client disconnects
01437  **/
01438 int mainloop(CLIENT *client) {
01439         struct nbd_request request;
01440         struct nbd_reply reply;
01441         gboolean go_on=TRUE;
01442 #ifdef DODBG
01443         int i = 0;
01444 #endif
01445         negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
01446         DEBUG("Entering request loop!\n");
01447         reply.magic = htonl(NBD_REPLY_MAGIC);
01448         reply.error = 0;
01449         while (go_on) {
01450                 char buf[BUFSIZE];
01451                 char* p;
01452                 size_t len;
01453                 size_t currlen;
01454                 size_t writelen;
01455                 uint16_t command;
01456 #ifdef DODBG
01457                 i++;
01458                 printf("%d: ", i);
01459 #endif
01460                 readit(client->net, &request, sizeof(request));
01461                 if (client->transactionlogfd != -1)
01462                         writeit(client->transactionlogfd, &request, sizeof(request));
01463 
01464                 request.from = ntohll(request.from);
01465                 request.type = ntohl(request.type);
01466                 command = request.type & NBD_CMD_MASK_COMMAND;
01467                 len = ntohl(request.len);
01468 
01469                 DEBUG("%s from %llu (%llu) len %u, ", getcommandname(command),
01470                                 (unsigned long long)request.from,
01471                                 (unsigned long long)request.from / 512, len);
01472 
01473                 if (request.magic != htonl(NBD_REQUEST_MAGIC))
01474                         err("Not enough magic.");
01475 
01476                 memcpy(reply.handle, request.handle, sizeof(reply.handle));
01477 
01478                 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
01479                         if (request.from + len < request.from) { // 64 bit overflow!!
01480                                 DEBUG("[Number too large!]");
01481                                 ERROR(client, reply, EINVAL);
01482                                 continue;
01483                         }
01484 
01485                         if (((off_t)request.from + len) > client->exportsize) {
01486                                 DEBUG("[RANGE!]");
01487                                 ERROR(client, reply, EINVAL);
01488                                 continue;
01489                         }
01490 
01491                         currlen = len;
01492                         if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
01493                                 currlen = BUFSIZE - sizeof(struct nbd_reply);
01494                                 if(!logged_oversized) {
01495                                         msg(LOG_DEBUG, "oversized request (this is not a problem)");
01496                                         logged_oversized = true;
01497                                 }
01498                         }
01499                 }
01500 
01501                 switch (command) {
01502 
01503                 case NBD_CMD_DISC:
01504                         msg(LOG_INFO, "Disconnect request received.");
01505                         if (client->server->flags & F_COPYONWRITE) { 
01506                                 if (client->difmap) g_free(client->difmap) ;
01507                                 close(client->difffile);
01508                                 unlink(client->difffilename);
01509                                 free(client->difffilename);
01510                         }
01511                         go_on=FALSE;
01512                         continue;
01513 
01514                 case NBD_CMD_WRITE:
01515                         DEBUG("wr: net->buf, ");
01516                         while(len > 0) {
01517                                 readit(client->net, buf, currlen);
01518                                 DEBUG("buf->exp, ");
01519                                 if ((client->server->flags & F_READONLY) ||
01520                                     (client->server->flags & F_AUTOREADONLY)) {
01521                                         DEBUG("[WRITE to READONLY!]");
01522                                         ERROR(client, reply, EPERM);
01523                                         consume(client->net, buf, len-currlen, BUFSIZE);
01524                                         continue;
01525                                 }
01526                                 if (expwrite(request.from, buf, currlen, client,
01527                                              request.type & NBD_CMD_FLAG_FUA)) {
01528                                         DEBUG("Write failed: %m" );
01529                                         ERROR(client, reply, errno);
01530                                         consume(client->net, buf, len-currlen, BUFSIZE);
01531                                         continue;
01532                                 }
01533                                 len -= currlen;
01534                                 request.from += currlen;
01535                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
01536                         }
01537                         SEND(client->net, reply);
01538                         DEBUG("OK!\n");
01539                         continue;
01540 
01541                 case NBD_CMD_FLUSH:
01542                         DEBUG("fl: ");
01543                         if (expflush(client)) {
01544                                 DEBUG("Flush failed: %m");
01545                                 ERROR(client, reply, errno);
01546                                 continue;
01547                         }
01548                         SEND(client->net, reply);
01549                         DEBUG("OK!\n");
01550                         continue;
01551 
01552                 case NBD_CMD_READ:
01553                         DEBUG("exp->buf, ");
01554                         if (client->transactionlogfd != -1)
01555                                 writeit(client->transactionlogfd, &reply, sizeof(reply));
01556                         writeit(client->net, &reply, sizeof(reply));
01557                         p = buf;
01558                         writelen = currlen;
01559                         while(len > 0) {
01560                                 if (expread(request.from, p, currlen, client)) {
01561                                         DEBUG("Read failed: %m");
01562                                         ERROR(client, reply, errno);
01563                                         continue;
01564                                 }
01565                                 
01566                                 DEBUG("buf->net, ");
01567                                 writeit(client->net, buf, writelen);
01568                                 len -= currlen;
01569                                 request.from += currlen;
01570                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
01571                                 p = buf;
01572                                 writelen = currlen;
01573                         }
01574                         DEBUG("OK!\n");
01575                         continue;
01576 
01577                 case NBD_CMD_TRIM:
01578                         /* The kernel module sets discard_zeroes_data == 0,
01579                          * so it is okay to do nothing.  */
01580                         if (exptrim(&request, client)) {
01581                                 DEBUG("Trim failed: %m");
01582                                 ERROR(client, reply, errno);
01583                                 continue;
01584                         }
01585                         SEND(client->net, reply);
01586                         continue;
01587 
01588                 default:
01589                         DEBUG ("Ignoring unknown command\n");
01590                         continue;
01591                 }
01592         }
01593         return 0;
01594 }
01595 
01596 /**
01597  * Set up client export array, which is an array of FILE_INFO.
01598  * Also, split a single exportfile into multiple ones, if that was asked.
01599  * @param client information on the client which we want to setup export for
01600  **/
01601 void setupexport(CLIENT* client) {
01602         int i;
01603         off_t laststartoff = 0, lastsize = 0;
01604         int multifile = (client->server->flags & F_MULTIFILE);
01605         int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
01606         int cancreate = (client->server->expected_size) && !multifile;
01607 
01608         client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
01609 
01610         /* If multi-file, open as many files as we can.
01611          * If not, open exactly one file.
01612          * Calculate file sizes as we go to get total size. */
01613         for(i=0; ; i++) {
01614                 FILE_INFO fi;
01615                 gchar *tmpname;
01616                 gchar* error_string;
01617 
01618                 if (i)
01619                   cancreate = 0;
01620                 /* if expected_size is specified, and this is the first file, we can create the file */
01621                 mode_t mode = (client->server->flags & F_READONLY) ?
01622                   O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
01623 
01624                 if (temporary) {
01625                         tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
01626                         DEBUG( "Opening %s\n", tmpname );
01627                         fi.fhandle = mkstemp(tmpname);
01628                 } else {
01629                         if(multifile) {
01630                                 tmpname=g_strdup_printf("%s.%d", client->exportname, i);
01631                         } else {
01632                                 tmpname=g_strdup(client->exportname);
01633                         }
01634                         DEBUG( "Opening %s\n", tmpname );
01635                         fi.fhandle = open(tmpname, mode, 0x600);
01636                         if(fi.fhandle == -1 && mode == O_RDWR) {
01637                                 /* Try again because maybe media was read-only */
01638                                 fi.fhandle = open(tmpname, O_RDONLY);
01639                                 if(fi.fhandle != -1) {
01640                                         /* Opening the base file in copyonwrite mode is
01641                                          * okay */
01642                                         if(!(client->server->flags & F_COPYONWRITE)) {
01643                                                 client->server->flags |= F_AUTOREADONLY;
01644                                                 client->server->flags |= F_READONLY;
01645                                         }
01646                                 }
01647                         }
01648                 }
01649                 if(fi.fhandle == -1) {
01650                         if(multifile && i>0)
01651                                 break;
01652                         error_string=g_strdup_printf(
01653                                 "Could not open exported file %s: %%m",
01654                                 tmpname);
01655                         err(error_string);
01656                 }
01657 
01658                 if (temporary)
01659                         unlink(tmpname); /* File will stick around whilst FD open */
01660 
01661                 fi.startoff = laststartoff + lastsize;
01662                 g_array_append_val(client->export, fi);
01663                 g_free(tmpname);
01664 
01665                 /* Starting offset and size of this file will be used to
01666                  * calculate starting offset of next file */
01667                 laststartoff = fi.startoff;
01668                 lastsize = size_autodetect(fi.fhandle);
01669 
01670                 /* If we created the file, it will be length zero */
01671                 if (!lastsize && cancreate) {
01672                         assert(!multifile);
01673                         if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
01674                                 err("Could not expand file: %m");
01675                         }
01676                         lastsize = client->server->expected_size;
01677                         break; /* don't look for any more files */
01678                 }
01679 
01680                 if(!multifile || temporary)
01681                         break;
01682         }
01683 
01684         /* Set export size to total calculated size */
01685         client->exportsize = laststartoff + lastsize;
01686 
01687         /* Export size may be overridden */
01688         if(client->server->expected_size) {
01689                 /* desired size must be <= total calculated size */
01690                 if(client->server->expected_size > client->exportsize) {
01691                         err("Size of exported file is too big\n");
01692                 }
01693 
01694                 client->exportsize = client->server->expected_size;
01695         }
01696 
01697         msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
01698         if(multifile) {
01699                 msg(LOG_INFO, "Total number of files: %d", i);
01700         }
01701 }
01702 
01703 int copyonwrite_prepare(CLIENT* client) {
01704         off_t i;
01705         if ((client->difffilename = malloc(1024))==NULL)
01706                 err("Failed to allocate string for diff file name");
01707         snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
01708                 (int)getpid()) ;
01709         client->difffilename[1023]='\0';
01710         msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
01711         client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
01712         if (client->difffile<0) err("Could not create diff file (%m)") ;
01713         if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
01714                 err("Could not allocate memory") ;
01715         for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
01716 
01717         return 0;
01718 }
01719 
01720 /**
01721  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
01722  * options
01723  *
01724  * @param command the command to be ran. Read from the config file
01725  * @param file the file name we're about to export
01726  **/
01727 int do_run(gchar* command, gchar* file) {
01728         gchar* cmd;
01729         int retval=0;
01730 
01731         if(command && *command) {
01732                 cmd = g_strdup_printf(command, file);
01733                 retval=system(cmd);
01734                 g_free(cmd);
01735         }
01736         return retval;
01737 }
01738 
01739 /**
01740  * Serve a connection. 
01741  *
01742  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
01743  * follow the road map.
01744  *
01745  * @param client a connected client
01746  **/
01747 void serveconnection(CLIENT *client) {
01748         if (client->server->transactionlog && (client->transactionlogfd == -1))
01749         {
01750                 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
01751                                                            O_WRONLY | O_CREAT,
01752                                                            S_IRUSR | S_IWUSR)))
01753                         g_warning("Could not open transaction log %s",
01754                                   client->server->transactionlog);
01755         }
01756 
01757         if(do_run(client->server->prerun, client->exportname)) {
01758                 exit(EXIT_FAILURE);
01759         }
01760         setupexport(client);
01761 
01762         if (client->server->flags & F_COPYONWRITE) {
01763                 copyonwrite_prepare(client);
01764         }
01765 
01766         setmysockopt(client->net);
01767 
01768         mainloop(client);
01769         do_run(client->server->postrun, client->exportname);
01770 
01771         if (-1 != client->transactionlogfd)
01772         {
01773                 close(client->transactionlogfd);
01774                 client->transactionlogfd = -1;
01775         }
01776 }
01777 
01778 /**
01779  * Find the name of the file we have to serve. This will use g_strdup_printf
01780  * to put the IP address of the client inside a filename containing
01781  * "%s" (in the form as specified by the "virtstyle" option). That name
01782  * is then written to client->exportname.
01783  *
01784  * @param net A socket connected to an nbd client
01785  * @param client information about the client. The IP address in human-readable
01786  * format will be written to a new char* buffer, the address of which will be
01787  * stored in client->clientname.
01788  * @return: 0 - OK, -1 - failed.
01789  **/
01790 int set_peername(int net, CLIENT *client) {
01791         struct sockaddr_storage netaddr;
01792         struct sockaddr_in  *netaddr4 = NULL;
01793         struct sockaddr_in6 *netaddr6 = NULL;
01794         socklen_t addrinlen = sizeof( struct sockaddr_storage );
01795         struct addrinfo hints;
01796         struct addrinfo *ai = NULL;
01797         char peername[NI_MAXHOST];
01798         char netname[NI_MAXHOST];
01799         char *tmp = NULL;
01800         int i;
01801         int e;
01802         int shift;
01803 
01804         if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
01805                 msg(LOG_INFO, "getpeername failed: %m");
01806                 return -1;
01807         }
01808 
01809         if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
01810                         peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
01811                 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
01812                 return -1;
01813         }
01814 
01815         memset(&hints, '\0', sizeof (hints));
01816         hints.ai_flags = AI_ADDRCONFIG;
01817         e = getaddrinfo(peername, NULL, &hints, &ai);
01818 
01819         if(e != 0) {
01820                 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
01821                 freeaddrinfo(ai);
01822                 return -1;
01823         }
01824 
01825         switch(client->server->virtstyle) {
01826                 case VIRT_NONE:
01827                         msg(LOG_DEBUG, "virtualization is off");
01828                         client->exportname=g_strdup(client->server->exportname);
01829                         break;
01830                 case VIRT_IPHASH:
01831                         msg(LOG_DEBUG, "virtstyle iphash");
01832                         for(i=0;i<strlen(peername);i++) {
01833                                 if(peername[i]=='.') {
01834                                         peername[i]='/';
01835                                 }
01836                         }
01837                 case VIRT_IPLIT:
01838                         msg(LOG_DEBUG, "virststyle ipliteral");
01839                         client->exportname=g_strdup_printf(client->server->exportname, peername);
01840                         break;
01841                 case VIRT_CIDR:
01842                         msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
01843                         memcpy(&netaddr, &(client->clientaddr), addrinlen);
01844                         int addrbits;
01845                         assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
01846                         if(ai->ai_family == AF_INET) {
01847                                 addrbits = 32;
01848                         } else if(ai->ai_family == AF_INET6) {
01849                                 addrbits = 128;
01850                         }
01851                         uint8_t* addrptr = ((struct sockaddr*)&netaddr)->sa_data;
01852                         for(int i = 0; i < addrbits; i+=8) {
01853                                 int masklen = client->server->cidrlen - i;
01854                                 masklen = masklen > 0 ? masklen : 0;
01855                                 uint8_t mask = getmaskbyte(masklen);
01856                                 *addrptr &= mask;
01857                                 addrptr++;
01858                         }
01859                         getnameinfo((struct sockaddr *) &netaddr, addrinlen,
01860                                                         netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
01861                         tmp=g_strdup_printf("%s/%s", netname, peername);
01862 
01863                         if(tmp != NULL)
01864                           client->exportname=g_strdup_printf(client->server->exportname, tmp);
01865 
01866                         break;
01867         }
01868 
01869         freeaddrinfo(ai);
01870         msg(LOG_INFO, "connect from %s, assigned file is %s",
01871             peername, client->exportname);
01872         client->clientname=g_strdup(peername);
01873         return 0;
01874 }
01875 
01876 /**
01877  * Destroy a pid_t*
01878  * @param data a pointer to pid_t which should be freed
01879  **/
01880 void destroy_pid_t(gpointer data) {
01881         g_free(data);
01882 }
01883 
01884 static pid_t
01885 spawn_child()
01886 {
01887         pid_t pid;
01888         sigset_t newset;
01889         sigset_t oldset;
01890 
01891         sigemptyset(&newset);
01892         sigaddset(&newset, SIGCHLD);
01893         sigaddset(&newset, SIGTERM);
01894         sigprocmask(SIG_BLOCK, &newset, &oldset);
01895         pid = fork();
01896         if (pid < 0) {
01897                 msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
01898                 goto out;
01899         }
01900         if (pid > 0) { /* Parent */
01901                 pid_t *pidp;
01902 
01903                 pidp = g_malloc(sizeof(pid_t));
01904                 *pidp = pid;
01905                 g_hash_table_insert(children, pidp, pidp);
01906                 goto out;
01907         }
01908         /* Child */
01909         signal(SIGCHLD, SIG_DFL);
01910         signal(SIGTERM, SIG_DFL);
01911         signal(SIGHUP, SIG_DFL);
01912 out:
01913         sigprocmask(SIG_SETMASK, &oldset, NULL);
01914         return pid;
01915 }
01916 
01917 static int
01918 socket_accept(const int sock)
01919 {
01920         struct sockaddr_storage addrin;
01921         socklen_t addrinlen = sizeof(addrin);
01922         int net;
01923 
01924         net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
01925         if (net < 0) {
01926                 err_nonfatal("Failed to accept socket connection: %m");
01927         }
01928 
01929         return net;
01930 }
01931 
01932 static void
01933 handle_modern_connection(GArray *const servers, const int sock)
01934 {
01935         int net;
01936         pid_t pid;
01937         CLIENT *client = NULL;
01938         int sock_flags_old;
01939         int sock_flags_new;
01940 
01941         net = socket_accept(sock);
01942         if (net < 0)
01943                 return;
01944 
01945         if (!dontfork) {
01946                 pid = spawn_child();
01947                 if (pid) {
01948                         if (pid > 0)
01949                                 msg(LOG_INFO, "Spawned a child process");
01950                         if (pid < 0)
01951                                 msg(LOG_ERR, "Failed to spawn a child process");
01952                         close(net);
01953                         return;
01954                 }
01955                 /* Child just continues. */
01956         }
01957 
01958         client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
01959         if (!client) {
01960                 msg(LOG_ERR, "Modern initial negotiation failed");
01961                 goto handler_err;
01962         }
01963 
01964         if (client->server->max_connections > 0 &&
01965            g_hash_table_size(children) >= client->server->max_connections) {
01966                 msg(LOG_ERR, "Max connections (%d) reached",
01967                     client->server->max_connections);
01968                 goto handler_err;
01969         }
01970 
01971         sock_flags_old = fcntl(net, F_GETFL, 0);
01972         if (sock_flags_old == -1) {
01973                 msg(LOG_ERR, "Failed to get socket flags");
01974                 goto handler_err;
01975         }
01976 
01977         sock_flags_new = sock_flags_old & ~O_NONBLOCK;
01978         if (sock_flags_new != sock_flags_old &&
01979             fcntl(net, F_SETFL, sock_flags_new) == -1) {
01980                 msg(LOG_ERR, "Failed to set socket to blocking mode");
01981                 goto handler_err;
01982         }
01983 
01984         if (set_peername(net, client)) {
01985                 msg(LOG_ERR, "Failed to set peername");
01986                 goto handler_err;
01987         }
01988 
01989         if (!authorized_client(client)) {
01990                 msg(LOG_INFO, "Client '%s' is not authorized to access",
01991                     client->clientname);
01992                 goto handler_err;
01993         }
01994 
01995         if (!dontfork) {
01996                 int i;
01997 
01998                 /* Free all root server resources here, because we are
01999                  * currently in the child process serving one specific
02000                  * connection. These are not simply needed anymore. */
02001                 g_hash_table_destroy(children);
02002                 children = NULL;
02003                 for (i = 0; i < modernsocks->len; i++) {
02004                         close(g_array_index(modernsocks, int, i));
02005                 }
02006                 g_array_free(modernsocks, TRUE);
02007 
02008                 /* Now that we are in the child process after a
02009                  * succesful negotiation, we do not need the list of
02010                  * servers anymore, get rid of it.*/
02011 
02012                 for (i = 0; i < servers->len; i++) {
02013                         const SERVER *const server = &g_array_index(servers, SERVER, i);
02014                         close(server->socket);
02015                 }
02016 
02017                 /* FALSE does not free the
02018                    actual data. This is required,
02019                    because the client has a
02020                    direct reference into that
02021                    data, and otherwise we get a
02022                    segfault... */
02023                 g_array_free(servers, FALSE);
02024         }
02025 
02026         msg(LOG_INFO, "Starting to serve");
02027         serveconnection(client);
02028         exit(EXIT_SUCCESS);
02029 
02030 handler_err:
02031         g_free(client);
02032         close(net);
02033 
02034         if (!dontfork) {
02035                 exit(EXIT_FAILURE);
02036         }
02037 }
02038 
02039 static void
02040 handle_oldstyle_connection(GArray *const servers, SERVER *const serve)
02041 {
02042         int net;
02043         CLIENT *client = NULL;
02044         int sock_flags_old;
02045         int sock_flags_new;
02046 
02047         net = socket_accept(serve->socket);
02048         if (net < 0)
02049                 return;
02050 
02051         if(serve->max_connections > 0 &&
02052            g_hash_table_size(children) >= serve->max_connections) {
02053                 msg(LOG_INFO, "Max connections reached");
02054                 goto handle_connection_out;
02055         }
02056         if((sock_flags_old = fcntl(net, F_GETFL, 0)) == -1) {
02057                 err("fcntl F_GETFL");
02058         }
02059         sock_flags_new = sock_flags_old & ~O_NONBLOCK;
02060         if (sock_flags_new != sock_flags_old &&
02061             fcntl(net, F_SETFL, sock_flags_new) == -1) {
02062                 err("fcntl F_SETFL ~O_NONBLOCK");
02063         }
02064 
02065         client = g_new0(CLIENT, 1);
02066         client->server=serve;
02067         client->exportsize=OFFT_MAX;
02068         client->net=net;
02069         client->transactionlogfd = -1;
02070 
02071         if (set_peername(net, client)) {
02072                 goto handle_connection_out;
02073         }
02074         if (!authorized_client(client)) {
02075                 msg(LOG_INFO, "Unauthorized client");
02076                 goto handle_connection_out;
02077         }
02078         msg(LOG_INFO, "Authorized client");
02079 
02080         if (!dontfork) {
02081                 pid_t pid;
02082                 int i;
02083                 sigset_t newset;
02084                 sigset_t oldset;
02085 
02086                 sigemptyset(&newset);
02087                 sigaddset(&newset, SIGCHLD);
02088                 sigaddset(&newset, SIGTERM);
02089                 sigprocmask(SIG_BLOCK, &newset, &oldset);
02090                 if ((pid = fork()) < 0) {
02091                         msg(LOG_INFO, "Could not fork (%s)", strerror(errno));
02092                         sigprocmask(SIG_SETMASK, &oldset, NULL);
02093                         goto handle_connection_out;
02094                 }
02095                 if (pid > 0) { /* parent */
02096                         pid_t *pidp;
02097 
02098                         pidp = g_malloc(sizeof(pid_t));
02099                         *pidp = pid;
02100                         g_hash_table_insert(children, pidp, pidp);
02101                         sigprocmask(SIG_SETMASK, &oldset, NULL);
02102                         goto handle_connection_out;
02103                 }
02104                 /* child */
02105                 signal(SIGCHLD, SIG_DFL);
02106                 signal(SIGTERM, SIG_DFL);
02107                 signal(SIGHUP, SIG_DFL);
02108                 sigprocmask(SIG_SETMASK, &oldset, NULL);
02109 
02110                 g_hash_table_destroy(children);
02111                 children = NULL;
02112                 for(i=0;i<servers->len;i++) {
02113                         close(g_array_index(servers, SERVER, i).socket);
02114                 }
02115                 /* FALSE does not free the
02116                    actual data. This is required,
02117                    because the client has a
02118                    direct reference into that
02119                    data, and otherwise we get a
02120                    segfault... */
02121                 g_array_free(servers, FALSE);
02122                 for(i=0;i<modernsocks->len;i++) {
02123                         close(g_array_index(modernsocks, int, i));
02124                 }
02125                 g_array_free(modernsocks, TRUE);
02126         }
02127 
02128         msg(LOG_INFO, "Starting to serve");
02129         serveconnection(client);
02130         exit(EXIT_SUCCESS);
02131 
02132 handle_connection_out:
02133         g_free(client);
02134         close(net);
02135 }
02136 
02137 /**
02138  * Return the index of the server whose servename matches the given
02139  * name.
02140  *
02141  * @param servename a string to match
02142  * @param servers an array of servers
02143  * @return the first index of the server whose servename matches the
02144  *         given name or -1 if one cannot be found
02145  **/
02146 static int get_index_by_servename(const gchar *const servename,
02147                                   const GArray *const servers) {
02148         int i;
02149 
02150         for (i = 0; i < servers->len; ++i) {
02151                 const SERVER server = g_array_index(servers, SERVER, i);
02152 
02153                 if (strcmp(servename, server.servename) == 0)
02154                         return i;
02155         }
02156 
02157         return -1;
02158 }
02159 
02160 int setup_serve(SERVER *const serve, GError **const gerror);
02161 
02162 /**
02163  * Parse configuration files and add servers to the array if they don't
02164  * already exist there. The existence is tested by comparing
02165  * servenames. A server is appended to the array only if its servename
02166  * is unique among all other servers.
02167  *
02168  * @param servers an array of servers
02169  * @return the number of new servers appended to the array, or -1 in
02170  *         case of an error
02171  **/
02172 static int append_new_servers(GArray *const servers, GError **const gerror) {
02173         int i;
02174         GArray *new_servers;
02175         const int old_len = servers->len;
02176         int retval = -1;
02177         struct generic_conf genconf;
02178 
02179         new_servers = parse_cfile(config_file_pos, &genconf, gerror);
02180         if (!new_servers)
02181                 goto out;
02182 
02183         for (i = 0; i < new_servers->len; ++i) {
02184                 SERVER new_server = g_array_index(new_servers, SERVER, i);
02185 
02186                 if (new_server.servename
02187                     && -1 == get_index_by_servename(new_server.servename,
02188                                                     servers)) {
02189                         if (setup_serve(&new_server, gerror) == -1)
02190                                 goto out;
02191                         if (append_serve(&new_server, servers) == -1)
02192                                 goto out;
02193                 }
02194         }
02195 
02196         retval = servers->len - old_len;
02197 out:
02198         g_array_free(new_servers, TRUE);
02199 
02200         return retval;
02201 }
02202 
02203 /**
02204  * Loop through the available servers, and serve them. Never returns.
02205  **/
02206 void serveloop(GArray* servers) {
02207         int i;
02208         int max;
02209         fd_set mset;
02210         fd_set rset;
02211 
02212         /* 
02213          * Set up the master fd_set. The set of descriptors we need
02214          * to select() for never changes anyway and it buys us a *lot*
02215          * of time to only build this once. However, if we ever choose
02216          * to not fork() for clients anymore, we may have to revisit
02217          * this.
02218          */
02219         max=0;
02220         FD_ZERO(&mset);
02221         for(i=0;i<servers->len;i++) {
02222                 int sock;
02223                 if((sock=(g_array_index(servers, SERVER, i)).socket) >= 0) {
02224                         FD_SET(sock, &mset);
02225                         max=sock>max?sock:max;
02226                 }
02227         }
02228         for(i=0;i<modernsocks->len;i++) {
02229                 int sock = g_array_index(modernsocks, int, i);
02230                 FD_SET(sock, &mset);
02231                 max=sock>max?sock:max;
02232         }
02233         for(;;) {
02234                 /* SIGHUP causes the root server process to reconfigure
02235                  * itself and add new export servers for each newly
02236                  * found export configuration group, i.e. spawn new
02237                  * server processes for each previously non-existent
02238                  * export. This does not alter old runtime configuration
02239                  * but just appends new exports. */
02240                 if (is_sighup_caught) {
02241                         int n;
02242                         GError *gerror = NULL;
02243 
02244                         msg(LOG_INFO, "reconfiguration request received");
02245                         is_sighup_caught = 0; /* Reset to allow catching
02246                                                * it again. */
02247 
02248                         n = append_new_servers(servers, &gerror);
02249                         if (n == -1)
02250                                 msg(LOG_ERR, "failed to append new servers: %s",
02251                                     gerror->message);
02252 
02253                         for (i = servers->len - n; i < servers->len; ++i) {
02254                                 const SERVER server = g_array_index(servers,
02255                                                                     SERVER, i);
02256 
02257                                 if (server.socket >= 0) {
02258                                         FD_SET(server.socket, &mset);
02259                                         max = server.socket > max ? server.socket : max;
02260                                 }
02261 
02262                                 msg(LOG_INFO, "reconfigured new server: %s",
02263                                     server.servename);
02264                         }
02265                 }
02266 
02267                 memcpy(&rset, &mset, sizeof(fd_set));
02268                 if(select(max+1, &rset, NULL, NULL, NULL)>0) {
02269 
02270                         DEBUG("accept, ");
02271                         for(i=0; i < modernsocks->len; i++) {
02272                                 int sock = g_array_index(modernsocks, int, i);
02273                                 if(!FD_ISSET(sock, &rset)) {
02274                                         continue;
02275                                 }
02276 
02277                                 handle_modern_connection(servers, sock);
02278                         }
02279                         for(i=0; i < servers->len; i++) {
02280                                 SERVER *serve;
02281 
02282                                 serve=&(g_array_index(servers, SERVER, i));
02283                                 if(serve->socket < 0) {
02284                                         continue;
02285                                 }
02286                                 if(FD_ISSET(serve->socket, &rset)) {
02287                                         handle_oldstyle_connection(servers, serve);
02288                                 }
02289                         }
02290                 }
02291         }
02292 }
02293 void serveloop(GArray* servers) G_GNUC_NORETURN;
02294 
02295 /**
02296  * Set server socket options.
02297  *
02298  * @param socket a socket descriptor of the server
02299  *
02300  * @param gerror a pointer to an error object pointer used for reporting
02301  *        errors. On error, if gerror is not NULL, *gerror is set and -1
02302  *        is returned.
02303  *
02304  * @return 0 on success, -1 on error
02305  **/
02306 int dosockopts(const int socket, GError **const gerror) {
02307 #ifndef sun
02308         int yes=1;
02309 #else
02310         char yes='1';
02311 #endif /* sun */
02312         struct linger l;
02313 
02314         /* lose the pesky "Address already in use" error message */
02315         if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
02316                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
02317                             "failed to set socket option SO_REUSEADDR: %s",
02318                             strerror(errno));
02319                 return -1;
02320         }
02321         l.l_onoff = 1;
02322         l.l_linger = 10;
02323         if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
02324                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
02325                             "failed to set socket option SO_LINGER: %s",
02326                             strerror(errno));
02327                 return -1;
02328         }
02329         if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
02330                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
02331                             "failed to set socket option SO_KEEPALIVE: %s",
02332                             strerror(errno));
02333                 return -1;
02334         }
02335 
02336         return 0;
02337 }
02338 
02339 /**
02340  * Connect a server's socket.
02341  *
02342  * @param serve the server we want to connect.
02343  **/
02344 int setup_serve(SERVER *const serve, GError **const gerror) {
02345         struct addrinfo hints;
02346         struct addrinfo *ai = NULL;
02347         gchar *port = NULL;
02348         int e;
02349         int retval = -1;
02350 
02351         /* Without this, it's possible that socket == 0, even if it's
02352          * not initialized at all. And that would be wrong because 0 is
02353          * totally legal value for properly initialized descriptor. This
02354          * line is required to ensure that unused/uninitialized
02355          * descriptors are marked as such (new style configuration
02356          * case). Currently, servers are being initialized in multiple
02357          * places, and some of the them do the socket initialization
02358          * incorrectly. This is the only point common to all code paths,
02359          * and therefore setting -1 is put here. However, the whole
02360          * server initialization procedure should be extracted to its
02361          * own function and all code paths wanting to mess with servers
02362          * should initialize servers with that function.
02363          * 
02364          * TODO: fix server initialization */
02365         serve->socket = -1;
02366 
02367         if(!(glob_flags & F_OLDSTYLE)) {
02368                 return serve->servename ? 1 : 0;
02369         }
02370         memset(&hints,'\0',sizeof(hints));
02371         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
02372         hints.ai_socktype = SOCK_STREAM;
02373         hints.ai_family = serve->socket_family;
02374 
02375         port = g_strdup_printf("%d", serve->port);
02376         if (!port) {
02377                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SYS,
02378                             "failed to open an export socket: "
02379                             "failed to convert a port number to a string: %s",
02380                             strerror(errno));
02381                 goto out;
02382         }
02383 
02384         e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
02385 
02386         g_free(port);
02387 
02388         if(e != 0) {
02389                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
02390                             "failed to open an export socket: "
02391                             "failed to get address info: %s",
02392                             gai_strerror(e));
02393                 goto out;
02394         }
02395 
02396         if(serve->socket_family == AF_UNSPEC)
02397                 serve->socket_family = ai->ai_family;
02398 
02399 #ifdef WITH_SDP
02400         if ((serve->flags) && F_SDP) {
02401                 if (ai->ai_family == AF_INET)
02402                         ai->ai_family = AF_INET_SDP;
02403                 else (ai->ai_family == AF_INET6)
02404                         ai->ai_family = AF_INET6_SDP;
02405         }
02406 #endif
02407         if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0) {
02408                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
02409                             "failed to open an export socket: "
02410                             "failed to create a socket: %s",
02411                             strerror(errno));
02412                 goto out;
02413         }
02414 
02415         if (dosockopts(serve->socket, gerror) == -1) {
02416                 g_prefix_error(gerror, "failed to open an export socket: ");
02417                 goto out;
02418         }
02419 
02420         DEBUG("Waiting for connections... bind, ");
02421         e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
02422         if (e != 0 && errno != EADDRINUSE) {
02423                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02424                             "failed to open an export socket: "
02425                             "failed to bind an address to a socket: %s",
02426                             strerror(errno));
02427                 goto out;
02428         }
02429         DEBUG("listen, ");
02430         if (listen(serve->socket, 1) < 0) {
02431                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02432                             "failed to open an export socket: "
02433                             "failed to start listening on a socket: %s",
02434                             strerror(errno));
02435                 goto out;
02436         }
02437 
02438         retval = serve->servename ? 1 : 0;
02439 out:
02440 
02441         if (retval == -1 && serve->socket >= 0) {
02442                 close(serve->socket);
02443                 serve->socket = -1;
02444         }
02445         freeaddrinfo (ai);
02446 
02447         return retval;
02448 }
02449 
02450 int open_modern(const gchar *const addr, const gchar *const port,
02451                 GError **const gerror) {
02452         struct addrinfo hints;
02453         struct addrinfo* ai = NULL;
02454         struct addrinfo* ai_bak;
02455         struct sock_flags;
02456         int e;
02457         int retval = -1;
02458         int i=0;
02459         int sock = -1;
02460 
02461         memset(&hints, '\0', sizeof(hints));
02462         hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
02463         hints.ai_socktype = SOCK_STREAM;
02464         hints.ai_family = AF_UNSPEC;
02465         hints.ai_protocol = IPPROTO_TCP;
02466         e = getaddrinfo(addr, port ? port : NBD_DEFAULT_PORT, &hints, &ai);
02467         ai_bak = ai;
02468         if(e != 0) {
02469                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
02470                             "failed to open a modern socket: "
02471                             "failed to get address info: %s",
02472                             gai_strerror(e));
02473                 goto out;
02474         }
02475 
02476         while(ai != NULL) {
02477                 sock = -1;
02478 
02479                 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
02480                         g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
02481                                     "failed to open a modern socket: "
02482                                     "failed to create a socket: %s",
02483                                     strerror(errno));
02484                         goto out;
02485                 }
02486 
02487                 if (dosockopts(sock, gerror) == -1) {
02488                         g_prefix_error(gerror, "failed to open a modern socket: ");
02489                         goto out;
02490                 }
02491 
02492                 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
02493                         /* This is so wrong. 
02494                          * 
02495                          * Linux will return multiple entries for the
02496                          * same system when we ask it for something
02497                          * AF_UNSPEC, even though the first entry will
02498                          * listen to both protocols. Other systems will
02499                          * return multiple entries too, but we actually
02500                          * do need to open both. Sigh.
02501                          *
02502                          * Handle it by ignoring EADDRINUSE if we've
02503                          * already got at least one socket open
02504                          */
02505                         if(errno == EADDRINUSE && modernsocks->len > 0) {
02506                                 goto next;
02507                         }
02508                         g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02509                                     "failed to open a modern socket: "
02510                                     "failed to bind an address to a socket: %s",
02511                                     strerror(errno));
02512                         goto out;
02513                 }
02514 
02515                 if(listen(sock, 10) <0) {
02516                         g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02517                                     "failed to open a modern socket: "
02518                                     "failed to start listening on a socket: %s",
02519                                     strerror(errno));
02520                         goto out;
02521                 }
02522                 g_array_append_val(modernsocks, sock);
02523         next:
02524                 ai = ai->ai_next;
02525         }
02526 
02527         retval = 0;
02528 out:
02529 
02530         if (retval == -1 && sock >= 0) {
02531                 close(sock);
02532         }
02533         if(ai_bak)
02534                 freeaddrinfo(ai_bak);
02535 
02536         return retval;
02537 }
02538 
02539 /**
02540  * Connect our servers.
02541  **/
02542 void setup_servers(GArray *const servers, const gchar *const modernaddr,
02543                    const gchar *const modernport) {
02544         int i;
02545         struct sigaction sa;
02546         int want_modern=0;
02547 
02548         for(i=0;i<servers->len;i++) {
02549                 GError *gerror = NULL;
02550                 SERVER *server = &g_array_index(servers, SERVER, i);
02551                 int ret;
02552 
02553                 ret = setup_serve(server, &gerror);
02554                 if (ret == -1) {
02555                         msg(LOG_ERR, "failed to setup servers: %s",
02556                             gerror->message);
02557                         g_clear_error(&gerror);
02558                         exit(EXIT_FAILURE);
02559                 }
02560                 want_modern |= ret;
02561         }
02562         if(want_modern) {
02563                 GError *gerror = NULL;
02564                 if (open_modern(modernaddr, modernport, &gerror) == -1) {
02565                         msg(LOG_ERR, "failed to setup servers: %s",
02566                             gerror->message);
02567                         g_clear_error(&gerror);
02568                         exit(EXIT_FAILURE);
02569                 }
02570         }
02571         children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
02572 
02573         sa.sa_handler = sigchld_handler;
02574         sigemptyset(&sa.sa_mask);
02575         sigaddset(&sa.sa_mask, SIGTERM);
02576         sa.sa_flags = SA_RESTART;
02577         if(sigaction(SIGCHLD, &sa, NULL) == -1)
02578                 err("sigaction: %m");
02579 
02580         sa.sa_handler = sigterm_handler;
02581         sigemptyset(&sa.sa_mask);
02582         sigaddset(&sa.sa_mask, SIGCHLD);
02583         sa.sa_flags = SA_RESTART;
02584         if(sigaction(SIGTERM, &sa, NULL) == -1)
02585                 err("sigaction: %m");
02586 
02587         sa.sa_handler = sighup_handler;
02588         sigemptyset(&sa.sa_mask);
02589         sa.sa_flags = SA_RESTART;
02590         if(sigaction(SIGHUP, &sa, NULL) == -1)
02591                 err("sigaction: %m");
02592 }
02593 
02594 /**
02595  * Go daemon (unless we specified at compile time that we didn't want this)
02596  * @param serve the first server of our configuration. If its port is zero,
02597  *      then do not daemonize, because we're doing inetd then. This parameter
02598  *      is only used to create a PID file of the form
02599  *      /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
02600  **/
02601 #if !defined(NODAEMON)
02602 void daemonize(SERVER* serve) {
02603         FILE*pidf;
02604 
02605         if(serve && !(serve->port)) {
02606                 return;
02607         }
02608         if(daemon(0,0)<0) {
02609                 err("daemon");
02610         }
02611         if(!*pidftemplate) {
02612                 if(serve) {
02613                         strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
02614                 } else {
02615                         strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
02616                 }
02617         }
02618         snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
02619         pidf=fopen(pidfname, "w");
02620         if(pidf) {
02621                 fprintf(pidf,"%d\n", (int)getpid());
02622                 fclose(pidf);
02623         } else {
02624                 perror("fopen");
02625                 fprintf(stderr, "Not fatal; continuing");
02626         }
02627 }
02628 #else
02629 #define daemonize(serve)
02630 #endif /* !defined(NODAEMON) */
02631 
02632 /*
02633  * Everything beyond this point (in the file) is run in non-daemon mode.
02634  * The stuff above daemonize() isn't.
02635  */
02636 
02637 /**
02638  * Set up user-ID and/or group-ID
02639  **/
02640 void dousers(const gchar *const username, const gchar *const groupname) {
02641         struct passwd *pw;
02642         struct group *gr;
02643         gchar* str;
02644         if (groupname) {
02645                 gr = getgrnam(groupname);
02646                 if(!gr) {
02647                         str = g_strdup_printf("Invalid group name: %s", groupname);
02648                         err(str);
02649                 }
02650                 if(setgid(gr->gr_gid)<0) {
02651                         err("Could not set GID: %m"); 
02652                 }
02653         }
02654         if (username) {
02655                 pw = getpwnam(username);
02656                 if(!pw) {
02657                         str = g_strdup_printf("Invalid user name: %s", username);
02658                         err(str);
02659                 }
02660                 if(setuid(pw->pw_uid)<0) {
02661                         err("Could not set UID: %m");
02662                 }
02663         }
02664 }
02665 
02666 #ifndef ISSERVER
02667 void glib_message_syslog_redirect(const gchar *log_domain,
02668                                   GLogLevelFlags log_level,
02669                                   const gchar *message,
02670                                   gpointer user_data)
02671 {
02672     int level=LOG_DEBUG;
02673     
02674     switch( log_level )
02675     {
02676       case G_LOG_FLAG_FATAL:
02677       case G_LOG_LEVEL_CRITICAL:
02678       case G_LOG_LEVEL_ERROR:    
02679         level=LOG_ERR; 
02680         break;
02681       case G_LOG_LEVEL_WARNING:
02682         level=LOG_WARNING;
02683         break;
02684       case G_LOG_LEVEL_MESSAGE:
02685       case G_LOG_LEVEL_INFO:
02686         level=LOG_INFO;
02687         break;
02688       case G_LOG_LEVEL_DEBUG:
02689         level=LOG_DEBUG;
02690         break;
02691       default:
02692         level=LOG_ERR;
02693     }
02694     syslog(level, "%s", message);
02695 }
02696 #endif
02697 
02698 /**
02699  * Main entry point...
02700  **/
02701 int main(int argc, char *argv[]) {
02702         SERVER *serve;
02703         GArray *servers;
02704         GError *err=NULL;
02705         struct generic_conf genconf;
02706 
02707         memset(&genconf, 0, sizeof(struct generic_conf));
02708 
02709         if (sizeof( struct nbd_request )!=28) {
02710                 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
02711                 exit(EXIT_FAILURE) ;
02712         }
02713 
02714         memset(pidftemplate, '\0', 256);
02715 
02716         modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
02717 
02718         logging();
02719         config_file_pos = g_strdup(CFILE);
02720         serve=cmdline(argc, argv);
02721 
02722         servers = parse_cfile(config_file_pos, &genconf, &err);
02723         
02724         /* Update global variables with parsed values. This will be
02725          * removed once we get rid of global configuration variables. */
02726         glob_flags   |= genconf.flags;
02727 
02728         if(serve) {
02729                 serve->socket_family = AF_UNSPEC;
02730 
02731                 append_serve(serve, servers);
02732      
02733                 if (!(serve->port)) {
02734                         CLIENT *client;
02735 #ifndef ISSERVER
02736                         /* You really should define ISSERVER if you're going to use
02737                          * inetd mode, but if you don't, closing stdout and stderr
02738                          * (which inetd had connected to the client socket) will let it
02739                          * work. */
02740                         close(1);
02741                         close(2);
02742                         open("/dev/null", O_WRONLY);
02743                         open("/dev/null", O_WRONLY);
02744                         g_log_set_default_handler( glib_message_syslog_redirect, NULL );
02745 #endif
02746                         client=g_malloc(sizeof(CLIENT));
02747                         client->server=serve;
02748                         client->net=-1;
02749                         client->exportsize=OFFT_MAX;
02750                         if (set_peername(0, client))
02751                                 exit(EXIT_FAILURE);
02752                         serveconnection(client);
02753                         return 0;
02754                 }
02755         }
02756     
02757         if(!servers || !servers->len) {
02758                 if(err && !(err->domain == NBDS_ERR
02759                             && err->code == NBDS_ERR_CFILE_NOTFOUND)) {
02760                         g_warning("Could not parse config file: %s", 
02761                                         err ? err->message : "Unknown error");
02762                 }
02763         }
02764         if(serve) {
02765                 g_warning("Specifying an export on the command line is deprecated.");
02766                 g_warning("Please use a configuration file instead.");
02767         }
02768 
02769         if((!serve) && (!servers||!servers->len)) {
02770                 if(err)
02771                         g_message("No configured exports; quitting.");
02772                 exit(EXIT_FAILURE);
02773         }
02774         if (!dontfork)
02775                 daemonize(serve);
02776         setup_servers(servers, genconf.modernaddr, genconf.modernport);
02777         dousers(genconf.user, genconf.group);
02778 
02779         serveloop(servers);
02780 }