Network Block Device @PACKAGE_VERSION@

nbd-server.c

Go to the documentation of this file.
00001 /*
00002  * Network Block Device - server
00003  *
00004  * Copyright 1996-1998 Pavel Machek, distribute under GPL
00005  *  <pavel@atrey.karlin.mff.cuni.cz>
00006  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
00007  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
00008  *
00009  * Version 1.0 - hopefully 64-bit-clean
00010  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
00011  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
00012  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
00013  *      type, or don't have 64 bit file offsets by defining FS_32BIT
00014  *      in compile options for nbd-server *only*. This can be done
00015  *      with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
00016  *      original autoconf input file, or I would make it a configure
00017  *      option.) Ken Yap <ken@nlc.net.au>.
00018  * Version 1.6 - fix autodetection of block device size and really make 64 bit
00019  *      clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
00020  * Version 2.0 - Version synchronised with client
00021  * Version 2.1 - Reap zombie client processes when they exit. Removed
00022  *      (uncommented) the _IO magic, it's no longer necessary. Wouter
00023  *      Verhelst <wouter@debian.org>
00024  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
00025  * Version 2.3 - Fixed code so that Large File Support works. This
00026  *      removes the FS_32BIT compile-time directive; define
00027  *      _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
00028  *      using FS_32BIT. This will allow you to use files >2GB instead of
00029  *      having to use the -m option. Wouter Verhelst <wouter@debian.org>
00030  * Version 2.4 - Added code to keep track of children, so that we can
00031  *      properly kill them from initscripts. Add a call to daemon(),
00032  *      so that processes don't think they have to wait for us, which is
00033  *      interesting for initscripts as well. Wouter Verhelst
00034  *      <wouter@debian.org>
00035  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
00036  *      zero after fork()ing, resulting in nbd-server going berserk
00037  *      when it receives a signal with at least one child open. Wouter
00038  *      Verhelst <wouter@debian.org>
00039  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
00040  *      rectified type of mainloop::size_host (sf.net bugs 814435 and
00041  *      817385); close the PID file after writing to it, so that the
00042  *      daemon can actually be found. Wouter Verhelst
00043  *      <wouter@debian.org>
00044  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
00045  *      correctly put in network endianness. Many types were corrected
00046  *      (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
00047  * Version 2.6 - Some code cleanup.
00048  * Version 2.7 - Better build system.
00049  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
00050  *      lot more work, but this is a start. Wouter Verhelst
00051  *      <wouter@debian.org>
00052  * 16/03/2010 - Add IPv6 support.
00053  *      Kitt Tientanopajai <kitt@kitty.in.th>
00054  *      Neutron Soutmun <neo.neutron@gmail.com>
00055  *      Suriya Soutmun <darksolar@gmail.com>
00056  */
00057 
00058 /* Includes LFS defines, which defines behaviours of some of the following
00059  * headers, so must come before those */
00060 #include "lfs.h"
00061 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */
00062 #define _BSD_SOURCE /* to get DT_* macros */
00063 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */
00064 
00065 #include <assert.h>
00066 #include <sys/types.h>
00067 #include <sys/socket.h>
00068 #include <sys/stat.h>
00069 #include <sys/select.h>
00070 #include <sys/wait.h>
00071 #include <sys/un.h>
00072 #ifdef HAVE_SYS_IOCTL_H
00073 #include <sys/ioctl.h>
00074 #endif
00075 #include <sys/param.h>
00076 #include <signal.h>
00077 #include <errno.h>
00078 #include <libgen.h>
00079 #include <netinet/tcp.h>
00080 #include <netinet/in.h>
00081 #include <netdb.h>
00082 #include <syslog.h>
00083 #include <unistd.h>
00084 #include <stdbool.h>
00085 #include <stdio.h>
00086 #include <stdlib.h>
00087 #include <string.h>
00088 #include <fcntl.h>
00089 #if HAVE_FALLOC_PH
00090 #include <linux/falloc.h>
00091 #endif
00092 #include <arpa/inet.h>
00093 #include <strings.h>
00094 #include <dirent.h>
00095 #ifdef HAVE_SYS_DIR_H
00096 #include <sys/dir.h>
00097 #endif
00098 #ifdef HAVE_SYS_DIRENT_H
00099 #include <sys/dirent.h>
00100 #endif
00101 #include <unistd.h>
00102 #include <getopt.h>
00103 #include <pwd.h>
00104 #include <grp.h>
00105 #include <dirent.h>
00106 #include <ctype.h>
00107 #include <inttypes.h>
00108 
00109 #include <glib.h>
00110 
00111 /* used in cliserv.h, so must come first */
00112 #define MY_NAME "nbd_server"
00113 #include "cliserv.h"
00114 #include "nbd-debug.h"
00115 #include "netdb-compat.h"
00116 #include "backend.h"
00117 #include "treefiles.h"
00118 
00119 #ifdef WITH_SDP
00120 #include <sdp_inet.h>
00121 #endif
00122 
00123 #if HAVE_FSCTL_SET_ZERO_DATA
00124 #include <io.h>
00125 /* don't include <windows.h> to avoid redefining eg the ERROR macro */
00126 #define NOMINMAX 1
00127 #include <windef.h>
00128 #include <winbase.h>
00129 #include <winioctl.h>
00130 #endif
00131 
00132 /** Default position of the config file */
00133 #ifndef SYSCONFDIR
00134 #define SYSCONFDIR "/etc"
00135 #endif
00136 #define CFILE SYSCONFDIR "/nbd-server/config"
00137 
00138 /** Where our config file actually is */
00139 gchar* config_file_pos;
00140 
00141 /** global flags */
00142 int glob_flags=0;
00143 
00144 /* Whether we should avoid forking */
00145 int dontfork = 0;
00146 
00147 /**
00148  * The highest value a variable of type off_t can reach. This is a signed
00149  * integer, so set all bits except for the leftmost one.
00150  **/
00151 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
00152 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
00153 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
00154 
00155 /** Global flags: */
00156 #define F_OLDSTYLE 1      /**< Allow oldstyle (port-based) exports */
00157 #define F_LIST 2          /**< Allow clients to list the exports on a server */
00158 #define F_NO_ZEROES 4     /**< Do not send zeros to client */
00159 GHashTable *children;
00160 char pidfname[256]; /**< name of our PID file */
00161 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
00162 
00163 #define NEG_INIT        (1 << 0)
00164 #define NEG_OLD         (1 << 1)
00165 #define NEG_MODERN      (1 << 2)
00166 
00167 #include <nbdsrv.h>
00168 
00169 /* Our thread pool */
00170 GThreadPool *tpool;
00171 
00172 /* A work package for the thread pool functions */
00173 struct work_package {
00174         CLIENT* client;
00175         struct nbd_request* req;
00176         void* data; /**< for read requests */
00177 };
00178 
00179 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by
00180                                                      SIGCHLD handler
00181                                                      to mark a child
00182                                                      exit */
00183 
00184 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by
00185                                                      SIGTERM handler
00186                                                      to mark a exit
00187                                                      request */
00188 
00189 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
00190                                                     handler to mark a
00191                                                     reconfiguration
00192                                                     request */
00193 
00194 GArray* modernsocks;      /**< Sockets for the modern handler. Not used
00195                                if a client was only specified on the
00196                                command line; only port used if
00197                                oldstyle is set to false (and then the
00198                                command-line client isn't used, gna gna).
00199                                This may be more than one socket on
00200                                systems that don't support serving IPv4
00201                                and IPv6 from the same socket (like,
00202                                e.g., FreeBSD) */
00203 
00204 bool logged_oversized=false;  /**< whether we logged oversized requests already */
00205 
00206 /**
00207  * Type of configuration file values
00208  **/
00209 typedef enum {
00210         PARAM_INT,              /**< This parameter is an integer */
00211         PARAM_INT64,            /**< This parameter is an integer */
00212         PARAM_STRING,           /**< This parameter is a string */
00213         PARAM_BOOL,             /**< This parameter is a boolean */
00214 } PARAM_TYPE;
00215 
00216 /**
00217  * Configuration file values
00218  **/
00219 typedef struct {
00220         gchar *paramname;       /**< Name of the parameter, as it appears in
00221                                   the config file */
00222         gboolean required;      /**< Whether this is a required (as opposed to
00223                                   optional) parameter */
00224         PARAM_TYPE ptype;       /**< Type of the parameter. */
00225         gpointer target;        /**< Pointer to where the data of this
00226                                   parameter should be written. If ptype is
00227                                   PARAM_BOOL, the data is or'ed rather than
00228                                   overwritten. */
00229         gint flagval;           /**< Flag mask for this parameter in case ptype
00230                                   is PARAM_BOOL. */
00231 } PARAM;
00232 
00233 /**
00234  * Configuration file values of the "generic" section
00235  **/
00236 struct generic_conf {
00237         gchar *user;            /**< user we run the server as    */
00238         gchar *group;           /**< group we run running as      */
00239         gchar *modernaddr;      /**< address of the modern socket */
00240         gchar *modernport;      /**< port of the modern socket    */
00241         gchar *unixsock;        /**< file name of the unix domain socket */
00242         gint flags;             /**< global flags                 */
00243         gint threads;           /**< maximum number of parallel threads we want to run */
00244 };
00245 
00246 /**
00247  * Translate a command name into human readable form
00248  *
00249  * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
00250  * @return pointer to the command name
00251  **/
00252 static inline const char * getcommandname(uint64_t command) {
00253         switch (command) {
00254         case NBD_CMD_READ:
00255                 return "NBD_CMD_READ";
00256         case NBD_CMD_WRITE:
00257                 return "NBD_CMD_WRITE";
00258         case NBD_CMD_DISC:
00259                 return "NBD_CMD_DISC";
00260         case NBD_CMD_FLUSH:
00261                 return "NBD_CMD_FLUSH";
00262         case NBD_CMD_TRIM:
00263                 return "NBD_CMD_TRIM";
00264         default:
00265                 return "UNKNOWN";
00266         }
00267 }
00268 
00269 /**
00270  * Consume data from an FD that we don't want
00271  *
00272  * @param f a file descriptor
00273  * @param buf a buffer
00274  * @param len the number of bytes to consume
00275  * @param bufsiz the size of the buffer
00276  **/
00277 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
00278         size_t curlen;
00279         while (len>0) {
00280                 curlen = (len>bufsiz)?bufsiz:len;
00281                 readit(f, buf, curlen);
00282                 len -= curlen;
00283         }
00284 }
00285 
00286 /**
00287  * Write data from a buffer into a filedescriptor
00288  *
00289  * @param f a file descriptor
00290  * @param buf a buffer containing data
00291  * @param len the number of bytes to be written
00292  **/
00293 static inline void writeit(int f, void *buf, size_t len) {
00294         ssize_t res;
00295         while (len > 0) {
00296                 DEBUG("+");
00297                 if ((res = write(f, buf, len)) <= 0)
00298                         err("Send failed: %m");
00299                 len -= res;
00300                 buf += res;
00301         }
00302 }
00303 
00304 /**
00305  * Print out a message about how to use nbd-server. Split out to a separate
00306  * function so that we can call it from multiple places
00307  */
00308 void usage() {
00309         printf("This is nbd-server version " VERSION "\n");
00310         printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
00311                "\t-r|--read-only\t\tread only\n"
00312                "\t-m|--multi-file\t\tmultiple file\n"
00313                "\t-c|--copy-on-write\tcopy on write\n"
00314                "\t-C|--config-file\tspecify an alternate configuration file\n"
00315                "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
00316                "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
00317                "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
00318                "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
00319                "\t-V|--version\toutput the version and exit\n\n"
00320                "\tif port is set to 0, stdin is used (for running from inetd).\n"
00321                "\tif file_to_export contains '%%s', it is substituted with the IP\n"
00322                "\t\taddress of the machine trying to connect\n" 
00323                "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
00324         printf("Using configuration file %s\n", CFILE);
00325 }
00326 
00327 /* Dumps a config file section of the given SERVER*, and exits. */
00328 void dump_section(SERVER* serve, gchar* section_header) {
00329         printf("[%s]\n", section_header);
00330         printf("\texportname = %s\n", serve->exportname);
00331         printf("\tlistenaddr = %s\n", serve->listenaddr);
00332         if(serve->flags & F_READONLY) {
00333                 printf("\treadonly = true\n");
00334         }
00335         if(serve->flags & F_MULTIFILE) {
00336                 printf("\tmultifile = true\n");
00337         }
00338         if(serve->flags & F_TREEFILES) {
00339                 printf("\ttreefiles = true\n");
00340         }
00341         if(serve->flags & F_COPYONWRITE) {
00342                 printf("\tcopyonwrite = true\n");
00343         }
00344         if(serve->expected_size) {
00345                 printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
00346         }
00347         if(serve->authname) {
00348                 printf("\tauthfile = %s\n", serve->authname);
00349         }
00350         exit(EXIT_SUCCESS);
00351 }
00352 
00353 /**
00354  * Parse the command line.
00355  *
00356  * @param argc the argc argument to main()
00357  * @param argv the argv argument to main()
00358  **/
00359 SERVER* cmdline(int argc, char *argv[], struct generic_conf *genconf) {
00360         int i=0;
00361         int nonspecial=0;
00362         int c;
00363         struct option long_options[] = {
00364                 {"read-only", no_argument, NULL, 'r'},
00365                 {"multi-file", no_argument, NULL, 'm'},
00366                 {"copy-on-write", no_argument, NULL, 'c'},
00367                 {"dont-fork", no_argument, NULL, 'd'},
00368                 {"authorize-file", required_argument, NULL, 'l'},
00369                 {"config-file", required_argument, NULL, 'C'},
00370                 {"pid-file", required_argument, NULL, 'p'},
00371                 {"output-config", required_argument, NULL, 'o'},
00372                 {"max-connection", required_argument, NULL, 'M'},
00373                 {"version", no_argument, NULL, 'V'},
00374                 {0,0,0,0}
00375         };
00376         SERVER *serve;
00377         off_t es;
00378         size_t last;
00379         char suffix;
00380         gboolean do_output=FALSE;
00381         gchar* section_header="";
00382         gchar** addr_port;
00383 
00384         if(argc==1) {
00385                 return NULL;
00386         }
00387         serve=g_new0(SERVER, 1);
00388         serve->authname = g_strdup(default_authname);
00389         serve->virtstyle=VIRT_IPLIT;
00390         while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:V", long_options, &i))>=0) {
00391                 switch (c) {
00392                 case 1:
00393                         /* non-option argument */
00394                         switch(nonspecial++) {
00395                         case 0:
00396                                 if(strchr(optarg, ':') == strrchr(optarg, ':')) {
00397                                         addr_port=g_strsplit(optarg, ":", 2);
00398 
00399                                         /* Check for "@" - maybe user using this separator
00400                                                  for IPv4 address */
00401                                         if(!addr_port[1]) {
00402                                                 g_strfreev(addr_port);
00403                                                 addr_port=g_strsplit(optarg, "@", 2);
00404                                         }
00405                                 } else {
00406                                         addr_port=g_strsplit(optarg, "@", 2);
00407                                 }
00408 
00409                                 if(addr_port[1]) {
00410                                         genconf->modernport=g_strdup(addr_port[1]);
00411                                         genconf->modernaddr=g_strdup(addr_port[0]);
00412                                 } else {
00413                                         g_free(genconf->modernaddr);
00414                                         genconf->modernaddr=NULL;
00415                                         genconf->modernport=g_strdup(addr_port[0]);
00416                                 }
00417                                 g_strfreev(addr_port);
00418                                 break;
00419                         case 1:
00420                                 serve->exportname = g_strdup(optarg);
00421                                 if(serve->exportname[0] != '/') {
00422                                         fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
00423                                         exit(EXIT_FAILURE);
00424                                 }
00425                                 break;
00426                         case 2:
00427                                 last=strlen(optarg)-1;
00428                                 suffix=optarg[last];
00429                                 if (suffix == 'k' || suffix == 'K' ||
00430                                     suffix == 'm' || suffix == 'M')
00431                                         optarg[last] = '\0';
00432                                 es = (off_t)atoll(optarg);
00433                                 switch (suffix) {
00434                                         case 'm':
00435                                         case 'M':  es <<= 10;
00436                                         case 'k':
00437                                         case 'K':  es <<= 10;
00438                                         default :  break;
00439                                 }
00440                                 serve->expected_size = es;
00441                                 break;
00442                         }
00443                         break;
00444                 case 'r':
00445                         serve->flags |= F_READONLY;
00446                         break;
00447                 case 'm':
00448                         serve->flags |= F_MULTIFILE;
00449                         break;
00450                 case 'o':
00451                         do_output = TRUE;
00452                         section_header = g_strdup(optarg);
00453                         break;
00454                 case 'p':
00455                         strncpy(pidfname, optarg, 256);
00456                         pidfname[255]='\0';
00457                         break;
00458                 case 'c': 
00459                         serve->flags |=F_COPYONWRITE;
00460                         break;
00461                 case 'd': 
00462                         dontfork = 1;
00463                         break;
00464                 case 'C':
00465                         g_free(config_file_pos);
00466                         config_file_pos=g_strdup(optarg);
00467                         break;
00468                 case 'l':
00469                         g_free(serve->authname);
00470                         serve->authname=g_strdup(optarg);
00471                         break;
00472                 case 'M':
00473                         serve->max_connections = strtol(optarg, NULL, 0);
00474                         break;
00475                 case 'V':
00476                         printf("This is nbd-server version " VERSION "\n");
00477                         exit(EXIT_SUCCESS);
00478                         break;
00479                 default:
00480                         usage();
00481                         exit(EXIT_FAILURE);
00482                         break;
00483                 }
00484         }
00485         /* What's left: the port to export, the name of the to be exported
00486          * file, and, optionally, the size of the file, in that order. */
00487         if(nonspecial<2) {
00488                 g_free(serve);
00489                 serve=NULL;
00490         } else {
00491                 serve->servename = "";
00492         }
00493         if(do_output) {
00494                 if(!serve) {
00495                         g_critical("Need a complete configuration on the command line to output a config file section!");
00496                         exit(EXIT_FAILURE);
00497                 }
00498                 dump_section(serve, section_header);
00499         }
00500         return serve;
00501 }
00502 
00503 /* forward definition of parse_cfile */
00504 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e);
00505 
00506 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
00507 #define NBD_D_TYPE de->d_type
00508 #else
00509 #define NBD_D_TYPE 0
00510 #define DT_UNKNOWN 0
00511 #define DT_REG 1
00512 #endif
00513 
00514 /**
00515  * Parse config file snippets in a directory. Uses readdir() and friends
00516  * to find files and open them, then passes them on to parse_cfile
00517  * with have_global set false
00518  **/
00519 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) {
00520         DIR* dirh = opendir(dir);
00521         struct dirent* de;
00522         gchar* fname;
00523         GArray* retval = NULL;
00524         GArray* tmp;
00525         struct stat stbuf;
00526 
00527         if(!dirh) {
00528                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
00529                 return NULL;
00530         }
00531         errno=0;
00532         while((de = readdir(dirh))) {
00533                 int saved_errno=errno;
00534                 fname = g_build_filename(dir, de->d_name, NULL);
00535                 switch(NBD_D_TYPE) {
00536                         case DT_UNKNOWN:
00537                                 /* Filesystem doesn't return type of
00538                                  * file through readdir. Run stat() on
00539                                  * the file instead */
00540                                 if(stat(fname, &stbuf)) {
00541                                         perror("stat");
00542                                         goto err_out;
00543                                 }
00544                                 if (!S_ISREG(stbuf.st_mode)) {
00545                                         goto next;
00546                                 }
00547                         case DT_REG:
00548                                 /* Skip unless the name ends with '.conf' */
00549                                 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
00550                                         goto next;
00551                                 }
00552                                 tmp = parse_cfile(fname, genconf, false, e);
00553                                 errno=saved_errno;
00554                                 if(*e) {
00555                                         goto err_out;
00556                                 }
00557                                 if(!retval)
00558                                         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
00559                                 retval = g_array_append_vals(retval, tmp->data, tmp->len);
00560                                 g_array_free(tmp, TRUE);
00561                         default:
00562                                 break;
00563                 }
00564         next:
00565                 g_free(fname);
00566         }
00567         if(errno) {
00568                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
00569         err_out:
00570                 if(retval)
00571                         g_array_free(retval, TRUE);
00572                 retval = NULL;
00573         }
00574         if(dirh)
00575                 closedir(dirh);
00576         return retval;
00577 }
00578 
00579 /**
00580  * Parse the config file.
00581  *
00582  * @param f the name of the config file
00583  *
00584  * @param genconf a pointer to generic configuration which will get
00585  *        updated with parsed values. If NULL, then parsed generic
00586  *        configuration values are safely and silently discarded.
00587  *
00588  * @param e a GError. Error code can be any of the following:
00589  *        NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
00590  *        NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
00591  *        or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
00592  *
00593  * @param expect_generic if true, we expect a configuration file that
00594  *        contains a [generic] section. If false, we don't.
00595  *
00596  * @return a GArray of SERVER* pointers. If the config file is empty or does not
00597  *      exist, returns an empty GArray; if the config file contains an
00598  *      error, returns NULL, and e is set appropriately
00599  **/
00600 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) {
00601         const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
00602         const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
00603         gchar* cfdir = NULL;
00604         SERVER s;
00605         gchar *virtstyle=NULL;
00606         PARAM lp[] = {
00607                 { "exportname", TRUE,   PARAM_STRING,   &(s.exportname),        0 },
00608                 { "authfile",   FALSE,  PARAM_STRING,   &(s.authname),          0 },
00609                 { "filesize",   FALSE,  PARAM_OFFT,     &(s.expected_size),     0 },
00610                 { "virtstyle",  FALSE,  PARAM_STRING,   &(virtstyle),           0 },
00611                 { "prerun",     FALSE,  PARAM_STRING,   &(s.prerun),            0 },
00612                 { "postrun",    FALSE,  PARAM_STRING,   &(s.postrun),           0 },
00613                 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog),   0 },
00614                 { "cowdir",     FALSE,  PARAM_STRING,   &(s.cowdir),            0 },
00615                 { "readonly",   FALSE,  PARAM_BOOL,     &(s.flags),             F_READONLY },
00616                 { "multifile",  FALSE,  PARAM_BOOL,     &(s.flags),             F_MULTIFILE },
00617                 { "treefiles",  FALSE,  PARAM_BOOL,     &(s.flags),             F_TREEFILES },
00618                 { "copyonwrite", FALSE, PARAM_BOOL,     &(s.flags),             F_COPYONWRITE },
00619                 { "sparse_cow", FALSE,  PARAM_BOOL,     &(s.flags),             F_SPARSE },
00620                 { "sdp",        FALSE,  PARAM_BOOL,     &(s.flags),             F_SDP },
00621                 { "sync",       FALSE,  PARAM_BOOL,     &(s.flags),             F_SYNC },
00622                 { "flush",      FALSE,  PARAM_BOOL,     &(s.flags),             F_FLUSH },
00623                 { "fua",        FALSE,  PARAM_BOOL,     &(s.flags),             F_FUA },
00624                 { "rotational", FALSE,  PARAM_BOOL,     &(s.flags),             F_ROTATIONAL },
00625                 { "temporary",  FALSE,  PARAM_BOOL,     &(s.flags),             F_TEMPORARY },
00626                 { "trim",       FALSE,  PARAM_BOOL,     &(s.flags),             F_TRIM },
00627                 { "listenaddr", FALSE,  PARAM_STRING,   &(s.listenaddr),        0 },
00628                 { "maxconnections", FALSE, PARAM_INT,   &(s.max_connections),   0 },
00629         };
00630         const int lp_size=sizeof(lp)/sizeof(PARAM);
00631         struct generic_conf genconftmp;
00632         PARAM gp[] = {
00633                 { "user",       FALSE, PARAM_STRING,    &(genconftmp.user),       0 },
00634                 { "group",      FALSE, PARAM_STRING,    &(genconftmp.group),      0 },
00635                 { "oldstyle",   FALSE, PARAM_BOOL,      &(genconftmp.flags),      F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used
00636                 { "listenaddr", FALSE, PARAM_STRING,    &(genconftmp.modernaddr), 0 },
00637                 { "port",       FALSE, PARAM_STRING,    &(genconftmp.modernport), 0 },
00638                 { "includedir", FALSE, PARAM_STRING,    &cfdir,                   0 },
00639                 { "allowlist",  FALSE, PARAM_BOOL,      &(genconftmp.flags),      F_LIST },
00640                 { "unixsock",   FALSE, PARAM_STRING,    &(genconftmp.unixsock),   0 },
00641                 { "max_threads", FALSE, PARAM_INT,      &(genconftmp.threads),    0 },
00642         };
00643         PARAM* p=gp;
00644         int p_size=sizeof(gp)/sizeof(PARAM);
00645         GKeyFile *cfile;
00646         GError *err = NULL;
00647         const char *err_msg=NULL;
00648         GArray *retval=NULL;
00649         gchar **groups;
00650         gboolean bval;
00651         gint ival;
00652         gint64 i64val;
00653         gchar* sval;
00654         gchar* startgroup;
00655         gint i;
00656         gint j;
00657 
00658         memset(&genconftmp, 0, sizeof(struct generic_conf));
00659 
00660         if (genconf) {
00661                 /* Use the passed configuration values as defaults. The
00662                  * parsing algorithm below updates all parameter targets
00663                  * found from configuration files. */
00664                 memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
00665         }
00666 
00667         cfile = g_key_file_new();
00668         retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
00669         if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
00670                         G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
00671                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
00672                                 f, err->message);
00673                 g_key_file_free(cfile);
00674                 return retval;
00675         }
00676         startgroup = g_key_file_get_start_group(cfile);
00677         if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) {
00678                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
00679                 g_key_file_free(cfile);
00680                 return NULL;
00681         }
00682         groups = g_key_file_get_groups(cfile, NULL);
00683         for(i=0;groups[i];i++) {
00684                 memset(&s, '\0', sizeof(SERVER));
00685 
00686                 /* After the [generic] group or when we're parsing an include
00687                  * directory, start parsing exports */
00688                 if(i==1 || !expect_generic) {
00689                         p=lp;
00690                         p_size=lp_size;
00691                 } 
00692                 for(j=0;j<p_size;j++) {
00693                         assert(p[j].target != NULL);
00694                         assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
00695                         switch(p[j].ptype) {
00696                                 case PARAM_INT:
00697                                         ival = g_key_file_get_integer(cfile,
00698                                                                 groups[i],
00699                                                                 p[j].paramname,
00700                                                                 &err);
00701                                         if(!err) {
00702                                                 *((gint*)p[j].target) = ival;
00703                                         }
00704                                         break;
00705                                 case PARAM_INT64:
00706                                         i64val = g_key_file_get_int64(cfile,
00707                                                                 groups[i],
00708                                                                 p[j].paramname,
00709                                                                 &err);
00710                                         if(!err) {
00711                                                 *((gint64*)p[j].target) = i64val;
00712                                         }
00713                                         break;
00714                                 case PARAM_STRING:
00715                                         sval = g_key_file_get_string(cfile,
00716                                                                 groups[i],
00717                                                                 p[j].paramname,
00718                                                                 &err);
00719                                         if(!err) {
00720                                                 *((gchar**)p[j].target) = sval;
00721                                         }
00722                                         break;
00723                                 case PARAM_BOOL:
00724                                         bval = g_key_file_get_boolean(cfile,
00725                                                         groups[i],
00726                                                         p[j].paramname, &err);
00727                                         if(!err) {
00728                                                 if(bval) {
00729                                                         *((gint*)p[j].target) |= p[j].flagval;
00730                                                 } else {
00731                                                         *((gint*)p[j].target) &= ~(p[j].flagval);
00732                                                 }
00733                                         }
00734                                         break;
00735                         }
00736                         if(err) {
00737                                 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
00738                                         if(!p[j].required) {
00739                                                 /* Ignore not-found error for optional values */
00740                                                 g_clear_error(&err);
00741                                                 continue;
00742                                         } else {
00743                                                 err_msg = MISSING_REQUIRED_ERROR;
00744                                         }
00745                                 } else {
00746                                         err_msg = DEFAULT_ERROR;
00747                                 }
00748                                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
00749                                 g_array_free(retval, TRUE);
00750                                 g_error_free(err);
00751                                 g_key_file_free(cfile);
00752                                 return NULL;
00753                         }
00754                 }
00755                 if(virtstyle) {
00756                         if(!strncmp(virtstyle, "none", 4)) {
00757                                 s.virtstyle=VIRT_NONE;
00758                         } else if(!strncmp(virtstyle, "ipliteral", 9)) {
00759                                 s.virtstyle=VIRT_IPLIT;
00760                         } else if(!strncmp(virtstyle, "iphash", 6)) {
00761                                 s.virtstyle=VIRT_IPHASH;
00762                         } else if(!strncmp(virtstyle, "cidrhash", 8)) {
00763                                 s.virtstyle=VIRT_CIDR;
00764                                 if(strlen(virtstyle)<10) {
00765                                         g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
00766                                         g_array_free(retval, TRUE);
00767                                         g_key_file_free(cfile);
00768                                         return NULL;
00769                                 }
00770                                 s.cidrlen=strtol(virtstyle+8, NULL, 0);
00771                         } else {
00772                                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
00773                                 g_array_free(retval, TRUE);
00774                                 g_key_file_free(cfile);
00775                                 return NULL;
00776                         }
00777                 } else {
00778                         s.virtstyle=VIRT_IPLIT;
00779                 }
00780                 if(genconftmp.flags & F_OLDSTYLE) {
00781                         g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
00782                         g_message("Exiting.");
00783                         return NULL;
00784                 }
00785                 /* Don't need to free this, it's not our string */
00786                 virtstyle=NULL;
00787                 /* Don't append values for the [generic] group */
00788                 if(i>0 || !expect_generic) {
00789                         s.servename = groups[i];
00790 
00791                         g_array_append_val(retval, s);
00792                 }
00793 #ifndef WITH_SDP
00794                 if(s.flags & F_SDP) {
00795                         g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
00796                         g_array_free(retval, TRUE);
00797                         g_key_file_free(cfile);
00798                         return NULL;
00799                 }
00800 #endif
00801         }
00802         g_key_file_free(cfile);
00803         if(cfdir) {
00804                 GArray* extra = do_cfile_dir(cfdir, &genconftmp, e);
00805                 if(extra) {
00806                         retval = g_array_append_vals(retval, extra->data, extra->len);
00807                         i+=extra->len;
00808                         g_array_free(extra, TRUE);
00809                 } else {
00810                         if(*e) {
00811                                 g_array_free(retval, TRUE);
00812                                 return NULL;
00813                         }
00814                 }
00815         }
00816         if(i==1 && expect_generic) {
00817                 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
00818         }
00819 
00820         if (genconf) {
00821                 /* Return the updated generic configuration through the
00822                  * pointer parameter. */
00823                 memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
00824         }
00825 
00826         return retval;
00827 }
00828 
00829 /**
00830  * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
00831  * main loop of the root server process. This allows us to separate the signal
00832  * catching from th actual task triggered by SIGCHLD and hence processing in the
00833  * interrupt context is kept as minimial as possible.
00834  *
00835  * @param s the signal we're handling (must be SIGCHLD, or something
00836  * is severely wrong)
00837  **/
00838 static void sigchld_handler(const int s G_GNUC_UNUSED) {
00839         is_sigchld_caught = 1;
00840 }
00841 
00842 /**
00843  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
00844  *
00845  * @param key the key
00846  * @param value the value corresponding to the above key
00847  * @param user_data a pointer which we always set to 1, so that we know what
00848  * will happen next.
00849  **/
00850 void killchild(gpointer key, gpointer value, gpointer user_data) {
00851         pid_t *pid=value;
00852 
00853         kill(*pid, SIGTERM);
00854 }
00855 
00856 /**
00857  * Handle SIGTERM by setting atomically a flag which will be evaluated in the
00858  * main loop of the root server process. This allows us to separate the signal
00859  * catching from th actual task triggered by SIGTERM and hence processing in the
00860  * interrupt context is kept as minimial as possible.
00861  *
00862  * @param s the signal we're handling (must be SIGTERM, or something
00863  * is severely wrong).
00864  **/
00865 static void sigterm_handler(const int s G_GNUC_UNUSED) {
00866         is_sigterm_caught = 1;
00867 }
00868 
00869 /**
00870  * Handle SIGHUP by setting atomically a flag which will be evaluated in
00871  * the main loop of the root server process. This allows us to separate
00872  * the signal catching from th actual task triggered by SIGHUP and hence
00873  * processing in the interrupt context is kept as minimial as possible.
00874  *
00875  * @param s the signal we're handling (must be SIGHUP, or something
00876  * is severely wrong).
00877  **/
00878 static void sighup_handler(const int s G_GNUC_UNUSED) {
00879         is_sighup_caught = 1;
00880 }
00881 
00882 /**
00883  * Get the file handle and offset, given an export offset.
00884  *
00885  * @param client The client we're serving for
00886  * @param a The offset to get corresponding file/offset for
00887  * @param fhandle [out] File descriptor
00888  * @param foffset [out] Offset into fhandle
00889  * @param maxbytes [out] Tells how many bytes can be read/written
00890  * from fhandle starting at foffset (0 if there is no limit)
00891  * @return 0 on success, -1 on failure
00892  **/
00893 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
00894 
00895         GArray * const export = client->export;
00896 
00897         /* Negative offset not allowed */
00898         if(a < 0)
00899                 return -1;
00900 
00901         /* Open separate file for treefiles */
00902         if (client->server->flags & F_TREEFILES) {
00903                 *foffset = a % TREEPAGESIZE;
00904                 *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block
00905                 *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a, &client->lock);
00906                 return 0;
00907         }
00908 
00909         /* Binary search for last file with starting offset <= a */
00910         FILE_INFO fi;
00911         int start = 0;
00912         int end = export->len - 1;
00913         while( start <= end ) {
00914                 int mid = (start + end) / 2;
00915                 fi = g_array_index(export, FILE_INFO, mid);
00916                 if( fi.startoff < a ) {
00917                         start = mid + 1;
00918                 } else if( fi.startoff > a ) {
00919                         end = mid - 1;
00920                 } else {
00921                         start = end = mid;
00922                         break;
00923                 }
00924         }
00925 
00926         /* end should never go negative, since first startoff is 0 and a >= 0 */
00927         assert(end >= 0);
00928 
00929         fi = g_array_index(export, FILE_INFO, end);
00930         *fhandle = fi.fhandle;
00931         *foffset = a - fi.startoff;
00932         *maxbytes = 0;
00933         if( end+1 < export->len ) {
00934                 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
00935                 *maxbytes = fi_next.startoff - a;
00936         }
00937 
00938         return 0;
00939 }
00940 
00941 /**
00942  * Write an amount of bytes at a given offset to the right file. This
00943  * abstracts the write-side of the multiple file option.
00944  *
00945  * @param a The offset where the write should start
00946  * @param buf The buffer to write from
00947  * @param len The length of buf
00948  * @param client The client we're serving for
00949  * @param fua Flag to indicate 'Force Unit Access'
00950  * @return The number of bytes actually written, or -1 in case of an error
00951  **/
00952 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
00953         int fhandle;
00954         off_t foffset;
00955         size_t maxbytes;
00956         ssize_t retval;
00957 
00958         if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
00959                 return -1;
00960         if(maxbytes && len > maxbytes)
00961                 len = maxbytes;
00962 
00963         DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
00964 
00965         retval = pwrite(fhandle, buf, len, foffset);
00966         if(client->server->flags & F_SYNC) {
00967                 fsync(fhandle);
00968         } else if (fua) {
00969 
00970           /* This is where we would do the following
00971            *   #ifdef USE_SYNC_FILE_RANGE
00972            * However, we don't, for the reasons set out below
00973            * by Christoph Hellwig <hch@infradead.org>
00974            *
00975            * [BEGINS] 
00976            * fdatasync is equivalent to fsync except that it does not flush
00977            * non-essential metadata (basically just timestamps in practice), but it
00978            * does flush metadata requried to find the data again, e.g. allocation
00979            * information and extent maps.  sync_file_range does nothing but flush
00980            * out pagecache content - it means you basically won't get your data
00981            * back in case of a crash if you either:
00982            * 
00983            *  a) have a volatile write cache in your disk (e.g. any normal SATA disk)
00984            *  b) are using a sparse file on a filesystem
00985            *  c) are using a fallocate-preallocated file on a filesystem
00986            *  d) use any file on a COW filesystem like btrfs
00987            * 
00988            * e.g. it only does anything useful for you if you do not have a volatile
00989            * write cache, and either use a raw block device node, or just overwrite
00990            * an already fully allocated (and not preallocated) file on a non-COW
00991            * filesystem.
00992            * [ENDS]
00993            *
00994            * What we should do is open a second FD with O_DSYNC set, then write to
00995            * that when appropriate. However, with a Linux client, every REQ_FUA
00996            * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
00997            * problems.
00998            *
00999            */
01000 #if 0
01001                 sync_file_range(fhandle, foffset, len,
01002                                 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
01003                                 SYNC_FILE_RANGE_WAIT_AFTER);
01004 #else
01005                 fdatasync(fhandle);
01006 #endif
01007         }
01008         /* close file pointer in case of treefiles */
01009         if (client->server->flags & F_TREEFILES) {
01010                 close(fhandle);
01011         }
01012         return retval;
01013 }
01014 
01015 /**
01016  * Call rawexpwrite repeatedly until all data has been written.
01017  *
01018  * @param a The offset where the write should start
01019  * @param buf The buffer to write from
01020  * @param len The length of buf
01021  * @param client The client we're serving for
01022  * @param fua Flag to indicate 'Force Unit Access'
01023  * @return 0 on success, nonzero on failure
01024  **/
01025 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01026         ssize_t ret=0;
01027 
01028         while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
01029                 a += ret;
01030                 buf += ret;
01031                 len -= ret;
01032         }
01033         return (ret < 0 || len != 0);
01034 }
01035 
01036 /**
01037  * Read an amount of bytes at a given offset from the right file. This
01038  * abstracts the read-side of the multiple files option.
01039  *
01040  * @param a The offset where the read should start
01041  * @param buf A buffer to read into
01042  * @param len The size of buf
01043  * @param client The client we're serving for
01044  * @return The number of bytes actually read, or -1 in case of an
01045  * error.
01046  **/
01047 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
01048         int fhandle;
01049         off_t foffset;
01050         size_t maxbytes;
01051         ssize_t retval;
01052 
01053         if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
01054                 return -1;
01055         if(maxbytes && len > maxbytes)
01056                 len = maxbytes;
01057 
01058         DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
01059 
01060         retval = pread(fhandle, buf, len, foffset);
01061         if (client->server->flags & F_TREEFILES) {
01062                 close(fhandle);
01063         }
01064         return retval;
01065 }
01066 
01067 /**
01068  * Call rawexpread repeatedly until all data has been read.
01069  * @return 0 on success, nonzero on failure
01070  **/
01071 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
01072         ssize_t ret=0;
01073 
01074         while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
01075                 a += ret;
01076                 buf += ret;
01077                 len -= ret;
01078         }
01079         return (ret < 0 || len != 0);
01080 }
01081 
01082 /**
01083  * Read an amount of bytes at a given offset from the right file. This
01084  * abstracts the read-side of the copyonwrite stuff, and calls
01085  * rawexpread() with the right parameters to do the actual work.
01086  * @param a The offset where the read should start
01087  * @param buf A buffer to read into
01088  * @param len The size of buf
01089  * @param client The client we're going to read for
01090  * @return 0 on success, nonzero on failure
01091  **/
01092 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
01093         off_t rdlen, offset;
01094         off_t mapcnt, mapl, maph, pagestart;
01095 
01096         if (!(client->server->flags & F_COPYONWRITE))
01097                 return(rawexpread_fully(a, buf, len, client));
01098         DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
01099 
01100         mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
01101 
01102         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
01103                 pagestart=mapcnt*DIFFPAGESIZE;
01104                 offset=a-pagestart;
01105                 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
01106                         len : (size_t)DIFFPAGESIZE-offset;
01107                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
01108                         DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
01109                                (unsigned long)(client->difmap[mapcnt]));
01110                         myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
01111                         if (read(client->difffile, buf, rdlen) != rdlen) return -1;
01112                 } else { /* the block is not there */
01113                         DEBUG("Page %llu is not here, we read the original one\n",
01114                                (unsigned long long)mapcnt);
01115                         if(rawexpread_fully(a, buf, rdlen, client)) return -1;
01116                 }
01117                 len-=rdlen; a+=rdlen; buf+=rdlen;
01118         }
01119         return 0;
01120 }
01121 
01122 /**
01123  * Write an amount of bytes at a given offset to the right file. This
01124  * abstracts the write-side of the copyonwrite option, and calls
01125  * rawexpwrite() with the right parameters to do the actual work.
01126  *
01127  * @param a The offset where the write should start
01128  * @param buf The buffer to write from
01129  * @param len The length of buf
01130  * @param client The client we're going to write for.
01131  * @param fua Flag to indicate 'Force Unit Access'
01132  * @return 0 on success, nonzero on failure
01133  **/
01134 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01135         char pagebuf[DIFFPAGESIZE];
01136         off_t mapcnt,mapl,maph;
01137         off_t wrlen,rdlen; 
01138         off_t pagestart;
01139         off_t offset;
01140 
01141         if (!(client->server->flags & F_COPYONWRITE))
01142                 return(rawexpwrite_fully(a, buf, len, client, fua)); 
01143         DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
01144 
01145         mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
01146 
01147         for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
01148                 pagestart=mapcnt*DIFFPAGESIZE ;
01149                 offset=a-pagestart ;
01150                 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
01151                         len : (size_t)DIFFPAGESIZE-offset;
01152 
01153                 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
01154                         DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
01155                                (unsigned long)(client->difmap[mapcnt])) ;
01156                         myseek(client->difffile,
01157                                         client->difmap[mapcnt]*DIFFPAGESIZE+offset);
01158                         if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
01159                 } else { /* the block is not there */
01160                         myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
01161                         client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
01162                         DEBUG("Page %llu is not here, we put it at %lu\n",
01163                                (unsigned long long)mapcnt,
01164                                (unsigned long)(client->difmap[mapcnt]));
01165                         rdlen=DIFFPAGESIZE ;
01166                         if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
01167                                 return -1;
01168                         memcpy(pagebuf+offset,buf,wrlen) ;
01169                         if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
01170                                         DIFFPAGESIZE)
01171                                 return -1;
01172                 }                                                   
01173                 len-=wrlen ; a+=wrlen ; buf+=wrlen ;
01174         }
01175         if (client->server->flags & F_SYNC) {
01176                 fsync(client->difffile);
01177         } else if (fua) {
01178                 /* open question: would it be cheaper to do multiple sync_file_ranges?
01179                    as we iterate through the above?
01180                  */
01181                 fdatasync(client->difffile);
01182         }
01183         return 0;
01184 }
01185 
01186 /**
01187  * Flush data to a client
01188  *
01189  * @param client The client we're going to write for.
01190  * @return 0 on success, nonzero on failure
01191  **/
01192 int expflush(CLIENT *client) {
01193         gint i;
01194 
01195         if (client->server->flags & F_COPYONWRITE) {
01196                 return fsync(client->difffile);
01197         }
01198 
01199         if (client->server->flags & F_TREEFILES ) {
01200                 // all we can do is force sync the entire filesystem containing the tree
01201                 if (client->server->flags & F_READONLY)
01202                         return 0;
01203                 sync();
01204                 return 0;
01205         }
01206         
01207         for (i = 0; i < client->export->len; i++) {
01208                 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
01209                 if (fsync(fi.fhandle) < 0)
01210                         return -1;
01211         }
01212         
01213         return 0;
01214 }
01215 
01216 void punch_hole(int fd, off_t off, off_t len) {
01217         DEBUG("punching hole in fd=%d, starting from %llu, length %llu\n", fd, (unsigned long long)off, (unsigned long long)len);
01218 #if HAVE_FALLOC_PH
01219         fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len);
01220 #elif HAVE_FSCTL_SET_ZERO_DATA
01221         FILE_ZERO_DATA_INFORMATION zerodata;
01222         zerodata.FileOffset.QuadPart = off;
01223         zerodata.BeyondFinalZero.QuadPart = off + len;
01224         HANDLE w32handle = (HANDLE)_get_osfhandle(fd);
01225         DWORD bytesret;
01226         DeviceIoControl(w32handle, FSCTL_SET_ZERO_DATA, &zerodata, sizeof(zerodata), NULL, 0, &bytesret, NULL);
01227 #else
01228         DEBUG("punching holes not supported on this platform\n");
01229 #endif
01230 }
01231 
01232 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) {
01233         uint64_t magic = htonll(0x3e889045565a9LL);
01234         reply_type = htonl(reply_type);
01235         uint32_t datsize = htonl(datasize);
01236         opt = htonl(opt);
01237         struct iovec v_data[] = {
01238                 { &magic, sizeof(magic) },
01239                 { &opt, sizeof(opt) },
01240                 { &reply_type, sizeof(reply_type) },
01241                 { &datsize, sizeof(datsize) },
01242                 { data, datasize },
01243         };
01244         size_t total = sizeof(magic) + sizeof(opt) + sizeof(reply_type) + sizeof(datsize) + datasize;
01245         ssize_t sent = writev(net, v_data, 5);
01246         if(sent != total) {
01247                 perror("E: couldn't write enough data:");
01248         }
01249 }
01250 
01251 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
01252         uint32_t namelen;
01253         char* name;
01254         int i;
01255 
01256         if (read(net, &namelen, sizeof(namelen)) < 0) {
01257                 err("Negotiation failed/7: %m");
01258                 return NULL;
01259         }
01260         namelen = ntohl(namelen);
01261         if(namelen > 0) {
01262                 name = malloc(namelen+1);
01263                 name[namelen]=0;
01264                 if (read(net, name, namelen) < 0) {
01265                         err("Negotiation failed/8: %m");
01266                         free(name);
01267                         return NULL;
01268                 }
01269         } else {
01270                 name = strdup("");
01271         }
01272         for(i=0; i<servers->len; i++) {
01273                 SERVER* serve = &(g_array_index(servers, SERVER, i));
01274                 if(!strcmp(serve->servename, name)) {
01275                         CLIENT* client = g_new0(CLIENT, 1);
01276                         client->server = serve;
01277                         client->exportsize = OFFT_MAX;
01278                         client->net = net;
01279                         client->modern = TRUE;
01280                         client->transactionlogfd = -1;
01281                         client->clientfeats = cflags;
01282                         pthread_mutex_init(&(client->lock), NULL);
01283                         free(name);
01284                         return client;
01285                 }
01286         }
01287         err("Negotiation failed/8a: Requested export not found");
01288         free(name);
01289         return NULL;
01290 }
01291 
01292 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
01293         uint32_t len;
01294         int i;
01295         char buf[1024];
01296         char *ptr = buf + sizeof(len);
01297 
01298         if (read(net, &len, sizeof(len)) < 0)
01299                 err("Negotiation failed/8: %m");
01300         len = ntohl(len);
01301         if(len) {
01302                 send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL);
01303         }
01304         if(!(glob_flags & F_LIST)) {
01305                 send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL);
01306                 err_nonfatal("Client tried disallowed list option");
01307                 return;
01308         }
01309         for(i=0; i<servers->len; i++) {
01310                 SERVER* serve = &(g_array_index(servers, SERVER, i));
01311                 len = htonl(strlen(serve->servename));
01312                 memcpy(buf, &len, sizeof(len));
01313                 strncpy(ptr, serve->servename, sizeof(buf) - sizeof(len));
01314                 send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
01315         }
01316         send_reply(opt, net, NBD_REP_ACK, 0, NULL);
01317 }
01318 
01319 /**
01320  * Do the initial negotiation.
01321  *
01322  * @param client The client we're negotiating with.
01323  **/
01324 CLIENT* negotiate(int net, GArray* servers) {
01325         uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
01326         uint64_t magic;
01327         uint32_t cflags = 0;
01328         uint32_t opt;
01329 
01330         assert(servers != NULL);
01331         if (write(net, INIT_PASSWD, 8) < 0)
01332                 err_nonfatal("Negotiation failed/1: %m");
01333         magic = htonll(opts_magic);
01334         if (write(net, &magic, sizeof(magic)) < 0)
01335                 err_nonfatal("Negotiation failed/2: %m");
01336 
01337         smallflags = htons(smallflags);
01338         if (write(net, &smallflags, sizeof(uint16_t)) < 0)
01339                 err_nonfatal("Negotiation failed/3: %m");
01340         if (read(net, &cflags, sizeof(cflags)) < 0)
01341                 err_nonfatal("Negotiation failed/4: %m");
01342         cflags = htonl(cflags);
01343         if (cflags & NBD_FLAG_C_NO_ZEROES) {
01344                 glob_flags |= F_NO_ZEROES;
01345         }
01346         do {
01347                 if (read(net, &magic, sizeof(magic)) < 0)
01348                         err_nonfatal("Negotiation failed/5: %m");
01349                 magic = ntohll(magic);
01350                 if(magic != opts_magic) {
01351                         err_nonfatal("Negotiation failed/5a: magic mismatch");
01352                         return NULL;
01353                 }
01354                 if (read(net, &opt, sizeof(opt)) < 0)
01355                         err_nonfatal("Negotiation failed/6: %m");
01356                 opt = ntohl(opt);
01357                 switch(opt) {
01358                 case NBD_OPT_EXPORT_NAME:
01359                         // NBD_OPT_EXPORT_NAME must be the last
01360                         // selected option, so return from here
01361                         // if that is chosen.
01362                         return handle_export_name(opt, net, servers, cflags);
01363                         break;
01364                 case NBD_OPT_LIST:
01365                         handle_list(opt, net, servers, cflags);
01366                         break;
01367                 case NBD_OPT_ABORT:
01368                         // handled below
01369                         break;
01370                 default:
01371                         send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL);
01372                         break;
01373                 }
01374         } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
01375         if(opt == NBD_OPT_ABORT) {
01376                 err_nonfatal("Session terminated by client");
01377                 return NULL;
01378         }
01379         err_nonfatal("Weird things happened: reached end of negotiation without success");
01380         return NULL;
01381 }
01382 
01383 void send_export_info(CLIENT* client) {
01384         uint64_t size_host = htonll((u64)(client->exportsize));
01385         uint16_t flags = NBD_FLAG_HAS_FLAGS;
01386 
01387         if (write(client->net, &size_host, 8) < 0)
01388                 err("Negotiation failed/9: %m");
01389         if (client->server->flags & F_READONLY)
01390                 flags |= NBD_FLAG_READ_ONLY;
01391         if (client->server->flags & F_FLUSH)
01392                 flags |= NBD_FLAG_SEND_FLUSH;
01393         if (client->server->flags & F_FUA)
01394                 flags |= NBD_FLAG_SEND_FUA;
01395         if (client->server->flags & F_ROTATIONAL)
01396                 flags |= NBD_FLAG_ROTATIONAL;
01397         if (client->server->flags & F_TRIM)
01398                 flags |= NBD_FLAG_SEND_TRIM;
01399         flags = htons(flags);
01400         if (write(client->net, &flags, sizeof(flags)) < 0)
01401                 err("Negotiation failed/11: %m");
01402         if (!(glob_flags & F_NO_ZEROES)) {
01403                 char zeros[128];
01404                 memset(zeros, '\0', sizeof(zeros));
01405                 if (write(client->net, zeros, 124) < 0)
01406                         err("Negotiation failed/12: %m");
01407         }
01408 }
01409 
01410 static int nbd_errno(int errcode) {
01411         switch (errcode) {
01412         case EPERM:
01413                 return htonl(1);
01414         case EIO:
01415                 return htonl(5);
01416         case ENOMEM:
01417                 return htonl(12);
01418         case EINVAL:
01419                 return htonl(22);
01420         case EFBIG:
01421         case ENOSPC:
01422 #ifdef EDQUOT
01423         case EDQUOT:
01424 #endif
01425                 return htonl(28); // ENOSPC
01426         default:
01427                 return htonl(22); // EINVAL
01428         }
01429 }
01430 
01431 static void package_dispose(struct work_package* package) {
01432         g_free(package->data);
01433         g_free(package->req);
01434         g_free(package);
01435 }
01436 
01437 struct work_package* package_create(CLIENT* client, struct nbd_request* req) {
01438         struct work_package* rv = calloc(sizeof (struct work_package), 1);
01439 
01440         rv->req = req;
01441         rv->client = client;
01442 
01443         if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE)
01444                 rv->data = malloc(req->len);
01445 
01446         return rv;
01447 }
01448 
01449 static void setup_reply(struct nbd_reply* rep, struct nbd_request* req) {
01450         rep->magic = htonl(NBD_REPLY_MAGIC);
01451         rep->error = 0;
01452         memcpy(&(rep->handle), &(req->handle), sizeof(req->handle));
01453 }
01454 
01455 static void handle_read(CLIENT* client, struct nbd_request* req) {
01456         struct nbd_reply rep;
01457         void* buf = malloc(req->len);
01458         if(!buf) {
01459                 err("Could not allocate memory for request");
01460         }
01461         DEBUG("handling read request\n");
01462         setup_reply(&rep, req);
01463         if(expread(req->from, buf, req->len, client)) {
01464                 DEBUG("Read failed: %m");
01465                 rep.error = nbd_errno(errno);
01466         }
01467         pthread_mutex_lock(&(client->lock));
01468         writeit(client->net, &rep, sizeof rep);
01469         if(!rep.error) {
01470                 writeit(client->net, buf, req->len);
01471         }
01472         pthread_mutex_unlock(&(client->lock));
01473         free(buf);
01474 }
01475 
01476 static void handle_write(CLIENT* client, struct nbd_request* req, void* data) {
01477         struct nbd_reply rep;
01478         DEBUG("handling write request\n");
01479         setup_reply(&rep, req);
01480 
01481         if ((client->server->flags & F_READONLY) ||
01482             (client->server->flags & F_AUTOREADONLY)) {
01483                 DEBUG("[WRITE to READONLY!]");
01484                 rep.error = nbd_errno(EPERM);
01485         } else {
01486                 if(expwrite(req->from, data, req->len, client, (req->type &~NBD_CMD_MASK_COMMAND))) {
01487                         DEBUG("Write failed: %m");
01488                         rep.error = nbd_errno(errno);
01489                 }
01490         }
01491         pthread_mutex_lock(&(client->lock));
01492         writeit(client->net, &rep, sizeof rep);
01493         pthread_mutex_unlock(&(client->lock));
01494 }
01495 
01496 static void handle_flush(CLIENT* client, struct nbd_request* req) {
01497         struct nbd_reply rep;
01498         DEBUG("handling flush request\n");
01499         setup_reply(&rep, req);
01500         if(expflush(client)) {
01501                 DEBUG("Flush failed: %m");
01502                 rep.error = nbd_errno(errno);
01503         }
01504         pthread_mutex_lock(&(client->lock));
01505         writeit(client->net, &rep, sizeof rep);
01506         pthread_mutex_unlock(&(client->lock));
01507 }
01508 
01509 static void handle_trim(CLIENT* client, struct nbd_request* req) {
01510         struct nbd_reply rep;
01511         DEBUG("handling trim request\n");
01512         setup_reply(&rep, req);
01513         if(exptrim(req, client)) {
01514                 DEBUG("Trim failed: %m");
01515                 rep.error = nbd_errno(errno);
01516         }
01517         pthread_mutex_lock(&(client->lock));
01518         writeit(client->net, &rep, sizeof rep);
01519         pthread_mutex_unlock(&(client->lock));
01520 }
01521 
01522 static void handle_request(gpointer data, gpointer user_data) {
01523         struct work_package* package = (struct work_package*) data;
01524         uint32_t type = package->req->type & NBD_CMD_MASK_COMMAND;
01525         uint32_t flags = package->req->type & ~NBD_CMD_MASK_COMMAND;
01526         struct nbd_reply rep;
01527 
01528         if(flags & ~NBD_CMD_FLAG_FUA) {
01529                 msg(LOG_ERR, "E: received invalid flag %d on command %d, ignoring", flags, type);
01530                 goto error;
01531         }
01532 
01533         switch(type) {
01534                 case NBD_CMD_READ:
01535                         handle_read(package->client, package->req);
01536                         break;
01537                 case NBD_CMD_WRITE:
01538                         handle_write(package->client, package->req, package->data);
01539                         break;
01540                 case NBD_CMD_FLUSH:
01541                         handle_flush(package->client, package->req);
01542                         break;
01543                 case NBD_CMD_TRIM:
01544                         handle_trim(package->client, package->req);
01545                         break;
01546                 default:
01547                         msg(LOG_ERR, "E: received unknown command %d of type, ignoring", package->req->type);
01548                         goto error;
01549         }
01550         goto end;
01551 error:
01552         setup_reply(&rep, package->req);
01553         rep.error = nbd_errno(EINVAL);
01554         pthread_mutex_lock(&(package->client->lock));
01555         writeit(package->client->net, &rep, sizeof rep);
01556         pthread_mutex_unlock(&(package->client->lock));
01557 end:
01558         package_dispose(package);
01559 }
01560 
01561 static int mainloop_threaded(CLIENT* client) {
01562         struct nbd_request* req;
01563         struct work_package* pkg;
01564 
01565         send_export_info(client);
01566         DEBUG("Entering request loop\n");
01567         while(1) {
01568                 req = calloc(sizeof (struct nbd_request), 1);
01569 
01570                 readit(client->net, req, sizeof(struct nbd_request));
01571                 if(client->transactionlogfd != -1) {
01572                         writeit(client->transactionlogfd, req, sizeof(struct nbd_request));
01573                 }
01574 
01575                 req->from = ntohll(req->from);
01576                 req->type = ntohl(req->type);
01577                 req->len = ntohl(req->len);
01578 
01579                 if(req->magic != htonl(NBD_REQUEST_MAGIC))
01580                         err("Protocol error: not enough magic.");
01581 
01582                 pkg = package_create(client, req);
01583 
01584                 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
01585                         readit(client->net, pkg->data, req->len);
01586                 }
01587                 if(req->type == NBD_CMD_DISC) {
01588                         g_thread_pool_free(tpool, FALSE, TRUE);
01589                         return 0;
01590                 }
01591                 g_thread_pool_push(tpool, pkg, NULL);
01592         }
01593 }
01594 
01595 /** sending macro. */
01596 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
01597         if (client->transactionlogfd != -1) \
01598                 writeit(client->transactionlogfd, &reply, sizeof(reply)); }
01599 /** error macro. */
01600 #define ERROR(client,reply,errcode) { reply.error = nbd_errno(errcode); SEND(client->net,reply); reply.error = 0; }
01601 /**
01602  * Serve a file to a single client.
01603  *
01604  * @todo This beast needs to be split up in many tiny little manageable
01605  * pieces. Preferably with a chainsaw.
01606  *
01607  * @param client The client we're going to serve to.
01608  * @return when the client disconnects
01609  **/
01610 int mainloop(CLIENT *client) {
01611         struct nbd_request request;
01612         struct nbd_reply reply;
01613         gboolean go_on=TRUE;
01614 #ifdef DODBG
01615         int i = 0;
01616 #endif
01617         send_export_info(client);
01618         DEBUG("Entering request loop!\n");
01619         reply.magic = htonl(NBD_REPLY_MAGIC);
01620         reply.error = 0;
01621         while (go_on) {
01622                 char buf[BUFSIZE];
01623                 char* p;
01624                 size_t len;
01625                 size_t currlen;
01626                 size_t writelen;
01627                 uint16_t command;
01628 #ifdef DODBG
01629                 i++;
01630                 printf("%d: ", i);
01631 #endif
01632                 readit(client->net, &request, sizeof(request));
01633                 if (client->transactionlogfd != -1)
01634                         writeit(client->transactionlogfd, &request, sizeof(request));
01635 
01636                 request.from = ntohll(request.from);
01637                 request.type = ntohl(request.type);
01638                 command = request.type & NBD_CMD_MASK_COMMAND;
01639                 len = ntohl(request.len);
01640 
01641                 DEBUG("%s from %llu (%llu) len %u, ", getcommandname(command),
01642                                 (unsigned long long)request.from,
01643                                 (unsigned long long)request.from / 512, len);
01644 
01645                 if (request.magic != htonl(NBD_REQUEST_MAGIC))
01646                         err("Not enough magic.");
01647 
01648                 memcpy(reply.handle, request.handle, sizeof(reply.handle));
01649 
01650                 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ) ||
01651                     (command==NBD_CMD_TRIM)) {
01652                         if (request.from + len < request.from) { // 64 bit overflow!!
01653                                 DEBUG("[Number too large!]");
01654                                 ERROR(client, reply, EINVAL);
01655                                 continue;
01656                         }
01657 
01658                         if (((off_t)request.from + len) > client->exportsize) {
01659                                 DEBUG("[RANGE!]");
01660                                 ERROR(client, reply, (command==NBD_CMD_WRITE) ? ENOSPC : EINVAL);
01661                                 continue;
01662                         }
01663 
01664                         currlen = len;
01665                         if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
01666                                 currlen = BUFSIZE - sizeof(struct nbd_reply);
01667                                 if(!logged_oversized) {
01668                                         msg(LOG_DEBUG, "oversized request (this is not a problem)");
01669                                         logged_oversized = true;
01670                                 }
01671                         }
01672                 }
01673 
01674                 switch (command) {
01675 
01676                 case NBD_CMD_DISC:
01677                         msg(LOG_INFO, "Disconnect request received.");
01678                         if (client->server->flags & F_COPYONWRITE) { 
01679                                 if (client->difmap) g_free(client->difmap) ;
01680                                 close(client->difffile);
01681                                 unlink(client->difffilename);
01682                                 free(client->difffilename);
01683                         }
01684                         go_on=FALSE;
01685                         continue;
01686 
01687                 case NBD_CMD_WRITE:
01688                         DEBUG("wr: net->buf, ");
01689                         while(len > 0) {
01690                                 readit(client->net, buf, currlen);
01691                                 DEBUG("buf->exp, ");
01692                                 if ((client->server->flags & F_READONLY) ||
01693                                     (client->server->flags & F_AUTOREADONLY)) {
01694                                         DEBUG("[WRITE to READONLY!]");
01695                                         ERROR(client, reply, EPERM);
01696                                         consume(client->net, buf, len-currlen, BUFSIZE);
01697                                         continue;
01698                                 }
01699                                 if (expwrite(request.from, buf, currlen, client,
01700                                              request.type & NBD_CMD_FLAG_FUA)) {
01701                                         DEBUG("Write failed: %m" );
01702                                         ERROR(client, reply, errno);
01703                                         consume(client->net, buf, len-currlen, BUFSIZE);
01704                                         continue;
01705                                 }
01706                                 len -= currlen;
01707                                 request.from += currlen;
01708                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
01709                         }
01710                         SEND(client->net, reply);
01711                         DEBUG("OK!\n");
01712                         continue;
01713 
01714                 case NBD_CMD_FLUSH:
01715                         DEBUG("fl: ");
01716                         if (expflush(client)) {
01717                                 DEBUG("Flush failed: %m");
01718                                 ERROR(client, reply, errno);
01719                                 continue;
01720                         }
01721                         SEND(client->net, reply);
01722                         DEBUG("OK!\n");
01723                         continue;
01724 
01725                 case NBD_CMD_READ:
01726                         DEBUG("exp->buf, ");
01727                         if (client->transactionlogfd != -1)
01728                                 writeit(client->transactionlogfd, &reply, sizeof(reply));
01729                         writeit(client->net, &reply, sizeof(reply));
01730                         p = buf;
01731                         writelen = currlen;
01732                         while(len > 0) {
01733                                 if (expread(request.from, p, currlen, client)) {
01734                                         DEBUG("Read failed: %m");
01735                                         ERROR(client, reply, errno);
01736                                         continue;
01737                                 }
01738                                 
01739                                 DEBUG("buf->net, ");
01740                                 writeit(client->net, buf, writelen);
01741                                 len -= currlen;
01742                                 request.from += currlen;
01743                                 currlen = (len < BUFSIZE) ? len : BUFSIZE;
01744                                 p = buf;
01745                                 writelen = currlen;
01746                         }
01747                         DEBUG("OK!\n");
01748                         continue;
01749 
01750                 case NBD_CMD_TRIM:
01751                         /* The kernel module sets discard_zeroes_data == 0,
01752                          * so it is okay to do nothing.  */
01753                         if ((client->server->flags & F_READONLY) ||
01754                             (client->server->flags & F_AUTOREADONLY)) {
01755                                 DEBUG("[TRIM to READONLY!]");
01756                                 ERROR(client, reply, EPERM);
01757                                 continue;
01758                         }
01759                         if (exptrim(&request, client)) {
01760                                 DEBUG("Trim failed: %m");
01761                                 ERROR(client, reply, errno);
01762                                 continue;
01763                         }
01764                         SEND(client->net, reply);
01765                         continue;
01766 
01767                 default:
01768                         DEBUG ("Ignoring unknown command\n");
01769                         continue;
01770                 }
01771         }
01772         return 0;
01773 }
01774 
01775 /**
01776  * Set up client export array, which is an array of FILE_INFO.
01777  * Also, split a single exportfile into multiple ones, if that was asked.
01778  * @param client information on the client which we want to setup export for
01779  **/
01780 void setupexport(CLIENT* client) {
01781         int i;
01782         off_t laststartoff = 0, lastsize = 0;
01783         int multifile = (client->server->flags & F_MULTIFILE);
01784         int treefile = (client->server->flags & F_TREEFILES);
01785         int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
01786         int cancreate = (client->server->expected_size) && !multifile;
01787 
01788         if (treefile) {
01789                 client->export = NULL; // this could be thousands of files so we open handles on demand although its slower
01790                 client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway
01791         } else {
01792                 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
01793 
01794                 /* If multi-file, open as many files as we can.
01795                  * If not, open exactly one file.
01796                  * Calculate file sizes as we go to get total size. */
01797                 for(i=0; ; i++) {
01798                         FILE_INFO fi;
01799                         gchar *tmpname;
01800                         gchar* error_string;
01801 
01802                         if (i)
01803                                 cancreate = 0;
01804                         /* if expected_size is specified, and this is the first file, we can create the file */
01805                         mode_t mode = (client->server->flags & F_READONLY) ?
01806                           O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
01807 
01808                         if (temporary) {
01809                                 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
01810                                 DEBUG( "Opening %s\n", tmpname );
01811                                 fi.fhandle = mkstemp(tmpname);
01812                         } else {
01813                                 if(multifile) {
01814                                         tmpname=g_strdup_printf("%s.%d", client->exportname, i);
01815                                 } else {
01816                                         tmpname=g_strdup(client->exportname);
01817                                 }
01818                                 DEBUG( "Opening %s\n", tmpname );
01819                                 fi.fhandle = open(tmpname, mode, 0600);
01820                                 if(fi.fhandle == -1 && mode == O_RDWR) {
01821                                         /* Try again because maybe media was read-only */
01822                                         fi.fhandle = open(tmpname, O_RDONLY);
01823                                         if(fi.fhandle != -1) {
01824                                                 /* Opening the base file in copyonwrite mode is
01825                                                  * okay */
01826                                                 if(!(client->server->flags & F_COPYONWRITE)) {
01827                                                         client->server->flags |= F_AUTOREADONLY;
01828                                                         client->server->flags |= F_READONLY;
01829                                                 }
01830                                         }
01831                                 }
01832                         }
01833                         if(fi.fhandle == -1) {
01834                                 if(multifile && i>0)
01835                                         break;
01836                                 error_string=g_strdup_printf(
01837                                         "Could not open exported file %s: %%m",
01838                                         tmpname);
01839                                 err(error_string);
01840                         }
01841 
01842                         if (temporary) {
01843                                 unlink(tmpname); /* File will stick around whilst FD open */
01844                         }
01845 
01846                         fi.startoff = laststartoff + lastsize;
01847                         g_array_append_val(client->export, fi);
01848                         g_free(tmpname);
01849 
01850                         /* Starting offset and size of this file will be used to
01851                          * calculate starting offset of next file */
01852                         laststartoff = fi.startoff;
01853                         lastsize = size_autodetect(fi.fhandle);
01854 
01855                         /* If we created the file, it will be length zero */
01856                         if (!lastsize && cancreate) {
01857                                 assert(!multifile);
01858                                 if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
01859                                         err("Could not expand file: %m");
01860                                 }
01861                                 lastsize = client->server->expected_size;
01862                                 break; /* don't look for any more files */
01863                         }
01864 
01865                         if(!multifile || temporary)
01866                                 break;
01867                 }
01868 
01869                 /* Set export size to total calculated size */
01870                 client->exportsize = laststartoff + lastsize;
01871 
01872                 /* Export size may be overridden */
01873                 if(client->server->expected_size) {
01874                         /* desired size must be <= total calculated size */
01875                         if(client->server->expected_size > client->exportsize) {
01876                                 err("Size of exported file is too big\n");
01877                         }
01878 
01879                         client->exportsize = client->server->expected_size;
01880                 }
01881         }
01882 
01883         msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
01884         if(multifile) {
01885                 msg(LOG_INFO, "Total number of files: %d", i);
01886         }
01887         if(treefile) {
01888                 msg(LOG_INFO, "Total number of (potential) files: %" PRId64, (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE);
01889         }
01890 }
01891 
01892 int copyonwrite_prepare(CLIENT* client) {
01893         off_t i;
01894         gchar* dir;
01895         gchar* export_base;
01896         if (client->server->cowdir != NULL) {
01897                 dir = g_strdup(client->server->cowdir);
01898         } else {
01899                 dir = g_strdup(dirname(client->exportname));
01900         }
01901         export_base = g_strdup(basename(client->exportname));
01902         client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname,
01903                 (int)getpid());
01904         g_free(dir);
01905         g_free(export_base);
01906         msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
01907         client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
01908         if (client->difffile<0) err("Could not create diff file (%m)") ;
01909         if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
01910                 err("Could not allocate memory") ;
01911         for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
01912 
01913         return 0;
01914 }
01915 
01916 /**
01917  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
01918  * options
01919  *
01920  * @param command the command to be ran. Read from the config file
01921  * @param file the file name we're about to export
01922  **/
01923 int do_run(gchar* command, gchar* file) {
01924         gchar* cmd;
01925         int retval=0;
01926 
01927         if(command && *command) {
01928                 cmd = g_strdup_printf(command, file);
01929                 retval=system(cmd);
01930                 g_free(cmd);
01931         }
01932         return retval;
01933 }
01934 
01935 /**
01936  * Serve a connection. 
01937  *
01938  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
01939  * follow the road map.
01940  *
01941  * @param client a connected client
01942  **/
01943 void serveconnection(CLIENT *client) {
01944         if (client->server->transactionlog && (client->transactionlogfd == -1))
01945         {
01946                 if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
01947                                                            O_WRONLY | O_CREAT,
01948                                                            S_IRUSR | S_IWUSR)))
01949                         g_warning("Could not open transaction log %s",
01950                                   client->server->transactionlog);
01951         }
01952 
01953         if(do_run(client->server->prerun, client->exportname)) {
01954                 exit(EXIT_FAILURE);
01955         }
01956         setupexport(client);
01957 
01958         if (client->server->flags & F_COPYONWRITE) {
01959                 copyonwrite_prepare(client);
01960         }
01961 
01962         setmysockopt(client->net);
01963 
01964         mainloop_threaded(client);
01965         do_run(client->server->postrun, client->exportname);
01966 
01967         if (-1 != client->transactionlogfd)
01968         {
01969                 close(client->transactionlogfd);
01970                 client->transactionlogfd = -1;
01971         }
01972 }
01973 
01974 /**
01975  * Find the name of the file we have to serve. This will use g_strdup_printf
01976  * to put the IP address of the client inside a filename containing
01977  * "%s" (in the form as specified by the "virtstyle" option). That name
01978  * is then written to client->exportname.
01979  *
01980  * @param net A socket connected to an nbd client
01981  * @param client information about the client. The IP address in human-readable
01982  * format will be written to a new char* buffer, the address of which will be
01983  * stored in client->clientname.
01984  * @return: 0 - OK, -1 - failed.
01985  **/
01986 int set_peername(int net, CLIENT *client) {
01987         struct sockaddr_storage netaddr;
01988         struct sockaddr* addr = (struct sockaddr*)&netaddr;
01989         socklen_t addrinlen = sizeof( struct sockaddr_storage );
01990         struct addrinfo hints;
01991         struct addrinfo *ai = NULL;
01992         char peername[NI_MAXHOST];
01993         char netname[NI_MAXHOST];
01994         char *tmp = NULL;
01995         int i;
01996         int e;
01997 
01998         if (getsockname(net, addr, &addrinlen) < 0) {
01999                 msg(LOG_INFO, "getsockname failed: %m");
02000                 return -1;
02001         }
02002 
02003         if(netaddr.ss_family == AF_UNIX) {
02004                 client->clientaddr.ss_family = AF_UNIX;
02005                 strcpy(peername, "unix");
02006         } else {
02007                 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
02008                         msg(LOG_INFO, "getpeername failed: %m");
02009                         return -1;
02010                 }
02011                 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
02012                                 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
02013                         msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
02014                         return -1;
02015                 }
02016 
02017                 memset(&hints, '\0', sizeof (hints));
02018                 hints.ai_flags = AI_ADDRCONFIG;
02019                 e = getaddrinfo(peername, NULL, &hints, &ai);
02020 
02021                 if(e != 0) {
02022                         msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
02023                         freeaddrinfo(ai);
02024                         return -1;
02025                 }
02026         }
02027 
02028         if(strncmp(peername, "::ffff:", 7) == 0) {
02029                 memmove(peername, peername+7, strlen(peername));
02030         }
02031 
02032         switch(client->server->virtstyle) {
02033                 case VIRT_NONE:
02034                         msg(LOG_DEBUG, "virtualization is off");
02035                         client->exportname=g_strdup(client->server->exportname);
02036                         break;
02037                 case VIRT_IPHASH:
02038                         msg(LOG_DEBUG, "virtstyle iphash");
02039                         for(i=0;i<strlen(peername);i++) {
02040                                 if(peername[i]=='.') {
02041                                         peername[i]='/';
02042                                 }
02043                         }
02044                 case VIRT_IPLIT:
02045                         msg(LOG_DEBUG, "virtstyle ipliteral");
02046                         client->exportname=g_strdup_printf(client->server->exportname, peername);
02047                         break;
02048                 case VIRT_CIDR:
02049                         msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
02050                         memcpy(&netaddr, &(client->clientaddr), addrinlen);
02051                         int addrbits;
02052                         if(client->clientaddr.ss_family == AF_UNIX) {
02053                                 tmp = g_strdup(peername);
02054                         } else {
02055                                 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
02056                                 if(ai->ai_family == AF_INET) {
02057                                         addrbits = 32;
02058                                 } else if(ai->ai_family == AF_INET6) {
02059                                         addrbits = 128;
02060                                 }
02061                                 uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data);
02062                                 for(int i = 0; i < addrbits; i+=8) {
02063                                         int masklen = client->server->cidrlen - i;
02064                                         masklen = masklen > 0 ? masklen : 0;
02065                                         uint8_t mask = getmaskbyte(masklen);
02066                                         *addrptr &= mask;
02067                                         addrptr++;
02068                                 }
02069                                 getnameinfo((struct sockaddr *) &netaddr, addrinlen,
02070                                                                 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
02071                                 tmp=g_strdup_printf("%s/%s", netname, peername);
02072                         }
02073 
02074                         if(tmp != NULL) {
02075                                 client->exportname=g_strdup_printf(client->server->exportname, tmp);
02076                                 g_free(tmp);
02077                         }
02078 
02079                         break;
02080         }
02081 
02082         freeaddrinfo(ai);
02083         msg(LOG_INFO, "connect from %s, assigned file is %s",
02084             peername, client->exportname);
02085         client->clientname=g_strdup(peername);
02086         return 0;
02087 }
02088 
02089 /**
02090  * Destroy a pid_t*
02091  * @param data a pointer to pid_t which should be freed
02092  **/
02093 void destroy_pid_t(gpointer data) {
02094         g_free(data);
02095 }
02096 
02097 static pid_t
02098 spawn_child()
02099 {
02100         pid_t pid;
02101         sigset_t newset;
02102         sigset_t oldset;
02103 
02104         sigemptyset(&newset);
02105         sigaddset(&newset, SIGCHLD);
02106         sigaddset(&newset, SIGTERM);
02107         sigprocmask(SIG_BLOCK, &newset, &oldset);
02108         pid = fork();
02109         if (pid < 0) {
02110                 msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
02111                 goto out;
02112         }
02113         if (pid > 0) { /* Parent */
02114                 pid_t *pidp;
02115 
02116                 pidp = g_malloc(sizeof(pid_t));
02117                 *pidp = pid;
02118                 g_hash_table_insert(children, pidp, pidp);
02119                 goto out;
02120         }
02121         /* Child */
02122 
02123         /* Child's signal disposition is reset to default. */
02124         signal(SIGCHLD, SIG_DFL);
02125         signal(SIGTERM, SIG_DFL);
02126         signal(SIGHUP, SIG_DFL);
02127         sigemptyset(&oldset);
02128 out:
02129         sigprocmask(SIG_SETMASK, &oldset, NULL);
02130         return pid;
02131 }
02132 
02133 static int
02134 socket_accept(const int sock)
02135 {
02136         struct sockaddr_storage addrin;
02137         socklen_t addrinlen = sizeof(addrin);
02138         int net;
02139 
02140         net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
02141         if (net < 0) {
02142                 err_nonfatal("Failed to accept socket connection: %m");
02143         }
02144 
02145         return net;
02146 }
02147 
02148 static void
02149 handle_modern_connection(GArray *const servers, const int sock)
02150 {
02151         int net;
02152         pid_t pid;
02153         CLIENT *client = NULL;
02154         int sock_flags_old;
02155         int sock_flags_new;
02156 
02157         net = socket_accept(sock);
02158         if (net < 0)
02159                 return;
02160 
02161         if (!dontfork) {
02162                 pid = spawn_child();
02163                 if (pid) {
02164                         if (pid > 0)
02165                                 msg(LOG_INFO, "Spawned a child process");
02166                         if (pid < 0)
02167                                 msg(LOG_ERR, "Failed to spawn a child process");
02168                         close(net);
02169                         return;
02170                 }
02171                 /* Child just continues. */
02172         }
02173 
02174         client = negotiate(net, servers);
02175         if (!client) {
02176                 msg(LOG_ERR, "Modern initial negotiation failed");
02177                 goto handler_err;
02178         }
02179 
02180         if (client->server->max_connections > 0 &&
02181            g_hash_table_size(children) >= client->server->max_connections) {
02182                 msg(LOG_ERR, "Max connections (%d) reached",
02183                     client->server->max_connections);
02184                 goto handler_err;
02185         }
02186 
02187         sock_flags_old = fcntl(net, F_GETFL, 0);
02188         if (sock_flags_old == -1) {
02189                 msg(LOG_ERR, "Failed to get socket flags");
02190                 goto handler_err;
02191         }
02192 
02193         sock_flags_new = sock_flags_old & ~O_NONBLOCK;
02194         if (sock_flags_new != sock_flags_old &&
02195             fcntl(net, F_SETFL, sock_flags_new) == -1) {
02196                 msg(LOG_ERR, "Failed to set socket to blocking mode");
02197                 goto handler_err;
02198         }
02199 
02200         if (set_peername(net, client)) {
02201                 msg(LOG_ERR, "Failed to set peername");
02202                 goto handler_err;
02203         }
02204 
02205         if (!authorized_client(client)) {
02206                 msg(LOG_INFO, "Client '%s' is not authorized to access",
02207                     client->clientname);
02208                 goto handler_err;
02209         }
02210 
02211         if (!dontfork) {
02212                 int i;
02213 
02214                 /* Free all root server resources here, because we are
02215                  * currently in the child process serving one specific
02216                  * connection. These are not simply needed anymore. */
02217                 g_hash_table_destroy(children);
02218                 children = NULL;
02219                 for (i = 0; i < modernsocks->len; i++) {
02220                         close(g_array_index(modernsocks, int, i));
02221                 }
02222                 g_array_free(modernsocks, TRUE);
02223 
02224                 /* Now that we are in the child process after a
02225                  * succesful negotiation, we do not need the list of
02226                  * servers anymore, get rid of it.*/
02227                 /* FALSE does not free the
02228                    actual data. This is required,
02229                    because the client has a
02230                    direct reference into that
02231                    data, and otherwise we get a
02232                    segfault... */
02233                 g_array_free(servers, FALSE);
02234         }
02235 
02236         msg(LOG_INFO, "Starting to serve");
02237         serveconnection(client);
02238         exit(EXIT_SUCCESS);
02239 
02240 handler_err:
02241         g_free(client);
02242         close(net);
02243 
02244         if (!dontfork) {
02245                 exit(EXIT_FAILURE);
02246         }
02247 }
02248 
02249 /**
02250  * Return the index of the server whose servename matches the given
02251  * name.
02252  *
02253  * @param servename a string to match
02254  * @param servers an array of servers
02255  * @return the first index of the server whose servename matches the
02256  *         given name or -1 if one cannot be found
02257  **/
02258 static int get_index_by_servename(const gchar *const servename,
02259                                   const GArray *const servers) {
02260         int i;
02261 
02262         for (i = 0; i < servers->len; ++i) {
02263                 const SERVER server = g_array_index(servers, SERVER, i);
02264 
02265                 if (strcmp(servename, server.servename) == 0)
02266                         return i;
02267         }
02268 
02269         return -1;
02270 }
02271 
02272 /**
02273  * Parse configuration files and add servers to the array if they don't
02274  * already exist there. The existence is tested by comparing
02275  * servenames. A server is appended to the array only if its servename
02276  * is unique among all other servers.
02277  *
02278  * @param servers an array of servers
02279  * @return the number of new servers appended to the array, or -1 in
02280  *         case of an error
02281  **/
02282 static int append_new_servers(GArray *const servers, GError **const gerror) {
02283         int i;
02284         GArray *new_servers;
02285         const int old_len = servers->len;
02286         int retval = -1;
02287         struct generic_conf genconf;
02288 
02289         new_servers = parse_cfile(config_file_pos, &genconf, true, gerror);
02290         g_thread_pool_set_max_threads(tpool, genconf.threads, NULL);
02291         if (!new_servers)
02292                 goto out;
02293 
02294         for (i = 0; i < new_servers->len; ++i) {
02295                 SERVER new_server = g_array_index(new_servers, SERVER, i);
02296 
02297                 if (new_server.servename
02298                     && -1 == get_index_by_servename(new_server.servename,
02299                                                     servers)) {
02300                         g_array_append_val(servers, new_server);
02301                 }
02302         }
02303 
02304         retval = servers->len - old_len;
02305 out:
02306         g_array_free(new_servers, TRUE);
02307 
02308         return retval;
02309 }
02310 
02311 /**
02312  * Loop through the available servers, and serve them. Never returns.
02313  **/
02314 void serveloop(GArray* servers) {
02315         int i;
02316         int max;
02317         fd_set mset;
02318         fd_set rset;
02319         sigset_t blocking_mask;
02320         sigset_t original_mask;
02321 
02322         /* 
02323          * Set up the master fd_set. The set of descriptors we need
02324          * to select() for never changes anyway and it buys us a *lot*
02325          * of time to only build this once. However, if we ever choose
02326          * to not fork() for clients anymore, we may have to revisit
02327          * this.
02328          */
02329         max=0;
02330         FD_ZERO(&mset);
02331         for(i=0;i<modernsocks->len;i++) {
02332                 int sock = g_array_index(modernsocks, int, i);
02333                 FD_SET(sock, &mset);
02334                 max=sock>max?sock:max;
02335         }
02336 
02337         /* Construct a signal mask which is used to make signal testing and
02338          * receiving an atomic operation to ensure no signal is received between
02339          * tests and blocking pselect(). */
02340         if (sigemptyset(&blocking_mask) == -1)
02341                 err("failed to initialize blocking_mask: %m");
02342 
02343         if (sigaddset(&blocking_mask, SIGCHLD) == -1)
02344                 err("failed to add SIGCHLD to blocking_mask: %m");
02345 
02346         if (sigaddset(&blocking_mask, SIGHUP) == -1)
02347                 err("failed to add SIGHUP to blocking_mask: %m");
02348 
02349         if (sigaddset(&blocking_mask, SIGTERM) == -1)
02350                 err("failed to add SIGTERM to blocking_mask: %m");
02351 
02352         if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1)
02353             err("failed to block signals: %m");
02354 
02355         for(;;) {
02356                 if (is_sigterm_caught) {
02357                         is_sigterm_caught = 0;
02358 
02359                         g_hash_table_foreach(children, killchild, NULL);
02360                         unlink(pidfname);
02361 
02362                         exit(EXIT_SUCCESS);
02363                 }
02364 
02365                 if (is_sigchld_caught) {
02366                         int status;
02367                         int* i;
02368                         pid_t pid;
02369 
02370                         is_sigchld_caught = 0;
02371 
02372                         while ((pid=waitpid(-1, &status, WNOHANG)) > 0) {
02373                                 if (WIFEXITED(status)) {
02374                                         msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
02375                                 }
02376                                 i = g_hash_table_lookup(children, &pid);
02377                                 if (!i) {
02378                                         msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
02379                                 } else {
02380                                         DEBUG("Removing %d from the list of children", pid);
02381                                         g_hash_table_remove(children, &pid);
02382                                 }
02383                         }
02384                 }
02385 
02386                 /* SIGHUP causes the root server process to reconfigure
02387                  * itself and add new export servers for each newly
02388                  * found export configuration group, i.e. spawn new
02389                  * server processes for each previously non-existent
02390                  * export. This does not alter old runtime configuration
02391                  * but just appends new exports. */
02392                 if (is_sighup_caught) {
02393                         int n;
02394                         GError *gerror = NULL;
02395 
02396                         msg(LOG_INFO, "reconfiguration request received");
02397                         is_sighup_caught = 0; /* Reset to allow catching
02398                                                * it again. */
02399 
02400                         n = append_new_servers(servers, &gerror);
02401                         if (n == -1)
02402                                 msg(LOG_ERR, "failed to append new servers: %s",
02403                                     gerror->message);
02404 
02405                         for (i = servers->len - n; i < servers->len; ++i) {
02406                                 const SERVER server = g_array_index(servers,
02407                                                                     SERVER, i);
02408 
02409                                 msg(LOG_INFO, "reconfigured new server: %s",
02410                                     server.servename);
02411                         }
02412                 }
02413 
02414                 memcpy(&rset, &mset, sizeof(fd_set));
02415                 if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) {
02416                         DEBUG("accept, ");
02417                         for(i=0; i < modernsocks->len; i++) {
02418                                 int sock = g_array_index(modernsocks, int, i);
02419                                 if(!FD_ISSET(sock, &rset)) {
02420                                         continue;
02421                                 }
02422 
02423                                 handle_modern_connection(servers, sock);
02424                         }
02425                 }
02426         }
02427 }
02428 void serveloop(GArray* servers) G_GNUC_NORETURN;
02429 
02430 /**
02431  * Set server socket options.
02432  *
02433  * @param socket a socket descriptor of the server
02434  *
02435  * @param gerror a pointer to an error object pointer used for reporting
02436  *        errors. On error, if gerror is not NULL, *gerror is set and -1
02437  *        is returned.
02438  *
02439  * @return 0 on success, -1 on error
02440  **/
02441 int dosockopts(const int socket, GError **const gerror) {
02442 #ifndef sun
02443         int yes=1;
02444 #else
02445         char yes='1';
02446 #endif /* sun */
02447         struct linger l;
02448 
02449         /* lose the pesky "Address already in use" error message */
02450         if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
02451                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
02452                             "failed to set socket option SO_REUSEADDR: %s",
02453                             strerror(errno));
02454                 return -1;
02455         }
02456         l.l_onoff = 1;
02457         l.l_linger = 10;
02458         if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
02459                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
02460                             "failed to set socket option SO_LINGER: %s",
02461                             strerror(errno));
02462                 return -1;
02463         }
02464         if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
02465                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
02466                             "failed to set socket option SO_KEEPALIVE: %s",
02467                             strerror(errno));
02468                 return -1;
02469         }
02470 
02471         return 0;
02472 }
02473 
02474 int open_unix(const gchar *const sockname, GError **const gerror) {
02475         struct sockaddr_un sa;
02476         int sock=-1;
02477         int retval=-1;
02478 
02479         memset(&sa, 0, sizeof(struct sockaddr_un));
02480         sa.sun_family = AF_UNIX;
02481         strncpy(sa.sun_path, sockname, sizeof sa.sun_path);
02482         sock = socket(AF_UNIX, SOCK_STREAM, 0);
02483         if(sock < 0) {
02484                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
02485                                 "failed to open a unix socket: "
02486                                 "failed to create socket: %s",
02487                                 strerror(errno));
02488                 goto out;
02489         }
02490         if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) {
02491                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02492                                 "failed to open a unix socket: "
02493                                 "failed to bind to address %s: %s",
02494                                 sockname, strerror(errno));
02495                 goto out;
02496         }
02497         if(listen(sock, 10)<0) {
02498                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02499                                 "failed to open a unix socket: "
02500                                 "failed to start listening: %s",
02501                                 strerror(errno));
02502                 goto out;
02503         }
02504         retval=0;
02505         g_array_append_val(modernsocks, sock);
02506 out:
02507         if(retval<0 && sock >= 0) {
02508                 close(sock);
02509         }
02510 
02511         return retval;
02512 }
02513 
02514 int open_modern(const gchar *const addr, const gchar *const port,
02515                 GError **const gerror) {
02516         struct addrinfo hints;
02517         struct addrinfo* ai = NULL;
02518         struct addrinfo* ai_bak = NULL;
02519         struct sock_flags;
02520         int e;
02521         int retval = -1;
02522         int sock = -1;
02523         gchar** addrs;
02524         gchar const* l_addr = addr;
02525 
02526         if(!addr || strlen(addr) == 0) {
02527                 l_addr = "::, 0.0.0.0";
02528         }
02529 
02530         addrs = g_strsplit_set(l_addr, ", \t", -1);
02531 
02532         for(int i=0; addrs[i]!=NULL; i++) {
02533                 if(addrs[i][0] == '\0') {
02534                         continue;
02535                 }
02536                 memset(&hints, '\0', sizeof(hints));
02537                 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
02538                 hints.ai_socktype = SOCK_STREAM;
02539                 hints.ai_family = AF_UNSPEC;
02540                 hints.ai_protocol = IPPROTO_TCP;
02541                 e = getaddrinfo(addrs[i], port ? port : NBD_DEFAULT_PORT, &hints, &ai);
02542                 ai_bak = ai;
02543                 if(e != 0 && addrs[i+1] == NULL && modernsocks->len == 0) {
02544                         g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
02545                                     "failed to open a modern socket: "
02546                                     "failed to get address info: %s",
02547                                     gai_strerror(e));
02548                         goto out;
02549                 }
02550 
02551                 while(ai != NULL) {
02552                         sock = -1;
02553 
02554                         if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
02555                                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
02556                                             "failed to open a modern socket: "
02557                                             "failed to create a socket: %s",
02558                                             strerror(errno));
02559                                 goto out;
02560                         }
02561 
02562                         if (dosockopts(sock, gerror) == -1) {
02563                                 g_prefix_error(gerror, "failed to open a modern socket: ");
02564                                 goto out;
02565                         }
02566 
02567                         if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
02568                                 /*
02569                                  * Some systems will return multiple entries for the
02570                                  * same address when we ask it for something
02571                                  * AF_UNSPEC, even though the first entry will
02572                                  * listen to both protocols. Other systems will
02573                                  * return multiple entries too, but we actually
02574                                  * do need to open both.
02575                                  *
02576                                  * Handle this by ignoring EADDRINUSE if we've
02577                                  * already got at least one socket open
02578                                  */
02579                                 if(errno == EADDRINUSE && modernsocks->len > 0) {
02580                                         goto next;
02581                                 }
02582                                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02583                                             "failed to open a modern socket: "
02584                                             "failed to bind an address to a socket: %s",
02585                                             strerror(errno));
02586                                 goto out;
02587                         }
02588 
02589                         if(listen(sock, 10) <0) {
02590                                 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
02591                                             "failed to open a modern socket: "
02592                                             "failed to start listening on a socket: %s",
02593                                             strerror(errno));
02594                                 goto out;
02595                         }
02596                         g_array_append_val(modernsocks, sock);
02597                 next:
02598                         ai = ai->ai_next;
02599                 }
02600                 if(ai_bak) {
02601                         freeaddrinfo(ai_bak);
02602                         ai_bak=NULL;
02603                 }
02604         }
02605 
02606         retval = 0;
02607 out:
02608 
02609         if (retval == -1 && sock >= 0) {
02610                 close(sock);
02611         }
02612         if(ai_bak)
02613                 freeaddrinfo(ai_bak);
02614 
02615         return retval;
02616 }
02617 
02618 /**
02619  * Connect our servers.
02620  **/
02621 void setup_servers(GArray *const servers, const gchar *const modernaddr,
02622                    const gchar *const modernport, const gchar* unixsock) {
02623         struct sigaction sa;
02624 
02625         GError *gerror = NULL;
02626         if (open_modern(modernaddr, modernport, &gerror) == -1) {
02627                 msg(LOG_ERR, "failed to setup servers: %s",
02628                     gerror->message);
02629                 g_clear_error(&gerror);
02630                 exit(EXIT_FAILURE);
02631         }
02632         if(unixsock != NULL) {
02633                 GError* gerror = NULL;
02634                 if(open_unix(unixsock, &gerror) == -1) {
02635                         msg(LOG_ERR, "failed to setup servers: %s",
02636                                         gerror->message);
02637                         g_clear_error(&gerror);
02638                         exit(EXIT_FAILURE);
02639                 }
02640         }
02641         children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
02642 
02643         sa.sa_handler = sigchld_handler;
02644         sigemptyset(&sa.sa_mask);
02645         sigaddset(&sa.sa_mask, SIGTERM);
02646         sa.sa_flags = SA_RESTART;
02647         if(sigaction(SIGCHLD, &sa, NULL) == -1)
02648                 err("sigaction: %m");
02649 
02650         sa.sa_handler = sigterm_handler;
02651         sigemptyset(&sa.sa_mask);
02652         sigaddset(&sa.sa_mask, SIGCHLD);
02653         sa.sa_flags = SA_RESTART;
02654         if(sigaction(SIGTERM, &sa, NULL) == -1)
02655                 err("sigaction: %m");
02656 
02657         sa.sa_handler = sighup_handler;
02658         sigemptyset(&sa.sa_mask);
02659         sa.sa_flags = SA_RESTART;
02660         if(sigaction(SIGHUP, &sa, NULL) == -1)
02661                 err("sigaction: %m");
02662 }
02663 
02664 /**
02665  * Go daemon (unless we specified at compile time that we didn't want this)
02666  * @param serve the first server of our configuration. If its port is zero,
02667  *      then do not daemonize, because we're doing inetd then. This parameter
02668  *      is only used to create a PID file of the form
02669  *      /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
02670  **/
02671 #if !defined(NODAEMON)
02672 void daemonize() {
02673         FILE*pidf;
02674 
02675         if(daemon(0,0)<0) {
02676                 err("daemon");
02677         }
02678         if(!*pidfname) {
02679                 strncpy(pidfname, "/var/run/nbd-server.pid", 255);
02680         }
02681         pidf=fopen(pidfname, "w");
02682         if(pidf) {
02683                 fprintf(pidf,"%d\n", (int)getpid());
02684                 fclose(pidf);
02685         } else {
02686                 perror("fopen");
02687                 fprintf(stderr, "Not fatal; continuing");
02688         }
02689 }
02690 #else
02691 #define daemonize(serve)
02692 #endif /* !defined(NODAEMON) */
02693 
02694 /*
02695  * Everything beyond this point (in the file) is run in non-daemon mode.
02696  * The stuff above daemonize() isn't.
02697  */
02698 
02699 /**
02700  * Set up user-ID and/or group-ID
02701  **/
02702 void dousers(const gchar *const username, const gchar *const groupname) {
02703         struct passwd *pw;
02704         struct group *gr;
02705         gchar* str;
02706         if (groupname) {
02707                 gr = getgrnam(groupname);
02708                 if(!gr) {
02709                         str = g_strdup_printf("Invalid group name: %s", groupname);
02710                         err(str);
02711                 }
02712                 if(setgid(gr->gr_gid)<0) {
02713                         err("Could not set GID: %m"); 
02714                 }
02715         }
02716         if (username) {
02717                 pw = getpwnam(username);
02718                 if(!pw) {
02719                         str = g_strdup_printf("Invalid user name: %s", username);
02720                         err(str);
02721                 }
02722                 if(setuid(pw->pw_uid)<0) {
02723                         err("Could not set UID: %m");
02724                 }
02725         }
02726 }
02727 
02728 #ifndef ISSERVER
02729 void glib_message_syslog_redirect(const gchar *log_domain,
02730                                   GLogLevelFlags log_level,
02731                                   const gchar *message,
02732                                   gpointer user_data)
02733 {
02734     int level=LOG_DEBUG;
02735     
02736     switch( log_level )
02737     {
02738       case G_LOG_FLAG_FATAL:
02739       case G_LOG_LEVEL_CRITICAL:
02740       case G_LOG_LEVEL_ERROR:    
02741         level=LOG_ERR; 
02742         break;
02743       case G_LOG_LEVEL_WARNING:
02744         level=LOG_WARNING;
02745         break;
02746       case G_LOG_LEVEL_MESSAGE:
02747       case G_LOG_LEVEL_INFO:
02748         level=LOG_INFO;
02749         break;
02750       case G_LOG_LEVEL_DEBUG:
02751         level=LOG_DEBUG;
02752         break;
02753       default:
02754         level=LOG_ERR;
02755     }
02756     syslog(level, "%s", message);
02757 }
02758 #endif
02759 
02760 /**
02761  * Main entry point...
02762  **/
02763 int main(int argc, char *argv[]) {
02764         SERVER *serve;
02765         GArray *servers;
02766         GError *gerr=NULL;
02767         struct generic_conf genconf;
02768 
02769         memset(&genconf, 0, sizeof(struct generic_conf));
02770 
02771         if (sizeof( struct nbd_request )!=28) {
02772                 fprintf(stderr,"Bad size of structure. Alignment problems?\n");
02773                 exit(EXIT_FAILURE) ;
02774         }
02775 
02776         modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
02777 
02778         logging(MY_NAME);
02779         config_file_pos = g_strdup(CFILE);
02780         serve=cmdline(argc, argv, &genconf);
02781 
02782         genconf.threads = 4;
02783         servers = parse_cfile(config_file_pos, &genconf, true, &gerr);
02784         
02785         /* Update global variables with parsed values. This will be
02786          * removed once we get rid of global configuration variables. */
02787         glob_flags   |= genconf.flags;
02788 
02789         if(serve) {
02790                 g_array_append_val(servers, *serve);
02791 
02792                 if(strcmp(genconf.modernport, "0")==0) {
02793 #ifndef ISSERVER
02794                         err("inetd mode requires syslog");
02795 #endif
02796                         CLIENT* client = g_malloc(sizeof(CLIENT));
02797                         client->server = serve;
02798                         client->net = -1;
02799                         client->modern = TRUE;
02800                         client->exportsize = OFFT_MAX;
02801                         if(set_peername(0, client))
02802                                 exit(EXIT_FAILURE);
02803                         serveconnection(client);
02804                         return 0;
02805                 }
02806         }
02807     
02808         if(!servers || !servers->len) {
02809                 if(gerr && !(gerr->domain == NBDS_ERR
02810                             && gerr->code == NBDS_ERR_CFILE_NOTFOUND)) {
02811                         g_warning("Could not parse config file: %s", 
02812                                         gerr ? gerr->message : "Unknown error");
02813                 }
02814         }
02815         if(serve) {
02816                 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol.");
02817         }
02818 
02819         if((!serve) && (!servers||!servers->len)) {
02820                 if(gerr)
02821                         g_message("No configured exports; quitting.");
02822                 exit(EXIT_FAILURE);
02823         }
02824         if (!dontfork)
02825                 daemonize();
02826 
02827         tpool = g_thread_pool_new(handle_request, NULL, genconf.threads, FALSE, NULL);
02828 
02829         setup_servers(servers, genconf.modernaddr, genconf.modernport,
02830                         genconf.unixsock);
02831         dousers(genconf.user, genconf.group);
02832 
02833         serveloop(servers);
02834 }