|
Network Block Device @PACKAGE_VERSION@
|
00001 /* 00002 * Network Block Device - server 00003 * 00004 * Copyright 1996-1998 Pavel Machek, distribute under GPL 00005 * <pavel@atrey.karlin.mff.cuni.cz> 00006 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org> 00007 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk> 00008 * 00009 * Version 1.0 - hopefully 64-bit-clean 00010 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au> 00011 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es> 00012 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer 00013 * type, or don't have 64 bit file offsets by defining FS_32BIT 00014 * in compile options for nbd-server *only*. This can be done 00015 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the 00016 * original autoconf input file, or I would make it a configure 00017 * option.) Ken Yap <ken@nlc.net.au>. 00018 * Version 1.6 - fix autodetection of block device size and really make 64 bit 00019 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk> 00020 * Version 2.0 - Version synchronised with client 00021 * Version 2.1 - Reap zombie client processes when they exit. Removed 00022 * (uncommented) the _IO magic, it's no longer necessary. Wouter 00023 * Verhelst <wouter@debian.org> 00024 * Version 2.2 - Auto switch to read-only mode (usefull for floppies). 00025 * Version 2.3 - Fixed code so that Large File Support works. This 00026 * removes the FS_32BIT compile-time directive; define 00027 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be 00028 * using FS_32BIT. This will allow you to use files >2GB instead of 00029 * having to use the -m option. Wouter Verhelst <wouter@debian.org> 00030 * Version 2.4 - Added code to keep track of children, so that we can 00031 * properly kill them from initscripts. Add a call to daemon(), 00032 * so that processes don't think they have to wait for us, which is 00033 * interesting for initscripts as well. Wouter Verhelst 00034 * <wouter@debian.org> 00035 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to 00036 * zero after fork()ing, resulting in nbd-server going berserk 00037 * when it receives a signal with at least one child open. Wouter 00038 * Verhelst <wouter@debian.org> 00039 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235); 00040 * rectified type of mainloop::size_host (sf.net bugs 814435 and 00041 * 817385); close the PID file after writing to it, so that the 00042 * daemon can actually be found. Wouter Verhelst 00043 * <wouter@debian.org> 00044 * 10/10/2003 - Size of the data "size_host" was wrong and so was not 00045 * correctly put in network endianness. Many types were corrected 00046 * (size_t and off_t instead of int). <vspaceg@sourceforge.net> 00047 * Version 2.6 - Some code cleanup. 00048 * Version 2.7 - Better build system. 00049 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 00050 * lot more work, but this is a start. Wouter Verhelst 00051 * <wouter@debian.org> 00052 * 16/03/2010 - Add IPv6 support. 00053 * Kitt Tientanopajai <kitt@kitty.in.th> 00054 * Neutron Soutmun <neo.neutron@gmail.com> 00055 * Suriya Soutmun <darksolar@gmail.com> 00056 */ 00057 00058 /* Includes LFS defines, which defines behaviours of some of the following 00059 * headers, so must come before those */ 00060 #include "lfs.h" 00061 00062 #include <assert.h> 00063 #include <sys/types.h> 00064 #include <sys/socket.h> 00065 #include <sys/stat.h> 00066 #include <sys/select.h> 00067 #include <sys/wait.h> 00068 #ifdef HAVE_SYS_IOCTL_H 00069 #include <sys/ioctl.h> 00070 #endif 00071 #include <sys/param.h> 00072 #ifdef HAVE_SYS_MOUNT_H 00073 #include <sys/mount.h> 00074 #endif 00075 #include <signal.h> 00076 #include <errno.h> 00077 #include <netinet/tcp.h> 00078 #include <netinet/in.h> 00079 #include <netdb.h> 00080 #include <syslog.h> 00081 #include <unistd.h> 00082 #include <stdbool.h> 00083 #include <stdio.h> 00084 #include <stdlib.h> 00085 #include <string.h> 00086 #include <fcntl.h> 00087 #if HAVE_FALLOC_PH 00088 #include <linux/falloc.h> 00089 #endif 00090 #include <arpa/inet.h> 00091 #include <strings.h> 00092 #include <dirent.h> 00093 #include <unistd.h> 00094 #include <getopt.h> 00095 #include <pwd.h> 00096 #include <grp.h> 00097 #include <dirent.h> 00098 #include <ctype.h> 00099 00100 #include <glib.h> 00101 00102 /* used in cliserv.h, so must come first */ 00103 #define MY_NAME "nbd_server" 00104 #include "cliserv.h" 00105 #include "nbd-debug.h" 00106 #include "netdb-compat.h" 00107 00108 #ifdef WITH_SDP 00109 #include <sdp_inet.h> 00110 #endif 00111 00112 /** Default position of the config file */ 00113 #ifndef SYSCONFDIR 00114 #define SYSCONFDIR "/etc" 00115 #endif 00116 #define CFILE SYSCONFDIR "/nbd-server/config" 00117 00118 /** Where our config file actually is */ 00119 gchar* config_file_pos; 00120 00121 /** global flags */ 00122 int glob_flags=0; 00123 00124 /* Whether we should avoid forking */ 00125 int dontfork = 0; 00126 00127 /** 00128 * The highest value a variable of type off_t can reach. This is a signed 00129 * integer, so set all bits except for the leftmost one. 00130 **/ 00131 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1)) 00132 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */ 00133 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */ 00134 00135 /** Per-export flags: */ 00136 #define F_READONLY 1 /**< flag to tell us a file is readonly */ 00137 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */ 00138 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using 00139 copyonwrite */ 00140 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */ 00141 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */ 00142 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */ 00143 #define F_SYNC 64 /**< Whether to fsync() after a write */ 00144 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */ 00145 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */ 00146 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */ 00147 #define F_TEMPORARY 1024 /**< Whether the backing file is temporary and should be created then unlinked */ 00148 #define F_TRIM 2048 /**< Whether server wants TRIM (discard) to be sent by the client */ 00149 #define F_FIXED 4096 /**< Client supports fixed new-style protocol (and can thus send us extra options */ 00150 00151 /** Global flags: */ 00152 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */ 00153 #define F_LIST 2 /**< Allow clients to list the exports on a server */ 00154 GHashTable *children; 00155 char pidfname[256]; /**< name of our PID file */ 00156 char pidftemplate[256]; /**< template to be used for the filename of the PID file */ 00157 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */ 00158 00159 #define NEG_INIT (1 << 0) 00160 #define NEG_OLD (1 << 1) 00161 #define NEG_MODERN (1 << 2) 00162 00163 #include <nbdsrv.h> 00164 00165 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP 00166 handler to mark a 00167 reconfiguration 00168 request */ 00169 00170 GArray* modernsocks; /**< Sockets for the modern handler. Not used 00171 if a client was only specified on the 00172 command line; only port used if 00173 oldstyle is set to false (and then the 00174 command-line client isn't used, gna gna). 00175 This may be more than one socket on 00176 systems that don't support serving IPv4 00177 and IPv6 from the same socket (like, 00178 e.g., FreeBSD) */ 00179 00180 bool logged_oversized=false; /**< whether we logged oversized requests already */ 00181 00182 /** 00183 * Variables associated with an open file 00184 **/ 00185 typedef struct { 00186 int fhandle; /**< file descriptor */ 00187 off_t startoff; /**< starting offset of this file */ 00188 } FILE_INFO; 00189 00190 /** 00191 * Type of configuration file values 00192 **/ 00193 typedef enum { 00194 PARAM_INT, /**< This parameter is an integer */ 00195 PARAM_INT64, /**< This parameter is an integer */ 00196 PARAM_STRING, /**< This parameter is a string */ 00197 PARAM_BOOL, /**< This parameter is a boolean */ 00198 } PARAM_TYPE; 00199 00200 /** 00201 * Configuration file values 00202 **/ 00203 typedef struct { 00204 gchar *paramname; /**< Name of the parameter, as it appears in 00205 the config file */ 00206 gboolean required; /**< Whether this is a required (as opposed to 00207 optional) parameter */ 00208 PARAM_TYPE ptype; /**< Type of the parameter. */ 00209 gpointer target; /**< Pointer to where the data of this 00210 parameter should be written. If ptype is 00211 PARAM_BOOL, the data is or'ed rather than 00212 overwritten. */ 00213 gint flagval; /**< Flag mask for this parameter in case ptype 00214 is PARAM_BOOL. */ 00215 } PARAM; 00216 00217 /** 00218 * Configuration file values of the "generic" section 00219 **/ 00220 struct generic_conf { 00221 gchar *user; /**< user we run the server as */ 00222 gchar *group; /**< group we run running as */ 00223 gchar *modernaddr; /**< address of the modern socket */ 00224 gchar *modernport; /**< port of the modern socket */ 00225 gint flags; /**< global flags */ 00226 }; 00227 00228 /** 00229 * Translate a command name into human readable form 00230 * 00231 * @param command The command number (after applying NBD_CMD_MASK_COMMAND) 00232 * @return pointer to the command name 00233 **/ 00234 static inline const char * getcommandname(uint64_t command) { 00235 switch (command) { 00236 case NBD_CMD_READ: 00237 return "NBD_CMD_READ"; 00238 case NBD_CMD_WRITE: 00239 return "NBD_CMD_WRITE"; 00240 case NBD_CMD_DISC: 00241 return "NBD_CMD_DISC"; 00242 case NBD_CMD_FLUSH: 00243 return "NBD_CMD_FLUSH"; 00244 case NBD_CMD_TRIM: 00245 return "NBD_CMD_TRIM"; 00246 default: 00247 return "UNKNOWN"; 00248 } 00249 } 00250 00251 /** 00252 * Read data from a file descriptor into a buffer 00253 * 00254 * @param f a file descriptor 00255 * @param buf a buffer 00256 * @param len the number of bytes to be read 00257 **/ 00258 static inline void readit(int f, void *buf, size_t len) { 00259 ssize_t res; 00260 while (len > 0) { 00261 DEBUG("*"); 00262 if ((res = read(f, buf, len)) <= 0) { 00263 if(errno != EAGAIN) { 00264 err("Read failed: %m"); 00265 } 00266 } else { 00267 len -= res; 00268 buf += res; 00269 } 00270 } 00271 } 00272 00273 /** 00274 * Consume data from an FD that we don't want 00275 * 00276 * @param f a file descriptor 00277 * @param buf a buffer 00278 * @param len the number of bytes to consume 00279 * @param bufsiz the size of the buffer 00280 **/ 00281 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) { 00282 size_t curlen; 00283 while (len>0) { 00284 curlen = (len>bufsiz)?bufsiz:len; 00285 readit(f, buf, curlen); 00286 len -= curlen; 00287 } 00288 } 00289 00290 /** 00291 * Write data from a buffer into a filedescriptor 00292 * 00293 * @param f a file descriptor 00294 * @param buf a buffer containing data 00295 * @param len the number of bytes to be written 00296 **/ 00297 static inline void writeit(int f, void *buf, size_t len) { 00298 ssize_t res; 00299 while (len > 0) { 00300 DEBUG("+"); 00301 if ((res = write(f, buf, len)) <= 0) 00302 err("Send failed: %m"); 00303 len -= res; 00304 buf += res; 00305 } 00306 } 00307 00308 /** 00309 * Print out a message about how to use nbd-server. Split out to a separate 00310 * function so that we can call it from multiple places 00311 */ 00312 void usage() { 00313 printf("This is nbd-server version " VERSION "\n"); 00314 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n" 00315 "\t-r|--read-only\t\tread only\n" 00316 "\t-m|--multi-file\t\tmultiple file\n" 00317 "\t-c|--copy-on-write\tcopy on write\n" 00318 "\t-C|--config-file\tspecify an alternate configuration file\n" 00319 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n" 00320 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n" 00321 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n" 00322 "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n" 00323 "\tif port is set to 0, stdin is used (for running from inetd).\n" 00324 "\tif file_to_export contains '%%s', it is substituted with the IP\n" 00325 "\t\taddress of the machine trying to connect\n" 00326 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n"); 00327 printf("Using configuration file %s\n", CFILE); 00328 } 00329 00330 /* Dumps a config file section of the given SERVER*, and exits. */ 00331 void dump_section(SERVER* serve, gchar* section_header) { 00332 printf("[%s]\n", section_header); 00333 printf("\texportname = %s\n", serve->exportname); 00334 printf("\tlistenaddr = %s\n", serve->listenaddr); 00335 printf("\tport = %d\n", serve->port); 00336 if(serve->flags & F_READONLY) { 00337 printf("\treadonly = true\n"); 00338 } 00339 if(serve->flags & F_MULTIFILE) { 00340 printf("\tmultifile = true\n"); 00341 } 00342 if(serve->flags & F_COPYONWRITE) { 00343 printf("\tcopyonwrite = true\n"); 00344 } 00345 if(serve->expected_size) { 00346 printf("\tfilesize = %lld\n", (long long int)serve->expected_size); 00347 } 00348 if(serve->authname) { 00349 printf("\tauthfile = %s\n", serve->authname); 00350 } 00351 exit(EXIT_SUCCESS); 00352 } 00353 00354 /** 00355 * Parse the command line. 00356 * 00357 * @param argc the argc argument to main() 00358 * @param argv the argv argument to main() 00359 **/ 00360 SERVER* cmdline(int argc, char *argv[]) { 00361 int i=0; 00362 int nonspecial=0; 00363 int c; 00364 struct option long_options[] = { 00365 {"read-only", no_argument, NULL, 'r'}, 00366 {"multi-file", no_argument, NULL, 'm'}, 00367 {"copy-on-write", no_argument, NULL, 'c'}, 00368 {"dont-fork", no_argument, NULL, 'd'}, 00369 {"authorize-file", required_argument, NULL, 'l'}, 00370 {"config-file", required_argument, NULL, 'C'}, 00371 {"pid-file", required_argument, NULL, 'p'}, 00372 {"output-config", required_argument, NULL, 'o'}, 00373 {"max-connection", required_argument, NULL, 'M'}, 00374 {0,0,0,0} 00375 }; 00376 SERVER *serve; 00377 off_t es; 00378 size_t last; 00379 char suffix; 00380 gboolean do_output=FALSE; 00381 gchar* section_header=""; 00382 gchar** addr_port; 00383 00384 if(argc==1) { 00385 return NULL; 00386 } 00387 serve=g_new0(SERVER, 1); 00388 serve->authname = g_strdup(default_authname); 00389 serve->virtstyle=VIRT_IPLIT; 00390 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) { 00391 switch (c) { 00392 case 1: 00393 /* non-option argument */ 00394 switch(nonspecial++) { 00395 case 0: 00396 if(strchr(optarg, ':') == strrchr(optarg, ':')) { 00397 addr_port=g_strsplit(optarg, ":", 2); 00398 00399 /* Check for "@" - maybe user using this separator 00400 for IPv4 address */ 00401 if(!addr_port[1]) { 00402 g_strfreev(addr_port); 00403 addr_port=g_strsplit(optarg, "@", 2); 00404 } 00405 } else { 00406 addr_port=g_strsplit(optarg, "@", 2); 00407 } 00408 00409 if(addr_port[1]) { 00410 serve->port=strtol(addr_port[1], NULL, 0); 00411 serve->listenaddr=g_strdup(addr_port[0]); 00412 } else { 00413 serve->listenaddr=NULL; 00414 serve->port=strtol(addr_port[0], NULL, 0); 00415 } 00416 g_strfreev(addr_port); 00417 break; 00418 case 1: 00419 serve->exportname = g_strdup(optarg); 00420 if(serve->exportname[0] != '/') { 00421 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n"); 00422 exit(EXIT_FAILURE); 00423 } 00424 break; 00425 case 2: 00426 last=strlen(optarg)-1; 00427 suffix=optarg[last]; 00428 if (suffix == 'k' || suffix == 'K' || 00429 suffix == 'm' || suffix == 'M') 00430 optarg[last] = '\0'; 00431 es = (off_t)atoll(optarg); 00432 switch (suffix) { 00433 case 'm': 00434 case 'M': es <<= 10; 00435 case 'k': 00436 case 'K': es <<= 10; 00437 default : break; 00438 } 00439 serve->expected_size = es; 00440 break; 00441 } 00442 break; 00443 case 'r': 00444 serve->flags |= F_READONLY; 00445 break; 00446 case 'm': 00447 serve->flags |= F_MULTIFILE; 00448 break; 00449 case 'o': 00450 do_output = TRUE; 00451 section_header = g_strdup(optarg); 00452 break; 00453 case 'p': 00454 strncpy(pidftemplate, optarg, 256); 00455 break; 00456 case 'c': 00457 serve->flags |=F_COPYONWRITE; 00458 break; 00459 case 'd': 00460 dontfork = 1; 00461 break; 00462 case 'C': 00463 g_free(config_file_pos); 00464 config_file_pos=g_strdup(optarg); 00465 break; 00466 case 'l': 00467 g_free(serve->authname); 00468 serve->authname=g_strdup(optarg); 00469 break; 00470 case 'M': 00471 serve->max_connections = strtol(optarg, NULL, 0); 00472 break; 00473 default: 00474 usage(); 00475 exit(EXIT_FAILURE); 00476 break; 00477 } 00478 } 00479 /* What's left: the port to export, the name of the to be exported 00480 * file, and, optionally, the size of the file, in that order. */ 00481 if(nonspecial<2) { 00482 g_free(serve); 00483 serve=NULL; 00484 } else { 00485 glob_flags |= F_OLDSTYLE; 00486 } 00487 if(do_output) { 00488 if(!serve) { 00489 g_critical("Need a complete configuration on the command line to output a config file section!"); 00490 exit(EXIT_FAILURE); 00491 } 00492 dump_section(serve, section_header); 00493 } 00494 return serve; 00495 } 00496 00497 /* forward definition of parse_cfile */ 00498 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, GError** e); 00499 00500 /** 00501 * Parse config file snippets in a directory. Uses readdir() and friends 00502 * to find files and open them, then passes them on to parse_cfile 00503 * with have_global set false 00504 **/ 00505 GArray* do_cfile_dir(gchar* dir, GError** e) { 00506 DIR* dirh = opendir(dir); 00507 struct dirent* de; 00508 gchar* fname; 00509 GArray* retval = NULL; 00510 GArray* tmp; 00511 struct stat stbuf; 00512 00513 if(!dirh) { 00514 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno)); 00515 return NULL; 00516 } 00517 errno=0; 00518 while((de = readdir(dirh))) { 00519 int saved_errno=errno; 00520 fname = g_build_filename(dir, de->d_name, NULL); 00521 switch(de->d_type) { 00522 case DT_UNKNOWN: 00523 /* Filesystem doesn't return type of 00524 * file through readdir. Run stat() on 00525 * the file instead */ 00526 if(stat(fname, &stbuf)) { 00527 perror("stat"); 00528 goto err_out; 00529 } 00530 if (!S_ISREG(stbuf.st_mode)) { 00531 goto next; 00532 } 00533 case DT_REG: 00534 /* Skip unless the name ends with '.conf' */ 00535 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) { 00536 goto next; 00537 } 00538 tmp = parse_cfile(fname, NULL, e); 00539 errno=saved_errno; 00540 if(*e) { 00541 goto err_out; 00542 } 00543 if(!retval) 00544 retval = g_array_new(FALSE, TRUE, sizeof(SERVER)); 00545 retval = g_array_append_vals(retval, tmp->data, tmp->len); 00546 g_array_free(tmp, TRUE); 00547 default: 00548 break; 00549 } 00550 next: 00551 g_free(fname); 00552 } 00553 if(errno) { 00554 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno)); 00555 err_out: 00556 if(retval) 00557 g_array_free(retval, TRUE); 00558 if(dirh) 00559 closedir(dirh); 00560 return NULL; 00561 } 00562 return retval; 00563 } 00564 00565 /** 00566 * Parse the config file. 00567 * 00568 * @param f the name of the config file 00569 * 00570 * @param genconf a pointer to generic configuration which will get 00571 * updated with parsed values. If NULL, then parsed generic 00572 * configuration values are safely and silently discarded. 00573 * 00574 * @param e a GError. Error code can be any of the following: 00575 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC, 00576 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED 00577 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS. 00578 * 00579 * @return a Array of SERVER* pointers, If the config file is empty or does not 00580 * exist, returns an empty GHashTable; if the config file contains an 00581 * error, returns NULL, and e is set appropriately 00582 **/ 00583 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, GError** e) { 00584 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s"; 00585 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s"; 00586 gchar* cfdir = NULL; 00587 SERVER s; 00588 gchar *virtstyle=NULL; 00589 PARAM lp[] = { 00590 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 }, 00591 { "port", TRUE, PARAM_INT, &(s.port), 0 }, 00592 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 }, 00593 { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 }, 00594 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 }, 00595 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 }, 00596 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 }, 00597 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 }, 00598 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY }, 00599 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE }, 00600 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE }, 00601 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE }, 00602 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP }, 00603 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC }, 00604 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH }, 00605 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA }, 00606 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL }, 00607 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY }, 00608 { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM }, 00609 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 }, 00610 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 }, 00611 }; 00612 const int lp_size=sizeof(lp)/sizeof(PARAM); 00613 struct generic_conf genconftmp; 00614 PARAM gp[] = { 00615 { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 }, 00616 { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 }, 00617 { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, 00618 { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 }, 00619 { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 }, 00620 { "includedir", FALSE, PARAM_STRING, &cfdir, 0 }, 00621 { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST }, 00622 }; 00623 PARAM* p=gp; 00624 int p_size=sizeof(gp)/sizeof(PARAM); 00625 GKeyFile *cfile; 00626 GError *err = NULL; 00627 const char *err_msg=NULL; 00628 GArray *retval=NULL; 00629 gchar **groups; 00630 gboolean bval; 00631 gint ival; 00632 gint64 i64val; 00633 gchar* sval; 00634 gchar* startgroup; 00635 gint i; 00636 gint j; 00637 00638 memset(&genconftmp, 0, sizeof(struct generic_conf)); 00639 00640 if (genconf) { 00641 /* Use the passed configuration values as defaults. The 00642 * parsing algorithm below updates all parameter targets 00643 * found from configuration files. */ 00644 memcpy(&genconftmp, genconf, sizeof(struct generic_conf)); 00645 } 00646 00647 cfile = g_key_file_new(); 00648 retval = g_array_new(FALSE, TRUE, sizeof(SERVER)); 00649 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS | 00650 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) { 00651 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s", 00652 f, err->message); 00653 g_key_file_free(cfile); 00654 return retval; 00655 } 00656 startgroup = g_key_file_get_start_group(cfile); 00657 if((!startgroup || strcmp(startgroup, "generic")) && genconf) { 00658 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!"); 00659 g_key_file_free(cfile); 00660 return NULL; 00661 } 00662 groups = g_key_file_get_groups(cfile, NULL); 00663 for(i=0;groups[i];i++) { 00664 memset(&s, '\0', sizeof(SERVER)); 00665 00666 /* After the [generic] group or when we're parsing an include 00667 * directory, start parsing exports */ 00668 if(i==1 || !genconf) { 00669 p=lp; 00670 p_size=lp_size; 00671 if(!(glob_flags & F_OLDSTYLE)) { 00672 lp[1].required = FALSE; 00673 } 00674 } 00675 for(j=0;j<p_size;j++) { 00676 assert(p[j].target != NULL); 00677 assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64); 00678 switch(p[j].ptype) { 00679 case PARAM_INT: 00680 ival = g_key_file_get_integer(cfile, 00681 groups[i], 00682 p[j].paramname, 00683 &err); 00684 if(!err) { 00685 *((gint*)p[j].target) = ival; 00686 } 00687 break; 00688 case PARAM_INT64: 00689 i64val = g_key_file_get_int64(cfile, 00690 groups[i], 00691 p[j].paramname, 00692 &err); 00693 if(!err) { 00694 *((gint64*)p[j].target) = i64val; 00695 } 00696 break; 00697 case PARAM_STRING: 00698 sval = g_key_file_get_string(cfile, 00699 groups[i], 00700 p[j].paramname, 00701 &err); 00702 if(!err) { 00703 *((gchar**)p[j].target) = sval; 00704 } 00705 break; 00706 case PARAM_BOOL: 00707 bval = g_key_file_get_boolean(cfile, 00708 groups[i], 00709 p[j].paramname, &err); 00710 if(!err) { 00711 if(bval) { 00712 *((gint*)p[j].target) |= p[j].flagval; 00713 } else { 00714 *((gint*)p[j].target) &= ~(p[j].flagval); 00715 } 00716 } 00717 break; 00718 } 00719 if(err) { 00720 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) { 00721 if(!p[j].required) { 00722 /* Ignore not-found error for optional values */ 00723 g_clear_error(&err); 00724 continue; 00725 } else { 00726 err_msg = MISSING_REQUIRED_ERROR; 00727 } 00728 } else { 00729 err_msg = DEFAULT_ERROR; 00730 } 00731 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message); 00732 g_array_free(retval, TRUE); 00733 g_error_free(err); 00734 g_key_file_free(cfile); 00735 return NULL; 00736 } 00737 } 00738 if(virtstyle) { 00739 if(!strncmp(virtstyle, "none", 4)) { 00740 s.virtstyle=VIRT_NONE; 00741 } else if(!strncmp(virtstyle, "ipliteral", 9)) { 00742 s.virtstyle=VIRT_IPLIT; 00743 } else if(!strncmp(virtstyle, "iphash", 6)) { 00744 s.virtstyle=VIRT_IPHASH; 00745 } else if(!strncmp(virtstyle, "cidrhash", 8)) { 00746 s.virtstyle=VIRT_CIDR; 00747 if(strlen(virtstyle)<10) { 00748 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]); 00749 g_array_free(retval, TRUE); 00750 g_key_file_free(cfile); 00751 return NULL; 00752 } 00753 s.cidrlen=strtol(virtstyle+8, NULL, 0); 00754 } else { 00755 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]); 00756 g_array_free(retval, TRUE); 00757 g_key_file_free(cfile); 00758 return NULL; 00759 } 00760 } else { 00761 s.virtstyle=VIRT_IPLIT; 00762 } 00763 if(s.port && !(glob_flags & F_OLDSTYLE)) { 00764 g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect."); 00765 g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info"); 00766 } 00767 /* Don't need to free this, it's not our string */ 00768 virtstyle=NULL; 00769 /* Don't append values for the [generic] group */ 00770 if(i>0 || !genconf) { 00771 s.socket_family = AF_UNSPEC; 00772 s.servename = groups[i]; 00773 00774 append_serve(&s, retval); 00775 } 00776 #ifndef WITH_SDP 00777 if(s.flags & F_SDP) { 00778 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]); 00779 g_array_free(retval, TRUE); 00780 g_key_file_free(cfile); 00781 return NULL; 00782 } 00783 #endif 00784 } 00785 g_key_file_free(cfile); 00786 if(cfdir) { 00787 GArray* extra = do_cfile_dir(cfdir, e); 00788 if(extra) { 00789 retval = g_array_append_vals(retval, extra->data, extra->len); 00790 i+=extra->len; 00791 g_array_free(extra, TRUE); 00792 } else { 00793 if(*e) { 00794 g_array_free(retval, TRUE); 00795 return NULL; 00796 } 00797 } 00798 } 00799 if(i==1 && genconf) { 00800 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports"); 00801 } 00802 00803 if (genconf) { 00804 /* Return the updated generic configuration through the 00805 * pointer parameter. */ 00806 memcpy(genconf, &genconftmp, sizeof(struct generic_conf)); 00807 } 00808 00809 return retval; 00810 } 00811 00812 /** 00813 * Signal handler for SIGCHLD 00814 * @param s the signal we're handling (must be SIGCHLD, or something 00815 * is severely wrong) 00816 **/ 00817 void sigchld_handler(int s) { 00818 int status; 00819 int* i; 00820 pid_t pid; 00821 00822 while((pid=waitpid(-1, &status, WNOHANG)) > 0) { 00823 if(WIFEXITED(status)) { 00824 msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status)); 00825 } 00826 i=g_hash_table_lookup(children, &pid); 00827 if(!i) { 00828 msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); 00829 } else { 00830 DEBUG("Removing %d from the list of children", pid); 00831 g_hash_table_remove(children, &pid); 00832 } 00833 } 00834 } 00835 00836 /** 00837 * Kill a child. Called from sigterm_handler::g_hash_table_foreach. 00838 * 00839 * @param key the key 00840 * @param value the value corresponding to the above key 00841 * @param user_data a pointer which we always set to 1, so that we know what 00842 * will happen next. 00843 **/ 00844 void killchild(gpointer key, gpointer value, gpointer user_data) { 00845 pid_t *pid=value; 00846 00847 kill(*pid, SIGTERM); 00848 } 00849 00850 /** 00851 * Handle SIGTERM and dispatch it to our children 00852 * @param s the signal we're handling (must be SIGTERM, or something 00853 * is severely wrong). 00854 **/ 00855 void sigterm_handler(int s) { 00856 g_hash_table_foreach(children, killchild, NULL); 00857 unlink(pidfname); 00858 00859 exit(EXIT_SUCCESS); 00860 } 00861 00862 /** 00863 * Handle SIGHUP by setting atomically a flag which will be evaluated in 00864 * the main loop of the root server process. This allows us to separate 00865 * the signal catching from th actual task triggered by SIGHUP and hence 00866 * processing in the interrupt context is kept as minimial as possible. 00867 * 00868 * @param s the signal we're handling (must be SIGHUP, or something 00869 * is severely wrong). 00870 **/ 00871 static void sighup_handler(const int s G_GNUC_UNUSED) { 00872 is_sighup_caught = 1; 00873 } 00874 00875 /** 00876 * Get the file handle and offset, given an export offset. 00877 * 00878 * @param export An array of export files 00879 * @param a The offset to get corresponding file/offset for 00880 * @param fhandle [out] File descriptor 00881 * @param foffset [out] Offset into fhandle 00882 * @param maxbytes [out] Tells how many bytes can be read/written 00883 * from fhandle starting at foffset (0 if there is no limit) 00884 * @return 0 on success, -1 on failure 00885 **/ 00886 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) { 00887 /* Negative offset not allowed */ 00888 if(a < 0) 00889 return -1; 00890 00891 /* Binary search for last file with starting offset <= a */ 00892 FILE_INFO fi; 00893 int start = 0; 00894 int end = export->len - 1; 00895 while( start <= end ) { 00896 int mid = (start + end) / 2; 00897 fi = g_array_index(export, FILE_INFO, mid); 00898 if( fi.startoff < a ) { 00899 start = mid + 1; 00900 } else if( fi.startoff > a ) { 00901 end = mid - 1; 00902 } else { 00903 start = end = mid; 00904 break; 00905 } 00906 } 00907 00908 /* end should never go negative, since first startoff is 0 and a >= 0 */ 00909 assert(end >= 0); 00910 00911 fi = g_array_index(export, FILE_INFO, end); 00912 *fhandle = fi.fhandle; 00913 *foffset = a - fi.startoff; 00914 *maxbytes = 0; 00915 if( end+1 < export->len ) { 00916 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1); 00917 *maxbytes = fi_next.startoff - a; 00918 } 00919 00920 return 0; 00921 } 00922 00923 /** 00924 * seek to a position in a file, with error handling. 00925 * @param handle a filedescriptor 00926 * @param a position to seek to 00927 * @todo get rid of this. 00928 **/ 00929 void myseek(int handle,off_t a) { 00930 if (lseek(handle, a, SEEK_SET) < 0) { 00931 err("Can not seek locally!\n"); 00932 } 00933 } 00934 00935 /** 00936 * Write an amount of bytes at a given offset to the right file. This 00937 * abstracts the write-side of the multiple file option. 00938 * 00939 * @param a The offset where the write should start 00940 * @param buf The buffer to write from 00941 * @param len The length of buf 00942 * @param client The client we're serving for 00943 * @param fua Flag to indicate 'Force Unit Access' 00944 * @return The number of bytes actually written, or -1 in case of an error 00945 **/ 00946 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { 00947 int fhandle; 00948 off_t foffset; 00949 size_t maxbytes; 00950 ssize_t retval; 00951 00952 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes)) 00953 return -1; 00954 if(maxbytes && len > maxbytes) 00955 len = maxbytes; 00956 00957 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); 00958 00959 myseek(fhandle, foffset); 00960 retval = write(fhandle, buf, len); 00961 if(client->server->flags & F_SYNC) { 00962 fsync(fhandle); 00963 } else if (fua) { 00964 00965 /* This is where we would do the following 00966 * #ifdef USE_SYNC_FILE_RANGE 00967 * However, we don't, for the reasons set out below 00968 * by Christoph Hellwig <hch@infradead.org> 00969 * 00970 * [BEGINS] 00971 * fdatasync is equivalent to fsync except that it does not flush 00972 * non-essential metadata (basically just timestamps in practice), but it 00973 * does flush metadata requried to find the data again, e.g. allocation 00974 * information and extent maps. sync_file_range does nothing but flush 00975 * out pagecache content - it means you basically won't get your data 00976 * back in case of a crash if you either: 00977 * 00978 * a) have a volatile write cache in your disk (e.g. any normal SATA disk) 00979 * b) are using a sparse file on a filesystem 00980 * c) are using a fallocate-preallocated file on a filesystem 00981 * d) use any file on a COW filesystem like btrfs 00982 * 00983 * e.g. it only does anything useful for you if you do not have a volatile 00984 * write cache, and either use a raw block device node, or just overwrite 00985 * an already fully allocated (and not preallocated) file on a non-COW 00986 * filesystem. 00987 * [ENDS] 00988 * 00989 * What we should do is open a second FD with O_DSYNC set, then write to 00990 * that when appropriate. However, with a Linux client, every REQ_FUA 00991 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance 00992 * problems. 00993 * 00994 */ 00995 #if 0 00996 sync_file_range(fhandle, foffset, len, 00997 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | 00998 SYNC_FILE_RANGE_WAIT_AFTER); 00999 #else 01000 fdatasync(fhandle); 01001 #endif 01002 } 01003 return retval; 01004 } 01005 01006 /** 01007 * Call rawexpwrite repeatedly until all data has been written. 01008 * 01009 * @param a The offset where the write should start 01010 * @param buf The buffer to write from 01011 * @param len The length of buf 01012 * @param client The client we're serving for 01013 * @param fua Flag to indicate 'Force Unit Access' 01014 * @return 0 on success, nonzero on failure 01015 **/ 01016 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) { 01017 ssize_t ret=0; 01018 01019 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) { 01020 a += ret; 01021 buf += ret; 01022 len -= ret; 01023 } 01024 return (ret < 0 || len != 0); 01025 } 01026 01027 /** 01028 * Read an amount of bytes at a given offset from the right file. This 01029 * abstracts the read-side of the multiple files option. 01030 * 01031 * @param a The offset where the read should start 01032 * @param buf A buffer to read into 01033 * @param len The size of buf 01034 * @param client The client we're serving for 01035 * @return The number of bytes actually read, or -1 in case of an 01036 * error. 01037 **/ 01038 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { 01039 int fhandle; 01040 off_t foffset; 01041 size_t maxbytes; 01042 01043 if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes)) 01044 return -1; 01045 if(maxbytes && len > maxbytes) 01046 len = maxbytes; 01047 01048 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); 01049 01050 myseek(fhandle, foffset); 01051 return read(fhandle, buf, len); 01052 } 01053 01054 /** 01055 * Call rawexpread repeatedly until all data has been read. 01056 * @return 0 on success, nonzero on failure 01057 **/ 01058 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) { 01059 ssize_t ret=0; 01060 01061 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) { 01062 a += ret; 01063 buf += ret; 01064 len -= ret; 01065 } 01066 return (ret < 0 || len != 0); 01067 } 01068 01069 /** 01070 * Read an amount of bytes at a given offset from the right file. This 01071 * abstracts the read-side of the copyonwrite stuff, and calls 01072 * rawexpread() with the right parameters to do the actual work. 01073 * @param a The offset where the read should start 01074 * @param buf A buffer to read into 01075 * @param len The size of buf 01076 * @param client The client we're going to read for 01077 * @return 0 on success, nonzero on failure 01078 **/ 01079 int expread(off_t a, char *buf, size_t len, CLIENT *client) { 01080 off_t rdlen, offset; 01081 off_t mapcnt, mapl, maph, pagestart; 01082 01083 if (!(client->server->flags & F_COPYONWRITE)) 01084 return(rawexpread_fully(a, buf, len, client)); 01085 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); 01086 01087 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; 01088 01089 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) { 01090 pagestart=mapcnt*DIFFPAGESIZE; 01091 offset=a-pagestart; 01092 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ? 01093 len : (size_t)DIFFPAGESIZE-offset; 01094 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ 01095 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, 01096 (unsigned long)(client->difmap[mapcnt])); 01097 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); 01098 if (read(client->difffile, buf, rdlen) != rdlen) return -1; 01099 } else { /* the block is not there */ 01100 DEBUG("Page %llu is not here, we read the original one\n", 01101 (unsigned long long)mapcnt); 01102 if(rawexpread_fully(a, buf, rdlen, client)) return -1; 01103 } 01104 len-=rdlen; a+=rdlen; buf+=rdlen; 01105 } 01106 return 0; 01107 } 01108 01109 /** 01110 * Write an amount of bytes at a given offset to the right file. This 01111 * abstracts the write-side of the copyonwrite option, and calls 01112 * rawexpwrite() with the right parameters to do the actual work. 01113 * 01114 * @param a The offset where the write should start 01115 * @param buf The buffer to write from 01116 * @param len The length of buf 01117 * @param client The client we're going to write for. 01118 * @param fua Flag to indicate 'Force Unit Access' 01119 * @return 0 on success, nonzero on failure 01120 **/ 01121 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { 01122 char pagebuf[DIFFPAGESIZE]; 01123 off_t mapcnt,mapl,maph; 01124 off_t wrlen,rdlen; 01125 off_t pagestart; 01126 off_t offset; 01127 01128 if (!(client->server->flags & F_COPYONWRITE)) 01129 return(rawexpwrite_fully(a, buf, len, client, fua)); 01130 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); 01131 01132 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; 01133 01134 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) { 01135 pagestart=mapcnt*DIFFPAGESIZE ; 01136 offset=a-pagestart ; 01137 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ? 01138 len : (size_t)DIFFPAGESIZE-offset; 01139 01140 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ 01141 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, 01142 (unsigned long)(client->difmap[mapcnt])) ; 01143 myseek(client->difffile, 01144 client->difmap[mapcnt]*DIFFPAGESIZE+offset); 01145 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ; 01146 } else { /* the block is not there */ 01147 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; 01148 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; 01149 DEBUG("Page %llu is not here, we put it at %lu\n", 01150 (unsigned long long)mapcnt, 01151 (unsigned long)(client->difmap[mapcnt])); 01152 rdlen=DIFFPAGESIZE ; 01153 if (rawexpread_fully(pagestart, pagebuf, rdlen, client)) 01154 return -1; 01155 memcpy(pagebuf+offset,buf,wrlen) ; 01156 if (write(client->difffile, pagebuf, DIFFPAGESIZE) != 01157 DIFFPAGESIZE) 01158 return -1; 01159 } 01160 len-=wrlen ; a+=wrlen ; buf+=wrlen ; 01161 } 01162 if (client->server->flags & F_SYNC) { 01163 fsync(client->difffile); 01164 } else if (fua) { 01165 /* open question: would it be cheaper to do multiple sync_file_ranges? 01166 as we iterate through the above? 01167 */ 01168 fdatasync(client->difffile); 01169 } 01170 return 0; 01171 } 01172 01173 /** 01174 * Flush data to a client 01175 * 01176 * @param client The client we're going to write for. 01177 * @return 0 on success, nonzero on failure 01178 **/ 01179 int expflush(CLIENT *client) { 01180 gint i; 01181 01182 if (client->server->flags & F_COPYONWRITE) { 01183 return fsync(client->difffile); 01184 } 01185 01186 for (i = 0; i < client->export->len; i++) { 01187 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i); 01188 if (fsync(fi.fhandle) < 0) 01189 return -1; 01190 } 01191 01192 return 0; 01193 } 01194 01195 /* 01196 * If the current system supports it, call fallocate() on the backend 01197 * file to resparsify stuff that isn't needed anymore (see NBD_CMD_TRIM) 01198 */ 01199 int exptrim(struct nbd_request* req, CLIENT* client) { 01200 #if HAVE_FALLOC_PH 01201 FILE_INFO prev = g_array_index(client->export, FILE_INFO, 0); 01202 FILE_INFO cur = prev; 01203 int i = 1; 01204 /* We're running on a system that supports the 01205 * FALLOC_FL_PUNCH_HOLE option to re-sparsify a file */ 01206 do { 01207 if(i<client->export->len) { 01208 cur = g_array_index(client->export, FILE_INFO, i); 01209 } 01210 if(prev.startoff <= req->from) { 01211 off_t curoff = req->from - prev.startoff; 01212 off_t curlen = cur.startoff - prev.startoff - curoff; 01213 fallocate(prev.fhandle, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, curoff, curlen); 01214 } 01215 prev = cur; 01216 } while(i < client->export->len && cur.startoff < (req->from + req->len)); 01217 DEBUG("Performed TRIM request from %llu to %llu", (unsigned long long) req->from, (unsigned long long) req->len); 01218 #else 01219 DEBUG("Ignoring TRIM request (not supported on current platform"); 01220 #endif 01221 return 0; 01222 } 01223 01224 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) { 01225 uint64_t magic = htonll(0x3e889045565a9LL); 01226 reply_type = htonl(reply_type); 01227 uint32_t datsize = htonl(datasize); 01228 opt = htonl(opt); 01229 struct iovec v_data[] = { 01230 { &magic, sizeof(magic) }, 01231 { &opt, sizeof(opt) }, 01232 { &reply_type, sizeof(reply_type) }, 01233 { &datsize, sizeof(datsize) }, 01234 { data, datasize }, 01235 }; 01236 size_t total = sizeof(magic) + sizeof(opt) + sizeof(reply_type) + sizeof(datsize) + datasize; 01237 ssize_t sent = writev(net, v_data, 5); 01238 if(sent != total) { 01239 perror("E: couldn't write enough data:"); 01240 } 01241 } 01242 01243 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) { 01244 uint32_t namelen; 01245 char* name; 01246 int i; 01247 01248 if (read(net, &namelen, sizeof(namelen)) < 0) { 01249 err("Negotiation failed/7: %m"); 01250 return NULL; 01251 } 01252 namelen = ntohl(namelen); 01253 name = malloc(namelen+1); 01254 name[namelen]=0; 01255 if (read(net, name, namelen) < 0) { 01256 err("Negotiation failed/8: %m"); 01257 free(name); 01258 return NULL; 01259 } 01260 for(i=0; i<servers->len; i++) { 01261 SERVER* serve = &(g_array_index(servers, SERVER, i)); 01262 if(!strcmp(serve->servename, name)) { 01263 CLIENT* client = g_new0(CLIENT, 1); 01264 client->server = serve; 01265 client->exportsize = OFFT_MAX; 01266 client->net = net; 01267 client->modern = TRUE; 01268 client->transactionlogfd = -1; 01269 client->clientfeats = cflags; 01270 free(name); 01271 return client; 01272 } 01273 } 01274 err("Negotiation failed/8a: Requested export not found"); 01275 free(name); 01276 return NULL; 01277 } 01278 01279 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) { 01280 uint32_t len; 01281 int i; 01282 char buf[1024]; 01283 char *ptr = buf + sizeof(len); 01284 01285 if (read(net, &len, sizeof(len)) < 0) 01286 err("Negotiation failed/8: %m"); 01287 len = ntohl(len); 01288 if(len) { 01289 send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL); 01290 } 01291 if(!(glob_flags & F_LIST)) { 01292 send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL); 01293 err_nonfatal("Client tried disallowed list option"); 01294 return; 01295 } 01296 for(i=0; i<servers->len; i++) { 01297 SERVER* serve = &(g_array_index(servers, SERVER, i)); 01298 len = htonl(strlen(serve->servename)); 01299 memcpy(buf, &len, sizeof(len)); 01300 strcpy(ptr, serve->servename); 01301 send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf); 01302 } 01303 send_reply(opt, net, NBD_REP_ACK, 0, NULL); 01304 } 01305 01306 /** 01307 * Do the initial negotiation. 01308 * 01309 * @param client The client we're negotiating with. 01310 **/ 01311 CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) { 01312 char zeros[128]; 01313 uint64_t size_host; 01314 uint32_t flags = NBD_FLAG_HAS_FLAGS; 01315 uint16_t smallflags = 0; 01316 uint64_t magic; 01317 01318 memset(zeros, '\0', sizeof(zeros)); 01319 assert(((phase & NEG_INIT) && (phase & NEG_MODERN)) || client); 01320 if(phase & NEG_MODERN) { 01321 smallflags |= NBD_FLAG_FIXED_NEWSTYLE; 01322 } 01323 if(phase & NEG_INIT) { 01324 /* common */ 01325 if (write(net, INIT_PASSWD, 8) < 0) { 01326 err_nonfatal("Negotiation failed/1: %m"); 01327 if(client) 01328 exit(EXIT_FAILURE); 01329 } 01330 if(phase & NEG_MODERN) { 01331 /* modern */ 01332 magic = htonll(opts_magic); 01333 } else { 01334 /* oldstyle */ 01335 magic = htonll(cliserv_magic); 01336 } 01337 if (write(net, &magic, sizeof(magic)) < 0) { 01338 err_nonfatal("Negotiation failed/2: %m"); 01339 if(phase & NEG_OLD) 01340 exit(EXIT_FAILURE); 01341 } 01342 } 01343 if ((phase & NEG_MODERN) && (phase & NEG_INIT)) { 01344 /* modern */ 01345 uint32_t cflags; 01346 uint32_t opt; 01347 01348 if(!servers) 01349 err("programmer error"); 01350 smallflags = htons(smallflags); 01351 if (write(net, &smallflags, sizeof(uint16_t)) < 0) 01352 err_nonfatal("Negotiation failed/3: %m"); 01353 if (read(net, &cflags, sizeof(cflags)) < 0) 01354 err_nonfatal("Negotiation failed/4: %m"); 01355 cflags = htonl(cflags); 01356 do { 01357 if (read(net, &magic, sizeof(magic)) < 0) 01358 err_nonfatal("Negotiation failed/5: %m"); 01359 magic = ntohll(magic); 01360 if(magic != opts_magic) { 01361 err_nonfatal("Negotiation failed/5a: magic mismatch"); 01362 return NULL; 01363 } 01364 if (read(net, &opt, sizeof(opt)) < 0) 01365 err_nonfatal("Negotiation failed/6: %m"); 01366 opt = ntohl(opt); 01367 switch(opt) { 01368 case NBD_OPT_EXPORT_NAME: 01369 // NBD_OPT_EXPORT_NAME must be the last 01370 // selected option, so return from here 01371 // if that is chosen. 01372 return handle_export_name(opt, net, servers, cflags); 01373 break; 01374 case NBD_OPT_LIST: 01375 handle_list(opt, net, servers, cflags); 01376 break; 01377 case NBD_OPT_ABORT: 01378 // handled below 01379 break; 01380 default: 01381 send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL); 01382 break; 01383 } 01384 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT)); 01385 if(opt == NBD_OPT_ABORT) { 01386 err_nonfatal("Session terminated by client"); 01387 return NULL; 01388 } 01389 } 01390 /* common */ 01391 size_host = htonll((u64)(client->exportsize)); 01392 if (write(net, &size_host, 8) < 0) 01393 err("Negotiation failed/9: %m"); 01394 if (client->server->flags & F_READONLY) 01395 flags |= NBD_FLAG_READ_ONLY; 01396 if (client->server->flags & F_FLUSH) 01397 flags |= NBD_FLAG_SEND_FLUSH; 01398 if (client->server->flags & F_FUA) 01399 flags |= NBD_FLAG_SEND_FUA; 01400 if (client->server->flags & F_ROTATIONAL) 01401 flags |= NBD_FLAG_ROTATIONAL; 01402 if (client->server->flags & F_TRIM) 01403 flags |= NBD_FLAG_SEND_TRIM; 01404 if (phase & NEG_OLD) { 01405 /* oldstyle */ 01406 flags = htonl(flags); 01407 if (write(client->net, &flags, 4) < 0) 01408 err("Negotiation failed/10: %m"); 01409 } else { 01410 /* modern */ 01411 smallflags = (uint16_t)(flags & ~((uint16_t)0)); 01412 smallflags = htons(smallflags); 01413 if (write(client->net, &smallflags, sizeof(smallflags)) < 0) { 01414 err("Negotiation failed/11: %m"); 01415 } 01416 } 01417 /* common */ 01418 if (write(client->net, zeros, 124) < 0) 01419 err("Negotiation failed/12: %m"); 01420 return NULL; 01421 } 01422 01423 /** sending macro. */ 01424 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \ 01425 if (client->transactionlogfd != -1) \ 01426 writeit(client->transactionlogfd, &reply, sizeof(reply)); } 01427 /** error macro. */ 01428 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; } 01429 /** 01430 * Serve a file to a single client. 01431 * 01432 * @todo This beast needs to be split up in many tiny little manageable 01433 * pieces. Preferably with a chainsaw. 01434 * 01435 * @param client The client we're going to serve to. 01436 * @return when the client disconnects 01437 **/ 01438 int mainloop(CLIENT *client) { 01439 struct nbd_request request; 01440 struct nbd_reply reply; 01441 gboolean go_on=TRUE; 01442 #ifdef DODBG 01443 int i = 0; 01444 #endif 01445 negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT)); 01446 DEBUG("Entering request loop!\n"); 01447 reply.magic = htonl(NBD_REPLY_MAGIC); 01448 reply.error = 0; 01449 while (go_on) { 01450 char buf[BUFSIZE]; 01451 char* p; 01452 size_t len; 01453 size_t currlen; 01454 size_t writelen; 01455 uint16_t command; 01456 #ifdef DODBG 01457 i++; 01458 printf("%d: ", i); 01459 #endif 01460 readit(client->net, &request, sizeof(request)); 01461 if (client->transactionlogfd != -1) 01462 writeit(client->transactionlogfd, &request, sizeof(request)); 01463 01464 request.from = ntohll(request.from); 01465 request.type = ntohl(request.type); 01466 command = request.type & NBD_CMD_MASK_COMMAND; 01467 len = ntohl(request.len); 01468 01469 DEBUG("%s from %llu (%llu) len %u, ", getcommandname(command), 01470 (unsigned long long)request.from, 01471 (unsigned long long)request.from / 512, len); 01472 01473 if (request.magic != htonl(NBD_REQUEST_MAGIC)) 01474 err("Not enough magic."); 01475 01476 memcpy(reply.handle, request.handle, sizeof(reply.handle)); 01477 01478 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) { 01479 if (request.from + len < request.from) { // 64 bit overflow!! 01480 DEBUG("[Number too large!]"); 01481 ERROR(client, reply, EINVAL); 01482 continue; 01483 } 01484 01485 if (((off_t)request.from + len) > client->exportsize) { 01486 DEBUG("[RANGE!]"); 01487 ERROR(client, reply, EINVAL); 01488 continue; 01489 } 01490 01491 currlen = len; 01492 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) { 01493 currlen = BUFSIZE - sizeof(struct nbd_reply); 01494 if(!logged_oversized) { 01495 msg(LOG_DEBUG, "oversized request (this is not a problem)"); 01496 logged_oversized = true; 01497 } 01498 } 01499 } 01500 01501 switch (command) { 01502 01503 case NBD_CMD_DISC: 01504 msg(LOG_INFO, "Disconnect request received."); 01505 if (client->server->flags & F_COPYONWRITE) { 01506 if (client->difmap) g_free(client->difmap) ; 01507 close(client->difffile); 01508 unlink(client->difffilename); 01509 free(client->difffilename); 01510 } 01511 go_on=FALSE; 01512 continue; 01513 01514 case NBD_CMD_WRITE: 01515 DEBUG("wr: net->buf, "); 01516 while(len > 0) { 01517 readit(client->net, buf, currlen); 01518 DEBUG("buf->exp, "); 01519 if ((client->server->flags & F_READONLY) || 01520 (client->server->flags & F_AUTOREADONLY)) { 01521 DEBUG("[WRITE to READONLY!]"); 01522 ERROR(client, reply, EPERM); 01523 consume(client->net, buf, len-currlen, BUFSIZE); 01524 continue; 01525 } 01526 if (expwrite(request.from, buf, currlen, client, 01527 request.type & NBD_CMD_FLAG_FUA)) { 01528 DEBUG("Write failed: %m" ); 01529 ERROR(client, reply, errno); 01530 consume(client->net, buf, len-currlen, BUFSIZE); 01531 continue; 01532 } 01533 len -= currlen; 01534 request.from += currlen; 01535 currlen = (len < BUFSIZE) ? len : BUFSIZE; 01536 } 01537 SEND(client->net, reply); 01538 DEBUG("OK!\n"); 01539 continue; 01540 01541 case NBD_CMD_FLUSH: 01542 DEBUG("fl: "); 01543 if (expflush(client)) { 01544 DEBUG("Flush failed: %m"); 01545 ERROR(client, reply, errno); 01546 continue; 01547 } 01548 SEND(client->net, reply); 01549 DEBUG("OK!\n"); 01550 continue; 01551 01552 case NBD_CMD_READ: 01553 DEBUG("exp->buf, "); 01554 if (client->transactionlogfd != -1) 01555 writeit(client->transactionlogfd, &reply, sizeof(reply)); 01556 writeit(client->net, &reply, sizeof(reply)); 01557 p = buf; 01558 writelen = currlen; 01559 while(len > 0) { 01560 if (expread(request.from, p, currlen, client)) { 01561 DEBUG("Read failed: %m"); 01562 ERROR(client, reply, errno); 01563 continue; 01564 } 01565 01566 DEBUG("buf->net, "); 01567 writeit(client->net, buf, writelen); 01568 len -= currlen; 01569 request.from += currlen; 01570 currlen = (len < BUFSIZE) ? len : BUFSIZE; 01571 p = buf; 01572 writelen = currlen; 01573 } 01574 DEBUG("OK!\n"); 01575 continue; 01576 01577 case NBD_CMD_TRIM: 01578 /* The kernel module sets discard_zeroes_data == 0, 01579 * so it is okay to do nothing. */ 01580 if (exptrim(&request, client)) { 01581 DEBUG("Trim failed: %m"); 01582 ERROR(client, reply, errno); 01583 continue; 01584 } 01585 SEND(client->net, reply); 01586 continue; 01587 01588 default: 01589 DEBUG ("Ignoring unknown command\n"); 01590 continue; 01591 } 01592 } 01593 return 0; 01594 } 01595 01596 /** 01597 * Set up client export array, which is an array of FILE_INFO. 01598 * Also, split a single exportfile into multiple ones, if that was asked. 01599 * @param client information on the client which we want to setup export for 01600 **/ 01601 void setupexport(CLIENT* client) { 01602 int i; 01603 off_t laststartoff = 0, lastsize = 0; 01604 int multifile = (client->server->flags & F_MULTIFILE); 01605 int temporary = (client->server->flags & F_TEMPORARY) && !multifile; 01606 int cancreate = (client->server->expected_size) && !multifile; 01607 01608 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO)); 01609 01610 /* If multi-file, open as many files as we can. 01611 * If not, open exactly one file. 01612 * Calculate file sizes as we go to get total size. */ 01613 for(i=0; ; i++) { 01614 FILE_INFO fi; 01615 gchar *tmpname; 01616 gchar* error_string; 01617 01618 if (i) 01619 cancreate = 0; 01620 /* if expected_size is specified, and this is the first file, we can create the file */ 01621 mode_t mode = (client->server->flags & F_READONLY) ? 01622 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0)); 01623 01624 if (temporary) { 01625 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i); 01626 DEBUG( "Opening %s\n", tmpname ); 01627 fi.fhandle = mkstemp(tmpname); 01628 } else { 01629 if(multifile) { 01630 tmpname=g_strdup_printf("%s.%d", client->exportname, i); 01631 } else { 01632 tmpname=g_strdup(client->exportname); 01633 } 01634 DEBUG( "Opening %s\n", tmpname ); 01635 fi.fhandle = open(tmpname, mode, 0x600); 01636 if(fi.fhandle == -1 && mode == O_RDWR) { 01637 /* Try again because maybe media was read-only */ 01638 fi.fhandle = open(tmpname, O_RDONLY); 01639 if(fi.fhandle != -1) { 01640 /* Opening the base file in copyonwrite mode is 01641 * okay */ 01642 if(!(client->server->flags & F_COPYONWRITE)) { 01643 client->server->flags |= F_AUTOREADONLY; 01644 client->server->flags |= F_READONLY; 01645 } 01646 } 01647 } 01648 } 01649 if(fi.fhandle == -1) { 01650 if(multifile && i>0) 01651 break; 01652 error_string=g_strdup_printf( 01653 "Could not open exported file %s: %%m", 01654 tmpname); 01655 err(error_string); 01656 } 01657 01658 if (temporary) 01659 unlink(tmpname); /* File will stick around whilst FD open */ 01660 01661 fi.startoff = laststartoff + lastsize; 01662 g_array_append_val(client->export, fi); 01663 g_free(tmpname); 01664 01665 /* Starting offset and size of this file will be used to 01666 * calculate starting offset of next file */ 01667 laststartoff = fi.startoff; 01668 lastsize = size_autodetect(fi.fhandle); 01669 01670 /* If we created the file, it will be length zero */ 01671 if (!lastsize && cancreate) { 01672 assert(!multifile); 01673 if(ftruncate (fi.fhandle, client->server->expected_size)<0) { 01674 err("Could not expand file: %m"); 01675 } 01676 lastsize = client->server->expected_size; 01677 break; /* don't look for any more files */ 01678 } 01679 01680 if(!multifile || temporary) 01681 break; 01682 } 01683 01684 /* Set export size to total calculated size */ 01685 client->exportsize = laststartoff + lastsize; 01686 01687 /* Export size may be overridden */ 01688 if(client->server->expected_size) { 01689 /* desired size must be <= total calculated size */ 01690 if(client->server->expected_size > client->exportsize) { 01691 err("Size of exported file is too big\n"); 01692 } 01693 01694 client->exportsize = client->server->expected_size; 01695 } 01696 01697 msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize); 01698 if(multifile) { 01699 msg(LOG_INFO, "Total number of files: %d", i); 01700 } 01701 } 01702 01703 int copyonwrite_prepare(CLIENT* client) { 01704 off_t i; 01705 if ((client->difffilename = malloc(1024))==NULL) 01706 err("Failed to allocate string for diff file name"); 01707 snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname, 01708 (int)getpid()) ; 01709 client->difffilename[1023]='\0'; 01710 msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ; 01711 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ; 01712 if (client->difffile<0) err("Could not create diff file (%m)") ; 01713 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL) 01714 err("Could not allocate memory") ; 01715 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ; 01716 01717 return 0; 01718 } 01719 01720 /** 01721 * Run a command. This is used for the ``prerun'' and ``postrun'' config file 01722 * options 01723 * 01724 * @param command the command to be ran. Read from the config file 01725 * @param file the file name we're about to export 01726 **/ 01727 int do_run(gchar* command, gchar* file) { 01728 gchar* cmd; 01729 int retval=0; 01730 01731 if(command && *command) { 01732 cmd = g_strdup_printf(command, file); 01733 retval=system(cmd); 01734 g_free(cmd); 01735 } 01736 return retval; 01737 } 01738 01739 /** 01740 * Serve a connection. 01741 * 01742 * @todo allow for multithreading, perhaps use libevent. Not just yet, though; 01743 * follow the road map. 01744 * 01745 * @param client a connected client 01746 **/ 01747 void serveconnection(CLIENT *client) { 01748 if (client->server->transactionlog && (client->transactionlogfd == -1)) 01749 { 01750 if (-1 == (client->transactionlogfd = open(client->server->transactionlog, 01751 O_WRONLY | O_CREAT, 01752 S_IRUSR | S_IWUSR))) 01753 g_warning("Could not open transaction log %s", 01754 client->server->transactionlog); 01755 } 01756 01757 if(do_run(client->server->prerun, client->exportname)) { 01758 exit(EXIT_FAILURE); 01759 } 01760 setupexport(client); 01761 01762 if (client->server->flags & F_COPYONWRITE) { 01763 copyonwrite_prepare(client); 01764 } 01765 01766 setmysockopt(client->net); 01767 01768 mainloop(client); 01769 do_run(client->server->postrun, client->exportname); 01770 01771 if (-1 != client->transactionlogfd) 01772 { 01773 close(client->transactionlogfd); 01774 client->transactionlogfd = -1; 01775 } 01776 } 01777 01778 /** 01779 * Find the name of the file we have to serve. This will use g_strdup_printf 01780 * to put the IP address of the client inside a filename containing 01781 * "%s" (in the form as specified by the "virtstyle" option). That name 01782 * is then written to client->exportname. 01783 * 01784 * @param net A socket connected to an nbd client 01785 * @param client information about the client. The IP address in human-readable 01786 * format will be written to a new char* buffer, the address of which will be 01787 * stored in client->clientname. 01788 * @return: 0 - OK, -1 - failed. 01789 **/ 01790 int set_peername(int net, CLIENT *client) { 01791 struct sockaddr_storage netaddr; 01792 struct sockaddr_in *netaddr4 = NULL; 01793 struct sockaddr_in6 *netaddr6 = NULL; 01794 socklen_t addrinlen = sizeof( struct sockaddr_storage ); 01795 struct addrinfo hints; 01796 struct addrinfo *ai = NULL; 01797 char peername[NI_MAXHOST]; 01798 char netname[NI_MAXHOST]; 01799 char *tmp = NULL; 01800 int i; 01801 int e; 01802 int shift; 01803 01804 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) { 01805 msg(LOG_INFO, "getpeername failed: %m"); 01806 return -1; 01807 } 01808 01809 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen, 01810 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) { 01811 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e)); 01812 return -1; 01813 } 01814 01815 memset(&hints, '\0', sizeof (hints)); 01816 hints.ai_flags = AI_ADDRCONFIG; 01817 e = getaddrinfo(peername, NULL, &hints, &ai); 01818 01819 if(e != 0) { 01820 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e)); 01821 freeaddrinfo(ai); 01822 return -1; 01823 } 01824 01825 switch(client->server->virtstyle) { 01826 case VIRT_NONE: 01827 msg(LOG_DEBUG, "virtualization is off"); 01828 client->exportname=g_strdup(client->server->exportname); 01829 break; 01830 case VIRT_IPHASH: 01831 msg(LOG_DEBUG, "virtstyle iphash"); 01832 for(i=0;i<strlen(peername);i++) { 01833 if(peername[i]=='.') { 01834 peername[i]='/'; 01835 } 01836 } 01837 case VIRT_IPLIT: 01838 msg(LOG_DEBUG, "virststyle ipliteral"); 01839 client->exportname=g_strdup_printf(client->server->exportname, peername); 01840 break; 01841 case VIRT_CIDR: 01842 msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen); 01843 memcpy(&netaddr, &(client->clientaddr), addrinlen); 01844 int addrbits; 01845 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6)); 01846 if(ai->ai_family == AF_INET) { 01847 addrbits = 32; 01848 } else if(ai->ai_family == AF_INET6) { 01849 addrbits = 128; 01850 } 01851 uint8_t* addrptr = ((struct sockaddr*)&netaddr)->sa_data; 01852 for(int i = 0; i < addrbits; i+=8) { 01853 int masklen = client->server->cidrlen - i; 01854 masklen = masklen > 0 ? masklen : 0; 01855 uint8_t mask = getmaskbyte(masklen); 01856 *addrptr &= mask; 01857 addrptr++; 01858 } 01859 getnameinfo((struct sockaddr *) &netaddr, addrinlen, 01860 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST); 01861 tmp=g_strdup_printf("%s/%s", netname, peername); 01862 01863 if(tmp != NULL) 01864 client->exportname=g_strdup_printf(client->server->exportname, tmp); 01865 01866 break; 01867 } 01868 01869 freeaddrinfo(ai); 01870 msg(LOG_INFO, "connect from %s, assigned file is %s", 01871 peername, client->exportname); 01872 client->clientname=g_strdup(peername); 01873 return 0; 01874 } 01875 01876 /** 01877 * Destroy a pid_t* 01878 * @param data a pointer to pid_t which should be freed 01879 **/ 01880 void destroy_pid_t(gpointer data) { 01881 g_free(data); 01882 } 01883 01884 static pid_t 01885 spawn_child() 01886 { 01887 pid_t pid; 01888 sigset_t newset; 01889 sigset_t oldset; 01890 01891 sigemptyset(&newset); 01892 sigaddset(&newset, SIGCHLD); 01893 sigaddset(&newset, SIGTERM); 01894 sigprocmask(SIG_BLOCK, &newset, &oldset); 01895 pid = fork(); 01896 if (pid < 0) { 01897 msg(LOG_ERR, "Could not fork (%s)", strerror(errno)); 01898 goto out; 01899 } 01900 if (pid > 0) { /* Parent */ 01901 pid_t *pidp; 01902 01903 pidp = g_malloc(sizeof(pid_t)); 01904 *pidp = pid; 01905 g_hash_table_insert(children, pidp, pidp); 01906 goto out; 01907 } 01908 /* Child */ 01909 signal(SIGCHLD, SIG_DFL); 01910 signal(SIGTERM, SIG_DFL); 01911 signal(SIGHUP, SIG_DFL); 01912 out: 01913 sigprocmask(SIG_SETMASK, &oldset, NULL); 01914 return pid; 01915 } 01916 01917 static int 01918 socket_accept(const int sock) 01919 { 01920 struct sockaddr_storage addrin; 01921 socklen_t addrinlen = sizeof(addrin); 01922 int net; 01923 01924 net = accept(sock, (struct sockaddr *) &addrin, &addrinlen); 01925 if (net < 0) { 01926 err_nonfatal("Failed to accept socket connection: %m"); 01927 } 01928 01929 return net; 01930 } 01931 01932 static void 01933 handle_modern_connection(GArray *const servers, const int sock) 01934 { 01935 int net; 01936 pid_t pid; 01937 CLIENT *client = NULL; 01938 int sock_flags_old; 01939 int sock_flags_new; 01940 01941 net = socket_accept(sock); 01942 if (net < 0) 01943 return; 01944 01945 if (!dontfork) { 01946 pid = spawn_child(); 01947 if (pid) { 01948 if (pid > 0) 01949 msg(LOG_INFO, "Spawned a child process"); 01950 if (pid < 0) 01951 msg(LOG_ERR, "Failed to spawn a child process"); 01952 close(net); 01953 return; 01954 } 01955 /* Child just continues. */ 01956 } 01957 01958 client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN); 01959 if (!client) { 01960 msg(LOG_ERR, "Modern initial negotiation failed"); 01961 goto handler_err; 01962 } 01963 01964 if (client->server->max_connections > 0 && 01965 g_hash_table_size(children) >= client->server->max_connections) { 01966 msg(LOG_ERR, "Max connections (%d) reached", 01967 client->server->max_connections); 01968 goto handler_err; 01969 } 01970 01971 sock_flags_old = fcntl(net, F_GETFL, 0); 01972 if (sock_flags_old == -1) { 01973 msg(LOG_ERR, "Failed to get socket flags"); 01974 goto handler_err; 01975 } 01976 01977 sock_flags_new = sock_flags_old & ~O_NONBLOCK; 01978 if (sock_flags_new != sock_flags_old && 01979 fcntl(net, F_SETFL, sock_flags_new) == -1) { 01980 msg(LOG_ERR, "Failed to set socket to blocking mode"); 01981 goto handler_err; 01982 } 01983 01984 if (set_peername(net, client)) { 01985 msg(LOG_ERR, "Failed to set peername"); 01986 goto handler_err; 01987 } 01988 01989 if (!authorized_client(client)) { 01990 msg(LOG_INFO, "Client '%s' is not authorized to access", 01991 client->clientname); 01992 goto handler_err; 01993 } 01994 01995 if (!dontfork) { 01996 int i; 01997 01998 /* Free all root server resources here, because we are 01999 * currently in the child process serving one specific 02000 * connection. These are not simply needed anymore. */ 02001 g_hash_table_destroy(children); 02002 children = NULL; 02003 for (i = 0; i < modernsocks->len; i++) { 02004 close(g_array_index(modernsocks, int, i)); 02005 } 02006 g_array_free(modernsocks, TRUE); 02007 02008 /* Now that we are in the child process after a 02009 * succesful negotiation, we do not need the list of 02010 * servers anymore, get rid of it.*/ 02011 02012 for (i = 0; i < servers->len; i++) { 02013 const SERVER *const server = &g_array_index(servers, SERVER, i); 02014 close(server->socket); 02015 } 02016 02017 /* FALSE does not free the 02018 actual data. This is required, 02019 because the client has a 02020 direct reference into that 02021 data, and otherwise we get a 02022 segfault... */ 02023 g_array_free(servers, FALSE); 02024 } 02025 02026 msg(LOG_INFO, "Starting to serve"); 02027 serveconnection(client); 02028 exit(EXIT_SUCCESS); 02029 02030 handler_err: 02031 g_free(client); 02032 close(net); 02033 02034 if (!dontfork) { 02035 exit(EXIT_FAILURE); 02036 } 02037 } 02038 02039 static void 02040 handle_oldstyle_connection(GArray *const servers, SERVER *const serve) 02041 { 02042 int net; 02043 CLIENT *client = NULL; 02044 int sock_flags_old; 02045 int sock_flags_new; 02046 02047 net = socket_accept(serve->socket); 02048 if (net < 0) 02049 return; 02050 02051 if(serve->max_connections > 0 && 02052 g_hash_table_size(children) >= serve->max_connections) { 02053 msg(LOG_INFO, "Max connections reached"); 02054 goto handle_connection_out; 02055 } 02056 if((sock_flags_old = fcntl(net, F_GETFL, 0)) == -1) { 02057 err("fcntl F_GETFL"); 02058 } 02059 sock_flags_new = sock_flags_old & ~O_NONBLOCK; 02060 if (sock_flags_new != sock_flags_old && 02061 fcntl(net, F_SETFL, sock_flags_new) == -1) { 02062 err("fcntl F_SETFL ~O_NONBLOCK"); 02063 } 02064 02065 client = g_new0(CLIENT, 1); 02066 client->server=serve; 02067 client->exportsize=OFFT_MAX; 02068 client->net=net; 02069 client->transactionlogfd = -1; 02070 02071 if (set_peername(net, client)) { 02072 goto handle_connection_out; 02073 } 02074 if (!authorized_client(client)) { 02075 msg(LOG_INFO, "Unauthorized client"); 02076 goto handle_connection_out; 02077 } 02078 msg(LOG_INFO, "Authorized client"); 02079 02080 if (!dontfork) { 02081 pid_t pid; 02082 int i; 02083 sigset_t newset; 02084 sigset_t oldset; 02085 02086 sigemptyset(&newset); 02087 sigaddset(&newset, SIGCHLD); 02088 sigaddset(&newset, SIGTERM); 02089 sigprocmask(SIG_BLOCK, &newset, &oldset); 02090 if ((pid = fork()) < 0) { 02091 msg(LOG_INFO, "Could not fork (%s)", strerror(errno)); 02092 sigprocmask(SIG_SETMASK, &oldset, NULL); 02093 goto handle_connection_out; 02094 } 02095 if (pid > 0) { /* parent */ 02096 pid_t *pidp; 02097 02098 pidp = g_malloc(sizeof(pid_t)); 02099 *pidp = pid; 02100 g_hash_table_insert(children, pidp, pidp); 02101 sigprocmask(SIG_SETMASK, &oldset, NULL); 02102 goto handle_connection_out; 02103 } 02104 /* child */ 02105 signal(SIGCHLD, SIG_DFL); 02106 signal(SIGTERM, SIG_DFL); 02107 signal(SIGHUP, SIG_DFL); 02108 sigprocmask(SIG_SETMASK, &oldset, NULL); 02109 02110 g_hash_table_destroy(children); 02111 children = NULL; 02112 for(i=0;i<servers->len;i++) { 02113 close(g_array_index(servers, SERVER, i).socket); 02114 } 02115 /* FALSE does not free the 02116 actual data. This is required, 02117 because the client has a 02118 direct reference into that 02119 data, and otherwise we get a 02120 segfault... */ 02121 g_array_free(servers, FALSE); 02122 for(i=0;i<modernsocks->len;i++) { 02123 close(g_array_index(modernsocks, int, i)); 02124 } 02125 g_array_free(modernsocks, TRUE); 02126 } 02127 02128 msg(LOG_INFO, "Starting to serve"); 02129 serveconnection(client); 02130 exit(EXIT_SUCCESS); 02131 02132 handle_connection_out: 02133 g_free(client); 02134 close(net); 02135 } 02136 02137 /** 02138 * Return the index of the server whose servename matches the given 02139 * name. 02140 * 02141 * @param servename a string to match 02142 * @param servers an array of servers 02143 * @return the first index of the server whose servename matches the 02144 * given name or -1 if one cannot be found 02145 **/ 02146 static int get_index_by_servename(const gchar *const servename, 02147 const GArray *const servers) { 02148 int i; 02149 02150 for (i = 0; i < servers->len; ++i) { 02151 const SERVER server = g_array_index(servers, SERVER, i); 02152 02153 if (strcmp(servename, server.servename) == 0) 02154 return i; 02155 } 02156 02157 return -1; 02158 } 02159 02160 int setup_serve(SERVER *const serve, GError **const gerror); 02161 02162 /** 02163 * Parse configuration files and add servers to the array if they don't 02164 * already exist there. The existence is tested by comparing 02165 * servenames. A server is appended to the array only if its servename 02166 * is unique among all other servers. 02167 * 02168 * @param servers an array of servers 02169 * @return the number of new servers appended to the array, or -1 in 02170 * case of an error 02171 **/ 02172 static int append_new_servers(GArray *const servers, GError **const gerror) { 02173 int i; 02174 GArray *new_servers; 02175 const int old_len = servers->len; 02176 int retval = -1; 02177 struct generic_conf genconf; 02178 02179 new_servers = parse_cfile(config_file_pos, &genconf, gerror); 02180 if (!new_servers) 02181 goto out; 02182 02183 for (i = 0; i < new_servers->len; ++i) { 02184 SERVER new_server = g_array_index(new_servers, SERVER, i); 02185 02186 if (new_server.servename 02187 && -1 == get_index_by_servename(new_server.servename, 02188 servers)) { 02189 if (setup_serve(&new_server, gerror) == -1) 02190 goto out; 02191 if (append_serve(&new_server, servers) == -1) 02192 goto out; 02193 } 02194 } 02195 02196 retval = servers->len - old_len; 02197 out: 02198 g_array_free(new_servers, TRUE); 02199 02200 return retval; 02201 } 02202 02203 /** 02204 * Loop through the available servers, and serve them. Never returns. 02205 **/ 02206 void serveloop(GArray* servers) { 02207 int i; 02208 int max; 02209 fd_set mset; 02210 fd_set rset; 02211 02212 /* 02213 * Set up the master fd_set. The set of descriptors we need 02214 * to select() for never changes anyway and it buys us a *lot* 02215 * of time to only build this once. However, if we ever choose 02216 * to not fork() for clients anymore, we may have to revisit 02217 * this. 02218 */ 02219 max=0; 02220 FD_ZERO(&mset); 02221 for(i=0;i<servers->len;i++) { 02222 int sock; 02223 if((sock=(g_array_index(servers, SERVER, i)).socket) >= 0) { 02224 FD_SET(sock, &mset); 02225 max=sock>max?sock:max; 02226 } 02227 } 02228 for(i=0;i<modernsocks->len;i++) { 02229 int sock = g_array_index(modernsocks, int, i); 02230 FD_SET(sock, &mset); 02231 max=sock>max?sock:max; 02232 } 02233 for(;;) { 02234 /* SIGHUP causes the root server process to reconfigure 02235 * itself and add new export servers for each newly 02236 * found export configuration group, i.e. spawn new 02237 * server processes for each previously non-existent 02238 * export. This does not alter old runtime configuration 02239 * but just appends new exports. */ 02240 if (is_sighup_caught) { 02241 int n; 02242 GError *gerror = NULL; 02243 02244 msg(LOG_INFO, "reconfiguration request received"); 02245 is_sighup_caught = 0; /* Reset to allow catching 02246 * it again. */ 02247 02248 n = append_new_servers(servers, &gerror); 02249 if (n == -1) 02250 msg(LOG_ERR, "failed to append new servers: %s", 02251 gerror->message); 02252 02253 for (i = servers->len - n; i < servers->len; ++i) { 02254 const SERVER server = g_array_index(servers, 02255 SERVER, i); 02256 02257 if (server.socket >= 0) { 02258 FD_SET(server.socket, &mset); 02259 max = server.socket > max ? server.socket : max; 02260 } 02261 02262 msg(LOG_INFO, "reconfigured new server: %s", 02263 server.servename); 02264 } 02265 } 02266 02267 memcpy(&rset, &mset, sizeof(fd_set)); 02268 if(select(max+1, &rset, NULL, NULL, NULL)>0) { 02269 02270 DEBUG("accept, "); 02271 for(i=0; i < modernsocks->len; i++) { 02272 int sock = g_array_index(modernsocks, int, i); 02273 if(!FD_ISSET(sock, &rset)) { 02274 continue; 02275 } 02276 02277 handle_modern_connection(servers, sock); 02278 } 02279 for(i=0; i < servers->len; i++) { 02280 SERVER *serve; 02281 02282 serve=&(g_array_index(servers, SERVER, i)); 02283 if(serve->socket < 0) { 02284 continue; 02285 } 02286 if(FD_ISSET(serve->socket, &rset)) { 02287 handle_oldstyle_connection(servers, serve); 02288 } 02289 } 02290 } 02291 } 02292 } 02293 void serveloop(GArray* servers) G_GNUC_NORETURN; 02294 02295 /** 02296 * Set server socket options. 02297 * 02298 * @param socket a socket descriptor of the server 02299 * 02300 * @param gerror a pointer to an error object pointer used for reporting 02301 * errors. On error, if gerror is not NULL, *gerror is set and -1 02302 * is returned. 02303 * 02304 * @return 0 on success, -1 on error 02305 **/ 02306 int dosockopts(const int socket, GError **const gerror) { 02307 #ifndef sun 02308 int yes=1; 02309 #else 02310 char yes='1'; 02311 #endif /* sun */ 02312 struct linger l; 02313 02314 /* lose the pesky "Address already in use" error message */ 02315 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { 02316 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR, 02317 "failed to set socket option SO_REUSEADDR: %s", 02318 strerror(errno)); 02319 return -1; 02320 } 02321 l.l_onoff = 1; 02322 l.l_linger = 10; 02323 if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) { 02324 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER, 02325 "failed to set socket option SO_LINGER: %s", 02326 strerror(errno)); 02327 return -1; 02328 } 02329 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) { 02330 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE, 02331 "failed to set socket option SO_KEEPALIVE: %s", 02332 strerror(errno)); 02333 return -1; 02334 } 02335 02336 return 0; 02337 } 02338 02339 /** 02340 * Connect a server's socket. 02341 * 02342 * @param serve the server we want to connect. 02343 **/ 02344 int setup_serve(SERVER *const serve, GError **const gerror) { 02345 struct addrinfo hints; 02346 struct addrinfo *ai = NULL; 02347 gchar *port = NULL; 02348 int e; 02349 int retval = -1; 02350 02351 /* Without this, it's possible that socket == 0, even if it's 02352 * not initialized at all. And that would be wrong because 0 is 02353 * totally legal value for properly initialized descriptor. This 02354 * line is required to ensure that unused/uninitialized 02355 * descriptors are marked as such (new style configuration 02356 * case). Currently, servers are being initialized in multiple 02357 * places, and some of the them do the socket initialization 02358 * incorrectly. This is the only point common to all code paths, 02359 * and therefore setting -1 is put here. However, the whole 02360 * server initialization procedure should be extracted to its 02361 * own function and all code paths wanting to mess with servers 02362 * should initialize servers with that function. 02363 * 02364 * TODO: fix server initialization */ 02365 serve->socket = -1; 02366 02367 if(!(glob_flags & F_OLDSTYLE)) { 02368 return serve->servename ? 1 : 0; 02369 } 02370 memset(&hints,'\0',sizeof(hints)); 02371 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV; 02372 hints.ai_socktype = SOCK_STREAM; 02373 hints.ai_family = serve->socket_family; 02374 02375 port = g_strdup_printf("%d", serve->port); 02376 if (!port) { 02377 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SYS, 02378 "failed to open an export socket: " 02379 "failed to convert a port number to a string: %s", 02380 strerror(errno)); 02381 goto out; 02382 } 02383 02384 e = getaddrinfo(serve->listenaddr,port,&hints,&ai); 02385 02386 g_free(port); 02387 02388 if(e != 0) { 02389 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI, 02390 "failed to open an export socket: " 02391 "failed to get address info: %s", 02392 gai_strerror(e)); 02393 goto out; 02394 } 02395 02396 if(serve->socket_family == AF_UNSPEC) 02397 serve->socket_family = ai->ai_family; 02398 02399 #ifdef WITH_SDP 02400 if ((serve->flags) && F_SDP) { 02401 if (ai->ai_family == AF_INET) 02402 ai->ai_family = AF_INET_SDP; 02403 else (ai->ai_family == AF_INET6) 02404 ai->ai_family = AF_INET6_SDP; 02405 } 02406 #endif 02407 if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0) { 02408 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET, 02409 "failed to open an export socket: " 02410 "failed to create a socket: %s", 02411 strerror(errno)); 02412 goto out; 02413 } 02414 02415 if (dosockopts(serve->socket, gerror) == -1) { 02416 g_prefix_error(gerror, "failed to open an export socket: "); 02417 goto out; 02418 } 02419 02420 DEBUG("Waiting for connections... bind, "); 02421 e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen); 02422 if (e != 0 && errno != EADDRINUSE) { 02423 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02424 "failed to open an export socket: " 02425 "failed to bind an address to a socket: %s", 02426 strerror(errno)); 02427 goto out; 02428 } 02429 DEBUG("listen, "); 02430 if (listen(serve->socket, 1) < 0) { 02431 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02432 "failed to open an export socket: " 02433 "failed to start listening on a socket: %s", 02434 strerror(errno)); 02435 goto out; 02436 } 02437 02438 retval = serve->servename ? 1 : 0; 02439 out: 02440 02441 if (retval == -1 && serve->socket >= 0) { 02442 close(serve->socket); 02443 serve->socket = -1; 02444 } 02445 freeaddrinfo (ai); 02446 02447 return retval; 02448 } 02449 02450 int open_modern(const gchar *const addr, const gchar *const port, 02451 GError **const gerror) { 02452 struct addrinfo hints; 02453 struct addrinfo* ai = NULL; 02454 struct addrinfo* ai_bak; 02455 struct sock_flags; 02456 int e; 02457 int retval = -1; 02458 int i=0; 02459 int sock = -1; 02460 02461 memset(&hints, '\0', sizeof(hints)); 02462 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; 02463 hints.ai_socktype = SOCK_STREAM; 02464 hints.ai_family = AF_UNSPEC; 02465 hints.ai_protocol = IPPROTO_TCP; 02466 e = getaddrinfo(addr, port ? port : NBD_DEFAULT_PORT, &hints, &ai); 02467 ai_bak = ai; 02468 if(e != 0) { 02469 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI, 02470 "failed to open a modern socket: " 02471 "failed to get address info: %s", 02472 gai_strerror(e)); 02473 goto out; 02474 } 02475 02476 while(ai != NULL) { 02477 sock = -1; 02478 02479 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) { 02480 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET, 02481 "failed to open a modern socket: " 02482 "failed to create a socket: %s", 02483 strerror(errno)); 02484 goto out; 02485 } 02486 02487 if (dosockopts(sock, gerror) == -1) { 02488 g_prefix_error(gerror, "failed to open a modern socket: "); 02489 goto out; 02490 } 02491 02492 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) { 02493 /* This is so wrong. 02494 * 02495 * Linux will return multiple entries for the 02496 * same system when we ask it for something 02497 * AF_UNSPEC, even though the first entry will 02498 * listen to both protocols. Other systems will 02499 * return multiple entries too, but we actually 02500 * do need to open both. Sigh. 02501 * 02502 * Handle it by ignoring EADDRINUSE if we've 02503 * already got at least one socket open 02504 */ 02505 if(errno == EADDRINUSE && modernsocks->len > 0) { 02506 goto next; 02507 } 02508 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02509 "failed to open a modern socket: " 02510 "failed to bind an address to a socket: %s", 02511 strerror(errno)); 02512 goto out; 02513 } 02514 02515 if(listen(sock, 10) <0) { 02516 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02517 "failed to open a modern socket: " 02518 "failed to start listening on a socket: %s", 02519 strerror(errno)); 02520 goto out; 02521 } 02522 g_array_append_val(modernsocks, sock); 02523 next: 02524 ai = ai->ai_next; 02525 } 02526 02527 retval = 0; 02528 out: 02529 02530 if (retval == -1 && sock >= 0) { 02531 close(sock); 02532 } 02533 if(ai_bak) 02534 freeaddrinfo(ai_bak); 02535 02536 return retval; 02537 } 02538 02539 /** 02540 * Connect our servers. 02541 **/ 02542 void setup_servers(GArray *const servers, const gchar *const modernaddr, 02543 const gchar *const modernport) { 02544 int i; 02545 struct sigaction sa; 02546 int want_modern=0; 02547 02548 for(i=0;i<servers->len;i++) { 02549 GError *gerror = NULL; 02550 SERVER *server = &g_array_index(servers, SERVER, i); 02551 int ret; 02552 02553 ret = setup_serve(server, &gerror); 02554 if (ret == -1) { 02555 msg(LOG_ERR, "failed to setup servers: %s", 02556 gerror->message); 02557 g_clear_error(&gerror); 02558 exit(EXIT_FAILURE); 02559 } 02560 want_modern |= ret; 02561 } 02562 if(want_modern) { 02563 GError *gerror = NULL; 02564 if (open_modern(modernaddr, modernport, &gerror) == -1) { 02565 msg(LOG_ERR, "failed to setup servers: %s", 02566 gerror->message); 02567 g_clear_error(&gerror); 02568 exit(EXIT_FAILURE); 02569 } 02570 } 02571 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t); 02572 02573 sa.sa_handler = sigchld_handler; 02574 sigemptyset(&sa.sa_mask); 02575 sigaddset(&sa.sa_mask, SIGTERM); 02576 sa.sa_flags = SA_RESTART; 02577 if(sigaction(SIGCHLD, &sa, NULL) == -1) 02578 err("sigaction: %m"); 02579 02580 sa.sa_handler = sigterm_handler; 02581 sigemptyset(&sa.sa_mask); 02582 sigaddset(&sa.sa_mask, SIGCHLD); 02583 sa.sa_flags = SA_RESTART; 02584 if(sigaction(SIGTERM, &sa, NULL) == -1) 02585 err("sigaction: %m"); 02586 02587 sa.sa_handler = sighup_handler; 02588 sigemptyset(&sa.sa_mask); 02589 sa.sa_flags = SA_RESTART; 02590 if(sigaction(SIGHUP, &sa, NULL) == -1) 02591 err("sigaction: %m"); 02592 } 02593 02594 /** 02595 * Go daemon (unless we specified at compile time that we didn't want this) 02596 * @param serve the first server of our configuration. If its port is zero, 02597 * then do not daemonize, because we're doing inetd then. This parameter 02598 * is only used to create a PID file of the form 02599 * /var/run/nbd-server.<port>.pid; it's not modified in any way. 02600 **/ 02601 #if !defined(NODAEMON) 02602 void daemonize(SERVER* serve) { 02603 FILE*pidf; 02604 02605 if(serve && !(serve->port)) { 02606 return; 02607 } 02608 if(daemon(0,0)<0) { 02609 err("daemon"); 02610 } 02611 if(!*pidftemplate) { 02612 if(serve) { 02613 strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255); 02614 } else { 02615 strncpy(pidftemplate, "/var/run/nbd-server.pid", 255); 02616 } 02617 } 02618 snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0); 02619 pidf=fopen(pidfname, "w"); 02620 if(pidf) { 02621 fprintf(pidf,"%d\n", (int)getpid()); 02622 fclose(pidf); 02623 } else { 02624 perror("fopen"); 02625 fprintf(stderr, "Not fatal; continuing"); 02626 } 02627 } 02628 #else 02629 #define daemonize(serve) 02630 #endif /* !defined(NODAEMON) */ 02631 02632 /* 02633 * Everything beyond this point (in the file) is run in non-daemon mode. 02634 * The stuff above daemonize() isn't. 02635 */ 02636 02637 /** 02638 * Set up user-ID and/or group-ID 02639 **/ 02640 void dousers(const gchar *const username, const gchar *const groupname) { 02641 struct passwd *pw; 02642 struct group *gr; 02643 gchar* str; 02644 if (groupname) { 02645 gr = getgrnam(groupname); 02646 if(!gr) { 02647 str = g_strdup_printf("Invalid group name: %s", groupname); 02648 err(str); 02649 } 02650 if(setgid(gr->gr_gid)<0) { 02651 err("Could not set GID: %m"); 02652 } 02653 } 02654 if (username) { 02655 pw = getpwnam(username); 02656 if(!pw) { 02657 str = g_strdup_printf("Invalid user name: %s", username); 02658 err(str); 02659 } 02660 if(setuid(pw->pw_uid)<0) { 02661 err("Could not set UID: %m"); 02662 } 02663 } 02664 } 02665 02666 #ifndef ISSERVER 02667 void glib_message_syslog_redirect(const gchar *log_domain, 02668 GLogLevelFlags log_level, 02669 const gchar *message, 02670 gpointer user_data) 02671 { 02672 int level=LOG_DEBUG; 02673 02674 switch( log_level ) 02675 { 02676 case G_LOG_FLAG_FATAL: 02677 case G_LOG_LEVEL_CRITICAL: 02678 case G_LOG_LEVEL_ERROR: 02679 level=LOG_ERR; 02680 break; 02681 case G_LOG_LEVEL_WARNING: 02682 level=LOG_WARNING; 02683 break; 02684 case G_LOG_LEVEL_MESSAGE: 02685 case G_LOG_LEVEL_INFO: 02686 level=LOG_INFO; 02687 break; 02688 case G_LOG_LEVEL_DEBUG: 02689 level=LOG_DEBUG; 02690 break; 02691 default: 02692 level=LOG_ERR; 02693 } 02694 syslog(level, "%s", message); 02695 } 02696 #endif 02697 02698 /** 02699 * Main entry point... 02700 **/ 02701 int main(int argc, char *argv[]) { 02702 SERVER *serve; 02703 GArray *servers; 02704 GError *err=NULL; 02705 struct generic_conf genconf; 02706 02707 memset(&genconf, 0, sizeof(struct generic_conf)); 02708 02709 if (sizeof( struct nbd_request )!=28) { 02710 fprintf(stderr,"Bad size of structure. Alignment problems?\n"); 02711 exit(EXIT_FAILURE) ; 02712 } 02713 02714 memset(pidftemplate, '\0', 256); 02715 02716 modernsocks = g_array_new(FALSE, FALSE, sizeof(int)); 02717 02718 logging(); 02719 config_file_pos = g_strdup(CFILE); 02720 serve=cmdline(argc, argv); 02721 02722 servers = parse_cfile(config_file_pos, &genconf, &err); 02723 02724 /* Update global variables with parsed values. This will be 02725 * removed once we get rid of global configuration variables. */ 02726 glob_flags |= genconf.flags; 02727 02728 if(serve) { 02729 serve->socket_family = AF_UNSPEC; 02730 02731 append_serve(serve, servers); 02732 02733 if (!(serve->port)) { 02734 CLIENT *client; 02735 #ifndef ISSERVER 02736 /* You really should define ISSERVER if you're going to use 02737 * inetd mode, but if you don't, closing stdout and stderr 02738 * (which inetd had connected to the client socket) will let it 02739 * work. */ 02740 close(1); 02741 close(2); 02742 open("/dev/null", O_WRONLY); 02743 open("/dev/null", O_WRONLY); 02744 g_log_set_default_handler( glib_message_syslog_redirect, NULL ); 02745 #endif 02746 client=g_malloc(sizeof(CLIENT)); 02747 client->server=serve; 02748 client->net=-1; 02749 client->exportsize=OFFT_MAX; 02750 if (set_peername(0, client)) 02751 exit(EXIT_FAILURE); 02752 serveconnection(client); 02753 return 0; 02754 } 02755 } 02756 02757 if(!servers || !servers->len) { 02758 if(err && !(err->domain == NBDS_ERR 02759 && err->code == NBDS_ERR_CFILE_NOTFOUND)) { 02760 g_warning("Could not parse config file: %s", 02761 err ? err->message : "Unknown error"); 02762 } 02763 } 02764 if(serve) { 02765 g_warning("Specifying an export on the command line is deprecated."); 02766 g_warning("Please use a configuration file instead."); 02767 } 02768 02769 if((!serve) && (!servers||!servers->len)) { 02770 if(err) 02771 g_message("No configured exports; quitting."); 02772 exit(EXIT_FAILURE); 02773 } 02774 if (!dontfork) 02775 daemonize(serve); 02776 setup_servers(servers, genconf.modernaddr, genconf.modernport); 02777 dousers(genconf.user, genconf.group); 02778 02779 serveloop(servers); 02780 }
1.7.3