|
Network Block Device @PACKAGE_VERSION@
|
00001 /* 00002 * Network Block Device - server 00003 * 00004 * Copyright 1996-1998 Pavel Machek, distribute under GPL 00005 * <pavel@atrey.karlin.mff.cuni.cz> 00006 * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org> 00007 * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk> 00008 * 00009 * Version 1.0 - hopefully 64-bit-clean 00010 * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au> 00011 * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es> 00012 * Version 1.5 - can compile on Unix systems that don't have 64 bit integer 00013 * type, or don't have 64 bit file offsets by defining FS_32BIT 00014 * in compile options for nbd-server *only*. This can be done 00015 * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the 00016 * original autoconf input file, or I would make it a configure 00017 * option.) Ken Yap <ken@nlc.net.au>. 00018 * Version 1.6 - fix autodetection of block device size and really make 64 bit 00019 * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk> 00020 * Version 2.0 - Version synchronised with client 00021 * Version 2.1 - Reap zombie client processes when they exit. Removed 00022 * (uncommented) the _IO magic, it's no longer necessary. Wouter 00023 * Verhelst <wouter@debian.org> 00024 * Version 2.2 - Auto switch to read-only mode (usefull for floppies). 00025 * Version 2.3 - Fixed code so that Large File Support works. This 00026 * removes the FS_32BIT compile-time directive; define 00027 * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be 00028 * using FS_32BIT. This will allow you to use files >2GB instead of 00029 * having to use the -m option. Wouter Verhelst <wouter@debian.org> 00030 * Version 2.4 - Added code to keep track of children, so that we can 00031 * properly kill them from initscripts. Add a call to daemon(), 00032 * so that processes don't think they have to wait for us, which is 00033 * interesting for initscripts as well. Wouter Verhelst 00034 * <wouter@debian.org> 00035 * Version 2.5 - Bugfix release: forgot to reset child_arraysize to 00036 * zero after fork()ing, resulting in nbd-server going berserk 00037 * when it receives a signal with at least one child open. Wouter 00038 * Verhelst <wouter@debian.org> 00039 * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235); 00040 * rectified type of mainloop::size_host (sf.net bugs 814435 and 00041 * 817385); close the PID file after writing to it, so that the 00042 * daemon can actually be found. Wouter Verhelst 00043 * <wouter@debian.org> 00044 * 10/10/2003 - Size of the data "size_host" was wrong and so was not 00045 * correctly put in network endianness. Many types were corrected 00046 * (size_t and off_t instead of int). <vspaceg@sourceforge.net> 00047 * Version 2.6 - Some code cleanup. 00048 * Version 2.7 - Better build system. 00049 * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 00050 * lot more work, but this is a start. Wouter Verhelst 00051 * <wouter@debian.org> 00052 * 16/03/2010 - Add IPv6 support. 00053 * Kitt Tientanopajai <kitt@kitty.in.th> 00054 * Neutron Soutmun <neo.neutron@gmail.com> 00055 * Suriya Soutmun <darksolar@gmail.com> 00056 */ 00057 00058 /* Includes LFS defines, which defines behaviours of some of the following 00059 * headers, so must come before those */ 00060 #include "lfs.h" 00061 #define _XOPEN_SOURCE 500 /* to get pread/pwrite */ 00062 #define _BSD_SOURCE /* to get DT_* macros */ 00063 #define _DARWIN_C_SOURCE /* to get DT_* macros on OS X */ 00064 00065 #include <assert.h> 00066 #include <sys/types.h> 00067 #include <sys/socket.h> 00068 #include <sys/stat.h> 00069 #include <sys/select.h> 00070 #include <sys/wait.h> 00071 #include <sys/un.h> 00072 #ifdef HAVE_SYS_IOCTL_H 00073 #include <sys/ioctl.h> 00074 #endif 00075 #include <sys/param.h> 00076 #include <signal.h> 00077 #include <errno.h> 00078 #include <libgen.h> 00079 #include <netinet/tcp.h> 00080 #include <netinet/in.h> 00081 #include <netdb.h> 00082 #include <syslog.h> 00083 #include <unistd.h> 00084 #include <stdbool.h> 00085 #include <stdio.h> 00086 #include <stdlib.h> 00087 #include <string.h> 00088 #include <fcntl.h> 00089 #if HAVE_FALLOC_PH 00090 #include <linux/falloc.h> 00091 #endif 00092 #include <arpa/inet.h> 00093 #include <strings.h> 00094 #include <dirent.h> 00095 #ifdef HAVE_SYS_DIR_H 00096 #include <sys/dir.h> 00097 #endif 00098 #ifdef HAVE_SYS_DIRENT_H 00099 #include <sys/dirent.h> 00100 #endif 00101 #include <unistd.h> 00102 #include <getopt.h> 00103 #include <pwd.h> 00104 #include <grp.h> 00105 #include <dirent.h> 00106 #include <ctype.h> 00107 #include <inttypes.h> 00108 00109 #include <glib.h> 00110 00111 /* used in cliserv.h, so must come first */ 00112 #define MY_NAME "nbd_server" 00113 #include "cliserv.h" 00114 #include "nbd-debug.h" 00115 #include "netdb-compat.h" 00116 #include "backend.h" 00117 #include "treefiles.h" 00118 00119 #ifdef WITH_SDP 00120 #include <sdp_inet.h> 00121 #endif 00122 00123 #if HAVE_FSCTL_SET_ZERO_DATA 00124 #include <io.h> 00125 /* don't include <windows.h> to avoid redefining eg the ERROR macro */ 00126 #define NOMINMAX 1 00127 #include <windef.h> 00128 #include <winbase.h> 00129 #include <winioctl.h> 00130 #endif 00131 00132 /** Default position of the config file */ 00133 #ifndef SYSCONFDIR 00134 #define SYSCONFDIR "/etc" 00135 #endif 00136 #define CFILE SYSCONFDIR "/nbd-server/config" 00137 00138 /** Where our config file actually is */ 00139 gchar* config_file_pos; 00140 00141 /** global flags */ 00142 int glob_flags=0; 00143 00144 /* Whether we should avoid forking */ 00145 int dontfork = 0; 00146 00147 /** 00148 * The highest value a variable of type off_t can reach. This is a signed 00149 * integer, so set all bits except for the leftmost one. 00150 **/ 00151 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1)) 00152 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */ 00153 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */ 00154 00155 /** Global flags: */ 00156 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */ 00157 #define F_LIST 2 /**< Allow clients to list the exports on a server */ 00158 #define F_NO_ZEROES 4 /**< Do not send zeros to client */ 00159 GHashTable *children; 00160 char pidfname[256]; /**< name of our PID file */ 00161 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */ 00162 00163 #define NEG_INIT (1 << 0) 00164 #define NEG_OLD (1 << 1) 00165 #define NEG_MODERN (1 << 2) 00166 00167 #include <nbdsrv.h> 00168 00169 /* Our thread pool */ 00170 GThreadPool *tpool; 00171 00172 /* A work package for the thread pool functions */ 00173 struct work_package { 00174 CLIENT* client; 00175 struct nbd_request* req; 00176 void* data; /**< for read requests */ 00177 }; 00178 00179 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by 00180 SIGCHLD handler 00181 to mark a child 00182 exit */ 00183 00184 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by 00185 SIGTERM handler 00186 to mark a exit 00187 request */ 00188 00189 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP 00190 handler to mark a 00191 reconfiguration 00192 request */ 00193 00194 GArray* modernsocks; /**< Sockets for the modern handler. Not used 00195 if a client was only specified on the 00196 command line; only port used if 00197 oldstyle is set to false (and then the 00198 command-line client isn't used, gna gna). 00199 This may be more than one socket on 00200 systems that don't support serving IPv4 00201 and IPv6 from the same socket (like, 00202 e.g., FreeBSD) */ 00203 00204 bool logged_oversized=false; /**< whether we logged oversized requests already */ 00205 00206 /** 00207 * Type of configuration file values 00208 **/ 00209 typedef enum { 00210 PARAM_INT, /**< This parameter is an integer */ 00211 PARAM_INT64, /**< This parameter is an integer */ 00212 PARAM_STRING, /**< This parameter is a string */ 00213 PARAM_BOOL, /**< This parameter is a boolean */ 00214 } PARAM_TYPE; 00215 00216 /** 00217 * Configuration file values 00218 **/ 00219 typedef struct { 00220 gchar *paramname; /**< Name of the parameter, as it appears in 00221 the config file */ 00222 gboolean required; /**< Whether this is a required (as opposed to 00223 optional) parameter */ 00224 PARAM_TYPE ptype; /**< Type of the parameter. */ 00225 gpointer target; /**< Pointer to where the data of this 00226 parameter should be written. If ptype is 00227 PARAM_BOOL, the data is or'ed rather than 00228 overwritten. */ 00229 gint flagval; /**< Flag mask for this parameter in case ptype 00230 is PARAM_BOOL. */ 00231 } PARAM; 00232 00233 /** 00234 * Configuration file values of the "generic" section 00235 **/ 00236 struct generic_conf { 00237 gchar *user; /**< user we run the server as */ 00238 gchar *group; /**< group we run running as */ 00239 gchar *modernaddr; /**< address of the modern socket */ 00240 gchar *modernport; /**< port of the modern socket */ 00241 gchar *unixsock; /**< file name of the unix domain socket */ 00242 gint flags; /**< global flags */ 00243 gint threads; /**< maximum number of parallel threads we want to run */ 00244 }; 00245 00246 /** 00247 * Translate a command name into human readable form 00248 * 00249 * @param command The command number (after applying NBD_CMD_MASK_COMMAND) 00250 * @return pointer to the command name 00251 **/ 00252 static inline const char * getcommandname(uint64_t command) { 00253 switch (command) { 00254 case NBD_CMD_READ: 00255 return "NBD_CMD_READ"; 00256 case NBD_CMD_WRITE: 00257 return "NBD_CMD_WRITE"; 00258 case NBD_CMD_DISC: 00259 return "NBD_CMD_DISC"; 00260 case NBD_CMD_FLUSH: 00261 return "NBD_CMD_FLUSH"; 00262 case NBD_CMD_TRIM: 00263 return "NBD_CMD_TRIM"; 00264 default: 00265 return "UNKNOWN"; 00266 } 00267 } 00268 00269 /** 00270 * Consume data from an FD that we don't want 00271 * 00272 * @param f a file descriptor 00273 * @param buf a buffer 00274 * @param len the number of bytes to consume 00275 * @param bufsiz the size of the buffer 00276 **/ 00277 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) { 00278 size_t curlen; 00279 while (len>0) { 00280 curlen = (len>bufsiz)?bufsiz:len; 00281 readit(f, buf, curlen); 00282 len -= curlen; 00283 } 00284 } 00285 00286 /** 00287 * Write data from a buffer into a filedescriptor 00288 * 00289 * @param f a file descriptor 00290 * @param buf a buffer containing data 00291 * @param len the number of bytes to be written 00292 **/ 00293 static inline void writeit(int f, void *buf, size_t len) { 00294 ssize_t res; 00295 while (len > 0) { 00296 DEBUG("+"); 00297 if ((res = write(f, buf, len)) <= 0) 00298 err("Send failed: %m"); 00299 len -= res; 00300 buf += res; 00301 } 00302 } 00303 00304 /** 00305 * Print out a message about how to use nbd-server. Split out to a separate 00306 * function so that we can call it from multiple places 00307 */ 00308 void usage() { 00309 printf("This is nbd-server version " VERSION "\n"); 00310 printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n" 00311 "\t-r|--read-only\t\tread only\n" 00312 "\t-m|--multi-file\t\tmultiple file\n" 00313 "\t-c|--copy-on-write\tcopy on write\n" 00314 "\t-C|--config-file\tspecify an alternate configuration file\n" 00315 "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n" 00316 "\t-p|--pid-file\t\tspecify a filename to write our PID to\n" 00317 "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n" 00318 "\t-M|--max-connections\tspecify the maximum number of opened connections\n" 00319 "\t-V|--version\toutput the version and exit\n\n" 00320 "\tif port is set to 0, stdin is used (for running from inetd).\n" 00321 "\tif file_to_export contains '%%s', it is substituted with the IP\n" 00322 "\t\taddress of the machine trying to connect\n" 00323 "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n"); 00324 printf("Using configuration file %s\n", CFILE); 00325 } 00326 00327 /* Dumps a config file section of the given SERVER*, and exits. */ 00328 void dump_section(SERVER* serve, gchar* section_header) { 00329 printf("[%s]\n", section_header); 00330 printf("\texportname = %s\n", serve->exportname); 00331 printf("\tlistenaddr = %s\n", serve->listenaddr); 00332 if(serve->flags & F_READONLY) { 00333 printf("\treadonly = true\n"); 00334 } 00335 if(serve->flags & F_MULTIFILE) { 00336 printf("\tmultifile = true\n"); 00337 } 00338 if(serve->flags & F_TREEFILES) { 00339 printf("\ttreefiles = true\n"); 00340 } 00341 if(serve->flags & F_COPYONWRITE) { 00342 printf("\tcopyonwrite = true\n"); 00343 } 00344 if(serve->expected_size) { 00345 printf("\tfilesize = %lld\n", (long long int)serve->expected_size); 00346 } 00347 if(serve->authname) { 00348 printf("\tauthfile = %s\n", serve->authname); 00349 } 00350 exit(EXIT_SUCCESS); 00351 } 00352 00353 /** 00354 * Parse the command line. 00355 * 00356 * @param argc the argc argument to main() 00357 * @param argv the argv argument to main() 00358 **/ 00359 SERVER* cmdline(int argc, char *argv[], struct generic_conf *genconf) { 00360 int i=0; 00361 int nonspecial=0; 00362 int c; 00363 struct option long_options[] = { 00364 {"read-only", no_argument, NULL, 'r'}, 00365 {"multi-file", no_argument, NULL, 'm'}, 00366 {"copy-on-write", no_argument, NULL, 'c'}, 00367 {"dont-fork", no_argument, NULL, 'd'}, 00368 {"authorize-file", required_argument, NULL, 'l'}, 00369 {"config-file", required_argument, NULL, 'C'}, 00370 {"pid-file", required_argument, NULL, 'p'}, 00371 {"output-config", required_argument, NULL, 'o'}, 00372 {"max-connection", required_argument, NULL, 'M'}, 00373 {"version", no_argument, NULL, 'V'}, 00374 {0,0,0,0} 00375 }; 00376 SERVER *serve; 00377 off_t es; 00378 size_t last; 00379 char suffix; 00380 gboolean do_output=FALSE; 00381 gchar* section_header=""; 00382 gchar** addr_port; 00383 00384 if(argc==1) { 00385 return NULL; 00386 } 00387 serve=g_new0(SERVER, 1); 00388 serve->authname = g_strdup(default_authname); 00389 serve->virtstyle=VIRT_IPLIT; 00390 while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:V", long_options, &i))>=0) { 00391 switch (c) { 00392 case 1: 00393 /* non-option argument */ 00394 switch(nonspecial++) { 00395 case 0: 00396 if(strchr(optarg, ':') == strrchr(optarg, ':')) { 00397 addr_port=g_strsplit(optarg, ":", 2); 00398 00399 /* Check for "@" - maybe user using this separator 00400 for IPv4 address */ 00401 if(!addr_port[1]) { 00402 g_strfreev(addr_port); 00403 addr_port=g_strsplit(optarg, "@", 2); 00404 } 00405 } else { 00406 addr_port=g_strsplit(optarg, "@", 2); 00407 } 00408 00409 if(addr_port[1]) { 00410 genconf->modernport=g_strdup(addr_port[1]); 00411 genconf->modernaddr=g_strdup(addr_port[0]); 00412 } else { 00413 g_free(genconf->modernaddr); 00414 genconf->modernaddr=NULL; 00415 genconf->modernport=g_strdup(addr_port[0]); 00416 } 00417 g_strfreev(addr_port); 00418 break; 00419 case 1: 00420 serve->exportname = g_strdup(optarg); 00421 if(serve->exportname[0] != '/') { 00422 fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n"); 00423 exit(EXIT_FAILURE); 00424 } 00425 break; 00426 case 2: 00427 last=strlen(optarg)-1; 00428 suffix=optarg[last]; 00429 if (suffix == 'k' || suffix == 'K' || 00430 suffix == 'm' || suffix == 'M') 00431 optarg[last] = '\0'; 00432 es = (off_t)atoll(optarg); 00433 switch (suffix) { 00434 case 'm': 00435 case 'M': es <<= 10; 00436 case 'k': 00437 case 'K': es <<= 10; 00438 default : break; 00439 } 00440 serve->expected_size = es; 00441 break; 00442 } 00443 break; 00444 case 'r': 00445 serve->flags |= F_READONLY; 00446 break; 00447 case 'm': 00448 serve->flags |= F_MULTIFILE; 00449 break; 00450 case 'o': 00451 do_output = TRUE; 00452 section_header = g_strdup(optarg); 00453 break; 00454 case 'p': 00455 strncpy(pidfname, optarg, 256); 00456 pidfname[255]='\0'; 00457 break; 00458 case 'c': 00459 serve->flags |=F_COPYONWRITE; 00460 break; 00461 case 'd': 00462 dontfork = 1; 00463 break; 00464 case 'C': 00465 g_free(config_file_pos); 00466 config_file_pos=g_strdup(optarg); 00467 break; 00468 case 'l': 00469 g_free(serve->authname); 00470 serve->authname=g_strdup(optarg); 00471 break; 00472 case 'M': 00473 serve->max_connections = strtol(optarg, NULL, 0); 00474 break; 00475 case 'V': 00476 printf("This is nbd-server version " VERSION "\n"); 00477 exit(EXIT_SUCCESS); 00478 break; 00479 default: 00480 usage(); 00481 exit(EXIT_FAILURE); 00482 break; 00483 } 00484 } 00485 /* What's left: the port to export, the name of the to be exported 00486 * file, and, optionally, the size of the file, in that order. */ 00487 if(nonspecial<2) { 00488 g_free(serve); 00489 serve=NULL; 00490 } else { 00491 serve->servename = ""; 00492 } 00493 if(do_output) { 00494 if(!serve) { 00495 g_critical("Need a complete configuration on the command line to output a config file section!"); 00496 exit(EXIT_FAILURE); 00497 } 00498 dump_section(serve, section_header); 00499 } 00500 return serve; 00501 } 00502 00503 /* forward definition of parse_cfile */ 00504 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e); 00505 00506 #ifdef HAVE_STRUCT_DIRENT_D_TYPE 00507 #define NBD_D_TYPE de->d_type 00508 #else 00509 #define NBD_D_TYPE 0 00510 #define DT_UNKNOWN 0 00511 #define DT_REG 1 00512 #endif 00513 00514 /** 00515 * Parse config file snippets in a directory. Uses readdir() and friends 00516 * to find files and open them, then passes them on to parse_cfile 00517 * with have_global set false 00518 **/ 00519 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) { 00520 DIR* dirh = opendir(dir); 00521 struct dirent* de; 00522 gchar* fname; 00523 GArray* retval = NULL; 00524 GArray* tmp; 00525 struct stat stbuf; 00526 00527 if(!dirh) { 00528 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno)); 00529 return NULL; 00530 } 00531 errno=0; 00532 while((de = readdir(dirh))) { 00533 int saved_errno=errno; 00534 fname = g_build_filename(dir, de->d_name, NULL); 00535 switch(NBD_D_TYPE) { 00536 case DT_UNKNOWN: 00537 /* Filesystem doesn't return type of 00538 * file through readdir. Run stat() on 00539 * the file instead */ 00540 if(stat(fname, &stbuf)) { 00541 perror("stat"); 00542 goto err_out; 00543 } 00544 if (!S_ISREG(stbuf.st_mode)) { 00545 goto next; 00546 } 00547 case DT_REG: 00548 /* Skip unless the name ends with '.conf' */ 00549 if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) { 00550 goto next; 00551 } 00552 tmp = parse_cfile(fname, genconf, false, e); 00553 errno=saved_errno; 00554 if(*e) { 00555 goto err_out; 00556 } 00557 if(!retval) 00558 retval = g_array_new(FALSE, TRUE, sizeof(SERVER)); 00559 retval = g_array_append_vals(retval, tmp->data, tmp->len); 00560 g_array_free(tmp, TRUE); 00561 default: 00562 break; 00563 } 00564 next: 00565 g_free(fname); 00566 } 00567 if(errno) { 00568 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno)); 00569 err_out: 00570 if(retval) 00571 g_array_free(retval, TRUE); 00572 retval = NULL; 00573 } 00574 if(dirh) 00575 closedir(dirh); 00576 return retval; 00577 } 00578 00579 /** 00580 * Parse the config file. 00581 * 00582 * @param f the name of the config file 00583 * 00584 * @param genconf a pointer to generic configuration which will get 00585 * updated with parsed values. If NULL, then parsed generic 00586 * configuration values are safely and silently discarded. 00587 * 00588 * @param e a GError. Error code can be any of the following: 00589 * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC, 00590 * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED 00591 * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS. 00592 * 00593 * @param expect_generic if true, we expect a configuration file that 00594 * contains a [generic] section. If false, we don't. 00595 * 00596 * @return a GArray of SERVER* pointers. If the config file is empty or does not 00597 * exist, returns an empty GArray; if the config file contains an 00598 * error, returns NULL, and e is set appropriately 00599 **/ 00600 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) { 00601 const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s"; 00602 const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s"; 00603 gchar* cfdir = NULL; 00604 SERVER s; 00605 gchar *virtstyle=NULL; 00606 PARAM lp[] = { 00607 { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 }, 00608 { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 }, 00609 { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 }, 00610 { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 }, 00611 { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 }, 00612 { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 }, 00613 { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 }, 00614 { "cowdir", FALSE, PARAM_STRING, &(s.cowdir), 0 }, 00615 { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY }, 00616 { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE }, 00617 { "treefiles", FALSE, PARAM_BOOL, &(s.flags), F_TREEFILES }, 00618 { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE }, 00619 { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE }, 00620 { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP }, 00621 { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC }, 00622 { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH }, 00623 { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA }, 00624 { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL }, 00625 { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY }, 00626 { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM }, 00627 { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 }, 00628 { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 }, 00629 }; 00630 const int lp_size=sizeof(lp)/sizeof(PARAM); 00631 struct generic_conf genconftmp; 00632 PARAM gp[] = { 00633 { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 }, 00634 { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 }, 00635 { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used 00636 { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 }, 00637 { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 }, 00638 { "includedir", FALSE, PARAM_STRING, &cfdir, 0 }, 00639 { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST }, 00640 { "unixsock", FALSE, PARAM_STRING, &(genconftmp.unixsock), 0 }, 00641 { "max_threads", FALSE, PARAM_INT, &(genconftmp.threads), 0 }, 00642 }; 00643 PARAM* p=gp; 00644 int p_size=sizeof(gp)/sizeof(PARAM); 00645 GKeyFile *cfile; 00646 GError *err = NULL; 00647 const char *err_msg=NULL; 00648 GArray *retval=NULL; 00649 gchar **groups; 00650 gboolean bval; 00651 gint ival; 00652 gint64 i64val; 00653 gchar* sval; 00654 gchar* startgroup; 00655 gint i; 00656 gint j; 00657 00658 memset(&genconftmp, 0, sizeof(struct generic_conf)); 00659 00660 if (genconf) { 00661 /* Use the passed configuration values as defaults. The 00662 * parsing algorithm below updates all parameter targets 00663 * found from configuration files. */ 00664 memcpy(&genconftmp, genconf, sizeof(struct generic_conf)); 00665 } 00666 00667 cfile = g_key_file_new(); 00668 retval = g_array_new(FALSE, TRUE, sizeof(SERVER)); 00669 if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS | 00670 G_KEY_FILE_KEEP_TRANSLATIONS, &err)) { 00671 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s", 00672 f, err->message); 00673 g_key_file_free(cfile); 00674 return retval; 00675 } 00676 startgroup = g_key_file_get_start_group(cfile); 00677 if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) { 00678 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!"); 00679 g_key_file_free(cfile); 00680 return NULL; 00681 } 00682 groups = g_key_file_get_groups(cfile, NULL); 00683 for(i=0;groups[i];i++) { 00684 memset(&s, '\0', sizeof(SERVER)); 00685 00686 /* After the [generic] group or when we're parsing an include 00687 * directory, start parsing exports */ 00688 if(i==1 || !expect_generic) { 00689 p=lp; 00690 p_size=lp_size; 00691 } 00692 for(j=0;j<p_size;j++) { 00693 assert(p[j].target != NULL); 00694 assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64); 00695 switch(p[j].ptype) { 00696 case PARAM_INT: 00697 ival = g_key_file_get_integer(cfile, 00698 groups[i], 00699 p[j].paramname, 00700 &err); 00701 if(!err) { 00702 *((gint*)p[j].target) = ival; 00703 } 00704 break; 00705 case PARAM_INT64: 00706 i64val = g_key_file_get_int64(cfile, 00707 groups[i], 00708 p[j].paramname, 00709 &err); 00710 if(!err) { 00711 *((gint64*)p[j].target) = i64val; 00712 } 00713 break; 00714 case PARAM_STRING: 00715 sval = g_key_file_get_string(cfile, 00716 groups[i], 00717 p[j].paramname, 00718 &err); 00719 if(!err) { 00720 *((gchar**)p[j].target) = sval; 00721 } 00722 break; 00723 case PARAM_BOOL: 00724 bval = g_key_file_get_boolean(cfile, 00725 groups[i], 00726 p[j].paramname, &err); 00727 if(!err) { 00728 if(bval) { 00729 *((gint*)p[j].target) |= p[j].flagval; 00730 } else { 00731 *((gint*)p[j].target) &= ~(p[j].flagval); 00732 } 00733 } 00734 break; 00735 } 00736 if(err) { 00737 if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) { 00738 if(!p[j].required) { 00739 /* Ignore not-found error for optional values */ 00740 g_clear_error(&err); 00741 continue; 00742 } else { 00743 err_msg = MISSING_REQUIRED_ERROR; 00744 } 00745 } else { 00746 err_msg = DEFAULT_ERROR; 00747 } 00748 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message); 00749 g_array_free(retval, TRUE); 00750 g_error_free(err); 00751 g_key_file_free(cfile); 00752 return NULL; 00753 } 00754 } 00755 if(virtstyle) { 00756 if(!strncmp(virtstyle, "none", 4)) { 00757 s.virtstyle=VIRT_NONE; 00758 } else if(!strncmp(virtstyle, "ipliteral", 9)) { 00759 s.virtstyle=VIRT_IPLIT; 00760 } else if(!strncmp(virtstyle, "iphash", 6)) { 00761 s.virtstyle=VIRT_IPHASH; 00762 } else if(!strncmp(virtstyle, "cidrhash", 8)) { 00763 s.virtstyle=VIRT_CIDR; 00764 if(strlen(virtstyle)<10) { 00765 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]); 00766 g_array_free(retval, TRUE); 00767 g_key_file_free(cfile); 00768 return NULL; 00769 } 00770 s.cidrlen=strtol(virtstyle+8, NULL, 0); 00771 } else { 00772 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]); 00773 g_array_free(retval, TRUE); 00774 g_key_file_free(cfile); 00775 return NULL; 00776 } 00777 } else { 00778 s.virtstyle=VIRT_IPLIT; 00779 } 00780 if(genconftmp.flags & F_OLDSTYLE) { 00781 g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol."); 00782 g_message("Exiting."); 00783 return NULL; 00784 } 00785 /* Don't need to free this, it's not our string */ 00786 virtstyle=NULL; 00787 /* Don't append values for the [generic] group */ 00788 if(i>0 || !expect_generic) { 00789 s.servename = groups[i]; 00790 00791 g_array_append_val(retval, s); 00792 } 00793 #ifndef WITH_SDP 00794 if(s.flags & F_SDP) { 00795 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]); 00796 g_array_free(retval, TRUE); 00797 g_key_file_free(cfile); 00798 return NULL; 00799 } 00800 #endif 00801 } 00802 g_key_file_free(cfile); 00803 if(cfdir) { 00804 GArray* extra = do_cfile_dir(cfdir, &genconftmp, e); 00805 if(extra) { 00806 retval = g_array_append_vals(retval, extra->data, extra->len); 00807 i+=extra->len; 00808 g_array_free(extra, TRUE); 00809 } else { 00810 if(*e) { 00811 g_array_free(retval, TRUE); 00812 return NULL; 00813 } 00814 } 00815 } 00816 if(i==1 && expect_generic) { 00817 g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports"); 00818 } 00819 00820 if (genconf) { 00821 /* Return the updated generic configuration through the 00822 * pointer parameter. */ 00823 memcpy(genconf, &genconftmp, sizeof(struct generic_conf)); 00824 } 00825 00826 return retval; 00827 } 00828 00829 /** 00830 * Handle SIGCHLD by setting atomically a flag which will be evaluated in the 00831 * main loop of the root server process. This allows us to separate the signal 00832 * catching from th actual task triggered by SIGCHLD and hence processing in the 00833 * interrupt context is kept as minimial as possible. 00834 * 00835 * @param s the signal we're handling (must be SIGCHLD, or something 00836 * is severely wrong) 00837 **/ 00838 static void sigchld_handler(const int s G_GNUC_UNUSED) { 00839 is_sigchld_caught = 1; 00840 } 00841 00842 /** 00843 * Kill a child. Called from sigterm_handler::g_hash_table_foreach. 00844 * 00845 * @param key the key 00846 * @param value the value corresponding to the above key 00847 * @param user_data a pointer which we always set to 1, so that we know what 00848 * will happen next. 00849 **/ 00850 void killchild(gpointer key, gpointer value, gpointer user_data) { 00851 pid_t *pid=value; 00852 00853 kill(*pid, SIGTERM); 00854 } 00855 00856 /** 00857 * Handle SIGTERM by setting atomically a flag which will be evaluated in the 00858 * main loop of the root server process. This allows us to separate the signal 00859 * catching from th actual task triggered by SIGTERM and hence processing in the 00860 * interrupt context is kept as minimial as possible. 00861 * 00862 * @param s the signal we're handling (must be SIGTERM, or something 00863 * is severely wrong). 00864 **/ 00865 static void sigterm_handler(const int s G_GNUC_UNUSED) { 00866 is_sigterm_caught = 1; 00867 } 00868 00869 /** 00870 * Handle SIGHUP by setting atomically a flag which will be evaluated in 00871 * the main loop of the root server process. This allows us to separate 00872 * the signal catching from th actual task triggered by SIGHUP and hence 00873 * processing in the interrupt context is kept as minimial as possible. 00874 * 00875 * @param s the signal we're handling (must be SIGHUP, or something 00876 * is severely wrong). 00877 **/ 00878 static void sighup_handler(const int s G_GNUC_UNUSED) { 00879 is_sighup_caught = 1; 00880 } 00881 00882 /** 00883 * Get the file handle and offset, given an export offset. 00884 * 00885 * @param client The client we're serving for 00886 * @param a The offset to get corresponding file/offset for 00887 * @param fhandle [out] File descriptor 00888 * @param foffset [out] Offset into fhandle 00889 * @param maxbytes [out] Tells how many bytes can be read/written 00890 * from fhandle starting at foffset (0 if there is no limit) 00891 * @return 0 on success, -1 on failure 00892 **/ 00893 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) { 00894 00895 GArray * const export = client->export; 00896 00897 /* Negative offset not allowed */ 00898 if(a < 0) 00899 return -1; 00900 00901 /* Open separate file for treefiles */ 00902 if (client->server->flags & F_TREEFILES) { 00903 *foffset = a % TREEPAGESIZE; 00904 *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block 00905 *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a, &client->lock); 00906 return 0; 00907 } 00908 00909 /* Binary search for last file with starting offset <= a */ 00910 FILE_INFO fi; 00911 int start = 0; 00912 int end = export->len - 1; 00913 while( start <= end ) { 00914 int mid = (start + end) / 2; 00915 fi = g_array_index(export, FILE_INFO, mid); 00916 if( fi.startoff < a ) { 00917 start = mid + 1; 00918 } else if( fi.startoff > a ) { 00919 end = mid - 1; 00920 } else { 00921 start = end = mid; 00922 break; 00923 } 00924 } 00925 00926 /* end should never go negative, since first startoff is 0 and a >= 0 */ 00927 assert(end >= 0); 00928 00929 fi = g_array_index(export, FILE_INFO, end); 00930 *fhandle = fi.fhandle; 00931 *foffset = a - fi.startoff; 00932 *maxbytes = 0; 00933 if( end+1 < export->len ) { 00934 FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1); 00935 *maxbytes = fi_next.startoff - a; 00936 } 00937 00938 return 0; 00939 } 00940 00941 /** 00942 * Write an amount of bytes at a given offset to the right file. This 00943 * abstracts the write-side of the multiple file option. 00944 * 00945 * @param a The offset where the write should start 00946 * @param buf The buffer to write from 00947 * @param len The length of buf 00948 * @param client The client we're serving for 00949 * @param fua Flag to indicate 'Force Unit Access' 00950 * @return The number of bytes actually written, or -1 in case of an error 00951 **/ 00952 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { 00953 int fhandle; 00954 off_t foffset; 00955 size_t maxbytes; 00956 ssize_t retval; 00957 00958 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes)) 00959 return -1; 00960 if(maxbytes && len > maxbytes) 00961 len = maxbytes; 00962 00963 DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua); 00964 00965 retval = pwrite(fhandle, buf, len, foffset); 00966 if(client->server->flags & F_SYNC) { 00967 fsync(fhandle); 00968 } else if (fua) { 00969 00970 /* This is where we would do the following 00971 * #ifdef USE_SYNC_FILE_RANGE 00972 * However, we don't, for the reasons set out below 00973 * by Christoph Hellwig <hch@infradead.org> 00974 * 00975 * [BEGINS] 00976 * fdatasync is equivalent to fsync except that it does not flush 00977 * non-essential metadata (basically just timestamps in practice), but it 00978 * does flush metadata requried to find the data again, e.g. allocation 00979 * information and extent maps. sync_file_range does nothing but flush 00980 * out pagecache content - it means you basically won't get your data 00981 * back in case of a crash if you either: 00982 * 00983 * a) have a volatile write cache in your disk (e.g. any normal SATA disk) 00984 * b) are using a sparse file on a filesystem 00985 * c) are using a fallocate-preallocated file on a filesystem 00986 * d) use any file on a COW filesystem like btrfs 00987 * 00988 * e.g. it only does anything useful for you if you do not have a volatile 00989 * write cache, and either use a raw block device node, or just overwrite 00990 * an already fully allocated (and not preallocated) file on a non-COW 00991 * filesystem. 00992 * [ENDS] 00993 * 00994 * What we should do is open a second FD with O_DSYNC set, then write to 00995 * that when appropriate. However, with a Linux client, every REQ_FUA 00996 * immediately follows a REQ_FLUSH, so fdatasync does not cause performance 00997 * problems. 00998 * 00999 */ 01000 #if 0 01001 sync_file_range(fhandle, foffset, len, 01002 SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | 01003 SYNC_FILE_RANGE_WAIT_AFTER); 01004 #else 01005 fdatasync(fhandle); 01006 #endif 01007 } 01008 /* close file pointer in case of treefiles */ 01009 if (client->server->flags & F_TREEFILES) { 01010 close(fhandle); 01011 } 01012 return retval; 01013 } 01014 01015 /** 01016 * Call rawexpwrite repeatedly until all data has been written. 01017 * 01018 * @param a The offset where the write should start 01019 * @param buf The buffer to write from 01020 * @param len The length of buf 01021 * @param client The client we're serving for 01022 * @param fua Flag to indicate 'Force Unit Access' 01023 * @return 0 on success, nonzero on failure 01024 **/ 01025 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) { 01026 ssize_t ret=0; 01027 01028 while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) { 01029 a += ret; 01030 buf += ret; 01031 len -= ret; 01032 } 01033 return (ret < 0 || len != 0); 01034 } 01035 01036 /** 01037 * Read an amount of bytes at a given offset from the right file. This 01038 * abstracts the read-side of the multiple files option. 01039 * 01040 * @param a The offset where the read should start 01041 * @param buf A buffer to read into 01042 * @param len The size of buf 01043 * @param client The client we're serving for 01044 * @return The number of bytes actually read, or -1 in case of an 01045 * error. 01046 **/ 01047 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) { 01048 int fhandle; 01049 off_t foffset; 01050 size_t maxbytes; 01051 ssize_t retval; 01052 01053 if(get_filepos(client, a, &fhandle, &foffset, &maxbytes)) 01054 return -1; 01055 if(maxbytes && len > maxbytes) 01056 len = maxbytes; 01057 01058 DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len); 01059 01060 retval = pread(fhandle, buf, len, foffset); 01061 if (client->server->flags & F_TREEFILES) { 01062 close(fhandle); 01063 } 01064 return retval; 01065 } 01066 01067 /** 01068 * Call rawexpread repeatedly until all data has been read. 01069 * @return 0 on success, nonzero on failure 01070 **/ 01071 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) { 01072 ssize_t ret=0; 01073 01074 while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) { 01075 a += ret; 01076 buf += ret; 01077 len -= ret; 01078 } 01079 return (ret < 0 || len != 0); 01080 } 01081 01082 /** 01083 * Read an amount of bytes at a given offset from the right file. This 01084 * abstracts the read-side of the copyonwrite stuff, and calls 01085 * rawexpread() with the right parameters to do the actual work. 01086 * @param a The offset where the read should start 01087 * @param buf A buffer to read into 01088 * @param len The size of buf 01089 * @param client The client we're going to read for 01090 * @return 0 on success, nonzero on failure 01091 **/ 01092 int expread(off_t a, char *buf, size_t len, CLIENT *client) { 01093 off_t rdlen, offset; 01094 off_t mapcnt, mapl, maph, pagestart; 01095 01096 if (!(client->server->flags & F_COPYONWRITE)) 01097 return(rawexpread_fully(a, buf, len, client)); 01098 DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); 01099 01100 mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE; 01101 01102 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) { 01103 pagestart=mapcnt*DIFFPAGESIZE; 01104 offset=a-pagestart; 01105 rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ? 01106 len : (size_t)DIFFPAGESIZE-offset; 01107 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ 01108 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, 01109 (unsigned long)(client->difmap[mapcnt])); 01110 myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset); 01111 if (read(client->difffile, buf, rdlen) != rdlen) return -1; 01112 } else { /* the block is not there */ 01113 DEBUG("Page %llu is not here, we read the original one\n", 01114 (unsigned long long)mapcnt); 01115 if(rawexpread_fully(a, buf, rdlen, client)) return -1; 01116 } 01117 len-=rdlen; a+=rdlen; buf+=rdlen; 01118 } 01119 return 0; 01120 } 01121 01122 /** 01123 * Write an amount of bytes at a given offset to the right file. This 01124 * abstracts the write-side of the copyonwrite option, and calls 01125 * rawexpwrite() with the right parameters to do the actual work. 01126 * 01127 * @param a The offset where the write should start 01128 * @param buf The buffer to write from 01129 * @param len The length of buf 01130 * @param client The client we're going to write for. 01131 * @param fua Flag to indicate 'Force Unit Access' 01132 * @return 0 on success, nonzero on failure 01133 **/ 01134 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) { 01135 char pagebuf[DIFFPAGESIZE]; 01136 off_t mapcnt,mapl,maph; 01137 off_t wrlen,rdlen; 01138 off_t pagestart; 01139 off_t offset; 01140 01141 if (!(client->server->flags & F_COPYONWRITE)) 01142 return(rawexpwrite_fully(a, buf, len, client, fua)); 01143 DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a); 01144 01145 mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ; 01146 01147 for (mapcnt=mapl;mapcnt<=maph;mapcnt++) { 01148 pagestart=mapcnt*DIFFPAGESIZE ; 01149 offset=a-pagestart ; 01150 wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ? 01151 len : (size_t)DIFFPAGESIZE-offset; 01152 01153 if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */ 01154 DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt, 01155 (unsigned long)(client->difmap[mapcnt])) ; 01156 myseek(client->difffile, 01157 client->difmap[mapcnt]*DIFFPAGESIZE+offset); 01158 if (write(client->difffile, buf, wrlen) != wrlen) return -1 ; 01159 } else { /* the block is not there */ 01160 myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ; 01161 client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++; 01162 DEBUG("Page %llu is not here, we put it at %lu\n", 01163 (unsigned long long)mapcnt, 01164 (unsigned long)(client->difmap[mapcnt])); 01165 rdlen=DIFFPAGESIZE ; 01166 if (rawexpread_fully(pagestart, pagebuf, rdlen, client)) 01167 return -1; 01168 memcpy(pagebuf+offset,buf,wrlen) ; 01169 if (write(client->difffile, pagebuf, DIFFPAGESIZE) != 01170 DIFFPAGESIZE) 01171 return -1; 01172 } 01173 len-=wrlen ; a+=wrlen ; buf+=wrlen ; 01174 } 01175 if (client->server->flags & F_SYNC) { 01176 fsync(client->difffile); 01177 } else if (fua) { 01178 /* open question: would it be cheaper to do multiple sync_file_ranges? 01179 as we iterate through the above? 01180 */ 01181 fdatasync(client->difffile); 01182 } 01183 return 0; 01184 } 01185 01186 /** 01187 * Flush data to a client 01188 * 01189 * @param client The client we're going to write for. 01190 * @return 0 on success, nonzero on failure 01191 **/ 01192 int expflush(CLIENT *client) { 01193 gint i; 01194 01195 if (client->server->flags & F_COPYONWRITE) { 01196 return fsync(client->difffile); 01197 } 01198 01199 if (client->server->flags & F_TREEFILES ) { 01200 // all we can do is force sync the entire filesystem containing the tree 01201 if (client->server->flags & F_READONLY) 01202 return 0; 01203 sync(); 01204 return 0; 01205 } 01206 01207 for (i = 0; i < client->export->len; i++) { 01208 FILE_INFO fi = g_array_index(client->export, FILE_INFO, i); 01209 if (fsync(fi.fhandle) < 0) 01210 return -1; 01211 } 01212 01213 return 0; 01214 } 01215 01216 void punch_hole(int fd, off_t off, off_t len) { 01217 DEBUG("punching hole in fd=%d, starting from %llu, length %llu\n", fd, (unsigned long long)off, (unsigned long long)len); 01218 #if HAVE_FALLOC_PH 01219 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len); 01220 #elif HAVE_FSCTL_SET_ZERO_DATA 01221 FILE_ZERO_DATA_INFORMATION zerodata; 01222 zerodata.FileOffset.QuadPart = off; 01223 zerodata.BeyondFinalZero.QuadPart = off + len; 01224 HANDLE w32handle = (HANDLE)_get_osfhandle(fd); 01225 DWORD bytesret; 01226 DeviceIoControl(w32handle, FSCTL_SET_ZERO_DATA, &zerodata, sizeof(zerodata), NULL, 0, &bytesret, NULL); 01227 #else 01228 DEBUG("punching holes not supported on this platform\n"); 01229 #endif 01230 } 01231 01232 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) { 01233 uint64_t magic = htonll(0x3e889045565a9LL); 01234 reply_type = htonl(reply_type); 01235 uint32_t datsize = htonl(datasize); 01236 opt = htonl(opt); 01237 struct iovec v_data[] = { 01238 { &magic, sizeof(magic) }, 01239 { &opt, sizeof(opt) }, 01240 { &reply_type, sizeof(reply_type) }, 01241 { &datsize, sizeof(datsize) }, 01242 { data, datasize }, 01243 }; 01244 size_t total = sizeof(magic) + sizeof(opt) + sizeof(reply_type) + sizeof(datsize) + datasize; 01245 ssize_t sent = writev(net, v_data, 5); 01246 if(sent != total) { 01247 perror("E: couldn't write enough data:"); 01248 } 01249 } 01250 01251 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) { 01252 uint32_t namelen; 01253 char* name; 01254 int i; 01255 01256 if (read(net, &namelen, sizeof(namelen)) < 0) { 01257 err("Negotiation failed/7: %m"); 01258 return NULL; 01259 } 01260 namelen = ntohl(namelen); 01261 if(namelen > 0) { 01262 name = malloc(namelen+1); 01263 name[namelen]=0; 01264 if (read(net, name, namelen) < 0) { 01265 err("Negotiation failed/8: %m"); 01266 free(name); 01267 return NULL; 01268 } 01269 } else { 01270 name = strdup(""); 01271 } 01272 for(i=0; i<servers->len; i++) { 01273 SERVER* serve = &(g_array_index(servers, SERVER, i)); 01274 if(!strcmp(serve->servename, name)) { 01275 CLIENT* client = g_new0(CLIENT, 1); 01276 client->server = serve; 01277 client->exportsize = OFFT_MAX; 01278 client->net = net; 01279 client->modern = TRUE; 01280 client->transactionlogfd = -1; 01281 client->clientfeats = cflags; 01282 pthread_mutex_init(&(client->lock), NULL); 01283 free(name); 01284 return client; 01285 } 01286 } 01287 err("Negotiation failed/8a: Requested export not found"); 01288 free(name); 01289 return NULL; 01290 } 01291 01292 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) { 01293 uint32_t len; 01294 int i; 01295 char buf[1024]; 01296 char *ptr = buf + sizeof(len); 01297 01298 if (read(net, &len, sizeof(len)) < 0) 01299 err("Negotiation failed/8: %m"); 01300 len = ntohl(len); 01301 if(len) { 01302 send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL); 01303 } 01304 if(!(glob_flags & F_LIST)) { 01305 send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL); 01306 err_nonfatal("Client tried disallowed list option"); 01307 return; 01308 } 01309 for(i=0; i<servers->len; i++) { 01310 SERVER* serve = &(g_array_index(servers, SERVER, i)); 01311 len = htonl(strlen(serve->servename)); 01312 memcpy(buf, &len, sizeof(len)); 01313 strncpy(ptr, serve->servename, sizeof(buf) - sizeof(len)); 01314 send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf); 01315 } 01316 send_reply(opt, net, NBD_REP_ACK, 0, NULL); 01317 } 01318 01319 /** 01320 * Do the initial negotiation. 01321 * 01322 * @param client The client we're negotiating with. 01323 **/ 01324 CLIENT* negotiate(int net, GArray* servers) { 01325 uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES; 01326 uint64_t magic; 01327 uint32_t cflags = 0; 01328 uint32_t opt; 01329 01330 assert(servers != NULL); 01331 if (write(net, INIT_PASSWD, 8) < 0) 01332 err_nonfatal("Negotiation failed/1: %m"); 01333 magic = htonll(opts_magic); 01334 if (write(net, &magic, sizeof(magic)) < 0) 01335 err_nonfatal("Negotiation failed/2: %m"); 01336 01337 smallflags = htons(smallflags); 01338 if (write(net, &smallflags, sizeof(uint16_t)) < 0) 01339 err_nonfatal("Negotiation failed/3: %m"); 01340 if (read(net, &cflags, sizeof(cflags)) < 0) 01341 err_nonfatal("Negotiation failed/4: %m"); 01342 cflags = htonl(cflags); 01343 if (cflags & NBD_FLAG_C_NO_ZEROES) { 01344 glob_flags |= F_NO_ZEROES; 01345 } 01346 do { 01347 if (read(net, &magic, sizeof(magic)) < 0) 01348 err_nonfatal("Negotiation failed/5: %m"); 01349 magic = ntohll(magic); 01350 if(magic != opts_magic) { 01351 err_nonfatal("Negotiation failed/5a: magic mismatch"); 01352 return NULL; 01353 } 01354 if (read(net, &opt, sizeof(opt)) < 0) 01355 err_nonfatal("Negotiation failed/6: %m"); 01356 opt = ntohl(opt); 01357 switch(opt) { 01358 case NBD_OPT_EXPORT_NAME: 01359 // NBD_OPT_EXPORT_NAME must be the last 01360 // selected option, so return from here 01361 // if that is chosen. 01362 return handle_export_name(opt, net, servers, cflags); 01363 break; 01364 case NBD_OPT_LIST: 01365 handle_list(opt, net, servers, cflags); 01366 break; 01367 case NBD_OPT_ABORT: 01368 // handled below 01369 break; 01370 default: 01371 send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL); 01372 break; 01373 } 01374 } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT)); 01375 if(opt == NBD_OPT_ABORT) { 01376 err_nonfatal("Session terminated by client"); 01377 return NULL; 01378 } 01379 err_nonfatal("Weird things happened: reached end of negotiation without success"); 01380 return NULL; 01381 } 01382 01383 void send_export_info(CLIENT* client) { 01384 uint64_t size_host = htonll((u64)(client->exportsize)); 01385 uint16_t flags = NBD_FLAG_HAS_FLAGS; 01386 01387 if (write(client->net, &size_host, 8) < 0) 01388 err("Negotiation failed/9: %m"); 01389 if (client->server->flags & F_READONLY) 01390 flags |= NBD_FLAG_READ_ONLY; 01391 if (client->server->flags & F_FLUSH) 01392 flags |= NBD_FLAG_SEND_FLUSH; 01393 if (client->server->flags & F_FUA) 01394 flags |= NBD_FLAG_SEND_FUA; 01395 if (client->server->flags & F_ROTATIONAL) 01396 flags |= NBD_FLAG_ROTATIONAL; 01397 if (client->server->flags & F_TRIM) 01398 flags |= NBD_FLAG_SEND_TRIM; 01399 flags = htons(flags); 01400 if (write(client->net, &flags, sizeof(flags)) < 0) 01401 err("Negotiation failed/11: %m"); 01402 if (!(glob_flags & F_NO_ZEROES)) { 01403 char zeros[128]; 01404 memset(zeros, '\0', sizeof(zeros)); 01405 if (write(client->net, zeros, 124) < 0) 01406 err("Negotiation failed/12: %m"); 01407 } 01408 } 01409 01410 static int nbd_errno(int errcode) { 01411 switch (errcode) { 01412 case EPERM: 01413 return htonl(1); 01414 case EIO: 01415 return htonl(5); 01416 case ENOMEM: 01417 return htonl(12); 01418 case EINVAL: 01419 return htonl(22); 01420 case EFBIG: 01421 case ENOSPC: 01422 #ifdef EDQUOT 01423 case EDQUOT: 01424 #endif 01425 return htonl(28); // ENOSPC 01426 default: 01427 return htonl(22); // EINVAL 01428 } 01429 } 01430 01431 static void package_dispose(struct work_package* package) { 01432 g_free(package->data); 01433 g_free(package->req); 01434 g_free(package); 01435 } 01436 01437 struct work_package* package_create(CLIENT* client, struct nbd_request* req) { 01438 struct work_package* rv = calloc(sizeof (struct work_package), 1); 01439 01440 rv->req = req; 01441 rv->client = client; 01442 01443 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) 01444 rv->data = malloc(req->len); 01445 01446 return rv; 01447 } 01448 01449 static void setup_reply(struct nbd_reply* rep, struct nbd_request* req) { 01450 rep->magic = htonl(NBD_REPLY_MAGIC); 01451 rep->error = 0; 01452 memcpy(&(rep->handle), &(req->handle), sizeof(req->handle)); 01453 } 01454 01455 static void handle_read(CLIENT* client, struct nbd_request* req) { 01456 struct nbd_reply rep; 01457 void* buf = malloc(req->len); 01458 if(!buf) { 01459 err("Could not allocate memory for request"); 01460 } 01461 DEBUG("handling read request\n"); 01462 setup_reply(&rep, req); 01463 if(expread(req->from, buf, req->len, client)) { 01464 DEBUG("Read failed: %m"); 01465 rep.error = nbd_errno(errno); 01466 } 01467 pthread_mutex_lock(&(client->lock)); 01468 writeit(client->net, &rep, sizeof rep); 01469 if(!rep.error) { 01470 writeit(client->net, buf, req->len); 01471 } 01472 pthread_mutex_unlock(&(client->lock)); 01473 free(buf); 01474 } 01475 01476 static void handle_write(CLIENT* client, struct nbd_request* req, void* data) { 01477 struct nbd_reply rep; 01478 DEBUG("handling write request\n"); 01479 setup_reply(&rep, req); 01480 01481 if ((client->server->flags & F_READONLY) || 01482 (client->server->flags & F_AUTOREADONLY)) { 01483 DEBUG("[WRITE to READONLY!]"); 01484 rep.error = nbd_errno(EPERM); 01485 } else { 01486 if(expwrite(req->from, data, req->len, client, (req->type &~NBD_CMD_MASK_COMMAND))) { 01487 DEBUG("Write failed: %m"); 01488 rep.error = nbd_errno(errno); 01489 } 01490 } 01491 pthread_mutex_lock(&(client->lock)); 01492 writeit(client->net, &rep, sizeof rep); 01493 pthread_mutex_unlock(&(client->lock)); 01494 } 01495 01496 static void handle_flush(CLIENT* client, struct nbd_request* req) { 01497 struct nbd_reply rep; 01498 DEBUG("handling flush request\n"); 01499 setup_reply(&rep, req); 01500 if(expflush(client)) { 01501 DEBUG("Flush failed: %m"); 01502 rep.error = nbd_errno(errno); 01503 } 01504 pthread_mutex_lock(&(client->lock)); 01505 writeit(client->net, &rep, sizeof rep); 01506 pthread_mutex_unlock(&(client->lock)); 01507 } 01508 01509 static void handle_trim(CLIENT* client, struct nbd_request* req) { 01510 struct nbd_reply rep; 01511 DEBUG("handling trim request\n"); 01512 setup_reply(&rep, req); 01513 if(exptrim(req, client)) { 01514 DEBUG("Trim failed: %m"); 01515 rep.error = nbd_errno(errno); 01516 } 01517 pthread_mutex_lock(&(client->lock)); 01518 writeit(client->net, &rep, sizeof rep); 01519 pthread_mutex_unlock(&(client->lock)); 01520 } 01521 01522 static void handle_request(gpointer data, gpointer user_data) { 01523 struct work_package* package = (struct work_package*) data; 01524 uint32_t type = package->req->type & NBD_CMD_MASK_COMMAND; 01525 uint32_t flags = package->req->type & ~NBD_CMD_MASK_COMMAND; 01526 struct nbd_reply rep; 01527 01528 if(flags & ~NBD_CMD_FLAG_FUA) { 01529 msg(LOG_ERR, "E: received invalid flag %d on command %d, ignoring", flags, type); 01530 goto error; 01531 } 01532 01533 switch(type) { 01534 case NBD_CMD_READ: 01535 handle_read(package->client, package->req); 01536 break; 01537 case NBD_CMD_WRITE: 01538 handle_write(package->client, package->req, package->data); 01539 break; 01540 case NBD_CMD_FLUSH: 01541 handle_flush(package->client, package->req); 01542 break; 01543 case NBD_CMD_TRIM: 01544 handle_trim(package->client, package->req); 01545 break; 01546 default: 01547 msg(LOG_ERR, "E: received unknown command %d of type, ignoring", package->req->type); 01548 goto error; 01549 } 01550 goto end; 01551 error: 01552 setup_reply(&rep, package->req); 01553 rep.error = nbd_errno(EINVAL); 01554 pthread_mutex_lock(&(package->client->lock)); 01555 writeit(package->client->net, &rep, sizeof rep); 01556 pthread_mutex_unlock(&(package->client->lock)); 01557 end: 01558 package_dispose(package); 01559 } 01560 01561 static int mainloop_threaded(CLIENT* client) { 01562 struct nbd_request* req; 01563 struct work_package* pkg; 01564 01565 send_export_info(client); 01566 DEBUG("Entering request loop\n"); 01567 while(1) { 01568 req = calloc(sizeof (struct nbd_request), 1); 01569 01570 readit(client->net, req, sizeof(struct nbd_request)); 01571 if(client->transactionlogfd != -1) { 01572 writeit(client->transactionlogfd, req, sizeof(struct nbd_request)); 01573 } 01574 01575 req->from = ntohll(req->from); 01576 req->type = ntohl(req->type); 01577 req->len = ntohl(req->len); 01578 01579 if(req->magic != htonl(NBD_REQUEST_MAGIC)) 01580 err("Protocol error: not enough magic."); 01581 01582 pkg = package_create(client, req); 01583 01584 if((req->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) { 01585 readit(client->net, pkg->data, req->len); 01586 } 01587 if(req->type == NBD_CMD_DISC) { 01588 g_thread_pool_free(tpool, FALSE, TRUE); 01589 return 0; 01590 } 01591 g_thread_pool_push(tpool, pkg, NULL); 01592 } 01593 } 01594 01595 /** sending macro. */ 01596 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \ 01597 if (client->transactionlogfd != -1) \ 01598 writeit(client->transactionlogfd, &reply, sizeof(reply)); } 01599 /** error macro. */ 01600 #define ERROR(client,reply,errcode) { reply.error = nbd_errno(errcode); SEND(client->net,reply); reply.error = 0; } 01601 /** 01602 * Serve a file to a single client. 01603 * 01604 * @todo This beast needs to be split up in many tiny little manageable 01605 * pieces. Preferably with a chainsaw. 01606 * 01607 * @param client The client we're going to serve to. 01608 * @return when the client disconnects 01609 **/ 01610 int mainloop(CLIENT *client) { 01611 struct nbd_request request; 01612 struct nbd_reply reply; 01613 gboolean go_on=TRUE; 01614 #ifdef DODBG 01615 int i = 0; 01616 #endif 01617 send_export_info(client); 01618 DEBUG("Entering request loop!\n"); 01619 reply.magic = htonl(NBD_REPLY_MAGIC); 01620 reply.error = 0; 01621 while (go_on) { 01622 char buf[BUFSIZE]; 01623 char* p; 01624 size_t len; 01625 size_t currlen; 01626 size_t writelen; 01627 uint16_t command; 01628 #ifdef DODBG 01629 i++; 01630 printf("%d: ", i); 01631 #endif 01632 readit(client->net, &request, sizeof(request)); 01633 if (client->transactionlogfd != -1) 01634 writeit(client->transactionlogfd, &request, sizeof(request)); 01635 01636 request.from = ntohll(request.from); 01637 request.type = ntohl(request.type); 01638 command = request.type & NBD_CMD_MASK_COMMAND; 01639 len = ntohl(request.len); 01640 01641 DEBUG("%s from %llu (%llu) len %u, ", getcommandname(command), 01642 (unsigned long long)request.from, 01643 (unsigned long long)request.from / 512, len); 01644 01645 if (request.magic != htonl(NBD_REQUEST_MAGIC)) 01646 err("Not enough magic."); 01647 01648 memcpy(reply.handle, request.handle, sizeof(reply.handle)); 01649 01650 if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ) || 01651 (command==NBD_CMD_TRIM)) { 01652 if (request.from + len < request.from) { // 64 bit overflow!! 01653 DEBUG("[Number too large!]"); 01654 ERROR(client, reply, EINVAL); 01655 continue; 01656 } 01657 01658 if (((off_t)request.from + len) > client->exportsize) { 01659 DEBUG("[RANGE!]"); 01660 ERROR(client, reply, (command==NBD_CMD_WRITE) ? ENOSPC : EINVAL); 01661 continue; 01662 } 01663 01664 currlen = len; 01665 if (currlen > BUFSIZE - sizeof(struct nbd_reply)) { 01666 currlen = BUFSIZE - sizeof(struct nbd_reply); 01667 if(!logged_oversized) { 01668 msg(LOG_DEBUG, "oversized request (this is not a problem)"); 01669 logged_oversized = true; 01670 } 01671 } 01672 } 01673 01674 switch (command) { 01675 01676 case NBD_CMD_DISC: 01677 msg(LOG_INFO, "Disconnect request received."); 01678 if (client->server->flags & F_COPYONWRITE) { 01679 if (client->difmap) g_free(client->difmap) ; 01680 close(client->difffile); 01681 unlink(client->difffilename); 01682 free(client->difffilename); 01683 } 01684 go_on=FALSE; 01685 continue; 01686 01687 case NBD_CMD_WRITE: 01688 DEBUG("wr: net->buf, "); 01689 while(len > 0) { 01690 readit(client->net, buf, currlen); 01691 DEBUG("buf->exp, "); 01692 if ((client->server->flags & F_READONLY) || 01693 (client->server->flags & F_AUTOREADONLY)) { 01694 DEBUG("[WRITE to READONLY!]"); 01695 ERROR(client, reply, EPERM); 01696 consume(client->net, buf, len-currlen, BUFSIZE); 01697 continue; 01698 } 01699 if (expwrite(request.from, buf, currlen, client, 01700 request.type & NBD_CMD_FLAG_FUA)) { 01701 DEBUG("Write failed: %m" ); 01702 ERROR(client, reply, errno); 01703 consume(client->net, buf, len-currlen, BUFSIZE); 01704 continue; 01705 } 01706 len -= currlen; 01707 request.from += currlen; 01708 currlen = (len < BUFSIZE) ? len : BUFSIZE; 01709 } 01710 SEND(client->net, reply); 01711 DEBUG("OK!\n"); 01712 continue; 01713 01714 case NBD_CMD_FLUSH: 01715 DEBUG("fl: "); 01716 if (expflush(client)) { 01717 DEBUG("Flush failed: %m"); 01718 ERROR(client, reply, errno); 01719 continue; 01720 } 01721 SEND(client->net, reply); 01722 DEBUG("OK!\n"); 01723 continue; 01724 01725 case NBD_CMD_READ: 01726 DEBUG("exp->buf, "); 01727 if (client->transactionlogfd != -1) 01728 writeit(client->transactionlogfd, &reply, sizeof(reply)); 01729 writeit(client->net, &reply, sizeof(reply)); 01730 p = buf; 01731 writelen = currlen; 01732 while(len > 0) { 01733 if (expread(request.from, p, currlen, client)) { 01734 DEBUG("Read failed: %m"); 01735 ERROR(client, reply, errno); 01736 continue; 01737 } 01738 01739 DEBUG("buf->net, "); 01740 writeit(client->net, buf, writelen); 01741 len -= currlen; 01742 request.from += currlen; 01743 currlen = (len < BUFSIZE) ? len : BUFSIZE; 01744 p = buf; 01745 writelen = currlen; 01746 } 01747 DEBUG("OK!\n"); 01748 continue; 01749 01750 case NBD_CMD_TRIM: 01751 /* The kernel module sets discard_zeroes_data == 0, 01752 * so it is okay to do nothing. */ 01753 if ((client->server->flags & F_READONLY) || 01754 (client->server->flags & F_AUTOREADONLY)) { 01755 DEBUG("[TRIM to READONLY!]"); 01756 ERROR(client, reply, EPERM); 01757 continue; 01758 } 01759 if (exptrim(&request, client)) { 01760 DEBUG("Trim failed: %m"); 01761 ERROR(client, reply, errno); 01762 continue; 01763 } 01764 SEND(client->net, reply); 01765 continue; 01766 01767 default: 01768 DEBUG ("Ignoring unknown command\n"); 01769 continue; 01770 } 01771 } 01772 return 0; 01773 } 01774 01775 /** 01776 * Set up client export array, which is an array of FILE_INFO. 01777 * Also, split a single exportfile into multiple ones, if that was asked. 01778 * @param client information on the client which we want to setup export for 01779 **/ 01780 void setupexport(CLIENT* client) { 01781 int i; 01782 off_t laststartoff = 0, lastsize = 0; 01783 int multifile = (client->server->flags & F_MULTIFILE); 01784 int treefile = (client->server->flags & F_TREEFILES); 01785 int temporary = (client->server->flags & F_TEMPORARY) && !multifile; 01786 int cancreate = (client->server->expected_size) && !multifile; 01787 01788 if (treefile) { 01789 client->export = NULL; // this could be thousands of files so we open handles on demand although its slower 01790 client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway 01791 } else { 01792 client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO)); 01793 01794 /* If multi-file, open as many files as we can. 01795 * If not, open exactly one file. 01796 * Calculate file sizes as we go to get total size. */ 01797 for(i=0; ; i++) { 01798 FILE_INFO fi; 01799 gchar *tmpname; 01800 gchar* error_string; 01801 01802 if (i) 01803 cancreate = 0; 01804 /* if expected_size is specified, and this is the first file, we can create the file */ 01805 mode_t mode = (client->server->flags & F_READONLY) ? 01806 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0)); 01807 01808 if (temporary) { 01809 tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i); 01810 DEBUG( "Opening %s\n", tmpname ); 01811 fi.fhandle = mkstemp(tmpname); 01812 } else { 01813 if(multifile) { 01814 tmpname=g_strdup_printf("%s.%d", client->exportname, i); 01815 } else { 01816 tmpname=g_strdup(client->exportname); 01817 } 01818 DEBUG( "Opening %s\n", tmpname ); 01819 fi.fhandle = open(tmpname, mode, 0600); 01820 if(fi.fhandle == -1 && mode == O_RDWR) { 01821 /* Try again because maybe media was read-only */ 01822 fi.fhandle = open(tmpname, O_RDONLY); 01823 if(fi.fhandle != -1) { 01824 /* Opening the base file in copyonwrite mode is 01825 * okay */ 01826 if(!(client->server->flags & F_COPYONWRITE)) { 01827 client->server->flags |= F_AUTOREADONLY; 01828 client->server->flags |= F_READONLY; 01829 } 01830 } 01831 } 01832 } 01833 if(fi.fhandle == -1) { 01834 if(multifile && i>0) 01835 break; 01836 error_string=g_strdup_printf( 01837 "Could not open exported file %s: %%m", 01838 tmpname); 01839 err(error_string); 01840 } 01841 01842 if (temporary) { 01843 unlink(tmpname); /* File will stick around whilst FD open */ 01844 } 01845 01846 fi.startoff = laststartoff + lastsize; 01847 g_array_append_val(client->export, fi); 01848 g_free(tmpname); 01849 01850 /* Starting offset and size of this file will be used to 01851 * calculate starting offset of next file */ 01852 laststartoff = fi.startoff; 01853 lastsize = size_autodetect(fi.fhandle); 01854 01855 /* If we created the file, it will be length zero */ 01856 if (!lastsize && cancreate) { 01857 assert(!multifile); 01858 if(ftruncate (fi.fhandle, client->server->expected_size)<0) { 01859 err("Could not expand file: %m"); 01860 } 01861 lastsize = client->server->expected_size; 01862 break; /* don't look for any more files */ 01863 } 01864 01865 if(!multifile || temporary) 01866 break; 01867 } 01868 01869 /* Set export size to total calculated size */ 01870 client->exportsize = laststartoff + lastsize; 01871 01872 /* Export size may be overridden */ 01873 if(client->server->expected_size) { 01874 /* desired size must be <= total calculated size */ 01875 if(client->server->expected_size > client->exportsize) { 01876 err("Size of exported file is too big\n"); 01877 } 01878 01879 client->exportsize = client->server->expected_size; 01880 } 01881 } 01882 01883 msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize); 01884 if(multifile) { 01885 msg(LOG_INFO, "Total number of files: %d", i); 01886 } 01887 if(treefile) { 01888 msg(LOG_INFO, "Total number of (potential) files: %" PRId64, (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE); 01889 } 01890 } 01891 01892 int copyonwrite_prepare(CLIENT* client) { 01893 off_t i; 01894 gchar* dir; 01895 gchar* export_base; 01896 if (client->server->cowdir != NULL) { 01897 dir = g_strdup(client->server->cowdir); 01898 } else { 01899 dir = g_strdup(dirname(client->exportname)); 01900 } 01901 export_base = g_strdup(basename(client->exportname)); 01902 client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname, 01903 (int)getpid()); 01904 g_free(dir); 01905 g_free(export_base); 01906 msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ; 01907 client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ; 01908 if (client->difffile<0) err("Could not create diff file (%m)") ; 01909 if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL) 01910 err("Could not allocate memory") ; 01911 for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ; 01912 01913 return 0; 01914 } 01915 01916 /** 01917 * Run a command. This is used for the ``prerun'' and ``postrun'' config file 01918 * options 01919 * 01920 * @param command the command to be ran. Read from the config file 01921 * @param file the file name we're about to export 01922 **/ 01923 int do_run(gchar* command, gchar* file) { 01924 gchar* cmd; 01925 int retval=0; 01926 01927 if(command && *command) { 01928 cmd = g_strdup_printf(command, file); 01929 retval=system(cmd); 01930 g_free(cmd); 01931 } 01932 return retval; 01933 } 01934 01935 /** 01936 * Serve a connection. 01937 * 01938 * @todo allow for multithreading, perhaps use libevent. Not just yet, though; 01939 * follow the road map. 01940 * 01941 * @param client a connected client 01942 **/ 01943 void serveconnection(CLIENT *client) { 01944 if (client->server->transactionlog && (client->transactionlogfd == -1)) 01945 { 01946 if (-1 == (client->transactionlogfd = open(client->server->transactionlog, 01947 O_WRONLY | O_CREAT, 01948 S_IRUSR | S_IWUSR))) 01949 g_warning("Could not open transaction log %s", 01950 client->server->transactionlog); 01951 } 01952 01953 if(do_run(client->server->prerun, client->exportname)) { 01954 exit(EXIT_FAILURE); 01955 } 01956 setupexport(client); 01957 01958 if (client->server->flags & F_COPYONWRITE) { 01959 copyonwrite_prepare(client); 01960 } 01961 01962 setmysockopt(client->net); 01963 01964 mainloop_threaded(client); 01965 do_run(client->server->postrun, client->exportname); 01966 01967 if (-1 != client->transactionlogfd) 01968 { 01969 close(client->transactionlogfd); 01970 client->transactionlogfd = -1; 01971 } 01972 } 01973 01974 /** 01975 * Find the name of the file we have to serve. This will use g_strdup_printf 01976 * to put the IP address of the client inside a filename containing 01977 * "%s" (in the form as specified by the "virtstyle" option). That name 01978 * is then written to client->exportname. 01979 * 01980 * @param net A socket connected to an nbd client 01981 * @param client information about the client. The IP address in human-readable 01982 * format will be written to a new char* buffer, the address of which will be 01983 * stored in client->clientname. 01984 * @return: 0 - OK, -1 - failed. 01985 **/ 01986 int set_peername(int net, CLIENT *client) { 01987 struct sockaddr_storage netaddr; 01988 struct sockaddr* addr = (struct sockaddr*)&netaddr; 01989 socklen_t addrinlen = sizeof( struct sockaddr_storage ); 01990 struct addrinfo hints; 01991 struct addrinfo *ai = NULL; 01992 char peername[NI_MAXHOST]; 01993 char netname[NI_MAXHOST]; 01994 char *tmp = NULL; 01995 int i; 01996 int e; 01997 01998 if (getsockname(net, addr, &addrinlen) < 0) { 01999 msg(LOG_INFO, "getsockname failed: %m"); 02000 return -1; 02001 } 02002 02003 if(netaddr.ss_family == AF_UNIX) { 02004 client->clientaddr.ss_family = AF_UNIX; 02005 strcpy(peername, "unix"); 02006 } else { 02007 if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) { 02008 msg(LOG_INFO, "getpeername failed: %m"); 02009 return -1; 02010 } 02011 if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen, 02012 peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) { 02013 msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e)); 02014 return -1; 02015 } 02016 02017 memset(&hints, '\0', sizeof (hints)); 02018 hints.ai_flags = AI_ADDRCONFIG; 02019 e = getaddrinfo(peername, NULL, &hints, &ai); 02020 02021 if(e != 0) { 02022 msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e)); 02023 freeaddrinfo(ai); 02024 return -1; 02025 } 02026 } 02027 02028 if(strncmp(peername, "::ffff:", 7) == 0) { 02029 memmove(peername, peername+7, strlen(peername)); 02030 } 02031 02032 switch(client->server->virtstyle) { 02033 case VIRT_NONE: 02034 msg(LOG_DEBUG, "virtualization is off"); 02035 client->exportname=g_strdup(client->server->exportname); 02036 break; 02037 case VIRT_IPHASH: 02038 msg(LOG_DEBUG, "virtstyle iphash"); 02039 for(i=0;i<strlen(peername);i++) { 02040 if(peername[i]=='.') { 02041 peername[i]='/'; 02042 } 02043 } 02044 case VIRT_IPLIT: 02045 msg(LOG_DEBUG, "virtstyle ipliteral"); 02046 client->exportname=g_strdup_printf(client->server->exportname, peername); 02047 break; 02048 case VIRT_CIDR: 02049 msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen); 02050 memcpy(&netaddr, &(client->clientaddr), addrinlen); 02051 int addrbits; 02052 if(client->clientaddr.ss_family == AF_UNIX) { 02053 tmp = g_strdup(peername); 02054 } else { 02055 assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6)); 02056 if(ai->ai_family == AF_INET) { 02057 addrbits = 32; 02058 } else if(ai->ai_family == AF_INET6) { 02059 addrbits = 128; 02060 } 02061 uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data); 02062 for(int i = 0; i < addrbits; i+=8) { 02063 int masklen = client->server->cidrlen - i; 02064 masklen = masklen > 0 ? masklen : 0; 02065 uint8_t mask = getmaskbyte(masklen); 02066 *addrptr &= mask; 02067 addrptr++; 02068 } 02069 getnameinfo((struct sockaddr *) &netaddr, addrinlen, 02070 netname, sizeof (netname), NULL, 0, NI_NUMERICHOST); 02071 tmp=g_strdup_printf("%s/%s", netname, peername); 02072 } 02073 02074 if(tmp != NULL) { 02075 client->exportname=g_strdup_printf(client->server->exportname, tmp); 02076 g_free(tmp); 02077 } 02078 02079 break; 02080 } 02081 02082 freeaddrinfo(ai); 02083 msg(LOG_INFO, "connect from %s, assigned file is %s", 02084 peername, client->exportname); 02085 client->clientname=g_strdup(peername); 02086 return 0; 02087 } 02088 02089 /** 02090 * Destroy a pid_t* 02091 * @param data a pointer to pid_t which should be freed 02092 **/ 02093 void destroy_pid_t(gpointer data) { 02094 g_free(data); 02095 } 02096 02097 static pid_t 02098 spawn_child() 02099 { 02100 pid_t pid; 02101 sigset_t newset; 02102 sigset_t oldset; 02103 02104 sigemptyset(&newset); 02105 sigaddset(&newset, SIGCHLD); 02106 sigaddset(&newset, SIGTERM); 02107 sigprocmask(SIG_BLOCK, &newset, &oldset); 02108 pid = fork(); 02109 if (pid < 0) { 02110 msg(LOG_ERR, "Could not fork (%s)", strerror(errno)); 02111 goto out; 02112 } 02113 if (pid > 0) { /* Parent */ 02114 pid_t *pidp; 02115 02116 pidp = g_malloc(sizeof(pid_t)); 02117 *pidp = pid; 02118 g_hash_table_insert(children, pidp, pidp); 02119 goto out; 02120 } 02121 /* Child */ 02122 02123 /* Child's signal disposition is reset to default. */ 02124 signal(SIGCHLD, SIG_DFL); 02125 signal(SIGTERM, SIG_DFL); 02126 signal(SIGHUP, SIG_DFL); 02127 sigemptyset(&oldset); 02128 out: 02129 sigprocmask(SIG_SETMASK, &oldset, NULL); 02130 return pid; 02131 } 02132 02133 static int 02134 socket_accept(const int sock) 02135 { 02136 struct sockaddr_storage addrin; 02137 socklen_t addrinlen = sizeof(addrin); 02138 int net; 02139 02140 net = accept(sock, (struct sockaddr *) &addrin, &addrinlen); 02141 if (net < 0) { 02142 err_nonfatal("Failed to accept socket connection: %m"); 02143 } 02144 02145 return net; 02146 } 02147 02148 static void 02149 handle_modern_connection(GArray *const servers, const int sock) 02150 { 02151 int net; 02152 pid_t pid; 02153 CLIENT *client = NULL; 02154 int sock_flags_old; 02155 int sock_flags_new; 02156 02157 net = socket_accept(sock); 02158 if (net < 0) 02159 return; 02160 02161 if (!dontfork) { 02162 pid = spawn_child(); 02163 if (pid) { 02164 if (pid > 0) 02165 msg(LOG_INFO, "Spawned a child process"); 02166 if (pid < 0) 02167 msg(LOG_ERR, "Failed to spawn a child process"); 02168 close(net); 02169 return; 02170 } 02171 /* Child just continues. */ 02172 } 02173 02174 client = negotiate(net, servers); 02175 if (!client) { 02176 msg(LOG_ERR, "Modern initial negotiation failed"); 02177 goto handler_err; 02178 } 02179 02180 if (client->server->max_connections > 0 && 02181 g_hash_table_size(children) >= client->server->max_connections) { 02182 msg(LOG_ERR, "Max connections (%d) reached", 02183 client->server->max_connections); 02184 goto handler_err; 02185 } 02186 02187 sock_flags_old = fcntl(net, F_GETFL, 0); 02188 if (sock_flags_old == -1) { 02189 msg(LOG_ERR, "Failed to get socket flags"); 02190 goto handler_err; 02191 } 02192 02193 sock_flags_new = sock_flags_old & ~O_NONBLOCK; 02194 if (sock_flags_new != sock_flags_old && 02195 fcntl(net, F_SETFL, sock_flags_new) == -1) { 02196 msg(LOG_ERR, "Failed to set socket to blocking mode"); 02197 goto handler_err; 02198 } 02199 02200 if (set_peername(net, client)) { 02201 msg(LOG_ERR, "Failed to set peername"); 02202 goto handler_err; 02203 } 02204 02205 if (!authorized_client(client)) { 02206 msg(LOG_INFO, "Client '%s' is not authorized to access", 02207 client->clientname); 02208 goto handler_err; 02209 } 02210 02211 if (!dontfork) { 02212 int i; 02213 02214 /* Free all root server resources here, because we are 02215 * currently in the child process serving one specific 02216 * connection. These are not simply needed anymore. */ 02217 g_hash_table_destroy(children); 02218 children = NULL; 02219 for (i = 0; i < modernsocks->len; i++) { 02220 close(g_array_index(modernsocks, int, i)); 02221 } 02222 g_array_free(modernsocks, TRUE); 02223 02224 /* Now that we are in the child process after a 02225 * succesful negotiation, we do not need the list of 02226 * servers anymore, get rid of it.*/ 02227 /* FALSE does not free the 02228 actual data. This is required, 02229 because the client has a 02230 direct reference into that 02231 data, and otherwise we get a 02232 segfault... */ 02233 g_array_free(servers, FALSE); 02234 } 02235 02236 msg(LOG_INFO, "Starting to serve"); 02237 serveconnection(client); 02238 exit(EXIT_SUCCESS); 02239 02240 handler_err: 02241 g_free(client); 02242 close(net); 02243 02244 if (!dontfork) { 02245 exit(EXIT_FAILURE); 02246 } 02247 } 02248 02249 /** 02250 * Return the index of the server whose servename matches the given 02251 * name. 02252 * 02253 * @param servename a string to match 02254 * @param servers an array of servers 02255 * @return the first index of the server whose servename matches the 02256 * given name or -1 if one cannot be found 02257 **/ 02258 static int get_index_by_servename(const gchar *const servename, 02259 const GArray *const servers) { 02260 int i; 02261 02262 for (i = 0; i < servers->len; ++i) { 02263 const SERVER server = g_array_index(servers, SERVER, i); 02264 02265 if (strcmp(servename, server.servename) == 0) 02266 return i; 02267 } 02268 02269 return -1; 02270 } 02271 02272 /** 02273 * Parse configuration files and add servers to the array if they don't 02274 * already exist there. The existence is tested by comparing 02275 * servenames. A server is appended to the array only if its servename 02276 * is unique among all other servers. 02277 * 02278 * @param servers an array of servers 02279 * @return the number of new servers appended to the array, or -1 in 02280 * case of an error 02281 **/ 02282 static int append_new_servers(GArray *const servers, GError **const gerror) { 02283 int i; 02284 GArray *new_servers; 02285 const int old_len = servers->len; 02286 int retval = -1; 02287 struct generic_conf genconf; 02288 02289 new_servers = parse_cfile(config_file_pos, &genconf, true, gerror); 02290 g_thread_pool_set_max_threads(tpool, genconf.threads, NULL); 02291 if (!new_servers) 02292 goto out; 02293 02294 for (i = 0; i < new_servers->len; ++i) { 02295 SERVER new_server = g_array_index(new_servers, SERVER, i); 02296 02297 if (new_server.servename 02298 && -1 == get_index_by_servename(new_server.servename, 02299 servers)) { 02300 g_array_append_val(servers, new_server); 02301 } 02302 } 02303 02304 retval = servers->len - old_len; 02305 out: 02306 g_array_free(new_servers, TRUE); 02307 02308 return retval; 02309 } 02310 02311 /** 02312 * Loop through the available servers, and serve them. Never returns. 02313 **/ 02314 void serveloop(GArray* servers) { 02315 int i; 02316 int max; 02317 fd_set mset; 02318 fd_set rset; 02319 sigset_t blocking_mask; 02320 sigset_t original_mask; 02321 02322 /* 02323 * Set up the master fd_set. The set of descriptors we need 02324 * to select() for never changes anyway and it buys us a *lot* 02325 * of time to only build this once. However, if we ever choose 02326 * to not fork() for clients anymore, we may have to revisit 02327 * this. 02328 */ 02329 max=0; 02330 FD_ZERO(&mset); 02331 for(i=0;i<modernsocks->len;i++) { 02332 int sock = g_array_index(modernsocks, int, i); 02333 FD_SET(sock, &mset); 02334 max=sock>max?sock:max; 02335 } 02336 02337 /* Construct a signal mask which is used to make signal testing and 02338 * receiving an atomic operation to ensure no signal is received between 02339 * tests and blocking pselect(). */ 02340 if (sigemptyset(&blocking_mask) == -1) 02341 err("failed to initialize blocking_mask: %m"); 02342 02343 if (sigaddset(&blocking_mask, SIGCHLD) == -1) 02344 err("failed to add SIGCHLD to blocking_mask: %m"); 02345 02346 if (sigaddset(&blocking_mask, SIGHUP) == -1) 02347 err("failed to add SIGHUP to blocking_mask: %m"); 02348 02349 if (sigaddset(&blocking_mask, SIGTERM) == -1) 02350 err("failed to add SIGTERM to blocking_mask: %m"); 02351 02352 if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1) 02353 err("failed to block signals: %m"); 02354 02355 for(;;) { 02356 if (is_sigterm_caught) { 02357 is_sigterm_caught = 0; 02358 02359 g_hash_table_foreach(children, killchild, NULL); 02360 unlink(pidfname); 02361 02362 exit(EXIT_SUCCESS); 02363 } 02364 02365 if (is_sigchld_caught) { 02366 int status; 02367 int* i; 02368 pid_t pid; 02369 02370 is_sigchld_caught = 0; 02371 02372 while ((pid=waitpid(-1, &status, WNOHANG)) > 0) { 02373 if (WIFEXITED(status)) { 02374 msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status)); 02375 } 02376 i = g_hash_table_lookup(children, &pid); 02377 if (!i) { 02378 msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid); 02379 } else { 02380 DEBUG("Removing %d from the list of children", pid); 02381 g_hash_table_remove(children, &pid); 02382 } 02383 } 02384 } 02385 02386 /* SIGHUP causes the root server process to reconfigure 02387 * itself and add new export servers for each newly 02388 * found export configuration group, i.e. spawn new 02389 * server processes for each previously non-existent 02390 * export. This does not alter old runtime configuration 02391 * but just appends new exports. */ 02392 if (is_sighup_caught) { 02393 int n; 02394 GError *gerror = NULL; 02395 02396 msg(LOG_INFO, "reconfiguration request received"); 02397 is_sighup_caught = 0; /* Reset to allow catching 02398 * it again. */ 02399 02400 n = append_new_servers(servers, &gerror); 02401 if (n == -1) 02402 msg(LOG_ERR, "failed to append new servers: %s", 02403 gerror->message); 02404 02405 for (i = servers->len - n; i < servers->len; ++i) { 02406 const SERVER server = g_array_index(servers, 02407 SERVER, i); 02408 02409 msg(LOG_INFO, "reconfigured new server: %s", 02410 server.servename); 02411 } 02412 } 02413 02414 memcpy(&rset, &mset, sizeof(fd_set)); 02415 if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) { 02416 DEBUG("accept, "); 02417 for(i=0; i < modernsocks->len; i++) { 02418 int sock = g_array_index(modernsocks, int, i); 02419 if(!FD_ISSET(sock, &rset)) { 02420 continue; 02421 } 02422 02423 handle_modern_connection(servers, sock); 02424 } 02425 } 02426 } 02427 } 02428 void serveloop(GArray* servers) G_GNUC_NORETURN; 02429 02430 /** 02431 * Set server socket options. 02432 * 02433 * @param socket a socket descriptor of the server 02434 * 02435 * @param gerror a pointer to an error object pointer used for reporting 02436 * errors. On error, if gerror is not NULL, *gerror is set and -1 02437 * is returned. 02438 * 02439 * @return 0 on success, -1 on error 02440 **/ 02441 int dosockopts(const int socket, GError **const gerror) { 02442 #ifndef sun 02443 int yes=1; 02444 #else 02445 char yes='1'; 02446 #endif /* sun */ 02447 struct linger l; 02448 02449 /* lose the pesky "Address already in use" error message */ 02450 if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { 02451 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR, 02452 "failed to set socket option SO_REUSEADDR: %s", 02453 strerror(errno)); 02454 return -1; 02455 } 02456 l.l_onoff = 1; 02457 l.l_linger = 10; 02458 if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) { 02459 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER, 02460 "failed to set socket option SO_LINGER: %s", 02461 strerror(errno)); 02462 return -1; 02463 } 02464 if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) { 02465 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE, 02466 "failed to set socket option SO_KEEPALIVE: %s", 02467 strerror(errno)); 02468 return -1; 02469 } 02470 02471 return 0; 02472 } 02473 02474 int open_unix(const gchar *const sockname, GError **const gerror) { 02475 struct sockaddr_un sa; 02476 int sock=-1; 02477 int retval=-1; 02478 02479 memset(&sa, 0, sizeof(struct sockaddr_un)); 02480 sa.sun_family = AF_UNIX; 02481 strncpy(sa.sun_path, sockname, sizeof sa.sun_path); 02482 sock = socket(AF_UNIX, SOCK_STREAM, 0); 02483 if(sock < 0) { 02484 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET, 02485 "failed to open a unix socket: " 02486 "failed to create socket: %s", 02487 strerror(errno)); 02488 goto out; 02489 } 02490 if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) { 02491 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02492 "failed to open a unix socket: " 02493 "failed to bind to address %s: %s", 02494 sockname, strerror(errno)); 02495 goto out; 02496 } 02497 if(listen(sock, 10)<0) { 02498 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02499 "failed to open a unix socket: " 02500 "failed to start listening: %s", 02501 strerror(errno)); 02502 goto out; 02503 } 02504 retval=0; 02505 g_array_append_val(modernsocks, sock); 02506 out: 02507 if(retval<0 && sock >= 0) { 02508 close(sock); 02509 } 02510 02511 return retval; 02512 } 02513 02514 int open_modern(const gchar *const addr, const gchar *const port, 02515 GError **const gerror) { 02516 struct addrinfo hints; 02517 struct addrinfo* ai = NULL; 02518 struct addrinfo* ai_bak = NULL; 02519 struct sock_flags; 02520 int e; 02521 int retval = -1; 02522 int sock = -1; 02523 gchar** addrs; 02524 gchar const* l_addr = addr; 02525 02526 if(!addr || strlen(addr) == 0) { 02527 l_addr = "::, 0.0.0.0"; 02528 } 02529 02530 addrs = g_strsplit_set(l_addr, ", \t", -1); 02531 02532 for(int i=0; addrs[i]!=NULL; i++) { 02533 if(addrs[i][0] == '\0') { 02534 continue; 02535 } 02536 memset(&hints, '\0', sizeof(hints)); 02537 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; 02538 hints.ai_socktype = SOCK_STREAM; 02539 hints.ai_family = AF_UNSPEC; 02540 hints.ai_protocol = IPPROTO_TCP; 02541 e = getaddrinfo(addrs[i], port ? port : NBD_DEFAULT_PORT, &hints, &ai); 02542 ai_bak = ai; 02543 if(e != 0 && addrs[i+1] == NULL && modernsocks->len == 0) { 02544 g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI, 02545 "failed to open a modern socket: " 02546 "failed to get address info: %s", 02547 gai_strerror(e)); 02548 goto out; 02549 } 02550 02551 while(ai != NULL) { 02552 sock = -1; 02553 02554 if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) { 02555 g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET, 02556 "failed to open a modern socket: " 02557 "failed to create a socket: %s", 02558 strerror(errno)); 02559 goto out; 02560 } 02561 02562 if (dosockopts(sock, gerror) == -1) { 02563 g_prefix_error(gerror, "failed to open a modern socket: "); 02564 goto out; 02565 } 02566 02567 if(bind(sock, ai->ai_addr, ai->ai_addrlen)) { 02568 /* 02569 * Some systems will return multiple entries for the 02570 * same address when we ask it for something 02571 * AF_UNSPEC, even though the first entry will 02572 * listen to both protocols. Other systems will 02573 * return multiple entries too, but we actually 02574 * do need to open both. 02575 * 02576 * Handle this by ignoring EADDRINUSE if we've 02577 * already got at least one socket open 02578 */ 02579 if(errno == EADDRINUSE && modernsocks->len > 0) { 02580 goto next; 02581 } 02582 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02583 "failed to open a modern socket: " 02584 "failed to bind an address to a socket: %s", 02585 strerror(errno)); 02586 goto out; 02587 } 02588 02589 if(listen(sock, 10) <0) { 02590 g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND, 02591 "failed to open a modern socket: " 02592 "failed to start listening on a socket: %s", 02593 strerror(errno)); 02594 goto out; 02595 } 02596 g_array_append_val(modernsocks, sock); 02597 next: 02598 ai = ai->ai_next; 02599 } 02600 if(ai_bak) { 02601 freeaddrinfo(ai_bak); 02602 ai_bak=NULL; 02603 } 02604 } 02605 02606 retval = 0; 02607 out: 02608 02609 if (retval == -1 && sock >= 0) { 02610 close(sock); 02611 } 02612 if(ai_bak) 02613 freeaddrinfo(ai_bak); 02614 02615 return retval; 02616 } 02617 02618 /** 02619 * Connect our servers. 02620 **/ 02621 void setup_servers(GArray *const servers, const gchar *const modernaddr, 02622 const gchar *const modernport, const gchar* unixsock) { 02623 struct sigaction sa; 02624 02625 GError *gerror = NULL; 02626 if (open_modern(modernaddr, modernport, &gerror) == -1) { 02627 msg(LOG_ERR, "failed to setup servers: %s", 02628 gerror->message); 02629 g_clear_error(&gerror); 02630 exit(EXIT_FAILURE); 02631 } 02632 if(unixsock != NULL) { 02633 GError* gerror = NULL; 02634 if(open_unix(unixsock, &gerror) == -1) { 02635 msg(LOG_ERR, "failed to setup servers: %s", 02636 gerror->message); 02637 g_clear_error(&gerror); 02638 exit(EXIT_FAILURE); 02639 } 02640 } 02641 children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t); 02642 02643 sa.sa_handler = sigchld_handler; 02644 sigemptyset(&sa.sa_mask); 02645 sigaddset(&sa.sa_mask, SIGTERM); 02646 sa.sa_flags = SA_RESTART; 02647 if(sigaction(SIGCHLD, &sa, NULL) == -1) 02648 err("sigaction: %m"); 02649 02650 sa.sa_handler = sigterm_handler; 02651 sigemptyset(&sa.sa_mask); 02652 sigaddset(&sa.sa_mask, SIGCHLD); 02653 sa.sa_flags = SA_RESTART; 02654 if(sigaction(SIGTERM, &sa, NULL) == -1) 02655 err("sigaction: %m"); 02656 02657 sa.sa_handler = sighup_handler; 02658 sigemptyset(&sa.sa_mask); 02659 sa.sa_flags = SA_RESTART; 02660 if(sigaction(SIGHUP, &sa, NULL) == -1) 02661 err("sigaction: %m"); 02662 } 02663 02664 /** 02665 * Go daemon (unless we specified at compile time that we didn't want this) 02666 * @param serve the first server of our configuration. If its port is zero, 02667 * then do not daemonize, because we're doing inetd then. This parameter 02668 * is only used to create a PID file of the form 02669 * /var/run/nbd-server.<port>.pid; it's not modified in any way. 02670 **/ 02671 #if !defined(NODAEMON) 02672 void daemonize() { 02673 FILE*pidf; 02674 02675 if(daemon(0,0)<0) { 02676 err("daemon"); 02677 } 02678 if(!*pidfname) { 02679 strncpy(pidfname, "/var/run/nbd-server.pid", 255); 02680 } 02681 pidf=fopen(pidfname, "w"); 02682 if(pidf) { 02683 fprintf(pidf,"%d\n", (int)getpid()); 02684 fclose(pidf); 02685 } else { 02686 perror("fopen"); 02687 fprintf(stderr, "Not fatal; continuing"); 02688 } 02689 } 02690 #else 02691 #define daemonize(serve) 02692 #endif /* !defined(NODAEMON) */ 02693 02694 /* 02695 * Everything beyond this point (in the file) is run in non-daemon mode. 02696 * The stuff above daemonize() isn't. 02697 */ 02698 02699 /** 02700 * Set up user-ID and/or group-ID 02701 **/ 02702 void dousers(const gchar *const username, const gchar *const groupname) { 02703 struct passwd *pw; 02704 struct group *gr; 02705 gchar* str; 02706 if (groupname) { 02707 gr = getgrnam(groupname); 02708 if(!gr) { 02709 str = g_strdup_printf("Invalid group name: %s", groupname); 02710 err(str); 02711 } 02712 if(setgid(gr->gr_gid)<0) { 02713 err("Could not set GID: %m"); 02714 } 02715 } 02716 if (username) { 02717 pw = getpwnam(username); 02718 if(!pw) { 02719 str = g_strdup_printf("Invalid user name: %s", username); 02720 err(str); 02721 } 02722 if(setuid(pw->pw_uid)<0) { 02723 err("Could not set UID: %m"); 02724 } 02725 } 02726 } 02727 02728 #ifndef ISSERVER 02729 void glib_message_syslog_redirect(const gchar *log_domain, 02730 GLogLevelFlags log_level, 02731 const gchar *message, 02732 gpointer user_data) 02733 { 02734 int level=LOG_DEBUG; 02735 02736 switch( log_level ) 02737 { 02738 case G_LOG_FLAG_FATAL: 02739 case G_LOG_LEVEL_CRITICAL: 02740 case G_LOG_LEVEL_ERROR: 02741 level=LOG_ERR; 02742 break; 02743 case G_LOG_LEVEL_WARNING: 02744 level=LOG_WARNING; 02745 break; 02746 case G_LOG_LEVEL_MESSAGE: 02747 case G_LOG_LEVEL_INFO: 02748 level=LOG_INFO; 02749 break; 02750 case G_LOG_LEVEL_DEBUG: 02751 level=LOG_DEBUG; 02752 break; 02753 default: 02754 level=LOG_ERR; 02755 } 02756 syslog(level, "%s", message); 02757 } 02758 #endif 02759 02760 /** 02761 * Main entry point... 02762 **/ 02763 int main(int argc, char *argv[]) { 02764 SERVER *serve; 02765 GArray *servers; 02766 GError *gerr=NULL; 02767 struct generic_conf genconf; 02768 02769 memset(&genconf, 0, sizeof(struct generic_conf)); 02770 02771 if (sizeof( struct nbd_request )!=28) { 02772 fprintf(stderr,"Bad size of structure. Alignment problems?\n"); 02773 exit(EXIT_FAILURE) ; 02774 } 02775 02776 modernsocks = g_array_new(FALSE, FALSE, sizeof(int)); 02777 02778 logging(MY_NAME); 02779 config_file_pos = g_strdup(CFILE); 02780 serve=cmdline(argc, argv, &genconf); 02781 02782 genconf.threads = 4; 02783 servers = parse_cfile(config_file_pos, &genconf, true, &gerr); 02784 02785 /* Update global variables with parsed values. This will be 02786 * removed once we get rid of global configuration variables. */ 02787 glob_flags |= genconf.flags; 02788 02789 if(serve) { 02790 g_array_append_val(servers, *serve); 02791 02792 if(strcmp(genconf.modernport, "0")==0) { 02793 #ifndef ISSERVER 02794 err("inetd mode requires syslog"); 02795 #endif 02796 CLIENT* client = g_malloc(sizeof(CLIENT)); 02797 client->server = serve; 02798 client->net = -1; 02799 client->modern = TRUE; 02800 client->exportsize = OFFT_MAX; 02801 if(set_peername(0, client)) 02802 exit(EXIT_FAILURE); 02803 serveconnection(client); 02804 return 0; 02805 } 02806 } 02807 02808 if(!servers || !servers->len) { 02809 if(gerr && !(gerr->domain == NBDS_ERR 02810 && gerr->code == NBDS_ERR_CFILE_NOTFOUND)) { 02811 g_warning("Could not parse config file: %s", 02812 gerr ? gerr->message : "Unknown error"); 02813 } 02814 } 02815 if(serve) { 02816 g_warning("Specifying an export on the command line no longer uses the oldstyle protocol."); 02817 } 02818 02819 if((!serve) && (!servers||!servers->len)) { 02820 if(gerr) 02821 g_message("No configured exports; quitting."); 02822 exit(EXIT_FAILURE); 02823 } 02824 if (!dontfork) 02825 daemonize(); 02826 02827 tpool = g_thread_pool_new(handle_request, NULL, genconf.threads, FALSE, NULL); 02828 02829 setup_servers(servers, genconf.modernaddr, genconf.modernport, 02830 genconf.unixsock); 02831 dousers(genconf.user, genconf.group); 02832 02833 serveloop(servers); 02834 }
1.7.3