Network Block Device  @PACKAGE_VERSION@
nbd-server.c
Go to the documentation of this file.
1 /*
2  * Network Block Device - server
3  *
4  * Copyright 1996-1998 Pavel Machek, distribute under GPL
5  * <pavel@atrey.karlin.mff.cuni.cz>
6  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8  *
9  * Version 1.0 - hopefully 64-bit-clean
10  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13  * type, or don't have 64 bit file offsets by defining FS_32BIT
14  * in compile options for nbd-server *only*. This can be done
15  * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16  * original autoconf input file, or I would make it a configure
17  * option.) Ken Yap <ken@nlc.net.au>.
18  * Version 1.6 - fix autodetection of block device size and really make 64 bit
19  * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20  * Version 2.0 - Version synchronised with client
21  * Version 2.1 - Reap zombie client processes when they exit. Removed
22  * (uncommented) the _IO magic, it's no longer necessary. Wouter
23  * Verhelst <wouter@debian.org>
24  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25  * Version 2.3 - Fixed code so that Large File Support works. This
26  * removes the FS_32BIT compile-time directive; define
27  * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28  * using FS_32BIT. This will allow you to use files >2GB instead of
29  * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30  * Version 2.4 - Added code to keep track of children, so that we can
31  * properly kill them from initscripts. Add a call to daemon(),
32  * so that processes don't think they have to wait for us, which is
33  * interesting for initscripts as well. Wouter Verhelst
34  * <wouter@debian.org>
35  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36  * zero after fork()ing, resulting in nbd-server going berserk
37  * when it receives a signal with at least one child open. Wouter
38  * Verhelst <wouter@debian.org>
39  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40  * rectified type of mainloop::size_host (sf.net bugs 814435 and
41  * 817385); close the PID file after writing to it, so that the
42  * daemon can actually be found. Wouter Verhelst
43  * <wouter@debian.org>
44  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45  * correctly put in network endianness. Many types were corrected
46  * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47  * Version 2.6 - Some code cleanup.
48  * Version 2.7 - Better build system.
49  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50  * lot more work, but this is a start. Wouter Verhelst
51  * <wouter@debian.org>
52  * 16/03/2010 - Add IPv6 support.
53  * Kitt Tientanopajai <kitt@kitty.in.th>
54  * Neutron Soutmun <neo.neutron@gmail.com>
55  * Suriya Soutmun <darksolar@gmail.com>
56  */
57 
58 /* Includes LFS defines, which defines behaviours of some of the following
59  * headers, so must come before those */
60 #include "lfs.h"
61 
62 #include <assert.h>
63 #include <sys/types.h>
64 #include <sys/socket.h>
65 #include <sys/stat.h>
66 #include <sys/select.h>
67 #include <sys/wait.h>
68 #ifdef HAVE_SYS_IOCTL_H
69 #include <sys/ioctl.h>
70 #endif
71 #include <sys/param.h>
72 #ifdef HAVE_SYS_MOUNT_H
73 #include <sys/mount.h>
74 #endif
75 #include <signal.h>
76 #include <errno.h>
77 #include <netinet/tcp.h>
78 #include <netinet/in.h>
79 #include <netdb.h>
80 #include <syslog.h>
81 #include <unistd.h>
82 #include <stdbool.h>
83 #include <stdio.h>
84 #include <stdlib.h>
85 #include <string.h>
86 #include <fcntl.h>
87 #if HAVE_FALLOC_PH
88 #include <linux/falloc.h>
89 #endif
90 #include <arpa/inet.h>
91 #include <strings.h>
92 #include <dirent.h>
93 #include <unistd.h>
94 #include <getopt.h>
95 #include <pwd.h>
96 #include <grp.h>
97 #include <dirent.h>
98 #include <ctype.h>
99 
100 #include <glib.h>
101 
102 /* used in cliserv.h, so must come first */
103 #define MY_NAME "nbd_server"
104 #include "cliserv.h"
105 #include "nbd-debug.h"
106 #include "netdb-compat.h"
107 
108 #ifdef WITH_SDP
109 #include <sdp_inet.h>
110 #endif
111 
112 /** Default position of the config file */
113 #ifndef SYSCONFDIR
114 #define SYSCONFDIR "/etc"
115 #endif
116 #define CFILE SYSCONFDIR "/nbd-server/config"
117 
118 /** Where our config file actually is */
120 
121 /** global flags */
123 
124 /* Whether we should avoid forking */
125 int dontfork = 0;
126 
127 /**
128  * The highest value a variable of type off_t can reach. This is a signed
129  * integer, so set all bits except for the leftmost one.
130  **/
131 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
132 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
133 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
134 
135 /** Per-export flags: */
136 #define F_READONLY 1 /**< flag to tell us a file is readonly */
137 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
138 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
139  copyonwrite */
140 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
141 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
142 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
143 #define F_SYNC 64 /**< Whether to fsync() after a write */
144 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
145 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
146 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
147 #define F_TEMPORARY 1024 /**< Whether the backing file is temporary and should be created then unlinked */
148 #define F_TRIM 2048 /**< Whether server wants TRIM (discard) to be sent by the client */
149 #define F_FIXED 4096 /**< Client supports fixed new-style protocol (and can thus send us extra options */
151 /** Global flags: */
152 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
153 #define F_LIST 2 /**< Allow clients to list the exports on a server */
154 GHashTable *children;
155 char pidfname[256]; /**< name of our PID file */
156 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
157 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
159 #define NEG_INIT (1 << 0)
160 #define NEG_OLD (1 << 1)
161 #define NEG_MODERN (1 << 2)
163 #include <nbdsrv.h>
164 
165 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
166  handler to mark a
167  reconfiguration
168  request */
169 
170 GArray* modernsocks; /**< Sockets for the modern handler. Not used
171  if a client was only specified on the
172  command line; only port used if
173  oldstyle is set to false (and then the
174  command-line client isn't used, gna gna).
175  This may be more than one socket on
176  systems that don't support serving IPv4
177  and IPv6 from the same socket (like,
178  e.g., FreeBSD) */
179 
180 bool logged_oversized=false; /**< whether we logged oversized requests already */
181 
182 /**
183  * Variables associated with an open file
184  **/
185 typedef struct {
186  int fhandle; /**< file descriptor */
187  off_t startoff; /**< starting offset of this file */
188 } FILE_INFO;
189 
190 /**
191  * Type of configuration file values
192  **/
193 typedef enum {
194  PARAM_INT, /**< This parameter is an integer */
195  PARAM_INT64, /**< This parameter is an integer */
196  PARAM_STRING, /**< This parameter is a string */
197  PARAM_BOOL, /**< This parameter is a boolean */
198 } PARAM_TYPE;
199 
200 /**
201  * Configuration file values
202  **/
203 typedef struct {
204  gchar *paramname; /**< Name of the parameter, as it appears in
205  the config file */
206  gboolean required; /**< Whether this is a required (as opposed to
207  optional) parameter */
208  PARAM_TYPE ptype; /**< Type of the parameter. */
209  gpointer target; /**< Pointer to where the data of this
210  parameter should be written. If ptype is
211  PARAM_BOOL, the data is or'ed rather than
212  overwritten. */
213  gint flagval; /**< Flag mask for this parameter in case ptype
214  is PARAM_BOOL. */
215 } PARAM;
216 
217 /**
218  * Configuration file values of the "generic" section
219  **/
220 struct generic_conf {
221  gchar *user; /**< user we run the server as */
222  gchar *group; /**< group we run running as */
223  gchar *modernaddr; /**< address of the modern socket */
224  gchar *modernport; /**< port of the modern socket */
225  gint flags; /**< global flags */
226 };
227 
228 /**
229  * Translate a command name into human readable form
230  *
231  * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
232  * @return pointer to the command name
233  **/
234 static inline const char * getcommandname(uint64_t command) {
235  switch (command) {
236  case NBD_CMD_READ:
237  return "NBD_CMD_READ";
238  case NBD_CMD_WRITE:
239  return "NBD_CMD_WRITE";
240  case NBD_CMD_DISC:
241  return "NBD_CMD_DISC";
242  case NBD_CMD_FLUSH:
243  return "NBD_CMD_FLUSH";
244  case NBD_CMD_TRIM:
245  return "NBD_CMD_TRIM";
246  default:
247  return "UNKNOWN";
248  }
249 }
250 
251 /**
252  * Read data from a file descriptor into a buffer
253  *
254  * @param f a file descriptor
255  * @param buf a buffer
256  * @param len the number of bytes to be read
257  **/
258 static inline void readit(int f, void *buf, size_t len) {
259  ssize_t res;
260  while (len > 0) {
261  DEBUG("*");
262  if ((res = read(f, buf, len)) <= 0) {
263  if(errno != EAGAIN) {
264  err("Read failed: %m");
265  }
266  } else {
267  len -= res;
268  buf += res;
269  }
270  }
271 }
272 
273 /**
274  * Consume data from an FD that we don't want
275  *
276  * @param f a file descriptor
277  * @param buf a buffer
278  * @param len the number of bytes to consume
279  * @param bufsiz the size of the buffer
280  **/
281 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
282  size_t curlen;
283  while (len>0) {
284  curlen = (len>bufsiz)?bufsiz:len;
285  readit(f, buf, curlen);
286  len -= curlen;
287  }
288 }
289 
290 /**
291  * Write data from a buffer into a filedescriptor
292  *
293  * @param f a file descriptor
294  * @param buf a buffer containing data
295  * @param len the number of bytes to be written
296  **/
297 static inline void writeit(int f, void *buf, size_t len) {
298  ssize_t res;
299  while (len > 0) {
300  DEBUG("+");
301  if ((res = write(f, buf, len)) <= 0)
302  err("Send failed: %m");
303  len -= res;
304  buf += res;
305  }
306 }
307 
308 /**
309  * Print out a message about how to use nbd-server. Split out to a separate
310  * function so that we can call it from multiple places
311  */
312 void usage() {
313  printf("This is nbd-server version " VERSION "\n");
314  printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
315  "\t-r|--read-only\t\tread only\n"
316  "\t-m|--multi-file\t\tmultiple file\n"
317  "\t-c|--copy-on-write\tcopy on write\n"
318  "\t-C|--config-file\tspecify an alternate configuration file\n"
319  "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
320  "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
321  "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
322  "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
323  "\tif port is set to 0, stdin is used (for running from inetd).\n"
324  "\tif file_to_export contains '%%s', it is substituted with the IP\n"
325  "\t\taddress of the machine trying to connect\n"
326  "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
327  printf("Using configuration file %s\n", CFILE);
328 }
329 
330 /* Dumps a config file section of the given SERVER*, and exits. */
331 void dump_section(SERVER* serve, gchar* section_header) {
332  printf("[%s]\n", section_header);
333  printf("\texportname = %s\n", serve->exportname);
334  printf("\tlistenaddr = %s\n", serve->listenaddr);
335  printf("\tport = %d\n", serve->port);
336  if(serve->flags & F_READONLY) {
337  printf("\treadonly = true\n");
338  }
339  if(serve->flags & F_MULTIFILE) {
340  printf("\tmultifile = true\n");
341  }
342  if(serve->flags & F_COPYONWRITE) {
343  printf("\tcopyonwrite = true\n");
344  }
345  if(serve->expected_size) {
346  printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
347  }
348  if(serve->authname) {
349  printf("\tauthfile = %s\n", serve->authname);
350  }
351  exit(EXIT_SUCCESS);
352 }
353 
354 /**
355  * Parse the command line.
356  *
357  * @param argc the argc argument to main()
358  * @param argv the argv argument to main()
359  **/
360 SERVER* cmdline(int argc, char *argv[]) {
361  int i=0;
362  int nonspecial=0;
363  int c;
364  struct option long_options[] = {
365  {"read-only", no_argument, NULL, 'r'},
366  {"multi-file", no_argument, NULL, 'm'},
367  {"copy-on-write", no_argument, NULL, 'c'},
368  {"dont-fork", no_argument, NULL, 'd'},
369  {"authorize-file", required_argument, NULL, 'l'},
370  {"config-file", required_argument, NULL, 'C'},
371  {"pid-file", required_argument, NULL, 'p'},
372  {"output-config", required_argument, NULL, 'o'},
373  {"max-connection", required_argument, NULL, 'M'},
374  {0,0,0,0}
375  };
376  SERVER *serve;
377  off_t es;
378  size_t last;
379  char suffix;
380  gboolean do_output=FALSE;
381  gchar* section_header="";
382  gchar** addr_port;
383 
384  if(argc==1) {
385  return NULL;
386  }
387  serve=g_new0(SERVER, 1);
388  serve->authname = g_strdup(default_authname);
389  serve->virtstyle=VIRT_IPLIT;
390  while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
391  switch (c) {
392  case 1:
393  /* non-option argument */
394  switch(nonspecial++) {
395  case 0:
396  if(strchr(optarg, ':') == strrchr(optarg, ':')) {
397  addr_port=g_strsplit(optarg, ":", 2);
398 
399  /* Check for "@" - maybe user using this separator
400  for IPv4 address */
401  if(!addr_port[1]) {
402  g_strfreev(addr_port);
403  addr_port=g_strsplit(optarg, "@", 2);
404  }
405  } else {
406  addr_port=g_strsplit(optarg, "@", 2);
407  }
408 
409  if(addr_port[1]) {
410  serve->port=strtol(addr_port[1], NULL, 0);
411  serve->listenaddr=g_strdup(addr_port[0]);
412  } else {
413  serve->listenaddr=NULL;
414  serve->port=strtol(addr_port[0], NULL, 0);
415  }
416  g_strfreev(addr_port);
417  break;
418  case 1:
419  serve->exportname = g_strdup(optarg);
420  if(serve->exportname[0] != '/') {
421  fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
422  exit(EXIT_FAILURE);
423  }
424  break;
425  case 2:
426  last=strlen(optarg)-1;
427  suffix=optarg[last];
428  if (suffix == 'k' || suffix == 'K' ||
429  suffix == 'm' || suffix == 'M')
430  optarg[last] = '\0';
431  es = (off_t)atoll(optarg);
432  switch (suffix) {
433  case 'm':
434  case 'M': es <<= 10;
435  case 'k':
436  case 'K': es <<= 10;
437  default : break;
438  }
439  serve->expected_size = es;
440  break;
441  }
442  break;
443  case 'r':
444  serve->flags |= F_READONLY;
445  break;
446  case 'm':
447  serve->flags |= F_MULTIFILE;
448  break;
449  case 'o':
450  do_output = TRUE;
451  section_header = g_strdup(optarg);
452  break;
453  case 'p':
454  strncpy(pidftemplate, optarg, 256);
455  break;
456  case 'c':
457  serve->flags |=F_COPYONWRITE;
458  break;
459  case 'd':
460  dontfork = 1;
461  break;
462  case 'C':
463  g_free(config_file_pos);
464  config_file_pos=g_strdup(optarg);
465  break;
466  case 'l':
467  g_free(serve->authname);
468  serve->authname=g_strdup(optarg);
469  break;
470  case 'M':
471  serve->max_connections = strtol(optarg, NULL, 0);
472  break;
473  default:
474  usage();
475  exit(EXIT_FAILURE);
476  break;
477  }
478  }
479  /* What's left: the port to export, the name of the to be exported
480  * file, and, optionally, the size of the file, in that order. */
481  if(nonspecial<2) {
482  g_free(serve);
483  serve=NULL;
484  } else {
486  }
487  if(do_output) {
488  if(!serve) {
489  g_critical("Need a complete configuration on the command line to output a config file section!");
490  exit(EXIT_FAILURE);
491  }
492  dump_section(serve, section_header);
493  }
494  return serve;
495 }
496 
497 /* forward definition of parse_cfile */
498 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, GError** e);
499 
500 /**
501  * Parse config file snippets in a directory. Uses readdir() and friends
502  * to find files and open them, then passes them on to parse_cfile
503  * with have_global set false
504  **/
505 GArray* do_cfile_dir(gchar* dir, GError** e) {
506  DIR* dirh = opendir(dir);
507  struct dirent* de;
508  gchar* fname;
509  GArray* retval = NULL;
510  GArray* tmp;
511  struct stat stbuf;
512 
513  if(!dirh) {
514  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
515  return NULL;
516  }
517  errno=0;
518  while((de = readdir(dirh))) {
519  int saved_errno=errno;
520  fname = g_build_filename(dir, de->d_name, NULL);
521  switch(de->d_type) {
522  case DT_UNKNOWN:
523  /* Filesystem doesn't return type of
524  * file through readdir. Run stat() on
525  * the file instead */
526  if(stat(fname, &stbuf)) {
527  perror("stat");
528  goto err_out;
529  }
530  if (!S_ISREG(stbuf.st_mode)) {
531  goto next;
532  }
533  case DT_REG:
534  /* Skip unless the name ends with '.conf' */
535  if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
536  goto next;
537  }
538  tmp = parse_cfile(fname, NULL, e);
539  errno=saved_errno;
540  if(*e) {
541  goto err_out;
542  }
543  if(!retval)
544  retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
545  retval = g_array_append_vals(retval, tmp->data, tmp->len);
546  g_array_free(tmp, TRUE);
547  default:
548  break;
549  }
550  next:
551  g_free(fname);
552  }
553  if(errno) {
554  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
555  err_out:
556  if(retval)
557  g_array_free(retval, TRUE);
558  if(dirh)
559  closedir(dirh);
560  return NULL;
561  }
562  return retval;
563 }
564 
565 /**
566  * Parse the config file.
567  *
568  * @param f the name of the config file
569  *
570  * @param genconf a pointer to generic configuration which will get
571  * updated with parsed values. If NULL, then parsed generic
572  * configuration values are safely and silently discarded.
573  *
574  * @param e a GError. Error code can be any of the following:
575  * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
576  * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
577  * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
578  *
579  * @return a Array of SERVER* pointers, If the config file is empty or does not
580  * exist, returns an empty GHashTable; if the config file contains an
581  * error, returns NULL, and e is set appropriately
582  **/
583 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, GError** e) {
584  const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
585  const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
586  gchar* cfdir = NULL;
587  SERVER s;
588  gchar *virtstyle=NULL;
589  PARAM lp[] = {
590  { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
591  { "port", TRUE, PARAM_INT, &(s.port), 0 },
592  { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
593  { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 },
594  { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
595  { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
596  { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
597  { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
598  { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
599  { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
600  { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
601  { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
602  { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
603  { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
604  { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
605  { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
606  { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
607  { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
608  { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM },
609  { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
610  { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
611  };
612  const int lp_size=sizeof(lp)/sizeof(PARAM);
613  struct generic_conf genconftmp;
614  PARAM gp[] = {
615  { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 },
616  { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 },
617  { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE },
618  { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 },
619  { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 },
620  { "includedir", FALSE, PARAM_STRING, &cfdir, 0 },
621  { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST },
622  };
623  PARAM* p=gp;
624  int p_size=sizeof(gp)/sizeof(PARAM);
625  GKeyFile *cfile;
626  GError *err = NULL;
627  const char *err_msg=NULL;
628  GArray *retval=NULL;
629  gchar **groups;
630  gboolean bval;
631  gint ival;
632  gint64 i64val;
633  gchar* sval;
634  gchar* startgroup;
635  gint i;
636  gint j;
637 
638  memset(&genconftmp, 0, sizeof(struct generic_conf));
639 
640  if (genconf) {
641  /* Use the passed configuration values as defaults. The
642  * parsing algorithm below updates all parameter targets
643  * found from configuration files. */
644  memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
645  }
646 
647  cfile = g_key_file_new();
648  retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
649  if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
650  G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
651  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
652  f, err->message);
653  g_key_file_free(cfile);
654  return retval;
655  }
656  startgroup = g_key_file_get_start_group(cfile);
657  if((!startgroup || strcmp(startgroup, "generic")) && genconf) {
658  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
659  g_key_file_free(cfile);
660  return NULL;
661  }
662  groups = g_key_file_get_groups(cfile, NULL);
663  for(i=0;groups[i];i++) {
664  memset(&s, '\0', sizeof(SERVER));
665 
666  /* After the [generic] group or when we're parsing an include
667  * directory, start parsing exports */
668  if(i==1 || !genconf) {
669  p=lp;
670  p_size=lp_size;
671  if(!(glob_flags & F_OLDSTYLE)) {
672  lp[1].required = FALSE;
673  }
674  }
675  for(j=0;j<p_size;j++) {
676  assert(p[j].target != NULL);
677  assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
678  switch(p[j].ptype) {
679  case PARAM_INT:
680  ival = g_key_file_get_integer(cfile,
681  groups[i],
682  p[j].paramname,
683  &err);
684  if(!err) {
685  *((gint*)p[j].target) = ival;
686  }
687  break;
688  case PARAM_INT64:
689  i64val = g_key_file_get_int64(cfile,
690  groups[i],
691  p[j].paramname,
692  &err);
693  if(!err) {
694  *((gint64*)p[j].target) = i64val;
695  }
696  break;
697  case PARAM_STRING:
698  sval = g_key_file_get_string(cfile,
699  groups[i],
700  p[j].paramname,
701  &err);
702  if(!err) {
703  *((gchar**)p[j].target) = sval;
704  }
705  break;
706  case PARAM_BOOL:
707  bval = g_key_file_get_boolean(cfile,
708  groups[i],
709  p[j].paramname, &err);
710  if(!err) {
711  if(bval) {
712  *((gint*)p[j].target) |= p[j].flagval;
713  } else {
714  *((gint*)p[j].target) &= ~(p[j].flagval);
715  }
716  }
717  break;
718  }
719  if(err) {
720  if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
721  if(!p[j].required) {
722  /* Ignore not-found error for optional values */
723  g_clear_error(&err);
724  continue;
725  } else {
726  err_msg = MISSING_REQUIRED_ERROR;
727  }
728  } else {
729  err_msg = DEFAULT_ERROR;
730  }
731  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
732  g_array_free(retval, TRUE);
733  g_error_free(err);
734  g_key_file_free(cfile);
735  return NULL;
736  }
737  }
738  if(virtstyle) {
739  if(!strncmp(virtstyle, "none", 4)) {
741  } else if(!strncmp(virtstyle, "ipliteral", 9)) {
743  } else if(!strncmp(virtstyle, "iphash", 6)) {
745  } else if(!strncmp(virtstyle, "cidrhash", 8)) {
747  if(strlen(virtstyle)<10) {
748  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
749  g_array_free(retval, TRUE);
750  g_key_file_free(cfile);
751  return NULL;
752  }
753  s.cidrlen=strtol(virtstyle+8, NULL, 0);
754  } else {
755  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
756  g_array_free(retval, TRUE);
757  g_key_file_free(cfile);
758  return NULL;
759  }
760  } else {
762  }
763  if(s.port && !(glob_flags & F_OLDSTYLE)) {
764  g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
765  g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
766  }
767  /* Don't need to free this, it's not our string */
768  virtstyle=NULL;
769  /* Don't append values for the [generic] group */
770  if(i>0 || !genconf) {
771  s.socket_family = AF_UNSPEC;
772  s.servename = groups[i];
773 
774  append_serve(&s, retval);
775  }
776 #ifndef WITH_SDP
777  if(s.flags & F_SDP) {
778  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
779  g_array_free(retval, TRUE);
780  g_key_file_free(cfile);
781  return NULL;
782  }
783 #endif
784  }
785  g_key_file_free(cfile);
786  if(cfdir) {
787  GArray* extra = do_cfile_dir(cfdir, e);
788  if(extra) {
789  retval = g_array_append_vals(retval, extra->data, extra->len);
790  i+=extra->len;
791  g_array_free(extra, TRUE);
792  } else {
793  if(*e) {
794  g_array_free(retval, TRUE);
795  return NULL;
796  }
797  }
798  }
799  if(i==1 && genconf) {
800  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
801  }
802 
803  if (genconf) {
804  /* Return the updated generic configuration through the
805  * pointer parameter. */
806  memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
807  }
808 
809  return retval;
810 }
811 
812 /**
813  * Signal handler for SIGCHLD
814  * @param s the signal we're handling (must be SIGCHLD, or something
815  * is severely wrong)
816  **/
817 void sigchld_handler(int s) {
818  int status;
819  int* i;
820  pid_t pid;
821 
822  while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
823  if(WIFEXITED(status)) {
824  msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
825  }
826  i=g_hash_table_lookup(children, &pid);
827  if(!i) {
828  msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
829  } else {
830  DEBUG("Removing %d from the list of children", pid);
831  g_hash_table_remove(children, &pid);
832  }
833  }
834 }
835 
836 /**
837  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
838  *
839  * @param key the key
840  * @param value the value corresponding to the above key
841  * @param user_data a pointer which we always set to 1, so that we know what
842  * will happen next.
843  **/
844 void killchild(gpointer key, gpointer value, gpointer user_data) {
845  pid_t *pid=value;
846 
847  kill(*pid, SIGTERM);
848 }
849 
850 /**
851  * Handle SIGTERM and dispatch it to our children
852  * @param s the signal we're handling (must be SIGTERM, or something
853  * is severely wrong).
854  **/
855 void sigterm_handler(int s) {
856  g_hash_table_foreach(children, killchild, NULL);
857  unlink(pidfname);
858 
859  exit(EXIT_SUCCESS);
860 }
861 
862 /**
863  * Handle SIGHUP by setting atomically a flag which will be evaluated in
864  * the main loop of the root server process. This allows us to separate
865  * the signal catching from th actual task triggered by SIGHUP and hence
866  * processing in the interrupt context is kept as minimial as possible.
867  *
868  * @param s the signal we're handling (must be SIGHUP, or something
869  * is severely wrong).
870  **/
871 static void sighup_handler(const int s G_GNUC_UNUSED) {
872  is_sighup_caught = 1;
873 }
874 
875 /**
876  * Get the file handle and offset, given an export offset.
877  *
878  * @param export An array of export files
879  * @param a The offset to get corresponding file/offset for
880  * @param fhandle [out] File descriptor
881  * @param foffset [out] Offset into fhandle
882  * @param maxbytes [out] Tells how many bytes can be read/written
883  * from fhandle starting at foffset (0 if there is no limit)
884  * @return 0 on success, -1 on failure
885  **/
886 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
887  /* Negative offset not allowed */
888  if(a < 0)
889  return -1;
890 
891  /* Binary search for last file with starting offset <= a */
892  FILE_INFO fi;
893  int start = 0;
894  int end = export->len - 1;
895  while( start <= end ) {
896  int mid = (start + end) / 2;
897  fi = g_array_index(export, FILE_INFO, mid);
898  if( fi.startoff < a ) {
899  start = mid + 1;
900  } else if( fi.startoff > a ) {
901  end = mid - 1;
902  } else {
903  start = end = mid;
904  break;
905  }
906  }
907 
908  /* end should never go negative, since first startoff is 0 and a >= 0 */
909  assert(end >= 0);
910 
911  fi = g_array_index(export, FILE_INFO, end);
912  *fhandle = fi.fhandle;
913  *foffset = a - fi.startoff;
914  *maxbytes = 0;
915  if( end+1 < export->len ) {
916  FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
917  *maxbytes = fi_next.startoff - a;
918  }
919 
920  return 0;
921 }
922 
923 /**
924  * seek to a position in a file, with error handling.
925  * @param handle a filedescriptor
926  * @param a position to seek to
927  * @todo get rid of this.
928  **/
929 void myseek(int handle,off_t a) {
930  if (lseek(handle, a, SEEK_SET) < 0) {
931  err("Can not seek locally!\n");
932  }
933 }
934 
935 /**
936  * Write an amount of bytes at a given offset to the right file. This
937  * abstracts the write-side of the multiple file option.
938  *
939  * @param a The offset where the write should start
940  * @param buf The buffer to write from
941  * @param len The length of buf
942  * @param client The client we're serving for
943  * @param fua Flag to indicate 'Force Unit Access'
944  * @return The number of bytes actually written, or -1 in case of an error
945  **/
946 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
947  int fhandle;
948  off_t foffset;
949  size_t maxbytes;
950  ssize_t retval;
951 
952  if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
953  return -1;
954  if(maxbytes && len > maxbytes)
955  len = maxbytes;
956 
957  DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
958 
959  myseek(fhandle, foffset);
960  retval = write(fhandle, buf, len);
961  if(client->server->flags & F_SYNC) {
962  fsync(fhandle);
963  } else if (fua) {
964 
965  /* This is where we would do the following
966  * #ifdef USE_SYNC_FILE_RANGE
967  * However, we don't, for the reasons set out below
968  * by Christoph Hellwig <hch@infradead.org>
969  *
970  * [BEGINS]
971  * fdatasync is equivalent to fsync except that it does not flush
972  * non-essential metadata (basically just timestamps in practice), but it
973  * does flush metadata requried to find the data again, e.g. allocation
974  * information and extent maps. sync_file_range does nothing but flush
975  * out pagecache content - it means you basically won't get your data
976  * back in case of a crash if you either:
977  *
978  * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
979  * b) are using a sparse file on a filesystem
980  * c) are using a fallocate-preallocated file on a filesystem
981  * d) use any file on a COW filesystem like btrfs
982  *
983  * e.g. it only does anything useful for you if you do not have a volatile
984  * write cache, and either use a raw block device node, or just overwrite
985  * an already fully allocated (and not preallocated) file on a non-COW
986  * filesystem.
987  * [ENDS]
988  *
989  * What we should do is open a second FD with O_DSYNC set, then write to
990  * that when appropriate. However, with a Linux client, every REQ_FUA
991  * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
992  * problems.
993  *
994  */
995 #if 0
996  sync_file_range(fhandle, foffset, len,
997  SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
998  SYNC_FILE_RANGE_WAIT_AFTER);
999 #else
1000  fdatasync(fhandle);
1001 #endif
1002  }
1003  return retval;
1004 }
1005 
1006 /**
1007  * Call rawexpwrite repeatedly until all data has been written.
1008  *
1009  * @param a The offset where the write should start
1010  * @param buf The buffer to write from
1011  * @param len The length of buf
1012  * @param client The client we're serving for
1013  * @param fua Flag to indicate 'Force Unit Access'
1014  * @return 0 on success, nonzero on failure
1015  **/
1016 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1017  ssize_t ret=0;
1018 
1019  while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1020  a += ret;
1021  buf += ret;
1022  len -= ret;
1023  }
1024  return (ret < 0 || len != 0);
1025 }
1026 
1027 /**
1028  * Read an amount of bytes at a given offset from the right file. This
1029  * abstracts the read-side of the multiple files option.
1030  *
1031  * @param a The offset where the read should start
1032  * @param buf A buffer to read into
1033  * @param len The size of buf
1034  * @param client The client we're serving for
1035  * @return The number of bytes actually read, or -1 in case of an
1036  * error.
1037  **/
1038 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1039  int fhandle;
1040  off_t foffset;
1041  size_t maxbytes;
1042 
1043  if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
1044  return -1;
1045  if(maxbytes && len > maxbytes)
1046  len = maxbytes;
1047 
1048  DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1049 
1050  myseek(fhandle, foffset);
1051  return read(fhandle, buf, len);
1052 }
1053 
1054 /**
1055  * Call rawexpread repeatedly until all data has been read.
1056  * @return 0 on success, nonzero on failure
1057  **/
1058 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1059  ssize_t ret=0;
1060 
1061  while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1062  a += ret;
1063  buf += ret;
1064  len -= ret;
1065  }
1066  return (ret < 0 || len != 0);
1067 }
1068 
1069 /**
1070  * Read an amount of bytes at a given offset from the right file. This
1071  * abstracts the read-side of the copyonwrite stuff, and calls
1072  * rawexpread() with the right parameters to do the actual work.
1073  * @param a The offset where the read should start
1074  * @param buf A buffer to read into
1075  * @param len The size of buf
1076  * @param client The client we're going to read for
1077  * @return 0 on success, nonzero on failure
1078  **/
1079 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1080  off_t rdlen, offset;
1081  off_t mapcnt, mapl, maph, pagestart;
1082 
1083  if (!(client->server->flags & F_COPYONWRITE))
1084  return(rawexpread_fully(a, buf, len, client));
1085  DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1086 
1087  mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1088 
1089  for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1090  pagestart=mapcnt*DIFFPAGESIZE;
1091  offset=a-pagestart;
1092  rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1093  len : (size_t)DIFFPAGESIZE-offset;
1094  if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1095  DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1096  (unsigned long)(client->difmap[mapcnt]));
1097  myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1098  if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1099  } else { /* the block is not there */
1100  DEBUG("Page %llu is not here, we read the original one\n",
1101  (unsigned long long)mapcnt);
1102  if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1103  }
1104  len-=rdlen; a+=rdlen; buf+=rdlen;
1105  }
1106  return 0;
1107 }
1108 
1109 /**
1110  * Write an amount of bytes at a given offset to the right file. This
1111  * abstracts the write-side of the copyonwrite option, and calls
1112  * rawexpwrite() with the right parameters to do the actual work.
1113  *
1114  * @param a The offset where the write should start
1115  * @param buf The buffer to write from
1116  * @param len The length of buf
1117  * @param client The client we're going to write for.
1118  * @param fua Flag to indicate 'Force Unit Access'
1119  * @return 0 on success, nonzero on failure
1120  **/
1121 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1122  char pagebuf[DIFFPAGESIZE];
1123  off_t mapcnt,mapl,maph;
1124  off_t wrlen,rdlen;
1125  off_t pagestart;
1126  off_t offset;
1127 
1128  if (!(client->server->flags & F_COPYONWRITE))
1129  return(rawexpwrite_fully(a, buf, len, client, fua));
1130  DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1131 
1132  mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1133 
1134  for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1135  pagestart=mapcnt*DIFFPAGESIZE ;
1136  offset=a-pagestart ;
1137  wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1138  len : (size_t)DIFFPAGESIZE-offset;
1139 
1140  if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1141  DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1142  (unsigned long)(client->difmap[mapcnt])) ;
1143  myseek(client->difffile,
1144  client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1145  if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1146  } else { /* the block is not there */
1147  myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1148  client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1149  DEBUG("Page %llu is not here, we put it at %lu\n",
1150  (unsigned long long)mapcnt,
1151  (unsigned long)(client->difmap[mapcnt]));
1152  rdlen=DIFFPAGESIZE ;
1153  if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1154  return -1;
1155  memcpy(pagebuf+offset,buf,wrlen) ;
1156  if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1157  DIFFPAGESIZE)
1158  return -1;
1159  }
1160  len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1161  }
1162  if (client->server->flags & F_SYNC) {
1163  fsync(client->difffile);
1164  } else if (fua) {
1165  /* open question: would it be cheaper to do multiple sync_file_ranges?
1166  as we iterate through the above?
1167  */
1168  fdatasync(client->difffile);
1169  }
1170  return 0;
1171 }
1172 
1173 /**
1174  * Flush data to a client
1175  *
1176  * @param client The client we're going to write for.
1177  * @return 0 on success, nonzero on failure
1178  **/
1179 int expflush(CLIENT *client) {
1180  gint i;
1181 
1182  if (client->server->flags & F_COPYONWRITE) {
1183  return fsync(client->difffile);
1184  }
1185 
1186  for (i = 0; i < client->export->len; i++) {
1187  FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1188  if (fsync(fi.fhandle) < 0)
1189  return -1;
1190  }
1191 
1192  return 0;
1193 }
1194 
1195 /*
1196  * If the current system supports it, call fallocate() on the backend
1197  * file to resparsify stuff that isn't needed anymore (see NBD_CMD_TRIM)
1198  */
1199 int exptrim(struct nbd_request* req, CLIENT* client) {
1200 #if HAVE_FALLOC_PH
1201  FILE_INFO prev = g_array_index(client->export, FILE_INFO, 0);
1202  FILE_INFO cur = prev;
1203  int i = 1;
1204  /* We're running on a system that supports the
1205  * FALLOC_FL_PUNCH_HOLE option to re-sparsify a file */
1206  do {
1207  if(i<client->export->len) {
1208  cur = g_array_index(client->export, FILE_INFO, i);
1209  }
1210  if(prev.startoff <= req->from) {
1211  off_t curoff = req->from - prev.startoff;
1212  off_t curlen = cur.startoff - prev.startoff - curoff;
1213  fallocate(prev.fhandle, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, curoff, curlen);
1214  }
1215  prev = cur;
1216  } while(i < client->export->len && cur.startoff < (req->from + req->len));
1217  DEBUG("Performed TRIM request from %llu to %llu", (unsigned long long) req->from, (unsigned long long) req->len);
1218 #else
1219  DEBUG("Ignoring TRIM request (not supported on current platform");
1220 #endif
1221  return 0;
1222 }
1223 
1224 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) {
1225  uint64_t magic = htonll(0x3e889045565a9LL);
1226  reply_type = htonl(reply_type);
1227  uint32_t datsize = htonl(datasize);
1228  opt = htonl(opt);
1229  struct iovec v_data[] = {
1230  { &magic, sizeof(magic) },
1231  { &opt, sizeof(opt) },
1232  { &reply_type, sizeof(reply_type) },
1233  { &datsize, sizeof(datsize) },
1234  { data, datasize },
1235  };
1236  size_t total = sizeof(magic) + sizeof(opt) + sizeof(reply_type) + sizeof(datsize) + datasize;
1237  ssize_t sent = writev(net, v_data, 5);
1238  if(sent != total) {
1239  perror("E: couldn't write enough data:");
1240  }
1241 }
1242 
1243 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
1244  uint32_t namelen;
1245  char* name;
1246  int i;
1247 
1248  if (read(net, &namelen, sizeof(namelen)) < 0) {
1249  err("Negotiation failed/7: %m");
1250  return NULL;
1251  }
1252  namelen = ntohl(namelen);
1253  name = malloc(namelen+1);
1254  name[namelen]=0;
1255  if (read(net, name, namelen) < 0) {
1256  err("Negotiation failed/8: %m");
1257  free(name);
1258  return NULL;
1259  }
1260  for(i=0; i<servers->len; i++) {
1261  SERVER* serve = &(g_array_index(servers, SERVER, i));
1262  if(!strcmp(serve->servename, name)) {
1263  CLIENT* client = g_new0(CLIENT, 1);
1264  client->server = serve;
1265  client->exportsize = OFFT_MAX;
1266  client->net = net;
1267  client->modern = TRUE;
1268  client->transactionlogfd = -1;
1269  client->clientfeats = cflags;
1270  free(name);
1271  return client;
1272  }
1273  }
1274  err("Negotiation failed/8a: Requested export not found");
1275  free(name);
1276  return NULL;
1277 }
1278 
1279 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
1280  uint32_t len;
1281  int i;
1282  char buf[1024];
1283  char *ptr = buf + sizeof(len);
1284 
1285  if (read(net, &len, sizeof(len)) < 0)
1286  err("Negotiation failed/8: %m");
1287  len = ntohl(len);
1288  if(len) {
1289  send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL);
1290  }
1291  if(!(glob_flags & F_LIST)) {
1292  send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL);
1293  err_nonfatal("Client tried disallowed list option");
1294  return;
1295  }
1296  for(i=0; i<servers->len; i++) {
1297  SERVER* serve = &(g_array_index(servers, SERVER, i));
1298  len = htonl(strlen(serve->servename));
1299  memcpy(buf, &len, sizeof(len));
1300  strcpy(ptr, serve->servename);
1301  send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
1302  }
1303  send_reply(opt, net, NBD_REP_ACK, 0, NULL);
1304 }
1305 
1306 /**
1307  * Do the initial negotiation.
1308  *
1309  * @param client The client we're negotiating with.
1310  **/
1311 CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
1312  char zeros[128];
1313  uint64_t size_host;
1314  uint32_t flags = NBD_FLAG_HAS_FLAGS;
1315  uint16_t smallflags = 0;
1316  uint64_t magic;
1317 
1318  memset(zeros, '\0', sizeof(zeros));
1319  assert(((phase & NEG_INIT) && (phase & NEG_MODERN)) || client);
1320  if(phase & NEG_MODERN) {
1321  smallflags |= NBD_FLAG_FIXED_NEWSTYLE;
1322  }
1323  if(phase & NEG_INIT) {
1324  /* common */
1325  if (write(net, INIT_PASSWD, 8) < 0) {
1326  err_nonfatal("Negotiation failed/1: %m");
1327  if(client)
1328  exit(EXIT_FAILURE);
1329  }
1330  if(phase & NEG_MODERN) {
1331  /* modern */
1332  magic = htonll(opts_magic);
1333  } else {
1334  /* oldstyle */
1335  magic = htonll(cliserv_magic);
1336  }
1337  if (write(net, &magic, sizeof(magic)) < 0) {
1338  err_nonfatal("Negotiation failed/2: %m");
1339  if(phase & NEG_OLD)
1340  exit(EXIT_FAILURE);
1341  }
1342  }
1343  if ((phase & NEG_MODERN) && (phase & NEG_INIT)) {
1344  /* modern */
1345  uint32_t cflags;
1346  uint32_t opt;
1347 
1348  if(!servers)
1349  err("programmer error");
1350  smallflags = htons(smallflags);
1351  if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1352  err_nonfatal("Negotiation failed/3: %m");
1353  if (read(net, &cflags, sizeof(cflags)) < 0)
1354  err_nonfatal("Negotiation failed/4: %m");
1355  cflags = htonl(cflags);
1356  do {
1357  if (read(net, &magic, sizeof(magic)) < 0)
1358  err_nonfatal("Negotiation failed/5: %m");
1359  magic = ntohll(magic);
1360  if(magic != opts_magic) {
1361  err_nonfatal("Negotiation failed/5a: magic mismatch");
1362  return NULL;
1363  }
1364  if (read(net, &opt, sizeof(opt)) < 0)
1365  err_nonfatal("Negotiation failed/6: %m");
1366  opt = ntohl(opt);
1367  switch(opt) {
1368  case NBD_OPT_EXPORT_NAME:
1369  // NBD_OPT_EXPORT_NAME must be the last
1370  // selected option, so return from here
1371  // if that is chosen.
1372  return handle_export_name(opt, net, servers, cflags);
1373  break;
1374  case NBD_OPT_LIST:
1375  handle_list(opt, net, servers, cflags);
1376  break;
1377  case NBD_OPT_ABORT:
1378  // handled below
1379  break;
1380  default:
1381  send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL);
1382  break;
1383  }
1384  } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
1385  if(opt == NBD_OPT_ABORT) {
1386  err_nonfatal("Session terminated by client");
1387  return NULL;
1388  }
1389  }
1390  /* common */
1391  size_host = htonll((u64)(client->exportsize));
1392  if (write(net, &size_host, 8) < 0)
1393  err("Negotiation failed/9: %m");
1394  if (client->server->flags & F_READONLY)
1395  flags |= NBD_FLAG_READ_ONLY;
1396  if (client->server->flags & F_FLUSH)
1397  flags |= NBD_FLAG_SEND_FLUSH;
1398  if (client->server->flags & F_FUA)
1399  flags |= NBD_FLAG_SEND_FUA;
1400  if (client->server->flags & F_ROTATIONAL)
1401  flags |= NBD_FLAG_ROTATIONAL;
1402  if (client->server->flags & F_TRIM)
1403  flags |= NBD_FLAG_SEND_TRIM;
1404  if (phase & NEG_OLD) {
1405  /* oldstyle */
1406  flags = htonl(flags);
1407  if (write(client->net, &flags, 4) < 0)
1408  err("Negotiation failed/10: %m");
1409  } else {
1410  /* modern */
1411  smallflags = (uint16_t)(flags & ~((uint16_t)0));
1412  smallflags = htons(smallflags);
1413  if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
1414  err("Negotiation failed/11: %m");
1415  }
1416  }
1417  /* common */
1418  if (write(client->net, zeros, 124) < 0)
1419  err("Negotiation failed/12: %m");
1420  return NULL;
1421 }
1422 
1423 /** sending macro. */
1424 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1425  if (client->transactionlogfd != -1) \
1426  writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1427 /** error macro. */
1428 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
1429 /**
1430  * Serve a file to a single client.
1431  *
1432  * @todo This beast needs to be split up in many tiny little manageable
1433  * pieces. Preferably with a chainsaw.
1434  *
1435  * @param client The client we're going to serve to.
1436  * @return when the client disconnects
1437  **/
1438 int mainloop(CLIENT *client) {
1439  struct nbd_request request;
1440  struct nbd_reply reply;
1441  gboolean go_on=TRUE;
1442 #ifdef DODBG
1443  int i = 0;
1444 #endif
1445  negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
1446  DEBUG("Entering request loop!\n");
1447  reply.magic = htonl(NBD_REPLY_MAGIC);
1448  reply.error = 0;
1449  while (go_on) {
1450  char buf[BUFSIZE];
1451  char* p;
1452  size_t len;
1453  size_t currlen;
1454  size_t writelen;
1455  uint16_t command;
1456 #ifdef DODBG
1457  i++;
1458  printf("%d: ", i);
1459 #endif
1460  readit(client->net, &request, sizeof(request));
1461  if (client->transactionlogfd != -1)
1462  writeit(client->transactionlogfd, &request, sizeof(request));
1463 
1464  request.from = ntohll(request.from);
1465  request.type = ntohl(request.type);
1466  command = request.type & NBD_CMD_MASK_COMMAND;
1467  len = ntohl(request.len);
1468 
1469  DEBUG("%s from %llu (%llu) len %u, ", getcommandname(command),
1470  (unsigned long long)request.from,
1471  (unsigned long long)request.from / 512, len);
1472 
1473  if (request.magic != htonl(NBD_REQUEST_MAGIC))
1474  err("Not enough magic.");
1475 
1476  memcpy(reply.handle, request.handle, sizeof(reply.handle));
1477 
1478  if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
1479  if (request.from + len < request.from) { // 64 bit overflow!!
1480  DEBUG("[Number too large!]");
1481  ERROR(client, reply, EINVAL);
1482  continue;
1483  }
1484 
1485  if (((off_t)request.from + len) > client->exportsize) {
1486  DEBUG("[RANGE!]");
1487  ERROR(client, reply, EINVAL);
1488  continue;
1489  }
1490 
1491  currlen = len;
1492  if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1493  currlen = BUFSIZE - sizeof(struct nbd_reply);
1494  if(!logged_oversized) {
1495  msg(LOG_DEBUG, "oversized request (this is not a problem)");
1496  logged_oversized = true;
1497  }
1498  }
1499  }
1500 
1501  switch (command) {
1502 
1503  case NBD_CMD_DISC:
1504  msg(LOG_INFO, "Disconnect request received.");
1505  if (client->server->flags & F_COPYONWRITE) {
1506  if (client->difmap) g_free(client->difmap) ;
1507  close(client->difffile);
1508  unlink(client->difffilename);
1509  free(client->difffilename);
1510  }
1511  go_on=FALSE;
1512  continue;
1513 
1514  case NBD_CMD_WRITE:
1515  DEBUG("wr: net->buf, ");
1516  while(len > 0) {
1517  readit(client->net, buf, currlen);
1518  DEBUG("buf->exp, ");
1519  if ((client->server->flags & F_READONLY) ||
1520  (client->server->flags & F_AUTOREADONLY)) {
1521  DEBUG("[WRITE to READONLY!]");
1522  ERROR(client, reply, EPERM);
1523  consume(client->net, buf, len-currlen, BUFSIZE);
1524  continue;
1525  }
1526  if (expwrite(request.from, buf, currlen, client,
1527  request.type & NBD_CMD_FLAG_FUA)) {
1528  DEBUG("Write failed: %m" );
1529  ERROR(client, reply, errno);
1530  consume(client->net, buf, len-currlen, BUFSIZE);
1531  continue;
1532  }
1533  len -= currlen;
1534  request.from += currlen;
1535  currlen = (len < BUFSIZE) ? len : BUFSIZE;
1536  }
1537  SEND(client->net, reply);
1538  DEBUG("OK!\n");
1539  continue;
1540 
1541  case NBD_CMD_FLUSH:
1542  DEBUG("fl: ");
1543  if (expflush(client)) {
1544  DEBUG("Flush failed: %m");
1545  ERROR(client, reply, errno);
1546  continue;
1547  }
1548  SEND(client->net, reply);
1549  DEBUG("OK!\n");
1550  continue;
1551 
1552  case NBD_CMD_READ:
1553  DEBUG("exp->buf, ");
1554  if (client->transactionlogfd != -1)
1555  writeit(client->transactionlogfd, &reply, sizeof(reply));
1556  writeit(client->net, &reply, sizeof(reply));
1557  p = buf;
1558  writelen = currlen;
1559  while(len > 0) {
1560  if (expread(request.from, p, currlen, client)) {
1561  DEBUG("Read failed: %m");
1562  ERROR(client, reply, errno);
1563  continue;
1564  }
1565 
1566  DEBUG("buf->net, ");
1567  writeit(client->net, buf, writelen);
1568  len -= currlen;
1569  request.from += currlen;
1570  currlen = (len < BUFSIZE) ? len : BUFSIZE;
1571  p = buf;
1572  writelen = currlen;
1573  }
1574  DEBUG("OK!\n");
1575  continue;
1576 
1577  case NBD_CMD_TRIM:
1578  /* The kernel module sets discard_zeroes_data == 0,
1579  * so it is okay to do nothing. */
1580  if (exptrim(&request, client)) {
1581  DEBUG("Trim failed: %m");
1582  ERROR(client, reply, errno);
1583  continue;
1584  }
1585  SEND(client->net, reply);
1586  continue;
1587 
1588  default:
1589  DEBUG ("Ignoring unknown command\n");
1590  continue;
1591  }
1592  }
1593  return 0;
1594 }
1595 
1596 /**
1597  * Set up client export array, which is an array of FILE_INFO.
1598  * Also, split a single exportfile into multiple ones, if that was asked.
1599  * @param client information on the client which we want to setup export for
1600  **/
1601 void setupexport(CLIENT* client) {
1602  int i;
1603  off_t laststartoff = 0, lastsize = 0;
1604  int multifile = (client->server->flags & F_MULTIFILE);
1605  int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1606  int cancreate = (client->server->expected_size) && !multifile;
1607 
1608  client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1609 
1610  /* If multi-file, open as many files as we can.
1611  * If not, open exactly one file.
1612  * Calculate file sizes as we go to get total size. */
1613  for(i=0; ; i++) {
1614  FILE_INFO fi;
1615  gchar *tmpname;
1616  gchar* error_string;
1617 
1618  if (i)
1619  cancreate = 0;
1620  /* if expected_size is specified, and this is the first file, we can create the file */
1621  mode_t mode = (client->server->flags & F_READONLY) ?
1622  O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1623 
1624  if (temporary) {
1625  tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1626  DEBUG( "Opening %s\n", tmpname );
1627  fi.fhandle = mkstemp(tmpname);
1628  } else {
1629  if(multifile) {
1630  tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1631  } else {
1632  tmpname=g_strdup(client->exportname);
1633  }
1634  DEBUG( "Opening %s\n", tmpname );
1635  fi.fhandle = open(tmpname, mode, 0x600);
1636  if(fi.fhandle == -1 && mode == O_RDWR) {
1637  /* Try again because maybe media was read-only */
1638  fi.fhandle = open(tmpname, O_RDONLY);
1639  if(fi.fhandle != -1) {
1640  /* Opening the base file in copyonwrite mode is
1641  * okay */
1642  if(!(client->server->flags & F_COPYONWRITE)) {
1643  client->server->flags |= F_AUTOREADONLY;
1644  client->server->flags |= F_READONLY;
1645  }
1646  }
1647  }
1648  }
1649  if(fi.fhandle == -1) {
1650  if(multifile && i>0)
1651  break;
1652  error_string=g_strdup_printf(
1653  "Could not open exported file %s: %%m",
1654  tmpname);
1655  err(error_string);
1656  }
1657 
1658  if (temporary)
1659  unlink(tmpname); /* File will stick around whilst FD open */
1660 
1661  fi.startoff = laststartoff + lastsize;
1662  g_array_append_val(client->export, fi);
1663  g_free(tmpname);
1664 
1665  /* Starting offset and size of this file will be used to
1666  * calculate starting offset of next file */
1667  laststartoff = fi.startoff;
1668  lastsize = size_autodetect(fi.fhandle);
1669 
1670  /* If we created the file, it will be length zero */
1671  if (!lastsize && cancreate) {
1672  assert(!multifile);
1673  if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
1674  err("Could not expand file: %m");
1675  }
1676  lastsize = client->server->expected_size;
1677  break; /* don't look for any more files */
1678  }
1679 
1680  if(!multifile || temporary)
1681  break;
1682  }
1683 
1684  /* Set export size to total calculated size */
1685  client->exportsize = laststartoff + lastsize;
1686 
1687  /* Export size may be overridden */
1688  if(client->server->expected_size) {
1689  /* desired size must be <= total calculated size */
1690  if(client->server->expected_size > client->exportsize) {
1691  err("Size of exported file is too big\n");
1692  }
1693 
1694  client->exportsize = client->server->expected_size;
1695  }
1696 
1697  msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1698  if(multifile) {
1699  msg(LOG_INFO, "Total number of files: %d", i);
1700  }
1701 }
1702 
1704  off_t i;
1705  if ((client->difffilename = malloc(1024))==NULL)
1706  err("Failed to allocate string for diff file name");
1707  snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
1708  (int)getpid()) ;
1709  client->difffilename[1023]='\0';
1710  msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
1711  client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1712  if (client->difffile<0) err("Could not create diff file (%m)") ;
1713  if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1714  err("Could not allocate memory") ;
1715  for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1716 
1717  return 0;
1718 }
1719 
1720 /**
1721  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1722  * options
1723  *
1724  * @param command the command to be ran. Read from the config file
1725  * @param file the file name we're about to export
1726  **/
1727 int do_run(gchar* command, gchar* file) {
1728  gchar* cmd;
1729  int retval=0;
1730 
1731  if(command && *command) {
1732  cmd = g_strdup_printf(command, file);
1733  retval=system(cmd);
1734  g_free(cmd);
1735  }
1736  return retval;
1737 }
1738 
1739 /**
1740  * Serve a connection.
1741  *
1742  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1743  * follow the road map.
1744  *
1745  * @param client a connected client
1746  **/
1747 void serveconnection(CLIENT *client) {
1748  if (client->server->transactionlog && (client->transactionlogfd == -1))
1749  {
1750  if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1751  O_WRONLY | O_CREAT,
1752  S_IRUSR | S_IWUSR)))
1753  g_warning("Could not open transaction log %s",
1754  client->server->transactionlog);
1755  }
1756 
1757  if(do_run(client->server->prerun, client->exportname)) {
1758  exit(EXIT_FAILURE);
1759  }
1760  setupexport(client);
1761 
1762  if (client->server->flags & F_COPYONWRITE) {
1763  copyonwrite_prepare(client);
1764  }
1765 
1766  setmysockopt(client->net);
1767 
1768  mainloop(client);
1769  do_run(client->server->postrun, client->exportname);
1770 
1771  if (-1 != client->transactionlogfd)
1772  {
1773  close(client->transactionlogfd);
1774  client->transactionlogfd = -1;
1775  }
1776 }
1777 
1778 /**
1779  * Find the name of the file we have to serve. This will use g_strdup_printf
1780  * to put the IP address of the client inside a filename containing
1781  * "%s" (in the form as specified by the "virtstyle" option). That name
1782  * is then written to client->exportname.
1783  *
1784  * @param net A socket connected to an nbd client
1785  * @param client information about the client. The IP address in human-readable
1786  * format will be written to a new char* buffer, the address of which will be
1787  * stored in client->clientname.
1788  * @return: 0 - OK, -1 - failed.
1789  **/
1790 int set_peername(int net, CLIENT *client) {
1791  struct sockaddr_storage netaddr;
1792  struct sockaddr_in *netaddr4 = NULL;
1793  struct sockaddr_in6 *netaddr6 = NULL;
1794  socklen_t addrinlen = sizeof( struct sockaddr_storage );
1795  struct addrinfo hints;
1796  struct addrinfo *ai = NULL;
1797  char peername[NI_MAXHOST];
1798  char netname[NI_MAXHOST];
1799  char *tmp = NULL;
1800  int i;
1801  int e;
1802  int shift;
1803 
1804  if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
1805  msg(LOG_INFO, "getpeername failed: %m");
1806  return -1;
1807  }
1808 
1809  if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
1810  peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
1811  msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
1812  return -1;
1813  }
1814 
1815  memset(&hints, '\0', sizeof (hints));
1816  hints.ai_flags = AI_ADDRCONFIG;
1817  e = getaddrinfo(peername, NULL, &hints, &ai);
1818 
1819  if(e != 0) {
1820  msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
1821  freeaddrinfo(ai);
1822  return -1;
1823  }
1824 
1825  switch(client->server->virtstyle) {
1826  case VIRT_NONE:
1827  msg(LOG_DEBUG, "virtualization is off");
1828  client->exportname=g_strdup(client->server->exportname);
1829  break;
1830  case VIRT_IPHASH:
1831  msg(LOG_DEBUG, "virtstyle iphash");
1832  for(i=0;i<strlen(peername);i++) {
1833  if(peername[i]=='.') {
1834  peername[i]='/';
1835  }
1836  }
1837  case VIRT_IPLIT:
1838  msg(LOG_DEBUG, "virststyle ipliteral");
1839  client->exportname=g_strdup_printf(client->server->exportname, peername);
1840  break;
1841  case VIRT_CIDR:
1842  msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
1843  memcpy(&netaddr, &(client->clientaddr), addrinlen);
1844  int addrbits;
1845  assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
1846  if(ai->ai_family == AF_INET) {
1847  addrbits = 32;
1848  } else if(ai->ai_family == AF_INET6) {
1849  addrbits = 128;
1850  }
1851  uint8_t* addrptr = ((struct sockaddr*)&netaddr)->sa_data;
1852  for(int i = 0; i < addrbits; i+=8) {
1853  int masklen = client->server->cidrlen - i;
1854  masklen = masklen > 0 ? masklen : 0;
1855  uint8_t mask = getmaskbyte(masklen);
1856  *addrptr &= mask;
1857  addrptr++;
1858  }
1859  getnameinfo((struct sockaddr *) &netaddr, addrinlen,
1860  netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
1861  tmp=g_strdup_printf("%s/%s", netname, peername);
1862 
1863  if(tmp != NULL)
1864  client->exportname=g_strdup_printf(client->server->exportname, tmp);
1865 
1866  break;
1867  }
1868 
1869  freeaddrinfo(ai);
1870  msg(LOG_INFO, "connect from %s, assigned file is %s",
1871  peername, client->exportname);
1872  client->clientname=g_strdup(peername);
1873  return 0;
1874 }
1875 
1876 /**
1877  * Destroy a pid_t*
1878  * @param data a pointer to pid_t which should be freed
1879  **/
1880 void destroy_pid_t(gpointer data) {
1881  g_free(data);
1882 }
1883 
1884 static pid_t
1886 {
1887  pid_t pid;
1888  sigset_t newset;
1889  sigset_t oldset;
1890 
1891  sigemptyset(&newset);
1892  sigaddset(&newset, SIGCHLD);
1893  sigaddset(&newset, SIGTERM);
1894  sigprocmask(SIG_BLOCK, &newset, &oldset);
1895  pid = fork();
1896  if (pid < 0) {
1897  msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
1898  goto out;
1899  }
1900  if (pid > 0) { /* Parent */
1901  pid_t *pidp;
1902 
1903  pidp = g_malloc(sizeof(pid_t));
1904  *pidp = pid;
1905  g_hash_table_insert(children, pidp, pidp);
1906  goto out;
1907  }
1908  /* Child */
1909  signal(SIGCHLD, SIG_DFL);
1910  signal(SIGTERM, SIG_DFL);
1911  signal(SIGHUP, SIG_DFL);
1912 out:
1913  sigprocmask(SIG_SETMASK, &oldset, NULL);
1914  return pid;
1915 }
1916 
1917 static int
1918 socket_accept(const int sock)
1919 {
1920  struct sockaddr_storage addrin;
1921  socklen_t addrinlen = sizeof(addrin);
1922  int net;
1923 
1924  net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
1925  if (net < 0) {
1926  err_nonfatal("Failed to accept socket connection: %m");
1927  }
1928 
1929  return net;
1930 }
1931 
1932 static void
1933 handle_modern_connection(GArray *const servers, const int sock)
1934 {
1935  int net;
1936  pid_t pid;
1937  CLIENT *client = NULL;
1938  int sock_flags_old;
1939  int sock_flags_new;
1940 
1941  net = socket_accept(sock);
1942  if (net < 0)
1943  return;
1944 
1945  if (!dontfork) {
1946  pid = spawn_child();
1947  if (pid) {
1948  if (pid > 0)
1949  msg(LOG_INFO, "Spawned a child process");
1950  if (pid < 0)
1951  msg(LOG_ERR, "Failed to spawn a child process");
1952  close(net);
1953  return;
1954  }
1955  /* Child just continues. */
1956  }
1957 
1958  client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
1959  if (!client) {
1960  msg(LOG_ERR, "Modern initial negotiation failed");
1961  goto handler_err;
1962  }
1963 
1964  if (client->server->max_connections > 0 &&
1965  g_hash_table_size(children) >= client->server->max_connections) {
1966  msg(LOG_ERR, "Max connections (%d) reached",
1967  client->server->max_connections);
1968  goto handler_err;
1969  }
1970 
1971  sock_flags_old = fcntl(net, F_GETFL, 0);
1972  if (sock_flags_old == -1) {
1973  msg(LOG_ERR, "Failed to get socket flags");
1974  goto handler_err;
1975  }
1976 
1977  sock_flags_new = sock_flags_old & ~O_NONBLOCK;
1978  if (sock_flags_new != sock_flags_old &&
1979  fcntl(net, F_SETFL, sock_flags_new) == -1) {
1980  msg(LOG_ERR, "Failed to set socket to blocking mode");
1981  goto handler_err;
1982  }
1983 
1984  if (set_peername(net, client)) {
1985  msg(LOG_ERR, "Failed to set peername");
1986  goto handler_err;
1987  }
1988 
1989  if (!authorized_client(client)) {
1990  msg(LOG_INFO, "Client '%s' is not authorized to access",
1991  client->clientname);
1992  goto handler_err;
1993  }
1994 
1995  if (!dontfork) {
1996  int i;
1997 
1998  /* Free all root server resources here, because we are
1999  * currently in the child process serving one specific
2000  * connection. These are not simply needed anymore. */
2001  g_hash_table_destroy(children);
2002  children = NULL;
2003  for (i = 0; i < modernsocks->len; i++) {
2004  close(g_array_index(modernsocks, int, i));
2005  }
2006  g_array_free(modernsocks, TRUE);
2007 
2008  /* Now that we are in the child process after a
2009  * succesful negotiation, we do not need the list of
2010  * servers anymore, get rid of it.*/
2011 
2012  for (i = 0; i < servers->len; i++) {
2013  const SERVER *const server = &g_array_index(servers, SERVER, i);
2014  close(server->socket);
2015  }
2016 
2017  /* FALSE does not free the
2018  actual data. This is required,
2019  because the client has a
2020  direct reference into that
2021  data, and otherwise we get a
2022  segfault... */
2023  g_array_free(servers, FALSE);
2024  }
2025 
2026  msg(LOG_INFO, "Starting to serve");
2027  serveconnection(client);
2028  exit(EXIT_SUCCESS);
2029 
2030 handler_err:
2031  g_free(client);
2032  close(net);
2033 
2034  if (!dontfork) {
2035  exit(EXIT_FAILURE);
2036  }
2037 }
2038 
2039 static void
2040 handle_oldstyle_connection(GArray *const servers, SERVER *const serve)
2041 {
2042  int net;
2043  CLIENT *client = NULL;
2044  int sock_flags_old;
2045  int sock_flags_new;
2046 
2047  net = socket_accept(serve->socket);
2048  if (net < 0)
2049  return;
2050 
2051  if(serve->max_connections > 0 &&
2052  g_hash_table_size(children) >= serve->max_connections) {
2053  msg(LOG_INFO, "Max connections reached");
2054  goto handle_connection_out;
2055  }
2056  if((sock_flags_old = fcntl(net, F_GETFL, 0)) == -1) {
2057  err("fcntl F_GETFL");
2058  }
2059  sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2060  if (sock_flags_new != sock_flags_old &&
2061  fcntl(net, F_SETFL, sock_flags_new) == -1) {
2062  err("fcntl F_SETFL ~O_NONBLOCK");
2063  }
2064 
2065  client = g_new0(CLIENT, 1);
2066  client->server=serve;
2067  client->exportsize=OFFT_MAX;
2068  client->net=net;
2069  client->transactionlogfd = -1;
2070 
2071  if (set_peername(net, client)) {
2072  goto handle_connection_out;
2073  }
2074  if (!authorized_client(client)) {
2075  msg(LOG_INFO, "Unauthorized client");
2076  goto handle_connection_out;
2077  }
2078  msg(LOG_INFO, "Authorized client");
2079 
2080  if (!dontfork) {
2081  pid_t pid;
2082  int i;
2083  sigset_t newset;
2084  sigset_t oldset;
2085 
2086  sigemptyset(&newset);
2087  sigaddset(&newset, SIGCHLD);
2088  sigaddset(&newset, SIGTERM);
2089  sigprocmask(SIG_BLOCK, &newset, &oldset);
2090  if ((pid = fork()) < 0) {
2091  msg(LOG_INFO, "Could not fork (%s)", strerror(errno));
2092  sigprocmask(SIG_SETMASK, &oldset, NULL);
2093  goto handle_connection_out;
2094  }
2095  if (pid > 0) { /* parent */
2096  pid_t *pidp;
2097 
2098  pidp = g_malloc(sizeof(pid_t));
2099  *pidp = pid;
2100  g_hash_table_insert(children, pidp, pidp);
2101  sigprocmask(SIG_SETMASK, &oldset, NULL);
2102  goto handle_connection_out;
2103  }
2104  /* child */
2105  signal(SIGCHLD, SIG_DFL);
2106  signal(SIGTERM, SIG_DFL);
2107  signal(SIGHUP, SIG_DFL);
2108  sigprocmask(SIG_SETMASK, &oldset, NULL);
2109 
2110  g_hash_table_destroy(children);
2111  children = NULL;
2112  for(i=0;i<servers->len;i++) {
2113  close(g_array_index(servers, SERVER, i).socket);
2114  }
2115  /* FALSE does not free the
2116  actual data. This is required,
2117  because the client has a
2118  direct reference into that
2119  data, and otherwise we get a
2120  segfault... */
2121  g_array_free(servers, FALSE);
2122  for(i=0;i<modernsocks->len;i++) {
2123  close(g_array_index(modernsocks, int, i));
2124  }
2125  g_array_free(modernsocks, TRUE);
2126  }
2127 
2128  msg(LOG_INFO, "Starting to serve");
2129  serveconnection(client);
2130  exit(EXIT_SUCCESS);
2131 
2132 handle_connection_out:
2133  g_free(client);
2134  close(net);
2135 }
2136 
2137 /**
2138  * Return the index of the server whose servename matches the given
2139  * name.
2140  *
2141  * @param servename a string to match
2142  * @param servers an array of servers
2143  * @return the first index of the server whose servename matches the
2144  * given name or -1 if one cannot be found
2145  **/
2146 static int get_index_by_servename(const gchar *const servename,
2147  const GArray *const servers) {
2148  int i;
2149 
2150  for (i = 0; i < servers->len; ++i) {
2151  const SERVER server = g_array_index(servers, SERVER, i);
2152 
2153  if (strcmp(servename, server.servename) == 0)
2154  return i;
2155  }
2156 
2157  return -1;
2158 }
2159 
2160 int setup_serve(SERVER *const serve, GError **const gerror);
2161 
2162 /**
2163  * Parse configuration files and add servers to the array if they don't
2164  * already exist there. The existence is tested by comparing
2165  * servenames. A server is appended to the array only if its servename
2166  * is unique among all other servers.
2167  *
2168  * @param servers an array of servers
2169  * @return the number of new servers appended to the array, or -1 in
2170  * case of an error
2171  **/
2172 static int append_new_servers(GArray *const servers, GError **const gerror) {
2173  int i;
2174  GArray *new_servers;
2175  const int old_len = servers->len;
2176  int retval = -1;
2177  struct generic_conf genconf;
2178 
2179  new_servers = parse_cfile(config_file_pos, &genconf, gerror);
2180  if (!new_servers)
2181  goto out;
2182 
2183  for (i = 0; i < new_servers->len; ++i) {
2184  SERVER new_server = g_array_index(new_servers, SERVER, i);
2185 
2186  if (new_server.servename
2187  && -1 == get_index_by_servename(new_server.servename,
2188  servers)) {
2189  if (setup_serve(&new_server, gerror) == -1)
2190  goto out;
2191  if (append_serve(&new_server, servers) == -1)
2192  goto out;
2193  }
2194  }
2195 
2196  retval = servers->len - old_len;
2197 out:
2198  g_array_free(new_servers, TRUE);
2199 
2200  return retval;
2201 }
2202 
2203 /**
2204  * Loop through the available servers, and serve them. Never returns.
2205  **/
2206 void serveloop(GArray* servers) {
2207  int i;
2208  int max;
2209  fd_set mset;
2210  fd_set rset;
2211 
2212  /*
2213  * Set up the master fd_set. The set of descriptors we need
2214  * to select() for never changes anyway and it buys us a *lot*
2215  * of time to only build this once. However, if we ever choose
2216  * to not fork() for clients anymore, we may have to revisit
2217  * this.
2218  */
2219  max=0;
2220  FD_ZERO(&mset);
2221  for(i=0;i<servers->len;i++) {
2222  int sock;
2223  if((sock=(g_array_index(servers, SERVER, i)).socket) >= 0) {
2224  FD_SET(sock, &mset);
2225  max=sock>max?sock:max;
2226  }
2227  }
2228  for(i=0;i<modernsocks->len;i++) {
2229  int sock = g_array_index(modernsocks, int, i);
2230  FD_SET(sock, &mset);
2231  max=sock>max?sock:max;
2232  }
2233  for(;;) {
2234  /* SIGHUP causes the root server process to reconfigure
2235  * itself and add new export servers for each newly
2236  * found export configuration group, i.e. spawn new
2237  * server processes for each previously non-existent
2238  * export. This does not alter old runtime configuration
2239  * but just appends new exports. */
2240  if (is_sighup_caught) {
2241  int n;
2242  GError *gerror = NULL;
2243 
2244  msg(LOG_INFO, "reconfiguration request received");
2245  is_sighup_caught = 0; /* Reset to allow catching
2246  * it again. */
2247 
2248  n = append_new_servers(servers, &gerror);
2249  if (n == -1)
2250  msg(LOG_ERR, "failed to append new servers: %s",
2251  gerror->message);
2252 
2253  for (i = servers->len - n; i < servers->len; ++i) {
2254  const SERVER server = g_array_index(servers,
2255  SERVER, i);
2256 
2257  if (server.socket >= 0) {
2258  FD_SET(server.socket, &mset);
2259  max = server.socket > max ? server.socket : max;
2260  }
2261 
2262  msg(LOG_INFO, "reconfigured new server: %s",
2263  server.servename);
2264  }
2265  }
2266 
2267  memcpy(&rset, &mset, sizeof(fd_set));
2268  if(select(max+1, &rset, NULL, NULL, NULL)>0) {
2269 
2270  DEBUG("accept, ");
2271  for(i=0; i < modernsocks->len; i++) {
2272  int sock = g_array_index(modernsocks, int, i);
2273  if(!FD_ISSET(sock, &rset)) {
2274  continue;
2275  }
2276 
2277  handle_modern_connection(servers, sock);
2278  }
2279  for(i=0; i < servers->len; i++) {
2280  SERVER *serve;
2281 
2282  serve=&(g_array_index(servers, SERVER, i));
2283  if(serve->socket < 0) {
2284  continue;
2285  }
2286  if(FD_ISSET(serve->socket, &rset)) {
2287  handle_oldstyle_connection(servers, serve);
2288  }
2289  }
2290  }
2291  }
2292 }
2293 void serveloop(GArray* servers) G_GNUC_NORETURN;
2294 
2295 /**
2296  * Set server socket options.
2297  *
2298  * @param socket a socket descriptor of the server
2299  *
2300  * @param gerror a pointer to an error object pointer used for reporting
2301  * errors. On error, if gerror is not NULL, *gerror is set and -1
2302  * is returned.
2303  *
2304  * @return 0 on success, -1 on error
2305  **/
2306 int dosockopts(const int socket, GError **const gerror) {
2307 #ifndef sun
2308  int yes=1;
2309 #else
2310  char yes='1';
2311 #endif /* sun */
2312  struct linger l;
2313 
2314  /* lose the pesky "Address already in use" error message */
2315  if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2316  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
2317  "failed to set socket option SO_REUSEADDR: %s",
2318  strerror(errno));
2319  return -1;
2320  }
2321  l.l_onoff = 1;
2322  l.l_linger = 10;
2323  if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
2324  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
2325  "failed to set socket option SO_LINGER: %s",
2326  strerror(errno));
2327  return -1;
2328  }
2329  if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2330  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
2331  "failed to set socket option SO_KEEPALIVE: %s",
2332  strerror(errno));
2333  return -1;
2334  }
2335 
2336  return 0;
2337 }
2338 
2339 /**
2340  * Connect a server's socket.
2341  *
2342  * @param serve the server we want to connect.
2343  **/
2344 int setup_serve(SERVER *const serve, GError **const gerror) {
2345  struct addrinfo hints;
2346  struct addrinfo *ai = NULL;
2347  gchar *port = NULL;
2348  int e;
2349  int retval = -1;
2350 
2351  /* Without this, it's possible that socket == 0, even if it's
2352  * not initialized at all. And that would be wrong because 0 is
2353  * totally legal value for properly initialized descriptor. This
2354  * line is required to ensure that unused/uninitialized
2355  * descriptors are marked as such (new style configuration
2356  * case). Currently, servers are being initialized in multiple
2357  * places, and some of the them do the socket initialization
2358  * incorrectly. This is the only point common to all code paths,
2359  * and therefore setting -1 is put here. However, the whole
2360  * server initialization procedure should be extracted to its
2361  * own function and all code paths wanting to mess with servers
2362  * should initialize servers with that function.
2363  *
2364  * TODO: fix server initialization */
2365  serve->socket = -1;
2366 
2367  if(!(glob_flags & F_OLDSTYLE)) {
2368  return serve->servename ? 1 : 0;
2369  }
2370  memset(&hints,'\0',sizeof(hints));
2371  hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2372  hints.ai_socktype = SOCK_STREAM;
2373  hints.ai_family = serve->socket_family;
2374 
2375  port = g_strdup_printf("%d", serve->port);
2376  if (!port) {
2377  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SYS,
2378  "failed to open an export socket: "
2379  "failed to convert a port number to a string: %s",
2380  strerror(errno));
2381  goto out;
2382  }
2383 
2384  e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2385 
2386  g_free(port);
2387 
2388  if(e != 0) {
2389  g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
2390  "failed to open an export socket: "
2391  "failed to get address info: %s",
2392  gai_strerror(e));
2393  goto out;
2394  }
2395 
2396  if(serve->socket_family == AF_UNSPEC)
2397  serve->socket_family = ai->ai_family;
2398 
2399 #ifdef WITH_SDP
2400  if ((serve->flags) && F_SDP) {
2401  if (ai->ai_family == AF_INET)
2402  ai->ai_family = AF_INET_SDP;
2403  else (ai->ai_family == AF_INET6)
2404  ai->ai_family = AF_INET6_SDP;
2405  }
2406 #endif
2407  if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0) {
2408  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
2409  "failed to open an export socket: "
2410  "failed to create a socket: %s",
2411  strerror(errno));
2412  goto out;
2413  }
2414 
2415  if (dosockopts(serve->socket, gerror) == -1) {
2416  g_prefix_error(gerror, "failed to open an export socket: ");
2417  goto out;
2418  }
2419 
2420  DEBUG("Waiting for connections... bind, ");
2421  e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2422  if (e != 0 && errno != EADDRINUSE) {
2423  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2424  "failed to open an export socket: "
2425  "failed to bind an address to a socket: %s",
2426  strerror(errno));
2427  goto out;
2428  }
2429  DEBUG("listen, ");
2430  if (listen(serve->socket, 1) < 0) {
2431  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2432  "failed to open an export socket: "
2433  "failed to start listening on a socket: %s",
2434  strerror(errno));
2435  goto out;
2436  }
2437 
2438  retval = serve->servename ? 1 : 0;
2439 out:
2440 
2441  if (retval == -1 && serve->socket >= 0) {
2442  close(serve->socket);
2443  serve->socket = -1;
2444  }
2445  freeaddrinfo (ai);
2446 
2447  return retval;
2448 }
2449 
2450 int open_modern(const gchar *const addr, const gchar *const port,
2451  GError **const gerror) {
2452  struct addrinfo hints;
2453  struct addrinfo* ai = NULL;
2454  struct addrinfo* ai_bak;
2455  struct sock_flags;
2456  int e;
2457  int retval = -1;
2458  int i=0;
2459  int sock = -1;
2460 
2461  memset(&hints, '\0', sizeof(hints));
2462  hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2463  hints.ai_socktype = SOCK_STREAM;
2464  hints.ai_family = AF_UNSPEC;
2465  hints.ai_protocol = IPPROTO_TCP;
2466  e = getaddrinfo(addr, port ? port : NBD_DEFAULT_PORT, &hints, &ai);
2467  ai_bak = ai;
2468  if(e != 0) {
2469  g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
2470  "failed to open a modern socket: "
2471  "failed to get address info: %s",
2472  gai_strerror(e));
2473  goto out;
2474  }
2475 
2476  while(ai != NULL) {
2477  sock = -1;
2478 
2479  if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2480  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
2481  "failed to open a modern socket: "
2482  "failed to create a socket: %s",
2483  strerror(errno));
2484  goto out;
2485  }
2486 
2487  if (dosockopts(sock, gerror) == -1) {
2488  g_prefix_error(gerror, "failed to open a modern socket: ");
2489  goto out;
2490  }
2491 
2492  if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
2493  /* This is so wrong.
2494  *
2495  * Linux will return multiple entries for the
2496  * same system when we ask it for something
2497  * AF_UNSPEC, even though the first entry will
2498  * listen to both protocols. Other systems will
2499  * return multiple entries too, but we actually
2500  * do need to open both. Sigh.
2501  *
2502  * Handle it by ignoring EADDRINUSE if we've
2503  * already got at least one socket open
2504  */
2505  if(errno == EADDRINUSE && modernsocks->len > 0) {
2506  goto next;
2507  }
2508  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2509  "failed to open a modern socket: "
2510  "failed to bind an address to a socket: %s",
2511  strerror(errno));
2512  goto out;
2513  }
2514 
2515  if(listen(sock, 10) <0) {
2516  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2517  "failed to open a modern socket: "
2518  "failed to start listening on a socket: %s",
2519  strerror(errno));
2520  goto out;
2521  }
2522  g_array_append_val(modernsocks, sock);
2523  next:
2524  ai = ai->ai_next;
2525  }
2526 
2527  retval = 0;
2528 out:
2529 
2530  if (retval == -1 && sock >= 0) {
2531  close(sock);
2532  }
2533  if(ai_bak)
2534  freeaddrinfo(ai_bak);
2535 
2536  return retval;
2537 }
2538 
2539 /**
2540  * Connect our servers.
2541  **/
2542 void setup_servers(GArray *const servers, const gchar *const modernaddr,
2543  const gchar *const modernport) {
2544  int i;
2545  struct sigaction sa;
2546  int want_modern=0;
2547 
2548  for(i=0;i<servers->len;i++) {
2549  GError *gerror = NULL;
2550  SERVER *server = &g_array_index(servers, SERVER, i);
2551  int ret;
2552 
2553  ret = setup_serve(server, &gerror);
2554  if (ret == -1) {
2555  msg(LOG_ERR, "failed to setup servers: %s",
2556  gerror->message);
2557  g_clear_error(&gerror);
2558  exit(EXIT_FAILURE);
2559  }
2560  want_modern |= ret;
2561  }
2562  if(want_modern) {
2563  GError *gerror = NULL;
2564  if (open_modern(modernaddr, modernport, &gerror) == -1) {
2565  msg(LOG_ERR, "failed to setup servers: %s",
2566  gerror->message);
2567  g_clear_error(&gerror);
2568  exit(EXIT_FAILURE);
2569  }
2570  }
2571  children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2572 
2573  sa.sa_handler = sigchld_handler;
2574  sigemptyset(&sa.sa_mask);
2575  sigaddset(&sa.sa_mask, SIGTERM);
2576  sa.sa_flags = SA_RESTART;
2577  if(sigaction(SIGCHLD, &sa, NULL) == -1)
2578  err("sigaction: %m");
2579 
2580  sa.sa_handler = sigterm_handler;
2581  sigemptyset(&sa.sa_mask);
2582  sigaddset(&sa.sa_mask, SIGCHLD);
2583  sa.sa_flags = SA_RESTART;
2584  if(sigaction(SIGTERM, &sa, NULL) == -1)
2585  err("sigaction: %m");
2586 
2587  sa.sa_handler = sighup_handler;
2588  sigemptyset(&sa.sa_mask);
2589  sa.sa_flags = SA_RESTART;
2590  if(sigaction(SIGHUP, &sa, NULL) == -1)
2591  err("sigaction: %m");
2592 }
2593 
2594 /**
2595  * Go daemon (unless we specified at compile time that we didn't want this)
2596  * @param serve the first server of our configuration. If its port is zero,
2597  * then do not daemonize, because we're doing inetd then. This parameter
2598  * is only used to create a PID file of the form
2599  * /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
2600  **/
2601 #if !defined(NODAEMON)
2602 void daemonize(SERVER* serve) {
2603  FILE*pidf;
2604 
2605  if(serve && !(serve->port)) {
2606  return;
2607  }
2608  if(daemon(0,0)<0) {
2609  err("daemon");
2610  }
2611  if(!*pidftemplate) {
2612  if(serve) {
2613  strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2614  } else {
2615  strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2616  }
2617  }
2618  snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2619  pidf=fopen(pidfname, "w");
2620  if(pidf) {
2621  fprintf(pidf,"%d\n", (int)getpid());
2622  fclose(pidf);
2623  } else {
2624  perror("fopen");
2625  fprintf(stderr, "Not fatal; continuing");
2626  }
2627 }
2628 #else
2629 #define daemonize(serve)
2630 #endif /* !defined(NODAEMON) */
2631 
2632 /*
2633  * Everything beyond this point (in the file) is run in non-daemon mode.
2634  * The stuff above daemonize() isn't.
2635  */
2636 
2637 /**
2638  * Set up user-ID and/or group-ID
2639  **/
2640 void dousers(const gchar *const username, const gchar *const groupname) {
2641  struct passwd *pw;
2642  struct group *gr;
2643  gchar* str;
2644  if (groupname) {
2645  gr = getgrnam(groupname);
2646  if(!gr) {
2647  str = g_strdup_printf("Invalid group name: %s", groupname);
2648  err(str);
2649  }
2650  if(setgid(gr->gr_gid)<0) {
2651  err("Could not set GID: %m");
2652  }
2653  }
2654  if (username) {
2655  pw = getpwnam(username);
2656  if(!pw) {
2657  str = g_strdup_printf("Invalid user name: %s", username);
2658  err(str);
2659  }
2660  if(setuid(pw->pw_uid)<0) {
2661  err("Could not set UID: %m");
2662  }
2663  }
2664 }
2665 
2666 #ifndef ISSERVER
2667 void glib_message_syslog_redirect(const gchar *log_domain,
2668  GLogLevelFlags log_level,
2669  const gchar *message,
2670  gpointer user_data)
2671 {
2672  int level=LOG_DEBUG;
2673 
2674  switch( log_level )
2675  {
2676  case G_LOG_FLAG_FATAL:
2677  case G_LOG_LEVEL_CRITICAL:
2678  case G_LOG_LEVEL_ERROR:
2679  level=LOG_ERR;
2680  break;
2681  case G_LOG_LEVEL_WARNING:
2682  level=LOG_WARNING;
2683  break;
2684  case G_LOG_LEVEL_MESSAGE:
2685  case G_LOG_LEVEL_INFO:
2686  level=LOG_INFO;
2687  break;
2688  case G_LOG_LEVEL_DEBUG:
2689  level=LOG_DEBUG;
2690  break;
2691  default:
2692  level=LOG_ERR;
2693  }
2694  syslog(level, "%s", message);
2695 }
2696 #endif
2697 
2698 /**
2699  * Main entry point...
2700  **/
2701 int main(int argc, char *argv[]) {
2702  SERVER *serve;
2703  GArray *servers;
2704  GError *err=NULL;
2705  struct generic_conf genconf;
2706 
2707  memset(&genconf, 0, sizeof(struct generic_conf));
2708 
2709  if (sizeof( struct nbd_request )!=28) {
2710  fprintf(stderr,"Bad size of structure. Alignment problems?\n");
2711  exit(EXIT_FAILURE) ;
2712  }
2713 
2714  memset(pidftemplate, '\0', 256);
2715 
2716  modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
2717 
2718  logging();
2719  config_file_pos = g_strdup(CFILE);
2720  serve=cmdline(argc, argv);
2721 
2722  servers = parse_cfile(config_file_pos, &genconf, &err);
2723 
2724  /* Update global variables with parsed values. This will be
2725  * removed once we get rid of global configuration variables. */
2726  glob_flags |= genconf.flags;
2727 
2728  if(serve) {
2729  serve->socket_family = AF_UNSPEC;
2730 
2731  append_serve(serve, servers);
2732 
2733  if (!(serve->port)) {
2734  CLIENT *client;
2735 #ifndef ISSERVER
2736  /* You really should define ISSERVER if you're going to use
2737  * inetd mode, but if you don't, closing stdout and stderr
2738  * (which inetd had connected to the client socket) will let it
2739  * work. */
2740  close(1);
2741  close(2);
2742  open("/dev/null", O_WRONLY);
2743  open("/dev/null", O_WRONLY);
2744  g_log_set_default_handler( glib_message_syslog_redirect, NULL );
2745 #endif
2746  client=g_malloc(sizeof(CLIENT));
2747  client->server=serve;
2748  client->net=-1;
2749  client->exportsize=OFFT_MAX;
2750  if (set_peername(0, client))
2751  exit(EXIT_FAILURE);
2752  serveconnection(client);
2753  return 0;
2754  }
2755  }
2756 
2757  if(!servers || !servers->len) {
2758  if(err && !(err->domain == NBDS_ERR
2759  && err->code == NBDS_ERR_CFILE_NOTFOUND)) {
2760  g_warning("Could not parse config file: %s",
2761  err ? err->message : "Unknown error");
2762  }
2763  }
2764  if(serve) {
2765  g_warning("Specifying an export on the command line is deprecated.");
2766  g_warning("Please use a configuration file instead.");
2767  }
2768 
2769  if((!serve) && (!servers||!servers->len)) {
2770  if(err)
2771  g_message("No configured exports; quitting.");
2772  exit(EXIT_FAILURE);
2773  }
2774  if (!dontfork)
2775  daemonize(serve);
2776  setup_servers(servers, genconf.modernaddr, genconf.modernport);
2777  dousers(genconf.user, genconf.group);
2778 
2779  serveloop(servers);
2780 }
The (required) group &quot;generic&quot; is missing.
Definition: nbdsrv.h:87
int expread(off_t a, char *buf, size_t len, CLIENT *client)
Read an amount of bytes at a given offset from the right file.
Definition: nbd-server.c:1079
int get_filepos(GArray *export, off_t a, int *fhandle, off_t *foffset, size_t *maxbytes)
Get the file handle and offset, given an export offset.
Definition: nbd-server.c:886
int setup_serve(SERVER *const serve, GError **const gerror)
Connect a server&#39;s socket.
Definition: nbd-server.c:2344
static void consume(int f, void *buf, size_t len, size_t bufsiz)
Consume data from an FD that we don&#39;t want.
Definition: nbd-server.c:281
This parameter is a string.
Definition: nbd-server.c:196
gchar * servename
name of the export as selected by nbd-client
Definition: nbdsrv.h:46
gint flagval
Flag mask for this parameter in case ptype is PARAM_BOOL.
Definition: nbd-server.c:213
void glib_message_syslog_redirect(const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer user_data)
Definition: nbd-server.c:2667
#define NBD_OPT_LIST
Definition: cliserv.h:154
void sigterm_handler(int s)
Handle SIGTERM and dispatch it to our children.
Definition: nbd-server.c:855
PARAM_TYPE ptype
Type of the parameter.
Definition: nbd-server.c:208
__be32 type
Definition: nbd.h:67
GArray * export
array of FILE_INFO of exported files; array size is always 1 unless we&#39;re doing the multiple file opt...
Definition: nbdsrv.h:59
Variables associated with a server.
Definition: nbdsrv.h:29
uint8_t getmaskbyte(int masklen)
Gets a byte to allow for address masking.
Definition: nbdsrv.c:91
void destroy_pid_t(gpointer data)
Destroy a pid_t*.
Definition: nbd-server.c:1880
void setup_servers(GArray *const servers, const gchar *const modernaddr, const gchar *const modernport)
Connect our servers.
Definition: nbd-server.c:2542
uint32_t difffilelen
number of pages in difffile
Definition: nbdsrv.h:68
#define NBD_FLAG_SEND_FUA
Definition: nbd.h:47
#define SEND(net, reply)
sending macro.
Definition: nbd-server.c:1424
int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client)
Call rawexpread repeatedly until all data has been read.
Definition: nbd-server.c:1058
int glob_flags
global flags
Definition: nbd-server.c:122
gchar * config_file_pos
Where our config file actually is.
Definition: nbd-server.c:119
#define SYSCONFDIR
Default position of the config file.
Definition: nbd-server.c:114
#define F_COPYONWRITE
flag to tell us a file is exported using copyonwrite
Definition: nbd-server.c:138
SERVER * server
The server this client is getting data from.
Definition: nbdsrv.h:63
static void handle_oldstyle_connection(GArray *const servers, SERVER *const serve)
Definition: nbd-server.c:2040
#define G_GNUC_NORETURN
Definition: cliserv.h:87
#define F_FLUSH
Whether server wants FLUSH to be sent by the client.
Definition: nbd-server.c:145
int copyonwrite_prepare(CLIENT *client)
Definition: nbd-server.c:1703
Failed to set SO_LINGER to a socket.
Definition: nbdsrv.h:97
Definition: nbd.h:77
No virtualization.
Definition: nbdsrv.h:19
gchar * postrun
command that will be ran after the client disconnects
Definition: nbdsrv.h:44
int clientfeats
Features supported by this client.
Definition: nbdsrv.h:72
int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua)
Call rawexpwrite repeatedly until all data has been written.
Definition: nbd-server.c:1016
#define VERSION
Definition: config.h:175
#define NEG_OLD
Definition: nbd-server.c:161
#define NEG_MODERN
Definition: nbd-server.c:162
gchar * paramname
Name of the parameter, as it appears in the config file.
Definition: nbd-server.c:204
void dump_section(SERVER *serve, gchar *section_header)
Definition: nbd-server.c:331
static int socket_accept(const int sock)
Definition: nbd-server.c:1918
char handle[8]
Definition: nbd.h:80
#define CFILE
Definition: nbd-server.c:116
This parameter is a boolean.
Definition: nbd-server.c:197
#define F_SDP
flag to tell us the export should be done using the Socket Direct Protocol for RDMA ...
Definition: nbd-server.c:143
Failed to bind an address to socket.
Definition: nbdsrv.h:102
void serveloop(GArray *servers)
Loop through the available servers, and serve them.
Definition: nbd-server.c:2206
#define msg(prio,...)
Logging macros.
Definition: nbdsrv.h:116
static void handle_modern_connection(GArray *const servers, const int sock)
Definition: nbd-server.c:1933
Error occurred during readdir()
Definition: nbdsrv.h:96
int fhandle
file descriptor
Definition: nbd-server.c:186
int dosockopts(const int socket, GError **const gerror)
Set server socket options.
Definition: nbd-server.c:2306
static int get_index_by_servename(const gchar *const servename, const GArray *const servers)
Return the index of the server whose servename matches the given name.
Definition: nbd-server.c:2146
gchar * modernport
port of the modern socket
Definition: nbd-server.c:224
gchar * user
user we run the server as
Definition: nbd-server.c:221
u64 ntohll(u64 a)
Definition: cliserv.h:138
#define NBD_REP_ERR_INVALID
Definition: cliserv.h:162
void setmysockopt(int sock)
Definition: cliserv.h:64
off_t startoff
starting offset of this file
Definition: nbd-server.c:187
The configuration file is not found.
Definition: nbdsrv.h:86
#define fdatasync(arg)
Definition: cliserv.h:44
#define F_TEMPORARY
Whether the backing file is temporary and should be created then unlinked.
Definition: nbd-server.c:148
char pidftemplate[256]
template to be used for the filename of the PID file
Definition: nbd-server.c:157
#define NBD_REP_ERR_POLICY
Definition: cliserv.h:161
static pid_t spawn_child()
Definition: nbd-server.c:1885
static int append_new_servers(GArray *const servers, GError **const gerror)
Parse configuration files and add servers to the array if they don&#39;t already exist there...
Definition: nbd-server.c:2172
int expflush(CLIENT *client)
Flush data to a client.
Definition: nbd-server.c:1179
struct sockaddr_storage clientaddr
peer, in binary format, network byte order
Definition: nbdsrv.h:57
int flags
flags associated with this exported file
Definition: nbdsrv.h:36
This parameter is an integer.
Definition: nbd-server.c:194
#define F_LIST
Allow clients to list the exports on a server.
Definition: nbd-server.c:154
void serveconnection(CLIENT *client)
Serve a connection.
Definition: nbd-server.c:1747
__be32 error
Definition: nbd.h:79
GArray * modernsocks
Sockets for the modern handler.
Definition: nbd-server.c:170
int exptrim(struct nbd_request *req, CLIENT *client)
Definition: nbd-server.c:1199
void killchild(gpointer key, gpointer value, gpointer user_data)
Kill a child.
Definition: nbd-server.c:844
gchar * exportname
(unprocessed) filename of the file we&#39;re exporting
Definition: nbdsrv.h:30
int net
The actual client socket.
Definition: nbdsrv.h:62
#define NBD_FLAG_SEND_FLUSH
Definition: nbd.h:46
void err_nonfatal(const char *s)
Definition: cliserv.h:92
#define F_AUTOREADONLY
flag to tell us a file is set to autoreadonly
Definition: nbd-server.c:141
gpointer target
Pointer to where the data of this parameter should be written.
Definition: nbd-server.c:209
int do_run(gchar *command, gchar *file)
Run a command.
Definition: nbd-server.c:1727
void usage(char *errmsg,...)
Definition: nbd-client.c:390
#define NBD_OPT_EXPORT_NAME
Definition: cliserv.h:152
Underlying system call or library error.
Definition: nbdsrv.h:104
int open_modern(const gchar *const addr, const gchar *const port, GError **const gerror)
Definition: nbd-server.c:2450
unsigned int port
port we&#39;re exporting this file at
Definition: nbdsrv.h:34
gboolean required
Whether this is a required (as opposed to optional) parameter.
Definition: nbd-server.c:206
#define NBD_REP_ACK
Definition: cliserv.h:157
#define F_TRIM
Whether server wants TRIM (discard) to be sent by the client.
Definition: nbd-server.c:149
gchar * transactionlog
filename for transaction log
Definition: nbdsrv.h:48
Every subnet in its own directory.
Definition: nbdsrv.h:23
gchar * listenaddr
The IP address we&#39;re listening on.
Definition: nbdsrv.h:33
#define F_OLDSTYLE
Global flags:
Definition: nbd-server.c:153
Failed to set SO_KEEPALIVE to a socket.
Definition: nbdsrv.h:99
#define NBD_OPT_ABORT
Definition: cliserv.h:153
static u64 cliserv_magic
Definition: cliserv.h:57
char * clientname
peer, in human-readable format
Definition: nbdsrv.h:56
static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void *data)
Definition: nbd-server.c:1224
#define AI_NUMERICSERV
Definition: netdb-compat.h:21
static const char * getcommandname(uint64_t command)
Translate a command name into human readable form.
Definition: nbd-server.c:234
int socket
The socket of this server.
Definition: nbdsrv.h:37
Variables associated with a client connection.
Definition: nbdsrv.h:54
gchar * modernaddr
address of the modern socket
Definition: nbd-server.c:223
bool logged_oversized
whether we logged oversized requests already
Definition: nbd-server.c:180
int difffile
filedescriptor of copyonwrite file.
Definition: nbdsrv.h:65
SERVER * cmdline(int argc, char *argv[])
Parse the command line.
Definition: nbd-server.c:360
GHashTable * children
Definition: nbd-server.c:155
#define OFFT_MAX
The highest value a variable of type off_t can reach.
Definition: nbd-server.c:131
int set_peername(int net, CLIENT *client)
Find the name of the file we have to serve.
Definition: nbd-server.c:1790
gint flags
global flags
Definition: nbd-server.c:225
#define NBD_FLAG_HAS_FLAGS
Definition: nbd.h:44
static void readit(int f, void *buf, size_t len)
Read data from a file descriptor into a buffer.
Definition: nbd-server.c:258
char pidfname[256]
name of our PID file
Definition: nbd-server.c:156
Failed to get address info.
Definition: nbdsrv.h:100
Variables associated with an open file.
Definition: nbd-server.c:185
int dontfork
Definition: nbd-server.c:125
uint32_t * difmap
see comment on the global difmap for this one
Definition: nbdsrv.h:69
static void sighup_handler(const int s G_GNUC_UNUSED)
Handle SIGHUP by setting atomically a flag which will be evaluated in the main loop of the root serve...
Definition: nbd-server.c:871
#define PARAM_OFFT
Definition: lfs.h:10
static void handle_list(uint32_t opt, int net, GArray *servers, uint32_t cflags)
Definition: nbd-server.c:1279
void daemonize(SERVER *serve)
Go daemon (unless we specified at compile time that we didn&#39;t want this)
Definition: nbd-server.c:2602
static CLIENT * handle_export_name(uint32_t opt, int net, GArray *servers, uint32_t cflags)
Definition: nbd-server.c:1243
uint64_t size_autodetect(int fhandle)
Detect the size of a file.
Definition: nbdsrv.c:251
#define F_SPARSE
flag to tell us copyronwrite should use a sparse file
Definition: nbd-server.c:142
PARAM_TYPE
Type of configuration file values.
Definition: nbd-server.c:193
ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client)
Read an amount of bytes at a given offset from the right file.
Definition: nbd-server.c:1038
void sigchld_handler(int s)
Signal handler for SIGCHLD.
Definition: nbd-server.c:817
GArray * do_cfile_dir(gchar *dir, GError **e)
Parse config file snippets in a directory.
Definition: nbd-server.c:505
Literal IP address as part of the filename.
Definition: nbdsrv.h:20
int transactionlogfd
fd for transaction log
Definition: nbdsrv.h:71
void myseek(int handle, off_t a)
seek to a position in a file, with error handling.
Definition: nbd-server.c:929
void err(const char *s) G_GNUC_NORETURN
Definition: cliserv.h:120
#define NBD_CMD_FLAG_FUA
Definition: nbd.h:41
int max_connections
maximum number of opened connections
Definition: nbdsrv.h:47
VIRT_STYLE virtstyle
The style of virtualization, if any.
Definition: nbdsrv.h:39
#define NBD_FLAG_READ_ONLY
Definition: nbd.h:45
#define G_GNUC_UNUSED
Definition: cliserv.h:88
static volatile sig_atomic_t is_sighup_caught
Flag set by SIGHUP handler to mark a reconfiguration request.
Definition: nbd-server.c:165
Failed to set SO_REUSEADDR to a socket.
Definition: nbdsrv.h:98
__be32 len
Definition: nbd.h:70
int append_serve(const SERVER *const s, GArray *const a)
append new server to array
Definition: nbdsrv.c:191
ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua)
Write an amount of bytes at a given offset to the right file.
Definition: nbd-server.c:946
#define BUFSIZE
Size of buffer that can hold requests.
Definition: nbd-server.c:132
__be64 from
Definition: nbd.h:69
int socket_family
family of the socket
Definition: nbdsrv.h:38
uint64_t expected_size
size of the exported file as it was told to us through configuration
Definition: nbdsrv.h:31
gchar * prerun
command to be ran after connecting a client, but before starting to serve
Definition: nbdsrv.h:42
#define NBD_FLAG_SEND_TRIM
Definition: nbd.h:49
int main(int argc, char **argv)
This parameter is an integer.
Definition: nbd-server.c:195
#define htonll
Definition: cliserv.h:146
#define NBD_CMD_MASK_COMMAND
Definition: nbd.h:40
gchar * group
group we run running as
Definition: nbd-server.c:222
uint64_t exportsize
size of the file we&#39;re exporting
Definition: nbdsrv.h:55
#define DEBUG(...)
Definition: nbd-debug.h:8
#define F_SYNC
Whether to fsync() after a write.
Definition: nbd-server.c:144
char * authname
filename of the authorization file
Definition: nbdsrv.h:35
static void writeit(int f, void *buf, size_t len)
Write data from a buffer into a filedescriptor.
Definition: nbd-server.c:297
#define NBD_REPLY_MAGIC
Definition: nbd.h:58
#define NBD_REP_SERVER
Definition: cliserv.h:158
#define NBD_DEFAULT_PORT
Definition: cliserv.h:148
gboolean modern
client was negotiated using modern negotiation protocol
Definition: nbdsrv.h:70
__be32 magic
Definition: nbd.h:78
#define F_FUA
Whether server wants FUA to be sent by the client.
Definition: nbd-server.c:146
void setupexport(CLIENT *client)
Set up client export array, which is an array of FILE_INFO.
Definition: nbd-server.c:1601
A config file was specified that does not define any exports.
Definition: nbdsrv.h:91
#define NBD_REQUEST_MAGIC
Definition: nbd.h:57
char * difffilename
filename of the copy-on-write file, if any
Definition: nbdsrv.h:64
void dousers(const gchar *const username, const gchar *const groupname)
Set up user-ID and/or group-ID.
Definition: nbd-server.c:2640
#define NBDS_ERR
Error domain common for all NBD server errors.
Definition: nbdsrv.h:80
char handle[8]
Definition: nbd.h:39
char handle[8]
Definition: nbd.h:68
char default_authname[]
default name of allow file
Definition: nbd-server.c:158
#define NEG_INIT
Definition: nbd-server.c:160
#define DIFFPAGESIZE
diff file uses those chunks
Definition: nbd-server.c:133
static u64 opts_magic
Definition: cliserv.h:58
Replacing all dots in an ip address by a / before doing the same as in IPLIT.
Definition: nbdsrv.h:21
uint8_t cidrlen
The length of the mask when we use CIDR-style virtualization.
Definition: nbdsrv.h:40
#define NBD_FLAG_FIXED_NEWSTYLE
Definition: cliserv.h:166
#define INIT_PASSWD
Definition: cliserv.h:60
#define F_READONLY
Per-export flags:
Definition: nbd-server.c:136
__be32 magic
Definition: nbd.h:37
A value is not supported in this build.
Definition: nbdsrv.h:90
#define NBD_FLAG_ROTATIONAL
Definition: nbd.h:48
A directory requested does not exist.
Definition: nbdsrv.h:95
char * exportname
(processed) filename of the file we&#39;re exporting
Definition: nbdsrv.h:58
int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua)
Write an amount of bytes at a given offset to the right file.
Definition: nbd-server.c:1121
#define ERROR(client, reply, errcode)
error macro.
Definition: nbd-server.c:1428
void negotiate(int sock, u64 *rsize64, u32 *flags, char *name, uint32_t needed_flags, uint32_t client_flags, uint32_t do_opts)
Definition: nbd-client.c:232
Configuration file values of the &quot;generic&quot; section.
Definition: nbd-server.c:220
Configuration file values.
Definition: nbd-server.c:203
#define F_ROTATIONAL
Whether server wants the client to implement the elevator algorithm.
Definition: nbd-server.c:147
A value is syntactically invalid.
Definition: nbdsrv.h:89
GArray * parse_cfile(gchar *f, struct generic_conf *genconf, GError **e)
Parse the config file.
Definition: nbd-server.c:583
void logging(void)
Definition: cliserv.h:125
__be32 len
Definition: nbd.h:41
int mainloop(CLIENT *client)
Serve a file to a single client.
Definition: nbd-server.c:1438
__be32 magic
Definition: nbd.h:66
#define F_MULTIFILE
flag to tell us a file is exported using -m
Definition: nbd-server.c:137
Failed to create a socket.
Definition: nbdsrv.h:101
int authorized_client(CLIENT *opts)
Check whether a client is allowed to connect.
Definition: nbdsrv.c:103
#define NBD_REP_ERR_UNSUP
Definition: cliserv.h:160