Ninja
deps_log.cc
Go to the documentation of this file.
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "deps_log.h"
16 
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
24 typedef __int32 int32_t;
25 typedef unsigned __int32 uint32_t;
26 #endif
27 
28 #include "graph.h"
29 #include "metrics.h"
30 #include "state.h"
31 #include "util.h"
32 
33 // The version is stored as 4 bytes after the signature and also serves as a
34 // byte order mark. Signature and version combined are 16 bytes long.
35 const char kFileSignature[] = "# ninjadeps\n";
36 const int kCurrentVersion = 4;
37 
38 // Record size is currently limited to less than the full 32 bit, due to
39 // internal buffers having to have this size.
40 const unsigned kMaxRecordSize = (1 << 19) - 1;
41 
43  Close();
44 }
45 
46 bool DepsLog::OpenForWrite(const string& path, string* err) {
47  if (needs_recompaction_) {
48  if (!Recompact(path, err))
49  return false;
50  }
51 
52  file_ = fopen(path.c_str(), "ab");
53  if (!file_) {
54  *err = strerror(errno);
55  return false;
56  }
57  // Set the buffer size to this and flush the file buffer after every record
58  // to make sure records aren't written partially.
59  setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1);
60  SetCloseOnExec(fileno(file_));
61 
62  // Opening a file in append mode doesn't set the file pointer to the file's
63  // end on Windows. Do that explicitly.
64  fseek(file_, 0, SEEK_END);
65 
66  if (ftell(file_) == 0) {
67  if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
68  *err = strerror(errno);
69  return false;
70  }
71  if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
72  *err = strerror(errno);
73  return false;
74  }
75  }
76  if (fflush(file_) != 0) {
77  *err = strerror(errno);
78  return false;
79  }
80  return true;
81 }
82 
84  const vector<Node*>& nodes) {
85  return RecordDeps(node, mtime, nodes.size(),
86  nodes.empty() ? NULL : (Node**)&nodes.front());
87 }
88 
90  int node_count, Node** nodes) {
91  // Track whether there's any new data to be recorded.
92  bool made_change = false;
93 
94  // Assign ids to all nodes that are missing one.
95  if (node->id() < 0) {
96  if (!RecordId(node))
97  return false;
98  made_change = true;
99  }
100  for (int i = 0; i < node_count; ++i) {
101  if (nodes[i]->id() < 0) {
102  if (!RecordId(nodes[i]))
103  return false;
104  made_change = true;
105  }
106  }
107 
108  // See if the new data is different than the existing data, if any.
109  if (!made_change) {
110  Deps* deps = GetDeps(node);
111  if (!deps ||
112  deps->mtime != mtime ||
113  deps->node_count != node_count) {
114  made_change = true;
115  } else {
116  for (int i = 0; i < node_count; ++i) {
117  if (deps->nodes[i] != nodes[i]) {
118  made_change = true;
119  break;
120  }
121  }
122  }
123  }
124 
125  // Don't write anything if there's no new info.
126  if (!made_change)
127  return true;
128 
129  // Update on-disk representation.
130  unsigned size = 4 * (1 + 2 + node_count);
131  if (size > kMaxRecordSize) {
132  errno = ERANGE;
133  return false;
134  }
135  size |= 0x80000000; // Deps record: set high bit.
136  if (fwrite(&size, 4, 1, file_) < 1)
137  return false;
138  int id = node->id();
139  if (fwrite(&id, 4, 1, file_) < 1)
140  return false;
141  uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
142  if (fwrite(&mtime_part, 4, 1, file_) < 1)
143  return false;
144  mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
145  if (fwrite(&mtime_part, 4, 1, file_) < 1)
146  return false;
147  for (int i = 0; i < node_count; ++i) {
148  id = nodes[i]->id();
149  if (fwrite(&id, 4, 1, file_) < 1)
150  return false;
151  }
152  if (fflush(file_) != 0)
153  return false;
154 
155  // Update in-memory representation.
156  Deps* deps = new Deps(mtime, node_count);
157  for (int i = 0; i < node_count; ++i)
158  deps->nodes[i] = nodes[i];
159  UpdateDeps(node->id(), deps);
160 
161  return true;
162 }
163 
165  if (file_)
166  fclose(file_);
167  file_ = NULL;
168 }
169 
170 bool DepsLog::Load(const string& path, State* state, string* err) {
171  METRIC_RECORD(".ninja_deps load");
172  char buf[kMaxRecordSize + 1];
173  FILE* f = fopen(path.c_str(), "rb");
174  if (!f) {
175  if (errno == ENOENT)
176  return true;
177  *err = strerror(errno);
178  return false;
179  }
180 
181  bool valid_header = true;
182  int version = 0;
183  if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
184  valid_header = false;
185  // Note: For version differences, this should migrate to the new format.
186  // But the v1 format could sometimes (rarely) end up with invalid data, so
187  // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
188  // and there was no release with it, so pretend that it never happened.)
189  if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
190  version != kCurrentVersion) {
191  if (version == 1)
192  *err = "deps log version change; rebuilding";
193  else
194  *err = "bad deps log signature or version; starting over";
195  fclose(f);
196  unlink(path.c_str());
197  // Don't report this as a failure. An empty deps log will cause
198  // us to rebuild the outputs anyway.
199  return true;
200  }
201 
202  long offset;
203  bool read_failed = false;
204  int unique_dep_record_count = 0;
205  int total_dep_record_count = 0;
206  for (;;) {
207  offset = ftell(f);
208 
209  unsigned size;
210  if (fread(&size, 4, 1, f) < 1) {
211  if (!feof(f))
212  read_failed = true;
213  break;
214  }
215  bool is_deps = (size >> 31) != 0;
216  size = size & 0x7FFFFFFF;
217 
218  if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
219  read_failed = true;
220  break;
221  }
222 
223  if (is_deps) {
224  assert(size % 4 == 0);
225  int* deps_data = reinterpret_cast<int*>(buf);
226  int out_id = deps_data[0];
227  TimeStamp mtime;
228  mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
229  (uint64_t)(unsigned int)deps_data[1]);
230  deps_data += 3;
231  int deps_count = (size / 4) - 3;
232 
233  Deps* deps = new Deps(mtime, deps_count);
234  for (int i = 0; i < deps_count; ++i) {
235  assert(deps_data[i] < (int)nodes_.size());
236  assert(nodes_[deps_data[i]]);
237  deps->nodes[i] = nodes_[deps_data[i]];
238  }
239 
240  total_dep_record_count++;
241  if (!UpdateDeps(out_id, deps))
242  ++unique_dep_record_count;
243  } else {
244  int path_size = size - 4;
245  assert(path_size > 0); // CanonicalizePath() rejects empty paths.
246  // There can be up to 3 bytes of padding.
247  if (buf[path_size - 1] == '\0') --path_size;
248  if (buf[path_size - 1] == '\0') --path_size;
249  if (buf[path_size - 1] == '\0') --path_size;
250  StringPiece subpath(buf, path_size);
251  // It is not necessary to pass in a correct slash_bits here. It will
252  // either be a Node that's in the manifest (in which case it will already
253  // have a correct slash_bits that GetNode will look up), or it is an
254  // implicit dependency from a .d which does not affect the build command
255  // (and so need not have its slashes maintained).
256  Node* node = state->GetNode(subpath, 0);
257 
258  // Check that the expected index matches the actual index. This can only
259  // happen if two ninja processes write to the same deps log concurrently.
260  // (This uses unary complement to make the checksum look less like a
261  // dependency record entry.)
262  unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
263  int expected_id = ~checksum;
264  int id = nodes_.size();
265  if (id != expected_id) {
266  read_failed = true;
267  break;
268  }
269 
270  assert(node->id() < 0);
271  node->set_id(id);
272  nodes_.push_back(node);
273  }
274  }
275 
276  if (read_failed) {
277  // An error occurred while loading; try to recover by truncating the
278  // file to the last fully-read record.
279  if (ferror(f)) {
280  *err = strerror(ferror(f));
281  } else {
282  *err = "premature end of file";
283  }
284  fclose(f);
285 
286  if (!Truncate(path, offset, err))
287  return false;
288 
289  // The truncate succeeded; we'll just report the load error as a
290  // warning because the build can proceed.
291  *err += "; recovering";
292  return true;
293  }
294 
295  fclose(f);
296 
297  // Rebuild the log if there are too many dead records.
298  int kMinCompactionEntryCount = 1000;
299  int kCompactionRatio = 3;
300  if (total_dep_record_count > kMinCompactionEntryCount &&
301  total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
302  needs_recompaction_ = true;
303  }
304 
305  return true;
306 }
307 
309  // Abort if the node has no id (never referenced in the deps) or if
310  // there's no deps recorded for the node.
311  if (node->id() < 0 || node->id() >= (int)deps_.size())
312  return NULL;
313  return deps_[node->id()];
314 }
315 
316 bool DepsLog::Recompact(const string& path, string* err) {
317  METRIC_RECORD(".ninja_deps recompact");
318 
319  Close();
320  string temp_path = path + ".recompact";
321 
322  // OpenForWrite() opens for append. Make sure it's not appending to a
323  // left-over file from a previous recompaction attempt that crashed somehow.
324  unlink(temp_path.c_str());
325 
326  DepsLog new_log;
327  if (!new_log.OpenForWrite(temp_path, err))
328  return false;
329 
330  // Clear all known ids so that new ones can be reassigned. The new indices
331  // will refer to the ordering in new_log, not in the current log.
332  for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
333  (*i)->set_id(-1);
334 
335  // Write out all deps again.
336  for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
337  Deps* deps = deps_[old_id];
338  if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
339 
340  if (!IsDepsEntryLiveFor(nodes_[old_id]))
341  continue;
342 
343  if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
344  deps->node_count, deps->nodes)) {
345  new_log.Close();
346  return false;
347  }
348  }
349 
350  new_log.Close();
351 
352  // All nodes now have ids that refer to new_log, so steal its data.
353  deps_.swap(new_log.deps_);
354  nodes_.swap(new_log.nodes_);
355 
356  if (unlink(path.c_str()) < 0) {
357  *err = strerror(errno);
358  return false;
359  }
360 
361  if (rename(temp_path.c_str(), path.c_str()) < 0) {
362  *err = strerror(errno);
363  return false;
364  }
365 
366  return true;
367 }
368 
370  // Skip entries that don't have in-edges or whose edges don't have a
371  // "deps" attribute. They were in the deps log from previous builds, but
372  // the the files they were for were removed from the build and their deps
373  // entries are no longer needed.
374  // (Without the check for "deps", a chain of two or more nodes that each
375  // had deps wouldn't be collected in a single recompaction.)
376  return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
377 }
378 
379 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
380  if (out_id >= (int)deps_.size())
381  deps_.resize(out_id + 1);
382 
383  bool delete_old = deps_[out_id] != NULL;
384  if (delete_old)
385  delete deps_[out_id];
386  deps_[out_id] = deps;
387  return delete_old;
388 }
389 
390 bool DepsLog::RecordId(Node* node) {
391  int path_size = node->path().size();
392  int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
393 
394  unsigned size = path_size + padding + 4;
395  if (size > kMaxRecordSize) {
396  errno = ERANGE;
397  return false;
398  }
399  if (fwrite(&size, 4, 1, file_) < 1)
400  return false;
401  if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
402  assert(node->path().size() > 0);
403  return false;
404  }
405  if (padding && fwrite("\0\0", padding, 1, file_) < 1)
406  return false;
407  int id = nodes_.size();
408  unsigned checksum = ~(unsigned)id;
409  if (fwrite(&checksum, 4, 1, file_) < 1)
410  return false;
411  if (fflush(file_) != 0)
412  return false;
413 
414  node->set_id(id);
415  nodes_.push_back(node);
416 
417  return true;
418 }
const vector< Node * > & nodes() const
Used for tests.
Definition: deps_log.h:102
const int kCurrentVersion
Definition: deps_log.cc:36
const char kFileSignature[]
Definition: deps_log.cc:35
const string & path() const
Definition: graph.h:75
vector< Deps * > deps_
Maps id -> deps of that id.
Definition: deps_log.h:118
Edge * in_edge() const
Definition: graph.h:90
Node * GetNode(StringPiece path, uint64_t slash_bits)
Definition: state.cc:103
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:27
Information about a node in the dependency graph: the file, whether it&#39;s dirty, mtime, etc.
Definition: graph.h:37
const unsigned kMaxRecordSize
Definition: deps_log.cc:40
int id() const
Definition: graph.h:93
void SetCloseOnExec(int fd)
Mark a file descriptor to not be inherited on exec()s.
Definition: util.cc:375
As build commands run they can output extra dependency information (e.g.
Definition: deps_log.h:68
vector< Node * > nodes_
Maps id -> Node.
Definition: deps_log.h:116
bool OpenForWrite(const string &path, string *err)
Definition: deps_log.cc:46
const vector< Deps * > & deps() const
Definition: deps_log.h:103
void set_id(int id)
Definition: graph.h:94
Deps * GetDeps(Node *node)
Definition: deps_log.cc:308
bool Load(const string &path, State *state, string *err)
Definition: deps_log.cc:170
bool Recompact(const string &path, string *err)
Rewrite the known log entries, throwing away old data.
Definition: deps_log.cc:316
int64_t TimeStamp
Definition: timestamp.h:31
bool needs_recompaction_
Definition: deps_log.h:112
bool RecordId(Node *node)
Definition: deps_log.cc:390
#define METRIC_RECORD(name)
The primary interface to metrics.
Definition: metrics.h:85
void Close()
Definition: deps_log.cc:164
FILE * file_
Definition: deps_log.h:113
bool Truncate(const string &path, size_t size, string *err)
Truncates a file to the given size.
Definition: util.cc:599
~DepsLog()
Definition: deps_log.cc:42
string GetBinding(const string &key)
Returns the shell-escaped value of |key|.
Definition: graph.cc:372
Global state (file status) for a single run.
Definition: state.h:85
unsigned long long uint64_t
Definition: win32port.h:29
bool RecordDeps(Node *node, TimeStamp mtime, const vector< Node *> &nodes)
Definition: deps_log.cc:83
bool UpdateDeps(int out_id, Deps *deps)
Definition: deps_log.cc:379
bool IsDepsEntryLiveFor(Node *node)
Returns if the deps entry for a node is still reachable from the manifest.
Definition: deps_log.cc:369