Ninja
deps_log.cc
Go to the documentation of this file.
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "deps_log.h"
16 
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
24 typedef __int32 int32_t;
25 typedef unsigned __int32 uint32_t;
26 #endif
27 
28 #include "graph.h"
29 #include "metrics.h"
30 #include "state.h"
31 #include "util.h"
32 
33 using namespace std;
34 
35 // The version is stored as 4 bytes after the signature and also serves as a
36 // byte order mark. Signature and version combined are 16 bytes long.
37 const char kFileSignature[] = "# ninjadeps\n";
38 const int kCurrentVersion = 4;
39 
40 // Record size is currently limited to less than the full 32 bit, due to
41 // internal buffers having to have this size.
42 const unsigned kMaxRecordSize = (1 << 19) - 1;
43 
45  Close();
46 }
47 
48 bool DepsLog::OpenForWrite(const string& path, string* err) {
49  if (needs_recompaction_) {
50  if (!Recompact(path, err))
51  return false;
52  }
53 
54  assert(!file_);
55  file_path_ = path; // we don't actually open the file right now, but will do
56  // so on the first write attempt
57  return true;
58 }
59 
60 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
61  const vector<Node*>& nodes) {
62  return RecordDeps(node, mtime, nodes.size(),
63  nodes.empty() ? NULL : (Node**)&nodes.front());
64 }
65 
67  int node_count, Node** nodes) {
68  // Track whether there's any new data to be recorded.
69  bool made_change = false;
70 
71  // Assign ids to all nodes that are missing one.
72  if (node->id() < 0) {
73  if (!RecordId(node))
74  return false;
75  made_change = true;
76  }
77  for (int i = 0; i < node_count; ++i) {
78  if (nodes[i]->id() < 0) {
79  if (!RecordId(nodes[i]))
80  return false;
81  made_change = true;
82  }
83  }
84 
85  // See if the new data is different than the existing data, if any.
86  if (!made_change) {
87  Deps* deps = GetDeps(node);
88  if (!deps ||
89  deps->mtime != mtime ||
90  deps->node_count != node_count) {
91  made_change = true;
92  } else {
93  for (int i = 0; i < node_count; ++i) {
94  if (deps->nodes[i] != nodes[i]) {
95  made_change = true;
96  break;
97  }
98  }
99  }
100  }
101 
102  // Don't write anything if there's no new info.
103  if (!made_change)
104  return true;
105 
106  // Update on-disk representation.
107  unsigned size = 4 * (1 + 2 + node_count);
108  if (size > kMaxRecordSize) {
109  errno = ERANGE;
110  return false;
111  }
112 
113  if (!OpenForWriteIfNeeded()) {
114  return false;
115  }
116  size |= 0x80000000; // Deps record: set high bit.
117  if (fwrite(&size, 4, 1, file_) < 1)
118  return false;
119  int id = node->id();
120  if (fwrite(&id, 4, 1, file_) < 1)
121  return false;
122  uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
123  if (fwrite(&mtime_part, 4, 1, file_) < 1)
124  return false;
125  mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
126  if (fwrite(&mtime_part, 4, 1, file_) < 1)
127  return false;
128  for (int i = 0; i < node_count; ++i) {
129  id = nodes[i]->id();
130  if (fwrite(&id, 4, 1, file_) < 1)
131  return false;
132  }
133  if (fflush(file_) != 0)
134  return false;
135 
136  // Update in-memory representation.
137  Deps* deps = new Deps(mtime, node_count);
138  for (int i = 0; i < node_count; ++i)
139  deps->nodes[i] = nodes[i];
140  UpdateDeps(node->id(), deps);
141 
142  return true;
143 }
144 
146  OpenForWriteIfNeeded(); // create the file even if nothing has been recorded
147  if (file_)
148  fclose(file_);
149  file_ = NULL;
150 }
151 
152 LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
153  METRIC_RECORD(".ninja_deps load");
154  char buf[kMaxRecordSize + 1];
155  FILE* f = fopen(path.c_str(), "rb");
156  if (!f) {
157  if (errno == ENOENT)
158  return LOAD_NOT_FOUND;
159  *err = strerror(errno);
160  return LOAD_ERROR;
161  }
162 
163  bool valid_header = true;
164  int version = 0;
165  if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
166  valid_header = false;
167  // Note: For version differences, this should migrate to the new format.
168  // But the v1 format could sometimes (rarely) end up with invalid data, so
169  // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
170  // and there was no release with it, so pretend that it never happened.)
171  if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
172  version != kCurrentVersion) {
173  if (version == 1)
174  *err = "deps log version change; rebuilding";
175  else
176  *err = "bad deps log signature or version; starting over";
177  fclose(f);
178  unlink(path.c_str());
179  // Don't report this as a failure. An empty deps log will cause
180  // us to rebuild the outputs anyway.
181  return LOAD_SUCCESS;
182  }
183 
184  long offset;
185  bool read_failed = false;
186  int unique_dep_record_count = 0;
187  int total_dep_record_count = 0;
188  for (;;) {
189  offset = ftell(f);
190 
191  unsigned size;
192  if (fread(&size, 4, 1, f) < 1) {
193  if (!feof(f))
194  read_failed = true;
195  break;
196  }
197  bool is_deps = (size >> 31) != 0;
198  size = size & 0x7FFFFFFF;
199 
200  if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
201  read_failed = true;
202  break;
203  }
204 
205  if (is_deps) {
206  assert(size % 4 == 0);
207  int* deps_data = reinterpret_cast<int*>(buf);
208  int out_id = deps_data[0];
209  TimeStamp mtime;
210  mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
211  (uint64_t)(unsigned int)deps_data[1]);
212  deps_data += 3;
213  int deps_count = (size / 4) - 3;
214 
215  Deps* deps = new Deps(mtime, deps_count);
216  for (int i = 0; i < deps_count; ++i) {
217  assert(deps_data[i] < (int)nodes_.size());
218  assert(nodes_[deps_data[i]]);
219  deps->nodes[i] = nodes_[deps_data[i]];
220  }
221 
222  total_dep_record_count++;
223  if (!UpdateDeps(out_id, deps))
224  ++unique_dep_record_count;
225  } else {
226  int path_size = size - 4;
227  assert(path_size > 0); // CanonicalizePath() rejects empty paths.
228  // There can be up to 3 bytes of padding.
229  if (buf[path_size - 1] == '\0') --path_size;
230  if (buf[path_size - 1] == '\0') --path_size;
231  if (buf[path_size - 1] == '\0') --path_size;
232  StringPiece subpath(buf, path_size);
233  // It is not necessary to pass in a correct slash_bits here. It will
234  // either be a Node that's in the manifest (in which case it will already
235  // have a correct slash_bits that GetNode will look up), or it is an
236  // implicit dependency from a .d which does not affect the build command
237  // (and so need not have its slashes maintained).
238  Node* node = state->GetNode(subpath, 0);
239 
240  // Check that the expected index matches the actual index. This can only
241  // happen if two ninja processes write to the same deps log concurrently.
242  // (This uses unary complement to make the checksum look less like a
243  // dependency record entry.)
244  unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
245  int expected_id = ~checksum;
246  int id = nodes_.size();
247  if (id != expected_id) {
248  read_failed = true;
249  break;
250  }
251 
252  assert(node->id() < 0);
253  node->set_id(id);
254  nodes_.push_back(node);
255  }
256  }
257 
258  if (read_failed) {
259  // An error occurred while loading; try to recover by truncating the
260  // file to the last fully-read record.
261  if (ferror(f)) {
262  *err = strerror(ferror(f));
263  } else {
264  *err = "premature end of file";
265  }
266  fclose(f);
267 
268  if (!Truncate(path, offset, err))
269  return LOAD_ERROR;
270 
271  // The truncate succeeded; we'll just report the load error as a
272  // warning because the build can proceed.
273  *err += "; recovering";
274  return LOAD_SUCCESS;
275  }
276 
277  fclose(f);
278 
279  // Rebuild the log if there are too many dead records.
280  int kMinCompactionEntryCount = 1000;
281  int kCompactionRatio = 3;
282  if (total_dep_record_count > kMinCompactionEntryCount &&
283  total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
284  needs_recompaction_ = true;
285  }
286 
287  return LOAD_SUCCESS;
288 }
289 
291  // Abort if the node has no id (never referenced in the deps) or if
292  // there's no deps recorded for the node.
293  if (node->id() < 0 || node->id() >= (int)deps_.size())
294  return NULL;
295  return deps_[node->id()];
296 }
297 
298 bool DepsLog::Recompact(const string& path, string* err) {
299  METRIC_RECORD(".ninja_deps recompact");
300 
301  Close();
302  string temp_path = path + ".recompact";
303 
304  // OpenForWrite() opens for append. Make sure it's not appending to a
305  // left-over file from a previous recompaction attempt that crashed somehow.
306  unlink(temp_path.c_str());
307 
308  DepsLog new_log;
309  if (!new_log.OpenForWrite(temp_path, err))
310  return false;
311 
312  // Clear all known ids so that new ones can be reassigned. The new indices
313  // will refer to the ordering in new_log, not in the current log.
314  for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
315  (*i)->set_id(-1);
316 
317  // Write out all deps again.
318  for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
319  Deps* deps = deps_[old_id];
320  if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
321 
322  if (!IsDepsEntryLiveFor(nodes_[old_id]))
323  continue;
324 
325  if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
326  deps->node_count, deps->nodes)) {
327  new_log.Close();
328  return false;
329  }
330  }
331 
332  new_log.Close();
333 
334  // All nodes now have ids that refer to new_log, so steal its data.
335  deps_.swap(new_log.deps_);
336  nodes_.swap(new_log.nodes_);
337 
338  if (unlink(path.c_str()) < 0) {
339  *err = strerror(errno);
340  return false;
341  }
342 
343  if (rename(temp_path.c_str(), path.c_str()) < 0) {
344  *err = strerror(errno);
345  return false;
346  }
347 
348  return true;
349 }
350 
352  // Skip entries that don't have in-edges or whose edges don't have a
353  // "deps" attribute. They were in the deps log from previous builds, but
354  // the the files they were for were removed from the build and their deps
355  // entries are no longer needed.
356  // (Without the check for "deps", a chain of two or more nodes that each
357  // had deps wouldn't be collected in a single recompaction.)
358  return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
359 }
360 
361 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
362  if (out_id >= (int)deps_.size())
363  deps_.resize(out_id + 1);
364 
365  bool delete_old = deps_[out_id] != NULL;
366  if (delete_old)
367  delete deps_[out_id];
368  deps_[out_id] = deps;
369  return delete_old;
370 }
371 
372 bool DepsLog::RecordId(Node* node) {
373  int path_size = node->path().size();
374  int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
375 
376  unsigned size = path_size + padding + 4;
377  if (size > kMaxRecordSize) {
378  errno = ERANGE;
379  return false;
380  }
381 
382  if (!OpenForWriteIfNeeded()) {
383  return false;
384  }
385  if (fwrite(&size, 4, 1, file_) < 1)
386  return false;
387  if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
388  assert(!node->path().empty());
389  return false;
390  }
391  if (padding && fwrite("\0\0", padding, 1, file_) < 1)
392  return false;
393  int id = nodes_.size();
394  unsigned checksum = ~(unsigned)id;
395  if (fwrite(&checksum, 4, 1, file_) < 1)
396  return false;
397  if (fflush(file_) != 0)
398  return false;
399 
400  node->set_id(id);
401  nodes_.push_back(node);
402 
403  return true;
404 }
405 
407  if (file_path_.empty()) {
408  return true;
409  }
410  file_ = fopen(file_path_.c_str(), "ab");
411  if (!file_) {
412  return false;
413  }
414  // Set the buffer size to this and flush the file buffer after every record
415  // to make sure records aren't written partially.
416  if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) {
417  return false;
418  }
419  SetCloseOnExec(fileno(file_));
420 
421  // Opening a file in append mode doesn't set the file pointer to the file's
422  // end on Windows. Do that explicitly.
423  fseek(file_, 0, SEEK_END);
424 
425  if (ftell(file_) == 0) {
426  if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
427  return false;
428  }
429  if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
430  return false;
431  }
432  }
433  if (fflush(file_) != 0) {
434  return false;
435  }
436  file_path_.clear();
437  return true;
438 }
const int kCurrentVersion
Definition: deps_log.cc:38
LoadStatus
Definition: load_status.h:18
const char kFileSignature[]
Definition: deps_log.cc:37
Edge * in_edge() const
Definition: graph.h:94
Node * GetNode(StringPiece path, uint64_t slash_bits)
Definition: state.cc:104
bool OpenForWriteIfNeeded()
Should be called before using file_.
Definition: deps_log.cc:406
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
Information about a node in the dependency graph: the file, whether it&#39;s dirty, mtime, etc.
Definition: graph.h:37
const unsigned kMaxRecordSize
Definition: deps_log.cc:42
Node ** nodes
Definition: deps_log.h:85
LoadStatus Load(const std::string &path, State *state, std::string *err)
Definition: deps_log.cc:152
const std::string & path() const
Definition: graph.h:76
bool Recompact(const std::string &path, std::string *err)
Rewrite the known log entries, throwing away old data.
Definition: deps_log.cc:298
int id() const
Definition: graph.h:97
void SetCloseOnExec(int fd)
Mark a file descriptor to not be inherited on exec()s.
Definition: util.cc:377
As build commands run they can output extra dependency information (e.g.
Definition: deps_log.h:68
bool RecordDeps(Node *node, TimeStamp mtime, const std::vector< Node *> &nodes)
void set_id(int id)
Definition: graph.h:98
Deps * GetDeps(Node *node)
Definition: deps_log.cc:290
int node_count
Definition: deps_log.h:84
int64_t TimeStamp
Definition: timestamp.h:31
bool RecordId(Node *node)
Definition: deps_log.cc:372
#define METRIC_RECORD(name)
The primary interface to metrics.
Definition: metrics.h:84
bool OpenForWrite(const std::string &path, std::string *err)
Definition: deps_log.cc:48
void Close()
Definition: deps_log.cc:145
bool Truncate(const string &path, size_t size, string *err)
Definition: util.cc:618
std::string GetBinding(const std::string &key) const
Returns the shell-escaped value of |key|.
Definition: graph.cc:411
~DepsLog()
Definition: deps_log.cc:44
Global state (file status) for a single run.
Definition: state.h:84
unsigned long long uint64_t
Definition: win32port.h:29
TimeStamp mtime
Definition: deps_log.h:83
bool UpdateDeps(int out_id, Deps *deps)
Definition: deps_log.cc:361
bool IsDepsEntryLiveFor(Node *node)
Returns if the deps entry for a node is still reachable from the manifest.
Definition: deps_log.cc:351