Ninja
deps_log.cc
Go to the documentation of this file.
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "deps_log.h"
16 
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
24 typedef __int32 int32_t;
25 typedef unsigned __int32 uint32_t;
26 #endif
27 
28 #include "graph.h"
29 #include "metrics.h"
30 #include "state.h"
31 #include "util.h"
32 
33 using namespace std;
34 
35 // The version is stored as 4 bytes after the signature and also serves as a
36 // byte order mark. Signature and version combined are 16 bytes long.
37 const char kFileSignature[] = "# ninjadeps\n";
38 const int kCurrentVersion = 4;
39 
40 // Record size is currently limited to less than the full 32 bit, due to
41 // internal buffers having to have this size.
42 const unsigned kMaxRecordSize = (1 << 19) - 1;
43 
45  Close();
46 }
47 
48 bool DepsLog::OpenForWrite(const string& path, string* err) {
49  if (needs_recompaction_) {
50  if (!Recompact(path, err))
51  return false;
52  }
53 
54  assert(!file_);
55  file_path_ = path; // we don't actually open the file right now, but will do
56  // so on the first write attempt
57  return true;
58 }
59 
60 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
61  const vector<Node*>& nodes) {
62  return RecordDeps(node, mtime, nodes.size(),
63  nodes.empty() ? NULL : (Node**)&nodes.front());
64 }
65 
67  int node_count, Node** nodes) {
68  // Track whether there's any new data to be recorded.
69  bool made_change = false;
70 
71  // Assign ids to all nodes that are missing one.
72  if (node->id() < 0) {
73  if (!RecordId(node))
74  return false;
75  made_change = true;
76  }
77  for (int i = 0; i < node_count; ++i) {
78  if (nodes[i]->id() < 0) {
79  if (!RecordId(nodes[i]))
80  return false;
81  made_change = true;
82  }
83  }
84 
85  // See if the new data is different than the existing data, if any.
86  if (!made_change) {
87  Deps* deps = GetDeps(node);
88  if (!deps ||
89  deps->mtime != mtime ||
90  deps->node_count != node_count) {
91  made_change = true;
92  } else {
93  for (int i = 0; i < node_count; ++i) {
94  if (deps->nodes[i] != nodes[i]) {
95  made_change = true;
96  break;
97  }
98  }
99  }
100  }
101 
102  // Don't write anything if there's no new info.
103  if (!made_change)
104  return true;
105 
106  // Update on-disk representation.
107  unsigned size = 4 * (1 + 2 + node_count);
108  if (size > kMaxRecordSize) {
109  errno = ERANGE;
110  return false;
111  }
112 
113  if (!OpenForWriteIfNeeded()) {
114  return false;
115  }
116  size |= 0x80000000; // Deps record: set high bit.
117  if (fwrite(&size, 4, 1, file_) < 1)
118  return false;
119  int id = node->id();
120  if (fwrite(&id, 4, 1, file_) < 1)
121  return false;
122  uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
123  if (fwrite(&mtime_part, 4, 1, file_) < 1)
124  return false;
125  mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
126  if (fwrite(&mtime_part, 4, 1, file_) < 1)
127  return false;
128  for (int i = 0; i < node_count; ++i) {
129  id = nodes[i]->id();
130  if (fwrite(&id, 4, 1, file_) < 1)
131  return false;
132  }
133  if (fflush(file_) != 0)
134  return false;
135 
136  // Update in-memory representation.
137  Deps* deps = new Deps(mtime, node_count);
138  for (int i = 0; i < node_count; ++i)
139  deps->nodes[i] = nodes[i];
140  UpdateDeps(node->id(), deps);
141 
142  return true;
143 }
144 
146  OpenForWriteIfNeeded(); // create the file even if nothing has been recorded
147  if (file_)
148  fclose(file_);
149  file_ = NULL;
150 }
151 
152 LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
153  METRIC_RECORD(".ninja_deps load");
154  char buf[kMaxRecordSize + 1];
155  FILE* f = fopen(path.c_str(), "rb");
156  if (!f) {
157  if (errno == ENOENT)
158  return LOAD_NOT_FOUND;
159  *err = strerror(errno);
160  return LOAD_ERROR;
161  }
162 
163  bool valid_header = true;
164  int version = 0;
165  if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
166  valid_header = false;
167  // Note: For version differences, this should migrate to the new format.
168  // But the v1 format could sometimes (rarely) end up with invalid data, so
169  // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
170  // and there was no release with it, so pretend that it never happened.)
171  if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
172  version != kCurrentVersion) {
173  if (version == 1)
174  *err = "deps log version change; rebuilding";
175  else
176  *err = "bad deps log signature or version; starting over";
177  fclose(f);
178  unlink(path.c_str());
179  // Don't report this as a failure. An empty deps log will cause
180  // us to rebuild the outputs anyway.
181  return LOAD_SUCCESS;
182  }
183 
184  long offset;
185  bool read_failed = false;
186  int unique_dep_record_count = 0;
187  int total_dep_record_count = 0;
188  for (;;) {
189  offset = ftell(f);
190 
191  unsigned size;
192  if (fread(&size, 4, 1, f) < 1) {
193  if (!feof(f))
194  read_failed = true;
195  break;
196  }
197  bool is_deps = (size >> 31) != 0;
198  size = size & 0x7FFFFFFF;
199 
200  if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
201  read_failed = true;
202  break;
203  }
204 
205  if (is_deps) {
206  assert(size % 4 == 0);
207  int* deps_data = reinterpret_cast<int*>(buf);
208  int out_id = deps_data[0];
209  TimeStamp mtime;
210  mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
211  (uint64_t)(unsigned int)deps_data[1]);
212  deps_data += 3;
213  int deps_count = (size / 4) - 3;
214 
215  Deps* deps = new Deps(mtime, deps_count);
216  for (int i = 0; i < deps_count; ++i) {
217  assert(deps_data[i] < (int)nodes_.size());
218  assert(nodes_[deps_data[i]]);
219  deps->nodes[i] = nodes_[deps_data[i]];
220  }
221 
222  total_dep_record_count++;
223  if (!UpdateDeps(out_id, deps))
224  ++unique_dep_record_count;
225  } else {
226  int path_size = size - 4;
227  assert(path_size > 0); // CanonicalizePath() rejects empty paths.
228  // There can be up to 3 bytes of padding.
229  if (buf[path_size - 1] == '\0') --path_size;
230  if (buf[path_size - 1] == '\0') --path_size;
231  if (buf[path_size - 1] == '\0') --path_size;
232  StringPiece subpath(buf, path_size);
233  // It is not necessary to pass in a correct slash_bits here. It will
234  // either be a Node that's in the manifest (in which case it will already
235  // have a correct slash_bits that GetNode will look up), or it is an
236  // implicit dependency from a .d which does not affect the build command
237  // (and so need not have its slashes maintained).
238  Node* node = state->GetNode(subpath, 0);
239 
240  // Check that the expected index matches the actual index. This can only
241  // happen if two ninja processes write to the same deps log concurrently.
242  // (This uses unary complement to make the checksum look less like a
243  // dependency record entry.)
244  unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
245  int expected_id = ~checksum;
246  int id = nodes_.size();
247  if (id != expected_id) {
248  read_failed = true;
249  break;
250  }
251 
252  assert(node->id() < 0);
253  node->set_id(id);
254  nodes_.push_back(node);
255  }
256  }
257 
258  if (read_failed) {
259  // An error occurred while loading; try to recover by truncating the
260  // file to the last fully-read record.
261  if (ferror(f)) {
262  *err = strerror(ferror(f));
263  } else {
264  *err = "premature end of file";
265  }
266  fclose(f);
267 
268  if (!Truncate(path, offset, err))
269  return LOAD_ERROR;
270 
271  // The truncate succeeded; we'll just report the load error as a
272  // warning because the build can proceed.
273  *err += "; recovering";
274  return LOAD_SUCCESS;
275  }
276 
277  fclose(f);
278 
279  // Rebuild the log if there are too many dead records.
280  int kMinCompactionEntryCount = 1000;
281  int kCompactionRatio = 3;
282  if (total_dep_record_count > kMinCompactionEntryCount &&
283  total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
284  needs_recompaction_ = true;
285  }
286 
287  return LOAD_SUCCESS;
288 }
289 
291  // Abort if the node has no id (never referenced in the deps) or if
292  // there's no deps recorded for the node.
293  if (node->id() < 0 || node->id() >= (int)deps_.size())
294  return NULL;
295  return deps_[node->id()];
296 }
297 
299  for (size_t id = 0; id < deps_.size(); ++id) {
300  Deps* deps = deps_[id];
301  if (!deps)
302  continue;
303  for (int i = 0; i < deps->node_count; ++i) {
304  if (deps->nodes[i] == node)
305  return nodes_[id];
306  }
307  }
308  return NULL;
309 }
310 
311 bool DepsLog::Recompact(const string& path, string* err) {
312  METRIC_RECORD(".ninja_deps recompact");
313 
314  Close();
315  string temp_path = path + ".recompact";
316 
317  // OpenForWrite() opens for append. Make sure it's not appending to a
318  // left-over file from a previous recompaction attempt that crashed somehow.
319  unlink(temp_path.c_str());
320 
321  DepsLog new_log;
322  if (!new_log.OpenForWrite(temp_path, err))
323  return false;
324 
325  // Clear all known ids so that new ones can be reassigned. The new indices
326  // will refer to the ordering in new_log, not in the current log.
327  for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
328  (*i)->set_id(-1);
329 
330  // Write out all deps again.
331  for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
332  Deps* deps = deps_[old_id];
333  if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
334 
335  if (!IsDepsEntryLiveFor(nodes_[old_id]))
336  continue;
337 
338  if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
339  deps->node_count, deps->nodes)) {
340  new_log.Close();
341  return false;
342  }
343  }
344 
345  new_log.Close();
346 
347  // All nodes now have ids that refer to new_log, so steal its data.
348  deps_.swap(new_log.deps_);
349  nodes_.swap(new_log.nodes_);
350 
351  if (unlink(path.c_str()) < 0) {
352  *err = strerror(errno);
353  return false;
354  }
355 
356  if (rename(temp_path.c_str(), path.c_str()) < 0) {
357  *err = strerror(errno);
358  return false;
359  }
360 
361  return true;
362 }
363 
365  // Skip entries that don't have in-edges or whose edges don't have a
366  // "deps" attribute. They were in the deps log from previous builds, but
367  // the the files they were for were removed from the build and their deps
368  // entries are no longer needed.
369  // (Without the check for "deps", a chain of two or more nodes that each
370  // had deps wouldn't be collected in a single recompaction.)
371  return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
372 }
373 
374 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
375  if (out_id >= (int)deps_.size())
376  deps_.resize(out_id + 1);
377 
378  bool delete_old = deps_[out_id] != NULL;
379  if (delete_old)
380  delete deps_[out_id];
381  deps_[out_id] = deps;
382  return delete_old;
383 }
384 
385 bool DepsLog::RecordId(Node* node) {
386  int path_size = node->path().size();
387  int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
388 
389  unsigned size = path_size + padding + 4;
390  if (size > kMaxRecordSize) {
391  errno = ERANGE;
392  return false;
393  }
394 
395  if (!OpenForWriteIfNeeded()) {
396  return false;
397  }
398  if (fwrite(&size, 4, 1, file_) < 1)
399  return false;
400  if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
401  assert(!node->path().empty());
402  return false;
403  }
404  if (padding && fwrite("\0\0", padding, 1, file_) < 1)
405  return false;
406  int id = nodes_.size();
407  unsigned checksum = ~(unsigned)id;
408  if (fwrite(&checksum, 4, 1, file_) < 1)
409  return false;
410  if (fflush(file_) != 0)
411  return false;
412 
413  node->set_id(id);
414  nodes_.push_back(node);
415 
416  return true;
417 }
418 
420  if (file_path_.empty()) {
421  return true;
422  }
423  file_ = fopen(file_path_.c_str(), "ab");
424  if (!file_) {
425  return false;
426  }
427  // Set the buffer size to this and flush the file buffer after every record
428  // to make sure records aren't written partially.
429  if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) {
430  return false;
431  }
432  SetCloseOnExec(fileno(file_));
433 
434  // Opening a file in append mode doesn't set the file pointer to the file's
435  // end on Windows. Do that explicitly.
436  fseek(file_, 0, SEEK_END);
437 
438  if (ftell(file_) == 0) {
439  if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
440  return false;
441  }
442  if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
443  return false;
444  }
445  }
446  if (fflush(file_) != 0) {
447  return false;
448  }
449  file_path_.clear();
450  return true;
451 }
const int kCurrentVersion
Definition: deps_log.cc:38
LoadStatus
Definition: load_status.h:18
const char kFileSignature[]
Definition: deps_log.cc:37
Edge * in_edge() const
Definition: graph.h:104
Node * GetNode(StringPiece path, uint64_t slash_bits)
Definition: state.cc:96
bool OpenForWriteIfNeeded()
Should be called before using file_.
Definition: deps_log.cc:419
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
Information about a node in the dependency graph: the file, whether it&#39;s dirty, mtime, etc.
Definition: graph.h:39
const unsigned kMaxRecordSize
Definition: deps_log.cc:42
Node * GetFirstReverseDepsNode(Node *node)
Definition: deps_log.cc:298
Node ** nodes
Definition: deps_log.h:85
LoadStatus Load(const std::string &path, State *state, std::string *err)
Definition: deps_log.cc:152
const std::string & path() const
Definition: graph.h:86
bool Recompact(const std::string &path, std::string *err)
Rewrite the known log entries, throwing away old data.
Definition: deps_log.cc:311
int id() const
Definition: graph.h:107
void SetCloseOnExec(int fd)
Mark a file descriptor to not be inherited on exec()s.
Definition: util.cc:398
As build commands run they can output extra dependency information (e.g.
Definition: deps_log.h:68
bool RecordDeps(Node *node, TimeStamp mtime, const std::vector< Node *> &nodes)
void set_id(int id)
Definition: graph.h:108
Deps * GetDeps(Node *node)
Definition: deps_log.cc:290
int node_count
Definition: deps_log.h:84
int64_t TimeStamp
Definition: timestamp.h:31
bool RecordId(Node *node)
Definition: deps_log.cc:385
#define METRIC_RECORD(name)
The primary interface to metrics.
Definition: metrics.h:84
bool OpenForWrite(const std::string &path, std::string *err)
Definition: deps_log.cc:48
void Close()
Definition: deps_log.cc:145
bool Truncate(const string &path, size_t size, string *err)
Definition: util.cc:851
std::string GetBinding(const std::string &key) const
Returns the shell-escaped value of |key|.
Definition: graph.cc:491
~DepsLog()
Definition: deps_log.cc:44
Global state (file status) for a single run.
Definition: state.h:92
unsigned long long uint64_t
Definition: win32port.h:29
TimeStamp mtime
Definition: deps_log.h:83
bool UpdateDeps(int out_id, Deps *deps)
Definition: deps_log.cc:374
bool IsDepsEntryLiveFor(Node *node)
Returns if the deps entry for a node is still reachable from the manifest.
Definition: deps_log.cc:364