Ninja
deps_log.cc
Go to the documentation of this file.
1// Copyright 2012 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "deps_log.h"
16
17#include <assert.h>
18#include <errno.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <string.h>
22#ifndef _WIN32
23#include <unistd.h>
24#elif defined(_MSC_VER) && (_MSC_VER < 1900)
25typedef __int32 int32_t;
26typedef unsigned __int32 uint32_t;
27#endif
28
29#include "graph.h"
30#include "metrics.h"
31#include "state.h"
32#include "util.h"
33
34using namespace std;
35
36// The version is stored as 4 bytes after the signature and also serves as a
37// byte order mark. Signature and version combined are 16 bytes long.
38static const char kFileSignature[] = "# ninjadeps\n";
39static const size_t kFileSignatureSize = sizeof(kFileSignature) - 1u;
40
41static const int32_t kCurrentVersion = 4;
42
43// Record size is currently limited to less than the full 32 bit, due to
44// internal buffers having to have this size.
45static constexpr size_t kMaxRecordSize = (1 << 19) - 1;
46
50
51bool DepsLog::OpenForWrite(const string& path, string* err) {
53 if (!Recompact(path, err))
54 return false;
55 }
56
57 assert(!file_);
58 file_path_ = path; // we don't actually open the file right now, but will do
59 // so on the first write attempt
60 return true;
61}
62
63bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
64 const vector<Node*>& nodes) {
65 return RecordDeps(node, mtime, static_cast<int>(nodes.size()), nodes.data());
66}
67
68bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, int node_count,
69 Node* const* nodes) {
70 // Track whether there's any new data to be recorded.
71 bool made_change = false;
72
73 // Assign ids to all nodes that are missing one.
74 if (node->id() < 0) {
75 if (!RecordId(node))
76 return false;
77 made_change = true;
78 }
79 for (int i = 0; i < node_count; ++i) {
80 if (nodes[i]->id() < 0) {
81 if (!RecordId(nodes[i]))
82 return false;
83 made_change = true;
84 }
85 }
86
87 // See if the new data is different than the existing data, if any.
88 if (!made_change) {
89 Deps* deps = GetDeps(node);
90 if (!deps ||
91 deps->mtime != mtime ||
92 deps->node_count != node_count) {
93 made_change = true;
94 } else {
95 for (int i = 0; i < node_count; ++i) {
96 if (deps->nodes[i] != nodes[i]) {
97 made_change = true;
98 break;
99 }
100 }
101 }
102 }
103
104 // Don't write anything if there's no new info.
105 if (!made_change)
106 return true;
107
108 // Update on-disk representation.
109 unsigned size = 4 * (1 + 2 + node_count);
110 if (size > kMaxRecordSize) {
111 errno = ERANGE;
112 return false;
113 }
114
115 if (!OpenForWriteIfNeeded()) {
116 return false;
117 }
118 size |= 0x80000000; // Deps record: set high bit.
119 if (fwrite(&size, 4, 1, file_) < 1)
120 return false;
121 int id = node->id();
122 if (fwrite(&id, 4, 1, file_) < 1)
123 return false;
124 uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff);
125 if (fwrite(&mtime_part, 4, 1, file_) < 1)
126 return false;
127 mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff);
128 if (fwrite(&mtime_part, 4, 1, file_) < 1)
129 return false;
130 for (int i = 0; i < node_count; ++i) {
131 id = nodes[i]->id();
132 if (fwrite(&id, 4, 1, file_) < 1)
133 return false;
134 }
135 if (fflush(file_) != 0)
136 return false;
137
138 // Update in-memory representation.
139 Deps* deps = new Deps(mtime, node_count);
140 for (int i = 0; i < node_count; ++i)
141 deps->nodes[i] = nodes[i];
142 UpdateDeps(node->id(), deps);
143
144 return true;
145}
146
148 OpenForWriteIfNeeded(); // create the file even if nothing has been recorded
149 if (file_)
150 fclose(file_);
151 file_ = NULL;
152}
153
154LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
155 METRIC_RECORD(".ninja_deps load");
156 char buf[kMaxRecordSize + 1];
157 FILE* f = fopen(path.c_str(), "rb");
158 if (!f) {
159 if (errno == ENOENT)
160 return LOAD_NOT_FOUND;
161 *err = strerror(errno);
162 return LOAD_ERROR;
163 }
164
165 bool valid_header = fread(buf, kFileSignatureSize, 1, f) == 1 &&
166 !memcmp(buf, kFileSignature, kFileSignatureSize);
167
168 int32_t version = 0;
169 bool valid_version =
170 fread(&version, 4, 1, f) == 1 && version == kCurrentVersion;
171
172 // Note: For version differences, this should migrate to the new format.
173 // But the v1 format could sometimes (rarely) end up with invalid data, so
174 // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
175 // and there was no release with it, so pretend that it never happened.)
176 if (!valid_header || !valid_version) {
177 if (version == 1)
178 *err = "deps log version change; rebuilding";
179 else
180 *err = "bad deps log signature or version; starting over";
181 fclose(f);
182 platformAwareUnlink(path.c_str());
183 // Don't report this as a failure. An empty deps log will cause
184 // us to rebuild the outputs anyway.
185 return LOAD_SUCCESS;
186 }
187
188 long offset = ftell(f);
189 bool read_failed = false;
190 int unique_dep_record_count = 0;
191 int total_dep_record_count = 0;
192 for (;;) {
193 unsigned size;
194 if (fread(&size, sizeof(size), 1, f) < 1) {
195 if (!feof(f))
196 read_failed = true;
197 break;
198 }
199 bool is_deps = (size >> 31) != 0;
200 size = size & 0x7FFFFFFF;
201
202 if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) {
203 read_failed = true;
204 break;
205 }
206 offset += size + sizeof(size);
207
208 if (is_deps) {
209 if ((size % 4) != 0) {
210 read_failed = true;
211 break;
212 }
213 int* deps_data = reinterpret_cast<int*>(buf);
214 int out_id = deps_data[0];
215 TimeStamp mtime;
216 mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) |
217 (uint64_t)(unsigned int)deps_data[1]);
218 deps_data += 3;
219 int deps_count = (size / 4) - 3;
220
221 for (int i = 0; i < deps_count; ++i) {
222 int node_id = deps_data[i];
223 if (node_id >= (int)nodes_.size() || !nodes_[node_id]) {
224 read_failed = true;
225 break;
226 }
227 }
228 if (read_failed)
229 break;
230
231 Deps* deps = new Deps(mtime, deps_count);
232 for (int i = 0; i < deps_count; ++i) {
233 deps->nodes[i] = nodes_[deps_data[i]];
234 }
235
236 total_dep_record_count++;
237 if (!UpdateDeps(out_id, deps))
238 ++unique_dep_record_count;
239 } else {
240 int path_size = size - 4;
241 if (path_size <= 0) {
242 read_failed = true;
243 break;
244 }
245 // There can be up to 3 bytes of padding.
246 if (buf[path_size - 1] == '\0') --path_size;
247 if (buf[path_size - 1] == '\0') --path_size;
248 if (buf[path_size - 1] == '\0') --path_size;
249 StringPiece subpath(buf, path_size);
250 // It is not necessary to pass in a correct slash_bits here. It will
251 // either be a Node that's in the manifest (in which case it will already
252 // have a correct slash_bits that GetNode will look up), or it is an
253 // implicit dependency from a .d which does not affect the build command
254 // (and so need not have its slashes maintained).
255 Node* node = state->GetNode(subpath, 0);
256
257 // Check that the expected index matches the actual index. This can only
258 // happen if two ninja processes write to the same deps log concurrently.
259 // (This uses unary complement to make the checksum look less like a
260 // dependency record entry.)
261 unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
262 int expected_id = ~checksum;
263 int id = static_cast<int>(nodes_.size());
264 if (id != expected_id || node->id() >= 0) {
265 read_failed = true;
266 break;
267 }
268 node->set_id(id);
269 nodes_.push_back(node);
270 }
271 }
272
273 if (read_failed) {
274 // An error occurred while loading; try to recover by truncating the
275 // file to the last fully-read record.
276 if (ferror(f)) {
277 *err = strerror(ferror(f));
278 } else {
279 *err = "premature end of file";
280 }
281 fclose(f);
282
283 if (!Truncate(path, offset, err))
284 return LOAD_ERROR;
285
286 // The truncate succeeded; we'll just report the load error as a
287 // warning because the build can proceed.
288 *err += "; recovering";
289 return LOAD_SUCCESS;
290 }
291
292 fclose(f);
293
294 // Rebuild the log if there are too many dead records.
295 int kMinCompactionEntryCount = 1000;
296 int kCompactionRatio = 3;
297 if (total_dep_record_count > kMinCompactionEntryCount &&
298 total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
299 needs_recompaction_ = true;
300 }
301
302 return LOAD_SUCCESS;
303}
304
306 // Abort if the node has no id (never referenced in the deps) or if
307 // there's no deps recorded for the node.
308 if (node->id() < 0 || node->id() >= (int)deps_.size())
309 return NULL;
310 return deps_[node->id()];
311}
312
314 for (size_t id = 0; id < deps_.size(); ++id) {
315 Deps* deps = deps_[id];
316 if (!deps)
317 continue;
318 for (int i = 0; i < deps->node_count; ++i) {
319 if (deps->nodes[i] == node)
320 return nodes_[id];
321 }
322 }
323 return NULL;
324}
325
326bool DepsLog::Recompact(const string& path, string* err) {
327 METRIC_RECORD(".ninja_deps recompact");
328
329 Close();
330 string temp_path = path + ".recompact";
331
332 // OpenForWrite() opens for append. Make sure it's not appending to a
333 // left-over file from a previous recompaction attempt that crashed somehow.
334 platformAwareUnlink(temp_path.c_str());
335
336 DepsLog new_log;
337 if (!new_log.OpenForWrite(temp_path, err))
338 return false;
339
340 // Clear all known ids so that new ones can be reassigned. The new indices
341 // will refer to the ordering in new_log, not in the current log.
342 for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
343 (*i)->set_id(-1);
344
345 // Write out all deps again.
346 for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
347 Deps* deps = deps_[old_id];
348 if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
349
350 if (!IsDepsEntryLiveFor(nodes_[old_id]))
351 continue;
352
353 if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
354 deps->node_count, deps->nodes)) {
355 new_log.Close();
356 return false;
357 }
358 }
359
360 new_log.Close();
361
362 // All nodes now have ids that refer to new_log, so steal its data.
363 deps_.swap(new_log.deps_);
364 nodes_.swap(new_log.nodes_);
365
366 if (platformAwareUnlink(path.c_str()) < 0) {
367 *err = strerror(errno);
368 return false;
369 }
370
371 if (rename(temp_path.c_str(), path.c_str()) < 0) {
372 *err = strerror(errno);
373 return false;
374 }
375
376 return true;
377}
378
380 // Skip entries that don't have in-edges or whose edges don't have a
381 // "deps" attribute. They were in the deps log from previous builds, but
382 // the the files they were for were removed from the build and their deps
383 // entries are no longer needed.
384 // (Without the check for "deps", a chain of two or more nodes that each
385 // had deps wouldn't be collected in a single recompaction.)
386 return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
387}
388
389bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
390 if (out_id >= (int)deps_.size())
391 deps_.resize(out_id + 1);
392
393 bool delete_old = deps_[out_id] != NULL;
394 if (delete_old)
395 delete deps_[out_id];
396 deps_[out_id] = deps;
397 return delete_old;
398}
399
401 int path_size = static_cast<int>(node->path().size());
402 assert(path_size > 0 && "Trying to record empty path Node!");
403 int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
404
405 unsigned size = path_size + padding + 4;
406 if (size > kMaxRecordSize) {
407 errno = ERANGE;
408 return false;
409 }
410
411 if (!OpenForWriteIfNeeded()) {
412 return false;
413 }
414 if (fwrite(&size, 4, 1, file_) < 1)
415 return false;
416 if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
417 return false;
418 }
419 if (padding && fwrite("\0\0", padding, 1, file_) < 1)
420 return false;
421 int id = static_cast<int>(nodes_.size());
422 unsigned checksum = ~(unsigned)id;
423 if (fwrite(&checksum, 4, 1, file_) < 1)
424 return false;
425 if (fflush(file_) != 0)
426 return false;
427
428 node->set_id(id);
429 nodes_.push_back(node);
430
431 return true;
432}
433
435 if (file_path_.empty()) {
436 return true;
437 }
438 file_ = fopen(file_path_.c_str(), "ab");
439 if (!file_) {
440 return false;
441 }
442 // Set the buffer size to this and flush the file buffer after every record
443 // to make sure records aren't written partially.
444 if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) {
445 return false;
446 }
447 SetCloseOnExec(fileno(file_));
448
449 // Opening a file in append mode doesn't set the file pointer to the file's
450 // end on Windows. Do that explicitly.
451 fseek(file_, 0, SEEK_END);
452
453 if (ftell(file_) == 0) {
454 if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
455 return false;
456 }
457 if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
458 return false;
459 }
460 }
461 if (fflush(file_) != 0) {
462 return false;
463 }
464 file_path_.clear();
465 return true;
466}
static const size_t kFileSignatureSize
Definition deps_log.cc:39
static const int32_t kCurrentVersion
Definition deps_log.cc:41
static constexpr size_t kMaxRecordSize
Definition deps_log.cc:45
static const char kFileSignature[]
Definition deps_log.cc:38
LoadStatus
Definition load_status.h:18
@ LOAD_ERROR
Definition load_status.h:19
@ LOAD_SUCCESS
Definition load_status.h:20
@ LOAD_NOT_FOUND
Definition load_status.h:21
#define METRIC_RECORD(name)
The primary interface to metrics.
Definition metrics.h:83
Definition hash_map.h:26
Deps * GetDeps(Node *node)
Definition deps_log.cc:305
DepsLog()
Definition deps_log.h:69
bool UpdateDeps(int out_id, Deps *deps)
Definition deps_log.cc:389
std::string file_path_
Definition deps_log.h:120
Node * GetFirstReverseDepsNode(Node *node)
Definition deps_log.cc:313
bool OpenForWriteIfNeeded()
Should be called before using file_.
Definition deps_log.cc:434
bool Recompact(const std::string &path, std::string *err)
Rewrite the known log entries, throwing away old data.
Definition deps_log.cc:326
~DepsLog()
Definition deps_log.cc:47
void Close()
Definition deps_log.cc:147
bool OpenForWrite(const std::string &path, std::string *err)
Definition deps_log.cc:51
const std::vector< Deps * > & deps() const
Definition deps_log.h:105
static bool IsDepsEntryLiveFor(const Node *node)
Returns if the deps entry for a node is still reachable from the manifest.
Definition deps_log.cc:379
std::vector< Node * > nodes_
Maps id -> Node.
Definition deps_log.h:123
FILE * file_
Definition deps_log.h:119
bool RecordId(Node *node)
Definition deps_log.cc:400
bool RecordDeps(Node *node, TimeStamp mtime, const std::vector< Node * > &nodes)
const std::vector< Node * > & nodes() const
Used for tests.
Definition deps_log.h:104
std::vector< Deps * > deps_
Maps id -> deps of that id.
Definition deps_log.h:125
bool needs_recompaction_
Definition deps_log.h:118
LoadStatus Load(const std::string &path, State *state, std::string *err)
Definition deps_log.cc:154
std::string GetBinding(const std::string &key) const
Returns the shell-escaped value of |key|.
Definition graph.cc:511
Information about a node in the dependency graph: the file, whether it's dirty, mtime,...
Definition graph.h:42
const std::string & path() const
Definition graph.h:82
int id() const
Definition graph.h:111
Edge * in_edge() const
Definition graph.h:100
void set_id(int id)
Definition graph.h:112
Global state (file status) for a single run.
Definition state.h:95
Node * GetNode(StringPiece path, uint64_t slash_bits)
Definition state.cc:95
StringPiece represents a slice of a string whose memory is managed externally.
int64_t TimeStamp
Definition timestamp.h:31
int platformAwareUnlink(const char *filename)
Definition util.cc:1025
bool Truncate(const string &path, size_t size, string *err)
Definition util.cc:1007
void SetCloseOnExec(int fd)
Mark a file descriptor to not be inherited on exec()s.
Definition util.cc:480
unsigned long long uint64_t
Definition win32port.h:29