YAP 7.1.0
cwalk.c
1#include <assert.h>
2#include <ctype.h>
3#include <cwalk.h>
4#include <stdarg.h>
5#include <stdio.h>
6#include <string.h>
7
12#if defined(WIN32) || defined(_WIN32) || \
13 defined(__WIN32) && !defined(__CYGWIN__)
14static enum cwk_path_style path_style = CWK_STYLE_WINDOWS;
15#else
16static enum cwk_path_style path_style = CWK_STYLE_UNIX;
17#endif
18
24static const char *separators[] = {[CWK_STYLE_WINDOWS] = "\\/",
25 [CWK_STYLE_UNIX] = "/"};
26
33{
34 struct cwk_segment segment;
35 const char **paths;
36 size_t path_index;
37};
38
39static size_t cwk_path_output_sized(char *buffer, size_t buffer_size,
40 size_t position, const char *str, size_t length)
41{
42 size_t amount_written;
43
44 // First we determine the amount which we can write to the buffer. There are
45 // three cases. In the first case we have enough to store the whole string in
46 // it. In the second one we can only store a part of it, and in the third we
47 // have no space left.
48 if (buffer_size > position + length) {
49 amount_written = length;
50 } else if (buffer_size > position) {
51 amount_written = buffer_size - position;
52 } else {
53 amount_written = 0;
54 }
55
56 // If we actually want to write out something we will do that here. We will
57 // always append a '\0', this way we are guaranteed to have a valid string at
58 // all times.
59 if (amount_written > 0) {
60 memmove(&buffer[position], str, amount_written);
61 }
62
63 // Return the theoretical length which would have been written when everything
64 // would have fit in the buffer.
65 return length;
66}
67
68static size_t cwk_path_output_current(char *buffer, size_t buffer_size,
69 size_t position)
70{
71 // We output a "current" directory, which is a single character. This
72 // character is currently not style dependant.
73 return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
74}
75
76static size_t cwk_path_output_back(char *buffer, size_t buffer_size,
77 size_t position)
78{
79 // We output a "back" directory, which ahs two characters. This
80 // character is currently not style dependant.
81 return cwk_path_output_sized(buffer, buffer_size, position, "..", 2);
82}
83
84static size_t cwk_path_output_separator(char *buffer, size_t buffer_size,
85 size_t position)
86{
87 // We output a separator, which is a single character.
88 return cwk_path_output_sized(buffer, buffer_size, position,
89 separators[path_style], 1);
90}
91
92static size_t cwk_path_output_dot(char *buffer, size_t buffer_size,
93 size_t position)
94{
95 // We output a dot, which is a single character. This is used for extensions.
96 return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
97}
98
99static size_t cwk_path_output(char *buffer, size_t buffer_size, size_t position,
100 const char *str)
101{
102 size_t length;
103
104 // This just does a sized output internally, but first measuring the
105 // null-terminated string.
106 length = strlen(str);
107 return cwk_path_output_sized(buffer, buffer_size, position, str, length);
108}
109
110static void cwk_path_terminate_output(char *buffer, size_t buffer_size,
111 size_t pos)
112{
113 if (buffer_size > 0) {
114 if (pos >= buffer_size) {
115 buffer[buffer_size - 1] = '\0';
116 } else {
117 buffer[pos] = '\0';
118 }
119 }
120}
121
122static bool cwk_path_is_string_equal(const char *first, const char *second,
123 size_t n)
124{
125 // If the path style is UNIX, we will compare case sensitively. This can be
126 // done easily using strncmp.
127 if (path_style == CWK_STYLE_UNIX) {
128 return strncmp(first, second, n) == 0;
129 }
130
131 // However, if this is windows we will have to compare case insensitively.
132 // Since there is no standard method to do that we will have to do it on our
133 // own.
134 while (*first && *second && n > 0) {
135 // We can consider the string to be not equal if the two lowercase
136 // characters are not equal.
137 if (tolower(*first++) != tolower(*second++)) {
138 return false;
139 }
140
141 --n;
142 }
143
144 // We can consider the string to be equal if we either reached n == 0 or both
145 // cursors point to a null character.
146 return n == 0 || (*first == '\0' && *second == '\0');
147}
148
149static const char *cwk_path_find_next_stop(const char *c)
150{
151 // We just move forward until we find a '\0' or a separator, which will be our
152 // next "stop".
153 while (*c != '\0' && !cwk_path_is_separator(c)) {
154 ++c;
155 }
156
157 // Return the pointer of the next stop.
158 return c;
159}
160
161static const char *cwk_path_find_previous_stop(const char *begin, const char *c)
162{
163 // We just move back until we find a separator or reach the beginning of the
164 // path, which will be our previous "stop".
165 while (c > begin && !cwk_path_is_separator(c)) {
166 --c;
167 }
168
169 // Return the pointer to the previous stop. We have to return the first
170 // character after the separator, not on the separator itself.
171 if (cwk_path_is_separator(c)) {
172 return c + 1;
173 } else {
174 return c;
175 }
176}
177
178static bool cwk_path_get_first_segment_without_root(const char *path,
179 const char *segments, struct cwk_segment *segment)
180{
181 // Let's remember the path. We will move the path pointer afterwards, that's
182 // why this has to be done first.
183 segment->path = path;
184 segment->segments = segments;
185
186 // Now let's check whether this is an empty string. An empty string has no
187 // segment it could use.
188 if (*segments == '\0') {
189 return false;
190 }
191
192 // If the string starts with separators, we will jump over those. If there is
193 // only a slash and a '\0' after it, we can't determine the first segment
194 // since there is none.
195 while (cwk_path_is_separator(segments)) {
196 ++segments;
197 if (*segments == '\0') {
198 return false;
199 }
200 }
201
202 // So this is the beginning of our segment.
203 segment->begin = segments;
204
205 // Now let's determine the end of the segment, which we do by moving the path
206 // pointer further until we find a separator.
207 segments = cwk_path_find_next_stop(segments);
208
209 // And finally, calculate the size of the segment by subtracting the position
210 // from the end.
211 segment->size = segments - segment->begin;
212 segment->end = segments;
213
214 // Tell the caller that we found a segment.
215 return true;
216}
217
218static bool cwk_path_get_last_segment_without_root(const char *path,
219 struct cwk_segment *segment)
220{
221 // Now this is fairly similar to the normal algorithm, however, it will assume
222 // that there is no root in the path. So we grab the first segment at this
223 // position, assuming there is no root.
224 if (!cwk_path_get_first_segment_without_root(path, path, segment)) {
225 return false;
226 }
227
228 // Now we find our last segment. The segment struct of the caller
229 // will contain the last segment, since the function we call here will not
230 // change the segment struct when it reaches the end.
231 while (cwk_path_get_next_segment(segment)) {
232 // We just loop until there is no other segment left.
233 }
234
235 return true;
236}
237
238static bool cwk_path_get_first_segment_joined(const char **paths,
239 struct cwk_segment_joined *sj)
240{
241 bool result;
242
243 // Prepare the first segment. We position the joined segment on the first path
244 // and assign the path array to the struct.
245 sj->path_index = 0;
246 sj->paths = paths;
247
248 // We loop through all paths until we find one which has a segment. The result
249 // is stored in a variable, so we can let the caller know whether we found one
250 // or not.
251 result = false;
252 while (paths[sj->path_index] != NULL &&
253 (result = cwk_path_get_first_segment(paths[sj->path_index],
254 &sj->segment)) == false) {
255 ++sj->path_index;
256 }
257
258 return result;
259}
260
261static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined *sj)
262{
263 bool result;
264
265 if (sj->paths[sj->path_index] == NULL) {
266 // We reached already the end of all paths, so there is no other segment
267 // left.
268 return false;
269 } else if (cwk_path_get_next_segment(&sj->segment)) {
270 // There was another segment on the current path, so we are good to
271 // continue.
272 return true;
273 }
274
275 // We try to move to the next path which has a segment available. We must at
276 // least move one further since the current path reached the end.
277 result = false;
278
279 do {
280 ++sj->path_index;
281
282 // And we obviously have to stop this loop if there are no more paths left.
283 if (sj->paths[sj->path_index] == NULL) {
284 break;
285 }
286
287 // Grab the first segment of the next path and determine whether this path
288 // has anything useful in it. There is one more thing we have to consider
289 // here - for the first time we do this we want to skip the root, but
290 // afterwards we will consider that to be part of the segments.
291 result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index],
292 sj->paths[sj->path_index], &sj->segment);
293
294 } while (!result);
295
296 // Finally, report the result back to the caller.
297 return result;
298}
299
300static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined *sj)
301{
302 bool result;
303
304 if (*sj->paths == NULL) {
305 // It's possible that there is no initialized segment available in the
306 // struct since there are no paths. In that case we can return false, since
307 // there is no previous segment.
308 return false;
309 } else if (cwk_path_get_previous_segment(&sj->segment)) {
310 // Now we try to get the previous segment from the current path. If we can
311 // do that successfully, we can let the caller know that we found one.
312 return true;
313 }
314
315 result = false;
316
317 do {
318 // We are done once we reached index 0. In that case there are no more
319 // segments left.
320 if (sj->path_index == 0) {
321 break;
322 }
323
324 // There is another path which we have to inspect. So we decrease the path
325 // index.
326 --sj->path_index;
327
328 // If this is the first path we will have to consider that this path might
329 // include a root, otherwise we just treat is as a segment.
330 if (sj->path_index == 0) {
331 result = cwk_path_get_last_segment(sj->paths[sj->path_index],
332 &sj->segment);
333 } else {
334 result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index],
335 &sj->segment);
336 }
337
338 } while (!result);
339
340 return result;
341}
342
343static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined *sj)
344{
345 enum cwk_segment_type type;
346 int counter;
347
348 // We are handling back segments here. We must verify how many back segments
349 // and how many normal segments come before this one to decide whether we keep
350 // or remove it.
351
352 // The counter determines how many normal segments are our current segment,
353 // which will popped off before us. If the counter goes above zero it means
354 // that our segment will be popped as well.
355 counter = 0;
356
357 // We loop over all previous segments until we either reach the beginning,
358 // which means our segment will not be dropped or the counter goes above zero.
359 while (cwk_path_get_previous_segment_joined(sj)) {
360
361 // Now grab the type. The type determines whether we will increase or
362 // decrease the counter. We don't handle a CWK_CURRENT frame here since it
363 // has no influence.
364 type = cwk_path_get_segment_type(&sj->segment);
365 if (type == CWK_NORMAL) {
366 // This is a normal segment. The normal segment will increase the counter
367 // since it neutralizes one back segment. If we go above zero we can
368 // return immediately.
369 ++counter;
370 if (counter > 0) {
371 return true;
372 }
373 } else if (type == CWK_BACK) {
374 // A CWK_BACK segment will reduce the counter by one. We can not remove a
375 // back segment as long we are not above zero since we don't have the
376 // opposite normal segment which we would remove.
377 --counter;
378 }
379 }
380
381 // We never got a count larger than zero, so we will keep this segment alive.
382 return false;
383}
384
385static bool cwk_path_segment_normal_will_be_removed(
386 struct cwk_segment_joined *sj)
387{
388 enum cwk_segment_type type;
389 int counter;
390
391 // The counter determines how many segments are above our current segment,
392 // which will popped off before us. If the counter goes below zero it means
393 // that our segment will be popped as well.
394 counter = 0;
395
396 // We loop over all following segments until we either reach the end, which
397 // means our segment will not be dropped or the counter goes below zero.
398 while (cwk_path_get_next_segment_joined(sj)) {
399
400 // First, grab the type. The type determines whether we will increase or
401 // decrease the counter. We don't handle a CWK_CURRENT frame here since it
402 // has no influence.
403 type = cwk_path_get_segment_type(&sj->segment);
404 if (type == CWK_NORMAL) {
405 // This is a normal segment. The normal segment will increase the counter
406 // since it will be removed by a "../" before us.
407 ++counter;
408 } else if (type == CWK_BACK) {
409 // A CWK_BACK segment will reduce the counter by one. If we are below zero
410 // we can return immediately.
411 --counter;
412 if (counter < 0) {
413 return true;
414 }
415 }
416 }
417
418 // We never got a negative count, so we will keep this segment alive.
419 return false;
420}
421
422static bool
423cwk_path_segment_will_be_removed(const struct cwk_segment_joined *sj,
424 bool absolute)
425{
426 enum cwk_segment_type type;
427 struct cwk_segment_joined sjc;
428
429 // We copy the joined path so we don't need to modify it.
430 sjc = *sj;
431
432 // First we check whether this is a CWK_CURRENT or CWK_BACK segment, since
433 // those will always be dropped.
434 type = cwk_path_get_segment_type(&sj->segment);
435 if (type == CWK_CURRENT) {
436 return true;
437 } else if (type == CWK_BACK && absolute) {
438 return true;
439 } else if (type == CWK_BACK) {
440 return cwk_path_segment_back_will_be_removed(&sjc);
441 } else {
442 return cwk_path_segment_normal_will_be_removed(&sjc);
443 }
444}
445
446static bool
447cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined *sj,
448 bool absolute)
449{
450 while (cwk_path_segment_will_be_removed(sj, absolute)) {
451 if (!cwk_path_get_next_segment_joined(sj)) {
452 return false;
453 }
454 }
455
456 return true;
457}
458
459static void cwk_path_get_root_windows(const char *path, size_t *length)
460{
461 const char *c;
462 bool is_device_path;
463
464 // A device path is a path which starts with "\\." or "\\?". A device path can
465 // be a UNC path as well, in which case it will take up one more segment.
466 is_device_path = false;
467
468 // We can not determine the root if this is an empty string. So we set the
469 // root to NULL and the length to zero and cancel the whole thing.
470 c = path;
471 *length = 0;
472 if (!*c) {
473 return;
474 }
475
476 // Now we have to verify whether this is a windows network path (UNC), which
477 // we will consider our root.
478 if (cwk_path_is_separator(c)) {
479 ++c;
480
481 // Check whether the path starts with a single back slash, which means this
482 // is not a network path - just a normal path starting with a backslash.
483 if (!cwk_path_is_separator(c)) {
484 // Okay, this is not a network path but we still use the backslash as a
485 // root.
486 ++(*length);
487 return;
488 }
489
490 // Yes, this is a network or device path. Skip the previous separator. Now
491 // we need to determine whether this is a device path. We might advance one
492 // character here if the server name starts with a '?' or a '.', but that's
493 // fine since we will search for a separator afterwards anyway.
494 ++c;
495 is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c);
496 if (is_device_path) {
497 // That's a device path, and the root must be either "\\.\" or "\\?\"
498 // which is 4 characters long. (at least that's how Windows
499 // GetFullPathName behaves.)
500 *length = 4;
501 return;
502 }
503
504 // We will grab anything up to the next stop. The next top might be a '\0'
505 // or another separator. That will be the server name.
506 c = cwk_path_find_next_stop(c);
507
508 // If this is a separator and not the end of a string we wil have to include
509 // it. However, if this is a '\0' we must not skip it.
510 while (cwk_path_is_separator(c)) {
511 ++c;
512 }
513
514 // We are now skipping the shared folder name, which will end after the
515 // next stop.
516 c = cwk_path_find_next_stop(c);
517
518 // Then there might be a separator at the end. We will include that as well,
519 // it will mark the path as absolute.
520 if (cwk_path_is_separator(c)) {
521 ++c;
522 }
523
524 // Finally, calculate the size of the root.
525 *length = c - path;
526 return;
527 }
528
529 // Move to the next and check whether this is a colon.
530 if (*++c == ':') {
531 *length = 2;
532
533 // Now check whether this is a backslash (or slash). If it is not, we could
534 // assume that the next character is a '\0' if it is a valid path. However,
535 // we will not assume that - since ':' is not valid in a path it must be a
536 // mistake by the caller than. We will try to understand it anyway.
537 if (cwk_path_is_separator(++c)) {
538 *length = 3;
539 }
540 }
541}
542
543static void cwk_path_get_root_unix(const char *path, size_t *length)
544{
545 // The slash of the unix path represents the root. There is no root if there
546 // is no slash.
547 if (cwk_path_is_separator(path)) {
548 *length = 1;
549 } else {
550 *length = 0;
551 }
552}
553
554static bool cwk_path_is_root_absolute(const char *path, size_t length)
555{
556 // This is definitely not absolute if there is no root.
557 if (length == 0) {
558 return false;
559 }
560
561 // If there is a separator at the end of the root, we can safely consider this
562 // to be an absolute path.
563 return cwk_path_is_separator(&path[length - 1]);
564}
565
566static size_t cwk_path_join_and_normalize_multiple(const char **paths,
567 char *buffer, size_t buffer_size)
568{
569 size_t pos;
570 bool absolute, has_segment_output;
571 struct cwk_segment_joined sj;
572
573 // We initialize the position after the root, which should get us started.
574 cwk_path_get_root(paths[0], &pos);
575
576 // Determine whether the path is absolute or not. We need that to determine
577 // later on whether we can remove superfluous "../" or not.
578 absolute = cwk_path_is_root_absolute(paths[0], pos);
579
580 // First copy the root to the output. We will not modify the root.
581 cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos);
582
583 // So we just grab the first segment. If there is no segment we will always
584 // output a "/", since we currently only support absolute paths here.
585 if (!cwk_path_get_first_segment_joined(paths, &sj)) {
586 goto done;
587 }
588
589 // Let's assume that we don't have any segment output for now. We will toggle
590 // this flag once there is some output.
591 has_segment_output = false;
592
593 do {
594 // Check whether we have to drop this segment because of resolving a
595 // relative path or because it is a CWK_CURRENT segment.
596 if (cwk_path_segment_will_be_removed(&sj, absolute)) {
597 continue;
598 }
599
600 // Remember that we have segment output, so we can handle the trailing slash
601 // later on. This is necessary since we might have segments but they are all
602 // removed.
603 has_segment_output = true;
604
605 // Write out the segment but keep in mind that we need to follow the
606 // buffer size limitations. That's why we use the path output functions
607 // here.
608 pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin,
609 sj.segment.size);
610 pos += cwk_path_output_separator(buffer, buffer_size, pos);
611 } while (cwk_path_get_next_segment_joined(&sj));
612
613 // Remove the trailing slash, but only if we have segment output. We don't
614 // want to remove anything from the root.
615 if (has_segment_output) {
616 --pos;
617 } else if (pos == 0) {
618 // This may happen if the path is absolute and all segments have been
619 // removed. We can not have an empty output - and empty output means we stay
620 // in the current directory. So we will output a ".".
621 assert(absolute == false);
622 pos += cwk_path_output_current(buffer, buffer_size, pos);
623 }
624
625 // We must append a '\0' in any case, unless the buffer size is zero. If the
626 // buffer size is zero, which means we can not.
627done:
628 cwk_path_terminate_output(buffer, buffer_size, pos);
629
630 // And finally let our caller know about the total size of the normalized
631 // path.
632 return pos;
633}
634
635size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer,
636 size_t buffer_size)
637{
638 size_t i;
639 const char *paths[4];
640
641 // The basename should be an absolute path if the caller is using the API
642 // correctly. However, he might not and in that case we will append a fake
643 // root at the beginning.
644 if (cwk_path_is_absolute(base)) {
645 i = 0;
646 } else {
647 paths[0] = "/";
648 i = 1;
649 }
650
651 if (cwk_path_is_absolute(path)) {
652 // If the submitted path is not relative the base path becomes irrelevant.
653 // We will only normalize the submitted path instead.
654 paths[i++] = path;
655 paths[i] = NULL;
656 } else {
657 // Otherwise we append the relative path to the base path and normalize it.
658 // The result will be a new absolute path.
659 paths[i++] = base;
660 paths[i++] = path;
661 paths[i] = NULL;
662 }
663
664 // Finally join everything together and normalize it.
665 return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
666}
667
668static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined *bsj,
669 struct cwk_segment_joined *osj, bool absolute, bool *base_available,
670 bool *other_available)
671{
672 // Now looping over all segments until they start to diverge. A path may
673 // diverge if two segments are not equal or if one path reaches the end.
674 do {
675
676 // Check whether there is anything available after we skip everything which
677 // is invisible. We do that for both paths, since we want to let the caller
678 // know which path has some trailing segments after they diverge.
679 *base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute);
680 *other_available = cwk_path_segment_joined_skip_invisible(osj, absolute);
681
682 // We are done if one or both of those paths reached the end. They either
683 // diverge or both reached the end - but in both cases we can not continue
684 // here.
685 if (!*base_available || !*other_available) {
686 break;
687 }
688
689 // Compare the content of both segments. We are done if they are not equal,
690 // since they diverge.
691 if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin,
692 bsj->segment.size)) {
693 break;
694 }
695
696 // We keep going until one of those segments reached the end. The next
697 // segment might be invisible, but we will check for that in the beginning
698 // of the loop once again.
699 *base_available = cwk_path_get_next_segment_joined(bsj);
700 *other_available = cwk_path_get_next_segment_joined(osj);
701 } while (*base_available && *other_available);
702}
703
704size_t cwk_path_get_relative(const char *base_directory, const char *path,
705 char *buffer, size_t buffer_size)
706{
707 size_t pos, base_root_length, path_root_length;
708 bool absolute, base_available, other_available, has_output;
709 const char *base_paths[2], *other_paths[2];
710 struct cwk_segment_joined bsj, osj;
711
712 pos = 0;
713
714 // First we compare the roots of those two paths. If the roots are not equal
715 // we can't continue, since there is no way to get a relative path from
716 // different roots.
717 cwk_path_get_root(base_directory, &base_root_length);
718 cwk_path_get_root(path, &path_root_length);
719 if (!cwk_path_is_string_equal(base_directory, path, base_root_length)) {
720 return pos;
721 }
722
723 // Verify whether this is an absolute path. We need to know that since we can
724 // remove all back-segments if it is.
725 absolute = cwk_path_is_root_absolute(base_directory, base_root_length);
726
727 // Initialize our joined segments. This will allow us to use the internal
728 // functions to skip until diverge and invisible. We only have one path in
729 // them though.
730 base_paths[0] = base_directory;
731 base_paths[1] = NULL;
732 other_paths[0] = path;
733 other_paths[1] = NULL;
734 cwk_path_get_first_segment_joined(base_paths, &bsj);
735 cwk_path_get_first_segment_joined(other_paths, &osj);
736
737 // Okay, now we skip until the segments diverge. We don't have anything to do
738 // with the segments which are equal.
739 cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available,
740 &other_available);
741
742 // Assume there is no output until we have got some. We will need this
743 // information later on to remove trailing slashes or alternatively output a
744 // current-segment.
745 has_output = false;
746
747 // So if we still have some segments left in the base path we will now output
748 // a back segment for all of them.
749 if (base_available) {
750 do {
751 // Skip any invisible segment. We don't care about those and we don't need
752 // to navigate back because of them.
753 if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) {
754 break;
755 }
756
757 // Toggle the flag if we have output. We need to remember that, since we
758 // want to remove the trailing slash.
759 has_output = true;
760
761 // Output the back segment and a separator. No need to worry about the
762 // superfluous segment since it will be removed later on.
763 pos += cwk_path_output_back(buffer, buffer_size, pos);
764 pos += cwk_path_output_separator(buffer, buffer_size, pos);
765 } while (cwk_path_get_next_segment_joined(&bsj));
766 }
767
768 // And if we have some segments available of the target path we will output
769 // all of those.
770 if (other_available) {
771 do {
772 // Again, skip any invisible segments since we don't need to navigate into
773 // them.
774 if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) {
775 break;
776 }
777
778 // Toggle the flag if we have output. We need to remember that, since we
779 // want to remove the trailing slash.
780 has_output = true;
781
782 // Output the current segment and a separator. No need to worry about the
783 // superfluous segment since it will be removed later on.
784 pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin,
785 osj.segment.size);
786 pos += cwk_path_output_separator(buffer, buffer_size, pos);
787 } while (cwk_path_get_next_segment_joined(&osj));
788 }
789
790 // If we have some output by now we will have to remove the trailing slash. We
791 // simply do that by moving back one character. The terminate output function
792 // will then place the '\0' on this position. Otherwise, if there is no
793 // output, we will have to output a "current directory", since the target path
794 // points to the base path.
795 if (has_output) {
796 --pos;
797 } else {
798 pos += cwk_path_output_current(buffer, buffer_size, pos);
799 }
800
801 // Finally, we can terminate the output - which means we place a '\0' at the
802 // current position or at the end of the buffer.
803 cwk_path_terminate_output(buffer, buffer_size, pos);
804
805 return pos;
806}
807
808size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer,
809 size_t buffer_size)
810{
811 const char *paths[3];
812
813 // This is simple. We will just create an array with the two paths which we
814 // wish to join.
815 paths[0] = path_a;
816 paths[1] = path_b;
817 paths[2] = NULL;
818
819 // And then call the join and normalize function which will do the hard work
820 // for us.
821 return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
822}
823
824size_t cwk_path_join_multiple(const char **paths, char *buffer,
825 size_t buffer_size)
826{
827 // We can just call the internal join and normalize function for this one,
828 // since it will handle everything.
829 return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
830}
831
832void cwk_path_get_root(const char *path, size_t *length)
833{
834 // We use a different implementation here based on the configuration of the
835 // library.
836 if (path_style == CWK_STYLE_WINDOWS) {
837 cwk_path_get_root_windows(path, length);
838 } else {
839 cwk_path_get_root_unix(path, length);
840 }
841}
842
843size_t cwk_path_change_root(const char *path, const char *new_root,
844 char *buffer, size_t buffer_size)
845{
846 const char *tail;
847 size_t root_length, path_length, tail_length, new_root_length, new_path_size;
848
849 // First we need to determine the actual size of the root which we will
850 // change.
851 cwk_path_get_root(path, &root_length);
852
853 // Now we determine the sizes of the new root and the path. We need that to
854 // determine the size of the part after the root (the tail).
855 new_root_length = strlen(new_root);
856 path_length = strlen(path);
857
858 // Okay, now we calculate the position of the tail and the length of it.
859 tail = path + root_length;
860 tail_length = path_length - root_length;
861
862 // We first output the tail and then the new root, that's because the source
863 // path and the buffer may be overlapping. This way the root will not
864 // overwrite the tail.
865 cwk_path_output_sized(buffer, buffer_size, new_root_length, tail,
866 tail_length);
867 cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length);
868
869 // Finally we calculate the size o the new path and terminate the output with
870 // a '\0'.
871 new_path_size = tail_length + new_root_length;
872 cwk_path_terminate_output(buffer, buffer_size, new_path_size);
873
874 return new_path_size;
875}
876
877bool cwk_path_is_absolute(const char *path)
878{
879 size_t length;
880
881 // We grab the root of the path. This root does not include the first
882 // separator of a path.
883 cwk_path_get_root(path, &length);
884
885 // Now we can determine whether the root is absolute or not.
886 return cwk_path_is_root_absolute(path, length);
887}
888
889bool cwk_path_is_relative(const char *path)
890{
891 // The path is relative if it is not absolute.
892 return !cwk_path_is_absolute(path);
893}
894
895void cwk_path_get_basename(const char *path, const char **basename,
896 size_t *length)
897{
898 struct cwk_segment segment;
899
900 // We get the last segment of the path. The last segment will contain the
901 // basename if there is any. If there are no segments we will set the basename
902 // to NULL and the length to 0.
903 if (!cwk_path_get_last_segment(path, &segment)) {
904 *basename = NULL;
905 *length = 0;
906 return;
907 }
908
909 // Now we can just output the segment contents, since that's our basename.
910 // There might be trailing separators after the basename, but the size does
911 // not include those.
912 *basename = segment.begin;
913 *length = segment.size;
914}
915
916size_t cwk_path_change_basename(const char *path, const char *new_basename,
917 char *buffer, size_t buffer_size)
918{
919 struct cwk_segment segment;
920 size_t pos, root_size, new_basename_size;
921
922 // First we try to get the last segment. We may only have a root without any
923 // segments, in which case we will create one.
924 if (!cwk_path_get_last_segment(path, &segment)) {
925
926 // So there is no segment in this path. First we grab the root and output
927 // that. We are not going to modify the root in any way.
928 cwk_path_get_root(path, &root_size);
929 pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
930
931 // We have to trim the separators from the beginning of the new basename.
932 // This is quite easy to do.
933 while (cwk_path_is_separator(new_basename)) {
934 ++new_basename;
935 }
936
937 // Now we measure the length of the new basename, this is a two step
938 // process. First we find the '\0' character at the end of the string.
939 new_basename_size = 0;
940 while (new_basename[new_basename_size]) {
941 ++new_basename_size;
942 }
943
944 // And then we trim the separators at the end of the basename until we reach
945 // the first valid character.
946 while (new_basename_size > 0 &&
947 cwk_path_is_separator(&new_basename[new_basename_size - 1])) {
948 --new_basename_size;
949 }
950
951 // Now we will output the new basename after the root.
952 pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename,
953 new_basename_size);
954
955 // And finally terminate the output and return the total size of the path.
956 cwk_path_terminate_output(buffer, buffer_size, pos);
957 return pos;
958 }
959
960 // If there is a last segment we can just forward this call, which is fairly
961 // easy.
962 return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size);
963}
964
965void cwk_path_get_dirname(const char *path, size_t *length)
966{
967 struct cwk_segment segment;
968
969 // We get the last segment of the path. The last segment will contain the
970 // basename if there is any. If there are no segments we will set the length
971 // to 0.
972 if (!cwk_path_get_last_segment(path, &segment)) {
973 *length = 0;
974 return;
975 }
976
977 // We can now return the length from the beginning of the string up to the
978 // beginning of the last segment.
979 *length = segment.begin - path;
980}
981
982bool cwk_path_get_extension(const char *path, const char **extension,
983 size_t *length)
984{
985 struct cwk_segment segment;
986 const char *c;
987
988 // We get the last segment of the path. The last segment will contain the
989 // extension if there is any.
990 if (!cwk_path_get_last_segment(path, &segment)) {
991 return false;
992 }
993
994 // Now we search for a dot within the segment. If there is a dot, we consider
995 // the rest of the segment the extension. We do this from the end towards the
996 // beginning, since we want to find the last dot.
997 for (c = segment.end; c >= segment.begin; --c) {
998 if (*c == '.') {
999 // Okay, we found an extension. We can stop looking now.
1000 *extension = c;
1001 *length = segment.end - c;
1002 return true;
1003 }
1004 }
1005
1006 // We couldn't find any extension.
1007 return false;
1008}
1009
1010bool cwk_path_has_extension(const char *path)
1011{
1012 const char *extension;
1013 size_t length;
1014
1015 // We just wrap the get_extension call which will then do the work for us.
1016 return cwk_path_get_extension(path, &extension, &length);
1017}
1018
1019size_t cwk_path_change_extension(const char *path, const char *new_extension,
1020 char *buffer, size_t buffer_size)
1021{
1022 struct cwk_segment segment;
1023 const char *c, *old_extension;
1024 size_t pos, root_size, trail_size, new_extension_size;
1025
1026 // First we try to get the last segment. We may only have a root without any
1027 // segments, in which case we will create one.
1028 if (!cwk_path_get_last_segment(path, &segment)) {
1029
1030 // So there is no segment in this path. First we grab the root and output
1031 // that. We are not going to modify the root in any way. If there is no
1032 // root, this will end up with a root size 0, and nothing will be written.
1033 cwk_path_get_root(path, &root_size);
1034 pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
1035
1036 // Add a dot if the submitted value doesn't have any.
1037 if (*new_extension != '.') {
1038 pos += cwk_path_output_dot(buffer, buffer_size, pos);
1039 }
1040
1041 // And finally terminate the output and return the total size of the path.
1042 pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
1043 cwk_path_terminate_output(buffer, buffer_size, pos);
1044 return pos;
1045 }
1046
1047 // Now we seek the old extension in the last segment, which we will replace
1048 // with the new one. If there is no old extension, it will point to the end of
1049 // the segment.
1050 old_extension = segment.end;
1051 for (c = segment.begin; c < segment.end; ++c) {
1052 if (*c == '.') {
1053 old_extension = c;
1054 }
1055 }
1056
1057 pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path,
1058 old_extension - segment.path);
1059
1060 // If the new extension starts with a dot, we will skip that dot. We always
1061 // output exactly one dot before the extension. If the extension contains
1062 // multiple dots, we will output those as part of the extension.
1063 if (*new_extension == '.') {
1064 ++new_extension;
1065 }
1066
1067 // We calculate the size of the new extension, including the dot, in order to
1068 // output the trail - which is any part of the path coming after the
1069 // extension. We must output this first, since the buffer may overlap with the
1070 // submitted path - and it would be overridden by longer extensions.
1071 new_extension_size = strlen(new_extension) + 1;
1072 trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size,
1073 segment.end);
1074
1075 // Finally we output the dot and the new extension. The new extension itself
1076 // doesn't contain the dot anymore, so we must output that first.
1077 pos += cwk_path_output_dot(buffer, buffer_size, pos);
1078 pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
1079
1080 // Now we terminate the output with a null-terminating character, but before
1081 // we do that we must add the size of the trail to the position which we
1082 // output before.
1083 pos += trail_size;
1084 cwk_path_terminate_output(buffer, buffer_size, pos);
1085
1086 // And the position is our output size now.
1087 return pos;
1088}
1089
1090size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size)
1091{
1092 const char *paths[2];
1093
1094 // Now we initialize the paths which we will normalize. Since this function
1095 // only supports submitting a single path, we will only add that one.
1096 paths[0] = path;
1097 paths[1] = NULL;
1098
1099 return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
1100}
1101
1102size_t cwk_path_get_intersection(const char *path_base, const char *path_other)
1103{
1104 bool absolute;
1105 size_t base_root_length, other_root_length;
1106 const char *end;
1107 const char *paths_base[2], *paths_other[2];
1108 struct cwk_segment_joined base, other;
1109
1110 // We first compare the two roots. We just return zero if they are not equal.
1111 // This will also happen to return zero if the paths are mixed relative and
1112 // absolute.
1113 cwk_path_get_root(path_base, &base_root_length);
1114 cwk_path_get_root(path_other, &other_root_length);
1115 if (!cwk_path_is_string_equal(path_base, path_other, base_root_length)) {
1116 return 0;
1117 }
1118
1119 // Configure our paths. We just have a single path in here for now.
1120 paths_base[0] = path_base;
1121 paths_base[1] = NULL;
1122 paths_other[0] = path_other;
1123 paths_other[1] = NULL;
1124
1125 // So we get the first segment of both paths. If one of those paths don't have
1126 // any segment, we will return 0.
1127 if (!cwk_path_get_first_segment_joined(paths_base, &base) ||
1128 !cwk_path_get_first_segment_joined(paths_other, &other)) {
1129 return base_root_length;
1130 }
1131
1132 // We now determine whether the path is absolute or not. This is required
1133 // because if will ignore removed segments, and this behaves differently if
1134 // the path is absolute. However, we only need to check the base path because
1135 // we are guaranteed that both paths are either relative or absolute.
1136 absolute = cwk_path_is_root_absolute(path_base, base_root_length);
1137
1138 // We must keep track of the end of the previous segment. Initially, this is
1139 // set to the beginning of the path. This means that 0 is returned if the
1140 // first segment is not equal.
1141 end = path_base + base_root_length;
1142
1143 // Now we loop over both segments until one of them reaches the end or their
1144 // contents are not equal.
1145 do {
1146 // We skip all segments which will be removed in each path, since we want to
1147 // know about the true path.
1148 if (!cwk_path_segment_joined_skip_invisible(&base, absolute) ||
1149 !cwk_path_segment_joined_skip_invisible(&other, absolute)) {
1150 break;
1151 }
1152
1153 if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin,
1154 base.segment.size)) {
1155 // So the content of those two segments are not equal. We will return the
1156 // size up to the beginning.
1157 return end - path_base;
1158 }
1159
1160 // Remember the end of the previous segment before we go to the next one.
1161 end = base.segment.end;
1162 } while (cwk_path_get_next_segment_joined(&base) &&
1163 cwk_path_get_next_segment_joined(&other));
1164
1165 // Now we calculate the length up to the last point where our paths pointed to
1166 // the same place.
1167 return end - path_base;
1168}
1169
1170bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment)
1171{
1172 size_t length;
1173 const char *segments;
1174
1175 // We skip the root since that's not part of the first segment. The root is
1176 // treated as a separate entity.
1177 cwk_path_get_root(path, &length);
1178 segments = path + length;
1179
1180 // Now, after we skipped the root we can continue and find the actual segment
1181 // content.
1182 return cwk_path_get_first_segment_without_root(path, segments, segment);
1183}
1184
1185bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment)
1186{
1187 // We first grab the first segment. This might be our last segment as well,
1188 // but we don't know yet. There is no last segment if there is no first
1189 // segment, so we return false in that case.
1190 if (!cwk_path_get_first_segment(path, segment)) {
1191 return false;
1192 }
1193
1194 // Now we find our last segment. The segment struct of the caller
1195 // will contain the last segment, since the function we call here will not
1196 // change the segment struct when it reaches the end.
1197 while (cwk_path_get_next_segment(segment)) {
1198 // We just loop until there is no other segment left.
1199 }
1200
1201 return true;
1202}
1203
1204bool cwk_path_get_next_segment(struct cwk_segment *segment)
1205{
1206 const char *c;
1207
1208 // First we jump to the end of the previous segment. The first character must
1209 // be either a '\0' or a separator.
1210 c = segment->begin + segment->size;
1211 if (*c == '\0') {
1212 return false;
1213 }
1214
1215 // Now we skip all separator until we reach something else. We are not yet
1216 // guaranteed to have a segment, since the string could just end afterwards.
1217 assert(cwk_path_is_separator(c));
1218 do {
1219 ++c;
1220 } while (cwk_path_is_separator(c));
1221
1222 // If the string ends here, we can safely assume that there is no other
1223 // segment after this one.
1224 if (*c == '\0') {
1225 return false;
1226 }
1227
1228 // Now we are safe to assume there is a segment. We store the beginning of
1229 // this segment in the segment struct of the caller.
1230 segment->begin = c;
1231
1232 // And now determine the size of this segment, and store it in the struct of
1233 // the caller as well.
1234 c = cwk_path_find_next_stop(c);
1235 segment->end = c;
1236 segment->size = c - segment->begin;
1237
1238 // Tell the caller that we found a segment.
1239 return true;
1240}
1241
1242bool cwk_path_get_previous_segment(struct cwk_segment *segment)
1243{
1244 const char *c;
1245
1246 // The current position might point to the first character of the path, which
1247 // means there are no previous segments available.
1248 c = segment->begin;
1249 if (c <= segment->segments) {
1250 return false;
1251 }
1252
1253 // We move towards the beginning of the path until we either reached the
1254 // beginning or the character is no separator anymore.
1255 do {
1256 --c;
1257 if (c <= segment->segments) {
1258 // So we reached the beginning here and there is no segment. So we return
1259 // false and don't change the segment structure submitted by the caller.
1260 return false;
1261 }
1262 } while (cwk_path_is_separator(c));
1263
1264 // We are guaranteed now that there is another segment, since we moved before
1265 // the previous separator and did not reach the segment path beginning.
1266 segment->end = c + 1;
1267 segment->begin = cwk_path_find_previous_stop(segment->segments, c);
1268 segment->size = segment->end - segment->begin;
1269
1270 return true;
1271}
1272
1273enum cwk_segment_type cwk_path_get_segment_type(
1274 const struct cwk_segment *segment)
1275{
1276 // We just make a string comparison with the segment contents and return the
1277 // appropriate type.
1278 if (strncmp(segment->begin, ".", segment->size) == 0) {
1279 return CWK_CURRENT;
1280 } else if (strncmp(segment->begin, "..", segment->size) == 0) {
1281 return CWK_BACK;
1282 }
1283
1284 return CWK_NORMAL;
1285}
1286
1287bool cwk_path_is_separator(const char *str)
1288{
1289 const char *c;
1290
1291 // We loop over all characters in the read symbols.
1292 c = separators[path_style];
1293 if (!c)
1294 return false;
1295 while (*c) {
1296 if (*c == *str) {
1297 return true;
1298 }
1299
1300 ++c;
1301 }
1302
1303 return false;
1304}
1305
1306size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value,
1307 char *buffer, size_t buffer_size)
1308{
1309 size_t pos, value_size, tail_size;
1310
1311 // First we have to output the head, which is the whole string up to the
1312 // beginning of the segment. This part of the path will just stay the same.
1313 pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path,
1314 segment->begin - segment->path);
1315
1316 // In order to trip the submitted value, we will skip any separator at the
1317 // beginning of it and behave as if it was never there.
1318 while (cwk_path_is_separator(value)) {
1319 ++value;
1320 }
1321
1322 // Now we determine the length of the value. In order to do that we first
1323 // locate the '\0'.
1324 value_size = 0;
1325 while (value[value_size]) {
1326 ++value_size;
1327 }
1328
1329 // Since we trim separators at the beginning and in the end of the value we
1330 // have to subtract from the size until there are either no more characters
1331 // left or the last character is no separator.
1332 while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) {
1333 --value_size;
1334 }
1335
1336 // We also have to determine the tail size, which is the part of the string
1337 // following the current segment. This part will not change.
1338 tail_size = strlen(segment->end);
1339
1340 // Now we output the tail. We have to do that, because if the buffer and the
1341 // source are overlapping we would override the tail if the value is
1342 // increasing in length.
1343 cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end,
1344 tail_size);
1345
1346 // Finally we can output the value in the middle of the head and the tail,
1347 // where we have enough space to fit the whole trimmed value.
1348 pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size);
1349
1350 // Now we add the tail size to the current position and terminate the output -
1351 // basically, ensure that there is a '\0' at the end of the buffer.
1352 pos += tail_size;
1353 cwk_path_terminate_output(buffer, buffer_size, pos);
1354
1355 // And now tell the caller how long the whole path would be.
1356 return pos;
1357}
1358
1359enum cwk_path_style cwk_path_guess_style(const char *path)
1360{
1361 const char *c;
1362 size_t root_length;
1363 struct cwk_segment segment;
1364
1365 // First we determine the root. Only windows roots can be longer than a single
1366 // slash, so if we can determine that it starts with something like "C:", we
1367 // know that this is a windows path.
1368 cwk_path_get_root_windows(path, &root_length);
1369 if (root_length > 1) {
1370 return CWK_STYLE_WINDOWS;
1371 }
1372
1373 // Next we check for slashes. Windows uses backslashes, while unix uses
1374 // forward slashes. Windows actually supports both, but our best guess is to
1375 // assume windows with backslashes and unix with forward slashes.
1376 for (c = path; *c; ++c) {
1377 if (*c == *separators[CWK_STYLE_UNIX]) {
1378 return CWK_STYLE_UNIX;
1379 } else if (*c == *separators[CWK_STYLE_WINDOWS]) {
1380 return CWK_STYLE_WINDOWS;
1381 }
1382 }
1383
1384 // This path does not have any slashes. We grab the last segment (which
1385 // actually must be the first one), and determine whether the segment starts
1386 // with a dot. A dot is a hidden folder or file in the UNIX world, in that
1387 // case we assume the path to have UNIX style.
1388 if (!cwk_path_get_last_segment(path, &segment)) {
1389 // We couldn't find any segments, so we default to a UNIX path style since
1390 // there is no way to make any assumptions.
1391 return CWK_STYLE_UNIX;
1392 }
1393
1394 if (*segment.begin == '.') {
1395 return CWK_STYLE_UNIX;
1396 }
1397
1398 // And finally we check whether the last segment contains a dot. If it
1399 // contains a dot, that might be an extension. Windows is more likely to have
1400 // file names with extensions, so our guess would be windows.
1401 for (c = segment.begin; *c; ++c) {
1402 if (*c == '.') {
1403 return CWK_STYLE_WINDOWS;
1404 }
1405 }
1406
1407 // All our checks failed, so we will return a default value which is currently
1408 // UNIX.
1409 return CWK_STYLE_UNIX;
1410}
1411
1412void cwk_path_set_style(enum cwk_path_style style)
1413{
1414 // We can just set the global path style variable and then the behaviour for
1415 // all functions will change accordingly.
1416 assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS);
1417 path_style = style;
1418}
1419
1420enum cwk_path_style cwk_path_get_style(void)
1421{
1422 // Simply return the path style which we store in a global variable.
1423 return path_style;
1424}
A joined path represents multiple path strings which are concatenated, but not (necessarily) stored i...
Definition: cwalk.c:33
A segment represents a single component of a path.
Definition: cwalk.h:15