~vsc/yap/cwalk_8c_source.html

#include <assert.h>

#include <ctype.h>

#include <cwalk.h>

#include <stdarg.h>

#include <stdio.h>

#include <string.h>


#if defined(WIN32) || defined(_WIN32) ||                                       \

  defined(__WIN32) && !defined(__CYGWIN__)

static enum cwk_path_style path_style = CWK_STYLE_WINDOWS;

#else

static enum cwk_path_style path_style = CWK_STYLE_UNIX;

#endif


static const char *separators[] = {[CWK_STYLE_WINDOWS] = "\\/",

  [CWK_STYLE_UNIX] = "/"};


struct cwk_segment_joined

{

  struct cwk_segment segment;

  const char **paths;

  size_t path_index;

};


static size_t cwk_path_output_sized(char *buffer, size_t buffer_size,

  size_t position, const char *str, size_t length)

{

  size_t amount_written;


  // First we determine the amount which we can write to the buffer. There are

  // three cases. In the first case we have enough to store the whole string in

  // it. In the second one we can only store a part of it, and in the third we

  // have no space left.

  if (buffer_size > position + length) {

    amount_written = length;

  } else if (buffer_size > position) {

    amount_written = buffer_size - position;

  } else {

    amount_written = 0;

  }


  // If we actually want to write out something we will do that here. We will

  // always append a '\0', this way we are guaranteed to have a valid string at

  // all times.

  if (amount_written > 0) {

    memmove(&buffer[position], str, amount_written);

  }


  // Return the theoretical length which would have been written when everything

  // would have fit in the buffer.

  return length;

}


static size_t cwk_path_output_current(char *buffer, size_t buffer_size,

  size_t position)

{

  // We output a "current" directory, which is a single character. This

  // character is currently not style dependant.

  return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);

}


static size_t cwk_path_output_back(char *buffer, size_t buffer_size,

  size_t position)

{

  // We output a "back" directory, which ahs two characters. This

  // character is currently not style dependant.

  return cwk_path_output_sized(buffer, buffer_size, position, "..", 2);

}


static size_t cwk_path_output_separator(char *buffer, size_t buffer_size,

  size_t position)

{

  // We output a separator, which is a single character.

  return cwk_path_output_sized(buffer, buffer_size, position,

    separators[path_style], 1);

}


static size_t cwk_path_output_dot(char *buffer, size_t buffer_size,

  size_t position)

{

  // We output a dot, which is a single character. This is used for extensions.

  return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);

}


static size_t cwk_path_output(char *buffer, size_t buffer_size, size_t position,

  const char *str)

{

  size_t length;


  // This just does a sized output internally, but first measuring the

  // null-terminated string.

  length = strlen(str);

  return cwk_path_output_sized(buffer, buffer_size, position, str, length);

}


static void cwk_path_terminate_output(char *buffer, size_t buffer_size,

  size_t pos)

{

  if (buffer_size > 0) {

    if (pos >= buffer_size) {

      buffer[buffer_size - 1] = '\0';

    } else {

      buffer[pos] = '\0';

    }

  }

}


static bool cwk_path_is_string_equal(const char *first, const char *second,

  size_t n)

{

  // If the path style is UNIX, we will compare case sensitively. This can be

  // done easily using strncmp.

  if (path_style == CWK_STYLE_UNIX) {

    return strncmp(first, second, n) == 0;

  }


  // However, if this is windows we will have to compare case insensitively.

  // Since there is no standard method to do that we will have to do it on our

  // own.

  while (*first && *second && n > 0) {

    // We can consider the string to be not equal if the two lowercase

    // characters are not equal.

    if (tolower(*first++) != tolower(*second++)) {

      return false;

    }


    --n;

  }


  // We can consider the string to be equal if we either reached n == 0 or both

  // cursors point to a null character.

  return n == 0 || (*first == '\0' && *second == '\0');

}


static const char *cwk_path_find_next_stop(const char *c)

{

  // We just move forward until we find a '\0' or a separator, which will be our

  // next "stop".

  while (*c != '\0' && !cwk_path_is_separator(c)) {

    ++c;

  }


  // Return the pointer of the next stop.

  return c;

}


static const char *cwk_path_find_previous_stop(const char *begin, const char *c)

{

  // We just move back until we find a separator or reach the beginning of the

  // path, which will be our previous "stop".

  while (c > begin && !cwk_path_is_separator(c)) {

    --c;

  }


  // Return the pointer to the previous stop. We have to return the first

  // character after the separator, not on the separator itself.

  if (cwk_path_is_separator(c)) {

    return c + 1;

  } else {

    return c;

  }

}


static bool cwk_path_get_first_segment_without_root(const char *path,

  const char *segments, struct cwk_segment *segment)

{

  // Let's remember the path. We will move the path pointer afterwards, that's

  // why this has to be done first.

  segment->path = path;

  segment->segments = segments;


  // Now let's check whether this is an empty string. An empty string has no

  // segment it could use.

  if (*segments == '\0') {

    return false;

  }


  // If the string starts with separators, we will jump over those. If there is

  // only a slash and a '\0' after it, we can't determine the first segment

  // since there is none.

  while (cwk_path_is_separator(segments)) {

    ++segments;

    if (*segments == '\0') {

      return false;

    }

  }


  // So this is the beginning of our segment.

  segment->begin = segments;


  // Now let's determine the end of the segment, which we do by moving the path

  // pointer further until we find a separator.

  segments = cwk_path_find_next_stop(segments);


  // And finally, calculate the size of the segment by subtracting the position

  // from the end.

  segment->size = segments - segment->begin;

  segment->end = segments;


  // Tell the caller that we found a segment.

  return true;

}


static bool cwk_path_get_last_segment_without_root(const char *path,

  struct cwk_segment *segment)

{

  // Now this is fairly similar to the normal algorithm, however, it will assume

  // that there is no root in the path. So we grab the first segment at this

  // position, assuming there is no root.

  if (!cwk_path_get_first_segment_without_root(path, path, segment)) {

    return false;

  }


  // Now we find our last segment. The segment struct of the caller

  // will contain the last segment, since the function we call here will not

  // change the segment struct when it reaches the end.

  while (cwk_path_get_next_segment(segment)) {

    // We just loop until there is no other segment left.

  }


  return true;

}


static bool cwk_path_get_first_segment_joined(const char **paths,

  struct cwk_segment_joined *sj)

{

  bool result;


  // Prepare the first segment. We position the joined segment on the first path

  // and assign the path array to the struct.

  sj->path_index = 0;

  sj->paths = paths;


  // We loop through all paths until we find one which has a segment. The result

  // is stored in a variable, so we can let the caller know whether we found one

  // or not.

  result = false;

  while (paths[sj->path_index] != NULL &&

         (result = cwk_path_get_first_segment(paths[sj->path_index],

            &sj->segment)) == false) {

    ++sj->path_index;

  }


  return result;

}


static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined *sj)

{

  bool result;


  if (sj->paths[sj->path_index] == NULL) {

    // We reached already the end of all paths, so there is no other segment

    // left.

    return false;

  } else if (cwk_path_get_next_segment(&sj->segment)) {

    // There was another segment on the current path, so we are good to

    // continue.

    return true;

  }


  // We try to move to the next path which has a segment available. We must at

  // least move one further since the current path reached the end.

  result = false;


  do {

    ++sj->path_index;


    // And we obviously have to stop this loop if there are no more paths left.

    if (sj->paths[sj->path_index] == NULL) {

      break;

    }


    // Grab the first segment of the next path and determine whether this path

    // has anything useful in it. There is one more thing we have to consider

    // here - for the first time we do this we want to skip the root, but

    // afterwards we will consider that to be part of the segments.

    result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index],

      sj->paths[sj->path_index], &sj->segment);


  } while (!result);


  // Finally, report the result back to the caller.

  return result;

}


static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined *sj)

{

  bool result;


  if (*sj->paths == NULL) {

    // It's possible that there is no initialized segment available in the

    // struct since there are no paths. In that case we can return false, since

    // there is no previous segment.

    return false;

  } else if (cwk_path_get_previous_segment(&sj->segment)) {

    // Now we try to get the previous segment from the current path. If we can

    // do that successfully, we can let the caller know that we found one.

    return true;

  }


  result = false;


  do {

    // We are done once we reached index 0. In that case there are no more

    // segments left.

    if (sj->path_index == 0) {

      break;

    }


    // There is another path which we have to inspect. So we decrease the path

    // index.

    --sj->path_index;


    // If this is the first path we will have to consider that this path might

    // include a root, otherwise we just treat is as a segment.

    if (sj->path_index == 0) {

      result = cwk_path_get_last_segment(sj->paths[sj->path_index],

        &sj->segment);

    } else {

      result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index],

        &sj->segment);

    }


  } while (!result);


  return result;

}


static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined *sj)

{

  enum cwk_segment_type type;

  int counter;


  // We are handling back segments here. We must verify how many back segments

  // and how many normal segments come before this one to decide whether we keep

  // or remove it.


  // The counter determines how many normal segments are our current segment,

  // which will popped off before us. If the counter goes above zero it means

  // that our segment will be popped as well.

  counter = 0;


  // We loop over all previous segments until we either reach the beginning,

  // which means our segment will not be dropped or the counter goes above zero.

  while (cwk_path_get_previous_segment_joined(sj)) {


    // Now grab the type. The type determines whether we will increase or

    // decrease the counter. We don't handle a CWK_CURRENT frame here since it

    // has no influence.

    type = cwk_path_get_segment_type(&sj->segment);

    if (type == CWK_NORMAL) {

      // This is a normal segment. The normal segment will increase the counter

      // since it neutralizes one back segment. If we go above zero we can

      // return immediately.

      ++counter;

      if (counter > 0) {

        return true;

      }

    } else if (type == CWK_BACK) {

      // A CWK_BACK segment will reduce the counter by one. We can not remove a

      // back segment as long we are not above zero since we don't have the

      // opposite normal segment which we would remove.

      --counter;

    }

  }


  // We never got a count larger than zero, so we will keep this segment alive.

  return false;

}


static bool cwk_path_segment_normal_will_be_removed(

  struct cwk_segment_joined *sj)

{

  enum cwk_segment_type type;

  int counter;


  // The counter determines how many segments are above our current segment,

  // which will popped off before us. If the counter goes below zero it means

  // that our segment will be popped as well.

  counter = 0;


  // We loop over all following segments until we either reach the end, which

  // means our segment will not be dropped or the counter goes below zero.

  while (cwk_path_get_next_segment_joined(sj)) {


    // First, grab the type. The type determines whether we will increase or

    // decrease the counter. We don't handle a CWK_CURRENT frame here since it

    // has no influence.

    type = cwk_path_get_segment_type(&sj->segment);

    if (type == CWK_NORMAL) {

      // This is a normal segment. The normal segment will increase the counter

      // since it will be removed by a "../" before us.

      ++counter;

    } else if (type == CWK_BACK) {

      // A CWK_BACK segment will reduce the counter by one. If we are below zero

      // we can return immediately.

      --counter;

      if (counter < 0) {

        return true;

      }

    }

  }


  // We never got a negative count, so we will keep this segment alive.

  return false;

}


static bool

cwk_path_segment_will_be_removed(const struct cwk_segment_joined *sj,

  bool absolute)

{

  enum cwk_segment_type type;

  struct cwk_segment_joined sjc;


  // We copy the joined path so we don't need to modify it.

  sjc = *sj;


  // First we check whether this is a CWK_CURRENT or CWK_BACK segment, since

  // those will always be dropped.

  type = cwk_path_get_segment_type(&sj->segment);

  if (type == CWK_CURRENT) {

    return true;

  } else if (type == CWK_BACK && absolute) {

    return true;

  } else if (type == CWK_BACK) {

    return cwk_path_segment_back_will_be_removed(&sjc);

  } else {

    return cwk_path_segment_normal_will_be_removed(&sjc);

  }

}


static bool

cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined *sj,

  bool absolute)

{

  while (cwk_path_segment_will_be_removed(sj, absolute)) {

    if (!cwk_path_get_next_segment_joined(sj)) {

      return false;

    }

  }


  return true;

}


static void cwk_path_get_root_windows(const char *path, size_t *length)

{

  const char *c;

  bool is_device_path;


  // A device path is a path which starts with "\\." or "\\?". A device path can

  // be a UNC path as well, in which case it will take up one more segment.

  is_device_path = false;


  // We can not determine the root if this is an empty string. So we set the

  // root to NULL and the length to zero and cancel the whole thing.

  c = path;

  *length = 0;

  if (!*c) {

    return;

  }


  // Now we have to verify whether this is a windows network path (UNC), which

  // we will consider our root.

  if (cwk_path_is_separator(c)) {

    ++c;


    // Check whether the path starts with a single back slash, which means this

    // is not a network path - just a normal path starting with a backslash.

    if (!cwk_path_is_separator(c)) {

      // Okay, this is not a network path but we still use the backslash as a

      // root.

      ++(*length);

      return;

    }


    // Yes, this is a network or device path. Skip the previous separator. Now

    // we need to determine whether this is a device path. We might advance one

    // character here if the server name starts with a '?' or a '.', but that's

    // fine since we will search for a separator afterwards anyway.

    ++c;

    is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c);

    if (is_device_path) {

      // That's a device path, and the root must be either "\\.\" or "\\?\"

      // which is 4 characters long. (at least that's how Windows

      // GetFullPathName behaves.)

      *length = 4;

      return;

    }


    // We will grab anything up to the next stop. The next top might be a '\0'

    // or another separator. That will be the server name.

    c = cwk_path_find_next_stop(c);


    // If this is a separator and not the end of a string we wil have to include

    // it. However, if this is a '\0' we must not skip it.

    while (cwk_path_is_separator(c)) {

      ++c;

    }


    // We are now skipping the shared folder name, which will end after the

    // next stop.

    c = cwk_path_find_next_stop(c);


    // Then there might be a separator at the end. We will include that as well,

    // it will mark the path as absolute.

    if (cwk_path_is_separator(c)) {

      ++c;

    }


    // Finally, calculate the size of the root.

    *length = c - path;

    return;

  }


  // Move to the next and check whether this is a colon.

  if (*++c == ':') {

    *length = 2;


    // Now check whether this is a backslash (or slash). If it is not, we could

    // assume that the next character is a '\0' if it is a valid path. However,

    // we will not assume that - since ':' is not valid in a path it must be a

    // mistake by the caller than. We will try to understand it anyway.

    if (cwk_path_is_separator(++c)) {

      *length = 3;

    }

  }

}


static void cwk_path_get_root_unix(const char *path, size_t *length)

{

  // The slash of the unix path represents the root. There is no root if there

  // is no slash.

  if (cwk_path_is_separator(path)) {

    *length = 1;

  } else {

    *length = 0;

  }

}


static bool cwk_path_is_root_absolute(const char *path, size_t length)

{

  // This is definitely not absolute if there is no root.

  if (length == 0) {

    return false;

  }


  // If there is a separator at the end of the root, we can safely consider this

  // to be an absolute path.

  return cwk_path_is_separator(&path[length - 1]);

}


static size_t cwk_path_join_and_normalize_multiple(const char **paths,

  char *buffer, size_t buffer_size)

{

  size_t pos;

  bool absolute, has_segment_output;

  struct cwk_segment_joined sj;


  // We initialize the position after the root, which should get us started.

  cwk_path_get_root(paths[0], &pos);


  // Determine whether the path is absolute or not. We need that to determine

  // later on whether we can remove superfluous "../" or not.

  absolute = cwk_path_is_root_absolute(paths[0], pos);


  // First copy the root to the output. We will not modify the root.

  cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos);


  // So we just grab the first segment. If there is no segment we will always

  // output a "/", since we currently only support absolute paths here.

  if (!cwk_path_get_first_segment_joined(paths, &sj)) {

    goto done;

  }


  // Let's assume that we don't have any segment output for now. We will toggle

  // this flag once there is some output.

  has_segment_output = false;


  do {

    // Check whether we have to drop this segment because of resolving a

    // relative path or because it is a CWK_CURRENT segment.

    if (cwk_path_segment_will_be_removed(&sj, absolute)) {

      continue;

    }


    // Remember that we have segment output, so we can handle the trailing slash

    // later on. This is necessary since we might have segments but they are all

    // removed.

    has_segment_output = true;


    // Write out the segment but keep in mind that we need to follow the

    // buffer size limitations. That's why we use the path output functions

    // here.

    pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin,

      sj.segment.size);

    pos += cwk_path_output_separator(buffer, buffer_size, pos);

  } while (cwk_path_get_next_segment_joined(&sj));


  // Remove the trailing slash, but only if we have segment output. We don't

  // want to remove anything from the root.

  if (has_segment_output) {

    --pos;

  } else if (pos == 0) {

    // This may happen if the path is absolute and all segments have been

    // removed. We can not have an empty output - and empty output means we stay

    // in the current directory. So we will output a ".".

    assert(absolute == false);

    pos += cwk_path_output_current(buffer, buffer_size, pos);

  }


  // We must append a '\0' in any case, unless the buffer size is zero. If the

  // buffer size is zero, which means we can not.

done:

  cwk_path_terminate_output(buffer, buffer_size, pos);


  // And finally let our caller know about the total size of the normalized

  // path.

  return pos;

}


size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer,

  size_t buffer_size)

{

  size_t i;

  const char *paths[4];


  // The basename should be an absolute path if the caller is using the API

  // correctly. However, he might not and in that case we will append a fake

  // root at the beginning.

  if (cwk_path_is_absolute(base)) {

    i = 0;

  } else {

    paths[0] = "/";

    i = 1;

  }


  if (cwk_path_is_absolute(path)) {

    // If the submitted path is not relative the base path becomes irrelevant.

    // We will only normalize the submitted path instead.

    paths[i++] = path;

    paths[i] = NULL;

  } else {

    // Otherwise we append the relative path to the base path and normalize it.

    // The result will be a new absolute path.

    paths[i++] = base;

    paths[i++] = path;

    paths[i] = NULL;

  }


  // Finally join everything together and normalize it.

  return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);

}


static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined *bsj,

  struct cwk_segment_joined *osj, bool absolute, bool *base_available,

  bool *other_available)

{

  // Now looping over all segments until they start to diverge. A path may

  // diverge if two segments are not equal or if one path reaches the end.

  do {


    // Check whether there is anything available after we skip everything which

    // is invisible. We do that for both paths, since we want to let the caller

    // know which path has some trailing segments after they diverge.

    *base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute);

    *other_available = cwk_path_segment_joined_skip_invisible(osj, absolute);


    // We are done if one or both of those paths reached the end. They either

    // diverge or both reached the end - but in both cases we can not continue

    // here.

    if (!*base_available || !*other_available) {

      break;

    }


    // Compare the content of both segments. We are done if they are not equal,

    // since they diverge.

    if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin,

          bsj->segment.size)) {

      break;

    }


    // We keep going until one of those segments reached the end. The next

    // segment might be invisible, but we will check for that in the beginning

    // of the loop once again.

    *base_available = cwk_path_get_next_segment_joined(bsj);

    *other_available = cwk_path_get_next_segment_joined(osj);

  } while (*base_available && *other_available);

}


size_t cwk_path_get_relative(const char *base_directory, const char *path,

  char *buffer, size_t buffer_size)

{

  size_t pos, base_root_length, path_root_length;

  bool absolute, base_available, other_available, has_output;

  const char *base_paths[2], *other_paths[2];

  struct cwk_segment_joined bsj, osj;


  pos = 0;


  // First we compare the roots of those two paths. If the roots are not equal

  // we can't continue, since there is no way to get a relative path from

  // different roots.

  cwk_path_get_root(base_directory, &base_root_length);

  cwk_path_get_root(path, &path_root_length);

  if (!cwk_path_is_string_equal(base_directory, path, base_root_length)) {

    return pos;

  }


  // Verify whether this is an absolute path. We need to know that since we can

  // remove all back-segments if it is.

  absolute = cwk_path_is_root_absolute(base_directory, base_root_length);


  // Initialize our joined segments. This will allow us to use the internal

  // functions to skip until diverge and invisible. We only have one path in

  // them though.

  base_paths[0] = base_directory;

  base_paths[1] = NULL;

  other_paths[0] = path;

  other_paths[1] = NULL;

  cwk_path_get_first_segment_joined(base_paths, &bsj);

  cwk_path_get_first_segment_joined(other_paths, &osj);


  // Okay, now we skip until the segments diverge. We don't have anything to do

  // with the segments which are equal.

  cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available,

    &other_available);


  // Assume there is no output until we have got some. We will need this

  // information later on to remove trailing slashes or alternatively output a

  // current-segment.

  has_output = false;


  // So if we still have some segments left in the base path we will now output

  // a back segment for all of them.

  if (base_available) {

    do {

      // Skip any invisible segment. We don't care about those and we don't need

      // to navigate back because of them.

      if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) {

        break;

      }


      // Toggle the flag if we have output. We need to remember that, since we

      // want to remove the trailing slash.

      has_output = true;


      // Output the back segment and a separator. No need to worry about the

      // superfluous segment since it will be removed later on.

      pos += cwk_path_output_back(buffer, buffer_size, pos);

      pos += cwk_path_output_separator(buffer, buffer_size, pos);

    } while (cwk_path_get_next_segment_joined(&bsj));

  }


  // And if we have some segments available of the target path we will output

  // all of those.

  if (other_available) {

    do {

      // Again, skip any invisible segments since we don't need to navigate into

      // them.

      if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) {

        break;

      }


      // Toggle the flag if we have output. We need to remember that, since we

      // want to remove the trailing slash.

      has_output = true;


      // Output the current segment and a separator. No need to worry about the

      // superfluous segment since it will be removed later on.

      pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin,

        osj.segment.size);

      pos += cwk_path_output_separator(buffer, buffer_size, pos);

    } while (cwk_path_get_next_segment_joined(&osj));

  }


  // If we have some output by now we will have to remove the trailing slash. We

  // simply do that by moving back one character. The terminate output function

  // will then place the '\0' on this position. Otherwise, if there is no

  // output, we will have to output a "current directory", since the target path

  // points to the base path.

  if (has_output) {

    --pos;

  } else {

    pos += cwk_path_output_current(buffer, buffer_size, pos);

  }


  // Finally, we can terminate the output - which means we place a '\0' at the

  // current position or at the end of the buffer.

  cwk_path_terminate_output(buffer, buffer_size, pos);


  return pos;

}


size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer,

  size_t buffer_size)

{

  const char *paths[3];


  // This is simple. We will just create an array with the two paths which we

  // wish to join.

  paths[0] = path_a;

  paths[1] = path_b;

  paths[2] = NULL;


  // And then call the join and normalize function which will do the hard work

  // for us.

  return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);

}


size_t cwk_path_join_multiple(const char **paths, char *buffer,

  size_t buffer_size)

{

  // We can just call the internal join and normalize function for this one,

  // since it will handle everything.

  return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);

}


void cwk_path_get_root(const char *path, size_t *length)

{

  // We use a different implementation here based on the configuration of the

  // library.

  if (path_style == CWK_STYLE_WINDOWS) {

    cwk_path_get_root_windows(path, length);

  } else {

    cwk_path_get_root_unix(path, length);

  }

}


size_t cwk_path_change_root(const char *path, const char *new_root,

  char *buffer, size_t buffer_size)

{

  const char *tail;

  size_t root_length, path_length, tail_length, new_root_length, new_path_size;


  // First we need to determine the actual size of the root which we will

  // change.

  cwk_path_get_root(path, &root_length);


  // Now we determine the sizes of the new root and the path. We need that to

  // determine the size of the part after the root (the tail).

  new_root_length = strlen(new_root);

  path_length = strlen(path);


  // Okay, now we calculate the position of the tail and the length of it.

  tail = path + root_length;

  tail_length = path_length - root_length;


  // We first output the tail and then the new root, that's because the source

  // path and the buffer may be overlapping. This way the root will not

  // overwrite the tail.

  cwk_path_output_sized(buffer, buffer_size, new_root_length, tail,

    tail_length);

  cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length);


  // Finally we calculate the size o the new path and terminate the output with

  // a '\0'.

  new_path_size = tail_length + new_root_length;

  cwk_path_terminate_output(buffer, buffer_size, new_path_size);


  return new_path_size;

}


bool cwk_path_is_absolute(const char *path)

{

  size_t length;


  // We grab the root of the path. This root does not include the first

  // separator of a path.

  cwk_path_get_root(path, &length);


  // Now we can determine whether the root is absolute or not.

  return cwk_path_is_root_absolute(path, length);

}


bool cwk_path_is_relative(const char *path)

{

  // The path is relative if it is not absolute.

  return !cwk_path_is_absolute(path);

}


void cwk_path_get_basename(const char *path, const char **basename,

  size_t *length)

{

  struct cwk_segment segment;


  // We get the last segment of the path. The last segment will contain the

  // basename if there is any. If there are no segments we will set the basename

  // to NULL and the length to 0.

  if (!cwk_path_get_last_segment(path, &segment)) {

    *basename = NULL;

    *length = 0;

    return;

  }


  // Now we can just output the segment contents, since that's our basename.

  // There might be trailing separators after the basename, but the size does

  // not include those.

  *basename = segment.begin;

  *length = segment.size;

}


size_t cwk_path_change_basename(const char *path, const char *new_basename,

  char *buffer, size_t buffer_size)

{

  struct cwk_segment segment;

  size_t pos, root_size, new_basename_size;


  // First we try to get the last segment. We may only have a root without any

  // segments, in which case we will create one.

  if (!cwk_path_get_last_segment(path, &segment)) {


    // So there is no segment in this path. First we grab the root and output

    // that. We are not going to modify the root in any way.

    cwk_path_get_root(path, &root_size);

    pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);


    // We have to trim the separators from the beginning of the new basename.

    // This is quite easy to do.

    while (cwk_path_is_separator(new_basename)) {

      ++new_basename;

    }


    // Now we measure the length of the new basename, this is a two step

    // process. First we find the '\0' character at the end of the string.

    new_basename_size = 0;

    while (new_basename[new_basename_size]) {

      ++new_basename_size;

    }


    // And then we trim the separators at the end of the basename until we reach

    // the first valid character.

    while (new_basename_size > 0 &&

           cwk_path_is_separator(&new_basename[new_basename_size - 1])) {

      --new_basename_size;

    }


    // Now we will output the new basename after the root.

    pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename,

      new_basename_size);


    // And finally terminate the output and return the total size of the path.

    cwk_path_terminate_output(buffer, buffer_size, pos);

    return pos;

  }


  // If there is a last segment we can just forward this call, which is fairly

  // easy.

  return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size);

}


void cwk_path_get_dirname(const char *path, size_t *length)

{

  struct cwk_segment segment;


  // We get the last segment of the path. The last segment will contain the

  // basename if there is any. If there are no segments we will set the length

  // to 0.

  if (!cwk_path_get_last_segment(path, &segment)) {

    *length = 0;

    return;

  }


  // We can now return the length from the beginning of the string up to the

  // beginning of the last segment.

  *length = segment.begin - path;

}


bool cwk_path_get_extension(const char *path, const char **extension,

  size_t *length)

{

  struct cwk_segment segment;

  const char *c;


  // We get the last segment of the path. The last segment will contain the

  // extension if there is any.

  if (!cwk_path_get_last_segment(path, &segment)) {

    return false;

  }


  // Now we search for a dot within the segment. If there is a dot, we consider

  // the rest of the segment the extension. We do this from the end towards the

  // beginning, since we want to find the last dot.

  for (c = segment.end; c >= segment.begin; --c) {

    if (*c == '.') {

      // Okay, we found an extension. We can stop looking now.

      *extension = c;

      *length = segment.end - c;

      return true;

    }

  }


  // We couldn't find any extension.

  return false;

}


bool cwk_path_has_extension(const char *path)

{

  const char *extension;

  size_t length;


  // We just wrap the get_extension call which will then do the work for us.

  return cwk_path_get_extension(path, &extension, &length);

}


size_t cwk_path_change_extension(const char *path, const char *new_extension,

  char *buffer, size_t buffer_size)

{

  struct cwk_segment segment;

  const char *c, *old_extension;

  size_t pos, root_size, trail_size, new_extension_size;


  // First we try to get the last segment. We may only have a root without any

  // segments, in which case we will create one.

  if (!cwk_path_get_last_segment(path, &segment)) {


    // So there is no segment in this path. First we grab the root and output

    // that. We are not going to modify the root in any way. If there is no

    // root, this will end up with a root size 0, and nothing will be written.

    cwk_path_get_root(path, &root_size);

    pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);


    // Add a dot if the submitted value doesn't have any.

    if (*new_extension != '.') {

      pos += cwk_path_output_dot(buffer, buffer_size, pos);

    }


    // And finally terminate the output and return the total size of the path.

    pos += cwk_path_output(buffer, buffer_size, pos, new_extension);

    cwk_path_terminate_output(buffer, buffer_size, pos);

    return pos;

  }


  // Now we seek the old extension in the last segment, which we will replace

  // with the new one. If there is no old extension, it will point to the end of

  // the segment.

  old_extension = segment.end;

  for (c = segment.begin; c < segment.end; ++c) {

    if (*c == '.') {

      old_extension = c;

    }

  }


  pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path,

    old_extension - segment.path);


  // If the new extension starts with a dot, we will skip that dot. We always

  // output exactly one dot before the extension. If the extension contains

  // multiple dots, we will output those as part of the extension.

  if (*new_extension == '.') {

    ++new_extension;

  }


  // We calculate the size of the new extension, including the dot, in order to

  // output the trail - which is any part of the path coming after the

  // extension. We must output this first, since the buffer may overlap with the

  // submitted path - and it would be overridden by longer extensions.

  new_extension_size = strlen(new_extension) + 1;

  trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size,

    segment.end);


  // Finally we output the dot and the new extension. The new extension itself

  // doesn't contain the dot anymore, so we must output that first.

  pos += cwk_path_output_dot(buffer, buffer_size, pos);

  pos += cwk_path_output(buffer, buffer_size, pos, new_extension);


  // Now we terminate the output with a null-terminating character, but before

  // we do that we must add the size of the trail to the position which we

  // output before.

  pos += trail_size;

  cwk_path_terminate_output(buffer, buffer_size, pos);


  // And the position is our output size now.

  return pos;

}


size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size)

{

  const char *paths[2];


  // Now we initialize the paths which we will normalize. Since this function

  // only supports submitting a single path, we will only add that one.

  paths[0] = path;

  paths[1] = NULL;


  return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);

}


size_t cwk_path_get_intersection(const char *path_base, const char *path_other)

{

  bool absolute;

  size_t base_root_length, other_root_length;

  const char *end;

  const char *paths_base[2], *paths_other[2];

  struct cwk_segment_joined base, other;


  // We first compare the two roots. We just return zero if they are not equal.

  // This will also happen to return zero if the paths are mixed relative and

  // absolute.

  cwk_path_get_root(path_base, &base_root_length);

  cwk_path_get_root(path_other, &other_root_length);

  if (!cwk_path_is_string_equal(path_base, path_other, base_root_length)) {

    return 0;

  }


  // Configure our paths. We just have a single path in here for now.

  paths_base[0] = path_base;

  paths_base[1] = NULL;

  paths_other[0] = path_other;

  paths_other[1] = NULL;


  // So we get the first segment of both paths. If one of those paths don't have

  // any segment, we will return 0.

  if (!cwk_path_get_first_segment_joined(paths_base, &base) ||

      !cwk_path_get_first_segment_joined(paths_other, &other)) {

    return base_root_length;

  }


  // We now determine whether the path is absolute or not. This is required

  // because if will ignore removed segments, and this behaves differently if

  // the path is absolute. However, we only need to check the base path because

  // we are guaranteed that both paths are either relative or absolute.

  absolute = cwk_path_is_root_absolute(path_base, base_root_length);


  // We must keep track of the end of the previous segment. Initially, this is

  // set to the beginning of the path. This means that 0 is returned if the

  // first segment is not equal.

  end = path_base + base_root_length;


  // Now we loop over both segments until one of them reaches the end or their

  // contents are not equal.

  do {

    // We skip all segments which will be removed in each path, since we want to

    // know about the true path.

    if (!cwk_path_segment_joined_skip_invisible(&base, absolute) ||

        !cwk_path_segment_joined_skip_invisible(&other, absolute)) {

      break;

    }


    if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin,

          base.segment.size)) {

      // So the content of those two segments are not equal. We will return the

      // size up to the beginning.

      return end - path_base;

    }


    // Remember the end of the previous segment before we go to the next one.

    end = base.segment.end;

  } while (cwk_path_get_next_segment_joined(&base) &&

           cwk_path_get_next_segment_joined(&other));


  // Now we calculate the length up to the last point where our paths pointed to

  // the same place.

  return end - path_base;

}


bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment)

{

  size_t length;

  const char *segments;


  // We skip the root since that's not part of the first segment. The root is

  // treated as a separate entity.

  cwk_path_get_root(path, &length);

  segments = path + length;


  // Now, after we skipped the root we can continue and find the actual segment

  // content.

  return cwk_path_get_first_segment_without_root(path, segments, segment);

}


bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment)

{

  // We first grab the first segment. This might be our last segment as well,

  // but we don't know yet. There is no last segment if there is no first

  // segment, so we return false in that case.

  if (!cwk_path_get_first_segment(path, segment)) {

    return false;

  }


  // Now we find our last segment. The segment struct of the caller

  // will contain the last segment, since the function we call here will not

  // change the segment struct when it reaches the end.

  while (cwk_path_get_next_segment(segment)) {

    // We just loop until there is no other segment left.

  }


  return true;

}


bool cwk_path_get_next_segment(struct cwk_segment *segment)

{

  const char *c;


  // First we jump to the end of the previous segment. The first character must

  // be either a '\0' or a separator.

  c = segment->begin + segment->size;

  if (*c == '\0') {

    return false;

  }


  // Now we skip all separator until we reach something else. We are not yet

  // guaranteed to have a segment, since the string could just end afterwards.

  assert(cwk_path_is_separator(c));

  do {

    ++c;

  } while (cwk_path_is_separator(c));


  // If the string ends here, we can safely assume that there is no other

  // segment after this one.

  if (*c == '\0') {

    return false;

  }


  // Now we are safe to assume there is a segment. We store the beginning of

  // this segment in the segment struct of the caller.

  segment->begin = c;


  // And now determine the size of this segment, and store it in the struct of

  // the caller as well.

  c = cwk_path_find_next_stop(c);

  segment->end = c;

  segment->size = c - segment->begin;


  // Tell the caller that we found a segment.

  return true;

}


bool cwk_path_get_previous_segment(struct cwk_segment *segment)

{

  const char *c;


  // The current position might point to the first character of the path, which

  // means there are no previous segments available.

  c = segment->begin;

  if (c <= segment->segments) {

    return false;

  }


  // We move towards the beginning of the path until we either reached the

  // beginning or the character is no separator anymore.

  do {

    --c;

    if (c <= segment->segments) {

      // So we reached the beginning here and there is no segment. So we return

      // false and don't change the segment structure submitted by the caller.

      return false;

    }

  } while (cwk_path_is_separator(c));


  // We are guaranteed now that there is another segment, since we moved before

  // the previous separator and did not reach the segment path beginning.

  segment->end = c + 1;

  segment->begin = cwk_path_find_previous_stop(segment->segments, c);

  segment->size = segment->end - segment->begin;


  return true;

}


enum cwk_segment_type cwk_path_get_segment_type(

  const struct cwk_segment *segment)

{

  // We just make a string comparison with the segment contents and return the

  // appropriate type.

  if (strncmp(segment->begin, ".", segment->size) == 0) {

    return CWK_CURRENT;

  } else if (strncmp(segment->begin, "..", segment->size) == 0) {

    return CWK_BACK;

  }


  return CWK_NORMAL;

}


bool cwk_path_is_separator(const char *str)

{

  const char *c;


  // We loop over all characters in the read symbols.

  c = separators[path_style];

  if (!c)

      return false;

  while (*c) {

    if (*c == *str) {

      return true;

    }


    ++c;

  }


  return false;

}


size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value,

  char *buffer, size_t buffer_size)

{

  size_t pos, value_size, tail_size;


  // First we have to output the head, which is the whole string up to the

  // beginning of the segment. This part of the path will just stay the same.

  pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path,

    segment->begin - segment->path);


  // In order to trip the submitted value, we will skip any separator at the

  // beginning of it and behave as if it was never there.

  while (cwk_path_is_separator(value)) {

    ++value;

  }


  // Now we determine the length of the value. In order to do that we first

  // locate the '\0'.

  value_size = 0;

  while (value[value_size]) {

    ++value_size;

  }


  // Since we trim separators at the beginning and in the end of the value we

  // have to subtract from the size until there are either no more characters

  // left or the last character is no separator.

  while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) {

    --value_size;

  }


  // We also have to determine the tail size, which is the part of the string

  // following the current segment. This part will not change.

  tail_size = strlen(segment->end);


  // Now we output the tail. We have to do that, because if the buffer and the

  // source are overlapping we would override the tail if the value is

  // increasing in length.

  cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end,

    tail_size);


  // Finally we can output the value in the middle of the head and the tail,

  // where we have enough space to fit the whole trimmed value.

  pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size);


  // Now we add the tail size to the current position and terminate the output -

  // basically, ensure that there is a '\0' at the end of the buffer.

  pos += tail_size;

  cwk_path_terminate_output(buffer, buffer_size, pos);


  // And now tell the caller how long the whole path would be.

  return pos;

}


enum cwk_path_style cwk_path_guess_style(const char *path)

{

  const char *c;

  size_t root_length;

  struct cwk_segment segment;


  // First we determine the root. Only windows roots can be longer than a single

  // slash, so if we can determine that it starts with something like "C:", we

  // know that this is a windows path.

  cwk_path_get_root_windows(path, &root_length);

  if (root_length > 1) {

    return CWK_STYLE_WINDOWS;

  }


  // Next we check for slashes. Windows uses backslashes, while unix uses

  // forward slashes. Windows actually supports both, but our best guess is to

  // assume windows with backslashes and unix with forward slashes.

  for (c = path; *c; ++c) {

    if (*c == *separators[CWK_STYLE_UNIX]) {

      return CWK_STYLE_UNIX;

    } else if (*c == *separators[CWK_STYLE_WINDOWS]) {

      return CWK_STYLE_WINDOWS;

    }

  }


  // This path does not have any slashes. We grab the last segment (which

  // actually must be the first one), and determine whether the segment starts

  // with a dot. A dot is a hidden folder or file in the UNIX world, in that

  // case we assume the path to have UNIX style.

  if (!cwk_path_get_last_segment(path, &segment)) {

    // We couldn't find any segments, so we default to a UNIX path style since

    // there is no way to make any assumptions.

    return CWK_STYLE_UNIX;

  }


  if (*segment.begin == '.') {

    return CWK_STYLE_UNIX;

  }


  // And finally we check whether the last segment contains a dot. If it

  // contains a dot, that might be an extension. Windows is more likely to have

  // file names with extensions, so our guess would be windows.

  for (c = segment.begin; *c; ++c) {

    if (*c == '.') {

      return CWK_STYLE_WINDOWS;

    }

  }


  // All our checks failed, so we will return a default value which is currently

  // UNIX.

  return CWK_STYLE_UNIX;

}


void cwk_path_set_style(enum cwk_path_style style)

{

  // We can just set the global path style variable and then the behaviour for

  // all functions will change accordingly.

  assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS);

  path_style = style;

}


enum cwk_path_style cwk_path_get_style(void)

{

  // Simply return the path style which we store in a global variable.

  return path_style;

}

cwk_segment_joined
A joined path represents multiple path strings which are concatenated, but not (necessarily) stored i...
Definition: cwalk.c:33

cwk_segment
A segment represents a single component of a path.
Definition: cwalk.h:15