How can I copy a file on Unix using C?
Asked Answered
R

13

68

I'm looking for the Unix equivalent of Win32's CopyFile, I don't want to reinvent the wheel by writing my own version.

Ratepayer answered 1/2, 2010 at 21:5 Comment(2)
To not reinvent the wheel compile GNU coreutils, AFAIK it has a static library for copying files in its build tree, used by cp and others. It supports sparseness and btrfs cowBeach
linux subset: #7464189Granitite
N
68

There is no need to either call non-portable APIs like sendfile, or shell out to external utilities. The same method that worked back in the 70s still works now:

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

int cp(const char *to, const char *from)
{
    int fd_to, fd_from;
    char buf[4096];
    ssize_t nread;
    int saved_errno;

    fd_from = open(from, O_RDONLY);
    if (fd_from < 0)
        return -1;

    fd_to = open(to, O_WRONLY | O_CREAT | O_EXCL, 0666);
    if (fd_to < 0)
        goto out_error;

    while (nread = read(fd_from, buf, sizeof buf), nread > 0)
    {
        char *out_ptr = buf;
        ssize_t nwritten;

        do {
            nwritten = write(fd_to, out_ptr, nread);

            if (nwritten >= 0)
            {
                nread -= nwritten;
                out_ptr += nwritten;
            }
            else if (errno != EINTR)
            {
                goto out_error;
            }
        } while (nread > 0);
    }

    if (nread == 0)
    {
        if (close(fd_to) < 0)
        {
            fd_to = -1;
            goto out_error;
        }
        close(fd_from);

        /* Success! */
        return 0;
    }

  out_error:
    saved_errno = errno;

    close(fd_from);
    if (fd_to >= 0)
        close(fd_to);

    errno = saved_errno;
    return -1;
}
Neumark answered 1/2, 2010 at 23:16 Comment(8)
@Caf: OMG....g.o.t.o..... :) Your code is more saner than mine anyways... ;) The old loop with read/write is the most portable... +1 from me...Lieb
I find controlled use of goto can be useful to consolidate the error handling path in one place.Neumark
Not useable for general purpose. A copy of a file is more then just the data stream. How about sparse files or extended attributes? Thats once again why Windows API as ugly as it is beats LinuxMae
question: if fd_to is not set as O_NONBLOCK, do you still need the second loop?Oxtail
You handle EINTR in the write() loop, but not in the read() loop.Hagridden
4096 is a damn small bufferCoimbra
@AnttiHaapala: Yes, that's something that has changed since the 70s - a buffer of say 2MB would be reasonable now.Neumark
@AnttiHaapala if you use a much bigger buffer you better not allocate it on the stack (i.e., use malloc()). Also, since 4096 is a typical page size (also a multiple of hdd sector size) it is not an unreasonable value) -- tailor to taste.Pantisocracy
S
27

There is no baked-in equivalent CopyFile() function in the APIs. But, under Linux, sendfile(2) can be used to copy a file in kernel mode which is a faster and better solution (for numerous reasons) than opening a file, looping over it to read into a buffer, and writing the output to another file. Other similar-ish APIs exist for some of the other popular unixy operating systems as well.

The following code snippet should work on macOS (10.5+), (Free)BSD, Linux (as of 2.6.33), and Solaris, among others. The implementation is (or should be) "zero-copy" for all platforms, meaning all of it is done in kernelspace and there is no copying of buffers or data in and out of userspace. Depending on the platform, this solution might also automatically copy resource forks/alternate data streams or use filesystem optimizations such as copy-on-write to avoid any actual I/O altogether (this is pretty much the best performance you can get).

#include <fcntl.h>
#include <unistd.h>
#if defined(__APPLE__)
#include <copyfile.h>
#elif defined(__FreeBSD__)
#include <limits.h>
#include <sys/stat.h>
#include <sys/types.h>
#else // Linux and others
#include <limits.h>
#include <sys/sendfile.h>
#include <sys/stat.h>
#endif

int OSCopyFile(const char* source, const char* destination)
{
    int result = 0;
    int input, output;
    if ((input = open(source, O_RDONLY)) == -1)
    {
        return -1;
    }
    // Create new or truncate existing at destination
    if ((output = creat(destination, 0660)) == -1)
    {
        close(input);
        return -1;
    }

    // Use platform-specific APIs to perform a kernel-mode file copy
#if defined(__APPLE__)
    // fcopyfile(3) is supported on OS X 10.5+
    result = fcopyfile(input, output, 0, COPYFILE_ALL);
#elif defined(__FreeBSD__)
    // FreeBSD used to have fcopyfile(3) but that API was dropped and now we
    // need to use copy_file_range(2) manually. Note that we are still not
    // buffering in userspace.
    struct stat file_stat = {0};
    result = fstat(input, &file_stat);
    off_t copied = 0;
    while (result == 0 && copied < file_stat.st_size) {
        ssize_t written = copy_file_range(input, 0, output, NULL, SSIZE_MAX, 0);
        copied += written;
        if (written == -1) {
            result = -1;
        }
    }
#else
    // sendfile will work with non-socket output (i.e. regular file) under
    // Linux 2.6.33+ and some other unixy systems.
    struct stat file_stat = {0};
    result = fstat(input, &file_stat);
    off_t copied = 0;
    while (result == 0 && copied < file_stat.st_size) {
        ssize_t written = sendfile(output, input, &copied, SSIZE_MAX);
        copied += written;
        if (written == -1) {
            result = -1;
        }
    }
#endif

    close(input);
    close(output);

    return result;
}

Note that Oracle Solaris also supports sendfile() and has similar semantics as Linux. It can be detected with #if defined(__sun) && defined(__SVR4), though in the above code it is handled in the default (Linux, et. al.) case.

Spill answered 1/2, 2010 at 21:17 Comment(17)
According to the man page, the output argument of sendfile must be a socket. Are you sure this works?Kirkham
The prototype from my man page (OS X): int sendfile(int fd, int s, off_t offset, off_t *len, struct sf_hdtr *hdtr, int flags); The output param is fd - file descriptor. At any rate, I tested it quickly (hence the updated non-C++ version) and it worked :)Spill
For Linux, Jay Conrod is right - the out_fd of sendfile could be a regular file in 2.4 kernels, but it now must support the sendpage internal kernel API (which essentially means pipe or socket). sendpage is implemented differently on different UNIXes - there's no standard semantics for it.Neumark
The prototype under Linux is different to OSX, hence you would think that (and I thought that too) that when I saw your implementation and saw the extra parameters for the sendfile...it is platform dependant - something worth bearing in mind about!Lieb
fyi - you can save a lot of work with a if (PathsMatch(source, destination)) return 1; /* where PathsMatch is the appropriate path comparison routine for the locale */, otherwise I imagine that the second open would fail.Sheik
+1 man sendfile says that since 2.6.33, this is supported again. sendfile() is superior to CopyFile() as it allows an offset. This is useful for stripping header information from a file.Kashmir
Updated for compatibility with Linux, FreeBSD, and OS X Mavericks.Spill
FYI sendfile allows also to copy on file handle on Solaris.Cartel
You should declare result as ssize_t not as int 64 bit systems and files bigger than 2GiB are quite common nowadays.Cartel
Furthermore, O_CREAT option in open() call requires mode parameter or else the file will be created with random permissions.Cartel
The function has also another bug. The open flags lack the O_TRUNC. So if the destination file already exists and is bigger than the source file, then that function will write the bytes of the source file but will keep the data beyond the size of the first file. I will edit the answer and replace the open(destination) call with a creat() call which does the right thing.Cartel
A propos big files, sendfile is limited to 0x7ffff000 bytes pro call on Linux. So if one wants to send also files that exceed 2GiB it is necessary to put the sendfile in a loop: off_t offset=0, total_sent=0; ssize_t sent; do { sent = sendfile(output, input, &offset, fileinfo.st_size-total_sent); } while(sent > -1 && (total_sent += sent) < fileinfo.st_size);Cartel
Tried to run this on FreeBSD 10.3 and it does not work: copyfile.h: No such file or directoryStatute
Also, is this really zero-copy on OS X? If so, can somebody link to a reference?Statute
copy.c:42:12: error: variable 'fileinfo' has initializer but incomplete type struct stat fileinfo = {0}Parcheesi
To make things even more complicated, Linux 4.5 has gained the system call copy_file_range(), which seems to be better than sendfile() for some situations (e.g. filesystems which support copy on write, like btrfs).Hartmunn
Answer has been updated to use copy_file_range(2) under FreeBSD (API is also available under newer Linux kernel versions), as fcopyfile(3) was dropped a while back. It might be a good idea to use copy_file_range() under Linux as well, given the restrictions on sendfile(2) and the possible performance benefits of copy_file_range() on CoW operating systems (zfs, bcachefs, btrfs, xfs in certain circumstances, etc).Spill
S
22

It's straight forward to use fork/execl to run cp to do the work for you. This has advantages over system in that it is not prone to a Bobby Tables attack and you don't need to sanitize the arguments to the same degree. Further, since system() requires you to cobble together the command argument, you are not likely to have a buffer overflow issue due to sloppy sprintf() checking.

The advantage to calling cp directly instead of writing it is not having to worry about elements of the target path existing in the destination. Doing that in roll-you-own code is error-prone and tedious.

I wrote this example in ANSI C and only stubbed out the barest error handling, other than that it's straight forward code.

void copy(char *source, char *dest)
{
    int childExitStatus;
    pid_t pid;
    int status;
    if (!source || !dest) {
        /* handle as you wish */
    }

    pid = fork();

    if (pid == 0) { /* child */
        execl("/bin/cp", "/bin/cp", source, dest, (char *)0);
    }
    else if (pid < 0) {
        /* error - couldn't start process - you decide how to handle */
    }
    else {
        /* parent - wait for child - this has all error handling, you
         * could just call wait() as long as you are only expecting to
         * have one child process at a time.
         */
        pid_t ws = waitpid( pid, &childExitStatus, WNOHANG);
        if (ws == -1)
        { /* error - handle as you wish */
        }

        if( WIFEXITED(childExitStatus)) /* exit code in childExitStatus */
        {
            status = WEXITSTATUS(childExitStatus); /* zero is normal exit */
            /* handle non-zero as you wish */
        }
        else if (WIFSIGNALED(childExitStatus)) /* killed */
        {
        }
        else if (WIFSTOPPED(childExitStatus)) /* stopped */
        {
        }
    }
}
Sheik answered 1/2, 2010 at 21:54 Comment(2)
+1 for another long, detailed, slog. Really makes you appreciate the "vector"/list form of system() in perl. Hmm. Maybe a system-ish function with an argv array would be nice to have?!?Togo
... after all it was implemented 17 years ago in glibc, and being a standard function 10 earsbefore your answer was written ..Coimbra
C
6

Another variant of the copy function using normal POSIX calls and without any loop. Code inspired from the buffer copy variant of the answer of caf. Warning: Using mmap can easily fail on 32 bit systems, on 64 bit system the danger is less likely.

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>

int cp(const char *to, const char *from)
{
  int fd_from = open(from, O_RDONLY);
  if(fd_from < 0)
    return -1;
  struct stat Stat;
  if(fstat(fd_from, &Stat)<0)
    goto out_error;

  void *mem = mmap(NULL, Stat.st_size, PROT_READ, MAP_SHARED, fd_from, 0);
  if(mem == MAP_FAILED)
    goto out_error;

  int fd_to = creat(to, 0666);
  if(fd_to < 0)
    goto out_error;

  ssize_t nwritten = write(fd_to, mem, Stat.st_size);
  if(nwritten < Stat.st_size)
    goto out_error;

  if(close(fd_to) < 0) {
    fd_to = -1;
    goto out_error;
  }
  close(fd_from);

  /* Success! */
  return 0;
}
out_error:;
  int saved_errno = errno;

  close(fd_from);
  if(fd_to >= 0)
    close(fd_to);

  errno = saved_errno;
  return -1;
}

EDIT: Corrected the file creation bug. See comment in https://mcmap.net/q/280706/-how-can-i-copy-a-file-on-unix-using-c/2180157#2180157 answer.

Cyton answered 11/9, 2014 at 11:49 Comment(6)
The same bug as in #2180579. If the destination already exists and is bigger than the source, then the file copy will only overwrite the destination partially and not truncate the resulting file;Cartel
(I realize this is an old question but...) What will happen with mmap when the size of the file being mapped is very large compared to the size of available memory and swapfile? Will hit hang the system in an out of memory/swapping situation?Mikkanen
The mapping of a file into the address range of the process doesn't take any memory per se. It is as if you said that your file is now part of swap space. This means that when you access an address in your mapped file, it will first generate a page fault as there is nothing in memory. The OS loads then corresponding page at that address from the disk and restores control to the process. Should there be no memory available, then the OS will simply free some other mapped pages from any other process; in priority clean pages (i.e. that do not need to be written to disk) but also dirty pages. =>Cartel
Swapping happens when the access pattern to the mapped pages exceeds the amount of physical memory in the system and it has to read and write pages all the time. mmap can be seen as nothing more than just increasing the systems swap area. mmap with option MAP_SHARED can also be seen as a way to make the file cache accessible to a process.Cartel
So if you mmap a large file, then access a lot of it, and the amount of the file you access is larger than your real memory, the OS will start paging out other processes. If that happens too much, the OS will start thrashing on swap activity. My point is, with files large relative to memory+swap, you have to think about the size of the mmap data that's being accessed to not cause problemsMikkanen
There are other multiple problems with this code. In 32-bit address spaces, it simply can't handle large files - once files get over 2GB, they may not fit into the address space, and files larger than 4 GB simple can't fit. In 64-bit mode, there can be limits to how much data can be written in one write() call.Catherincatherina
B
5

Copying files byte-by-byte does work, but is slow and wasteful on modern UNIXes. Modern UNIXes have “copy-on-write” support built-in to the filesystem: a system call makes a new directory entry pointing at the existing bytes on disk, and no file content bytes on disk are touched until one of the copies is modified, at which point only the changed blocks are written to disk. This allows near-instant file copies that use no additional file blocks, regardless of file size. For example, here are some details about how this works in xfs.

On linux, use the FICLONE ioctl as coreutils cp now does by default.

 #ifdef FICLONE
   return ioctl (dest_fd, FICLONE, src_fd);
 #else
   errno = ENOTSUP;
   return -1;
 #endif

On macOS, use clonefile(2) for instant copies on APFS volumes. This is what Apple’s cp -c uses. The docs are not completely clear but it is likely that copyfile(3) with COPYFILE_CLONE also uses this. Leave a comment if you’d like me to test that.

In case these copy-on-write operations are not supported—whether the OS is too old, the underlying file system does not support it, or because you are copying files between different filesystems—you do need to fall back to trying sendfile, or as a last resort, copying byte-by-byte. But to save everyone a lot of time and disk space, please give FICLONE and clonefile(2) a try first.

Blasphemous answered 16/1, 2021 at 19:22 Comment(0)
L
4

There is a way to do this, without resorting to the system call, you need to incorporate a wrapper something like this:

#include <sys/sendfile.h>
#include <fcntl.h>
#include <unistd.h>

/* 
** http://www.unixguide.net/unix/programming/2.5.shtml 
** About locking mechanism...
*/

int copy_file(const char *source, const char *dest){
   int fdSource = open(source, O_RDWR);

   /* Caf's comment about race condition... */
   if (fdSource > 0){
     if (lockf(fdSource, F_LOCK, 0) == -1) return 0; /* FAILURE */
   }else return 0; /* FAILURE */

   /* Now the fdSource is locked */

   int fdDest = open(dest, O_CREAT);
   off_t lCount;
   struct stat sourceStat;
   if (fdSource > 0 && fdDest > 0){
      if (!stat(source, &sourceStat)){
          int len = sendfile(fdDest, fdSource, &lCount, sourceStat.st_size);
          if (len > 0 && len == sourceStat.st_size){
               close(fdDest);
               close(fdSource);

               /* Sanity Check for Lock, if this is locked -1 is returned! */
               if (lockf(fdSource, F_TEST, 0) == 0){
                   if (lockf(fdSource, F_ULOCK, 0) == -1){
                      /* WHOOPS! WTF! FAILURE TO UNLOCK! */
                   }else{
                      return 1; /* Success */
                   }
               }else{
                   /* WHOOPS! WTF! TEST LOCK IS -1 WTF! */
                   return 0; /* FAILURE */
               }
          }
      }
   }
   return 0; /* Failure */
}

The above sample (error checking is omitted!) employs open, close and sendfile.

Edit: As caf has pointed out a race condition can occur between the open and stat so I thought I'd make this a bit more robust...Keep in mind that the locking mechanism varies from platform to platform...under Linux, this locking mechanism with lockf would suffice. If you want to make this portable, use the #ifdef macros to distinguish between different platforms/compilers...Thanks caf for spotting this...There is a link to a site that yielded "universal locking routines" here.

Lieb answered 1/2, 2010 at 21:25 Comment(11)
I am not 100% sure about the sendfile prototype, I think I got one of the parameters wrong... please bear that in mind... :)Lieb
You have a race condition - the file you have open as fdSource and the file you have stat()ed are not necessarily the same.Neumark
@caf: Can you give more details as I am looking at it and how can there be a race condition? I will amend the answer accordingly..thanks for letting me know...Lieb
tommbieb75: Simple - in between the open() call and the stat() call, someone else could have renamed the file and put a different file under that name - so you will copy the data from the first file, but using the length of the second one.Neumark
@caf: Holy moly....why didn't I think of that...well spotted...a lock should do the trick on the source file...well done for spotting that...race condition..well I never...as Clint Eastwood in 'Gran Torino' says 'J.C all friday...'Lieb
A lock doesn't help (they're not mandatory), but fstat can be used in this case to fix it.Neumark
@caf: Damnnit..... I just saw your comment after I edited my answer in the code.... dang.... LOL!!!!Lieb
@tommbieb75: If you use fstat instead of stat() you should be safe. Even if the file gets renamed/deleted/altered, your file descriptor will point to the old copy of the file which will still exist on the disk until you the file descriptor is closed (at which point it will be removed). The content could still change, though. On Windows I think the file will be locked for you. On Unix, locks are advisory, so there it is no guarantee against someone else changing the content (en.wikipedia.org/wiki/File_locking#In_Unix-like_systems)Blinders
Got "bad file descriptor" error from sendFile(). Adding O_WRONLY to open()'s flags when openning dest file fixes it for me. int fdDest = open(dest, O_CREAT|O_WRONLY);Linc
On Linux, you also have to add O_TRUNC or O_EXCL either to truncate the file or to fail if the file already exists. Otherwise, the sendfile() will only overwrite the sourceStat.st_size bytes of the destination.Cartel
Furthermore, open() takes 3 parameters when O_CREAT or O_TMPFILE flags are set, not 2. Your destination file will be created with random access rights otherwise.Cartel
U
3

I see nobody mentioned yet copy_file_range, supported at least on Linux and FreeBSD. The advantage of this one is that it explicitly documents ability of making use of CoW techniques like reflinks. Quoting:

copy_file_range() gives filesystems an opportunity to implement "copy acceleration" techniques, such as the use of reflinks (i.e., two or more inodes that share pointers to the same copy-on-write disk blocks) or server-side-copy (in the case of NFS).

FWIW, I am not sure if older sendfile is able to do that. The few mentions I found claim that it doesn't. In that sense, copy_file_range is superior to sendfile.

Below is an example of using the call (which is copied verbatim from the manual). I also checked that after using this code to copy a bash binary within BTRFS filesystem, the copy is reflinked to the original (I did that by calling duperemove on the files, and seeing Skipping - extents are already deduped. messages).

#define _GNU_SOURCE
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <unistd.h>

int
main(int argc, char **argv)
{
    int fd_in, fd_out;
    struct stat stat;
    off64_t len, ret;

    if (argc != 3) {
        fprintf(stderr, "Usage: %s <source> <destination>\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    fd_in = open(argv[1], O_RDONLY);
    if (fd_in == -1) {
        perror("open (argv[1])");
        exit(EXIT_FAILURE);
    }

    if (fstat(fd_in, &stat) == -1) {
        perror("fstat");
        exit(EXIT_FAILURE);
    }

    len = stat.st_size;

    fd_out = open(argv[2], O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd_out == -1) {
        perror("open (argv[2])");
        exit(EXIT_FAILURE);
    }

    do {
        ret = copy_file_range(fd_in, NULL, fd_out, NULL, len, 0);
        if (ret == -1) {
            perror("copy_file_range");
            exit(EXIT_FAILURE);
        }

        len -= ret;
    } while (len > 0 && ret > 0);

    close(fd_in);
    close(fd_out);
    exit(EXIT_SUCCESS);
}
Uncurl answered 26/7, 2021 at 12:36 Comment(0)
O
2

One option is that you could use system() to execute cp. This just re-uses the cp(1) command to do the work. If you only need to make another link to the file, this can be done with link() or symlink().

Overtrick answered 1/2, 2010 at 21:10 Comment(8)
beware that system() is a security hole.Sheik
Really? Would you use this in production code? I can't think of a good reason not to but it doesn't strike me as a clean solution.Ratepayer
If you specify the path to /bin/cp you're relatively safe, unless the attacker has managed to compromise the system to the extent that they can make modifications to arbitrary system shell utilities in /bin. If they've compromised the system to that extent you've got far bigger problems.Overtrick
Using system to run commands is fairly common in unix-land. With proper hygiene it can be reasonably secure and robust. After all, the commands are designed to be used in this way.Overtrick
@Togo - true; if you need to cross filesystems you would need symlink().Overtrick
What will happen if the user creates a file name like "somefile;rm /bin/*"? system() executes the command with sh -c so the text of the entire string is executed by the shell, which means you'd get anything after a semicolon executed as a command - stinks if your code is running setuid too. This is not unlike Bobby Tables (xkcd.com/327). For the trouble it would take to fully sanitize system() you could instead do the fork/exec pair directly on /bin/cp with the correct arguments.Sheik
plinth: I agree that using system() in this way is generally a bad idea, but note that / is one of the only two characters not allowed in a UNIX filename.Neumark
Alas, in sanitizing for a system call, 'taint perl :-(Togo
U
2
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>

#define    print_err(format, args...)   printf("[%s:%d][error]" format "\n", __func__, __LINE__, ##args)
#define    DATA_BUF_SIZE                (64 * 1024)    //limit to read maximum 64 KB data per time

int32_t get_file_size(const char *fname){
    struct stat sbuf;

    if (NULL == fname || strlen(fname) < 1){
        return 0;
    }

    if (stat(fname, &sbuf) < 0){
        print_err("%s, %s", fname, strerror(errno));
        return 0;
    }

    return sbuf.st_size; /* off_t shall be signed interge types, used for file size */
}

bool copyFile(CHAR *pszPathIn, CHAR *pszPathOut)
{
    INT32 fdIn, fdOut;
    UINT32 ulFileSize_in = 0;
    UINT32 ulFileSize_out = 0;
    CHAR *szDataBuf;

    if (!pszPathIn || !pszPathOut)
    {
        print_err(" Invalid param!");
        return false;
    }

    if ((1 > strlen(pszPathIn)) || (1 > strlen(pszPathOut)))
    {
        print_err(" Invalid param!");
        return false;
    }

    if (0 != access(pszPathIn, F_OK))
    {
        print_err(" %s, %s!", pszPathIn, strerror(errno));
        return false;
    }

    if (0 > (fdIn = open(pszPathIn, O_RDONLY)))
    {
        print_err("open(%s, ) failed, %s", pszPathIn, strerror(errno));
        return false;
    }

    if (0 > (fdOut = open(pszPathOut, O_CREAT | O_WRONLY | O_TRUNC, 0777)))
    {
        print_err("open(%s, ) failed, %s", pszPathOut, strerror(errno));
        close(fdIn);
        return false;
    }

    szDataBuf = malloc(DATA_BUF_SIZE);
    if (NULL == szDataBuf)
    {
        print_err("malloc() failed!");
        return false;
    }

    while (1)
    {
        INT32 slSizeRead = read(fdIn, szDataBuf, sizeof(szDataBuf));
        INT32 slSizeWrite;
        if (slSizeRead <= 0)
        {
            break;
        }

        slSizeWrite = write(fdOut, szDataBuf, slSizeRead);
        if (slSizeWrite < 0)
        {
            print_err("write(, , slSizeRead) failed, %s", slSizeRead, strerror(errno));
            break;
        }

        if (slSizeWrite != slSizeRead) /* verify wheter write all byte data successfully */
        {
            print_err(" write(, , %d) failed!", slSizeRead);
            break;
        }
    }

    close(fdIn);
    fsync(fdOut); /* causes all modified data and attributes to be moved to a permanent storage device */
    close(fdOut);

    ulFileSize_in = get_file_size(pszPathIn);
    ulFileSize_out = get_file_size(pszPathOut);
    if (ulFileSize_in == ulFileSize_out) /* verify again wheter write all byte data successfully */
    {
        free(szDataBuf);
        return true;
    }
    free(szDataBuf);
    return false;
}
Unfaithful answered 15/9, 2018 at 10:37 Comment(0)
T
1
sprintf( cmd, "/bin/cp -p \'%s\' \'%s\'", old, new);

system( cmd);

Add some error checks...

Otherwise, open both and loop on read/write, but probably not what you want.

...

UPDATE to address valid security concerns:

Rather than using "system()", do a fork/wait, and call execv() or execl() in the child.

execl( "/bin/cp", "-p", old, new);
Togo answered 1/2, 2010 at 21:11 Comment(5)
This does not work for files that have spaces (or quotes, backslashes, dollar signs, etc.) in the name. I use spaces in file names fairly often.Sweat
Ouch. That's right. Add backslash-single-quotes around the file names in the sprintf().Togo
OK, this is a swiss cheese (see valid security concerns in comments elsewhere), but if you have a relatively controlled environment, it might have some use.Togo
You have a shell code injection vulnerability if you do not properly handle single quote characters in the values of old or new. A little more effort to use fork and do your own exec can avoid all these problems with quoting.Addis
Yep, simple obvious and wrong, in many cases. Which is why I up-voted some of the more elaborate examples.Togo
G
1

Very simple :

#define BUF_SIZE 65536

int cp(const char *from, const char*to){
FILE *src, *dst;
size_t in, out;
char *buf = (char*) malloc(BUF_SIZE* sizeof(char));
src = fopen(from, "rb");
if (NULL == src) exit(2);
dst = fopen(to, "wb");
if (dst < 0) exit(3);
while (1) {
    in = fread(buf, sizeof(char), BUF_SIZE, src);
    if (0 == in) break;
    out = fwrite(buf, sizeof(char), in, dst);
    if (0 == out) break;
}
fclose(src);
fclose(dst);
}

Works on windows and linux.

Goby answered 7/3, 2021 at 21:2 Comment(3)
cp() doesn't return anything, yet it is typed as int, which could cause problems and might even be UB.Linder
Seems like free(buf) is missing?Wallis
You're checking dst like it is a file descriptor, but it is FILE * so you have to check it in the same way as srcLithometeor
C
0

Good question. Related to another good question:

In C on linux how would you implement cp

There are two approaches to the "simplest" implementation of cp. One approach uses a file copying system call function of some kind - the closest thing we get to a C function version of the Unix cp command. The other approach uses a buffer and read/write system call functions, either directly, or using a FILE wrapper.

It's likely the file copying system calls that take place solely in kernel-owned memory are faster than the system calls that take place in both kernel- and user-owned memory, especially in a network filesystem setting (copying between machines). But that would require testing (e.g. with Unix command time) and will be dependent on the hardware where the code is compiled and executed.

It's also likely that someone with an OS that doesn't have the standard Unix library will want to use your code. Then you'd want to use the buffer read/write version, since it only depends on <stdlib.h> and <stdio.h> (and friends)

<unistd.h>

Here's an example that uses function copy_file_range from the unix standard library <unistd.h>, to copy a source file to a (possible non-existent) destination file. The copy takes place in kernel space.

/* copy.c
 *
 * Defines function copy:
 *
 * Copy source file to destination file on the same filesystem (possibly NFS).
 * If the destination file does not exist, it is created. If the destination
 * file does exist, the old data is truncated to zero and replaced by the 
 * source data. The copy takes place in the kernel space.
 *
 * Compile with:
 *
 * gcc copy.c -o copy -Wall -g
 */

#define _GNU_SOURCE 
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>

/* On versions of glibc < 2.27, need to use syscall.
 * 
 * To determine glibc version used by gcc, compute an integer representing the
 * version. The strides are chosen to allow enough space for two-digit 
 * minor version and patch level.
 *
 */
#define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __gnuc_patchlevel__)
#if GCC_VERSION < 22700
static loff_t copy_file_range(int in, loff_t* off_in, int out, 
  loff_t* off_out, size_t s, unsigned int flags)
{
  return syscall(__NR_copy_file_range, in, off_in, out, off_out, s,
    flags);
}
#endif

/* The copy function.
 */
int copy(const char* src, const char* dst){
  int in, out;
  struct stat stat;
  loff_t s, n;
  if(0>(in = open(src, O_RDONLY))){
    perror("open(src, ...)");
    exit(EXIT_FAILURE);
  }
  if(fstat(in, &stat)){
    perror("fstat(in, ...)");
    exit(EXIT_FAILURE);
  }
  s = stat.st_size; 
  if(0>(out = open(dst, O_CREAT|O_WRONLY|O_TRUNC, 0644))){
    perror("open(dst, ...)");
    exit(EXIT_FAILURE);
  }
  do{
    if(1>(n = copy_file_range(in, NULL, out, NULL, s, 0))){
      perror("copy_file_range(...)");
      exit(EXIT_FAILURE);
    }
    s-=n;
  }while(0<s && 0<n);
  close(in);
  close(out);
  return EXIT_SUCCESS;
}

/* Test it out.
 *
 * BASH:
 *
 * gcc copy.c -o copy -Wall -g
 * echo 'Hello, world!' > src.txt
 * ./copy src.txt dst.txt
 * [ -z "$(diff src.txt dst.txt)" ]
 *
 */

int main(int argc, char* argv[argc]){
  if(argc!=3){
    printf("Usage: %s <SOURCE> <DESTINATION>", argv[0]);
    exit(EXIT_FAILURE);
  }
  copy(argv[1], argv[2]);
  return EXIT_SUCCESS;
}

It's based on the example in my Ubuntu 20.x Linux distribution's man page for copy_file_range. Check your man pages for it with:

> man copy_file_range

Then hit j or Enter until you get to the example section. Or search by typing /example.

<stdio.h>/<stdlib.h> only

Here's an example that only uses stdlib/stdio. The downside is it uses an intermediate buffer in user-space.

/* copy.c
 *
 * Compile with:
 * 
 * gcc copy.c -o copy -Wall -g
 *
 * Defines function copy:
 *
 * Copy a source file to a destination file. If the destination file already
 * exists, this clobbers it. If the destination file does not exist, it is
 * created. 
 *
 * Uses a buffer in user-space, so may not perform as well as 
 * copy_file_range, which copies in kernel-space.
 *
 */

#include <stdlib.h>
#include <stdio.h>

#define BUF_SIZE 65536 //2^16

int copy(const char* in_path, const char* out_path){
  size_t n;
  FILE* in=NULL, * out=NULL;
  char* buf = calloc(BUF_SIZE, 1);
  if((in = fopen(in_path, "rb")) && (out = fopen(out_path, "wb")))
    while((n = fread(buf, 1, BUF_SIZE, in)) && fwrite(buf, 1, n, out));
  free(buf);
  if(in) fclose(in);
  if(out) fclose(out);
  return EXIT_SUCCESS;
}

/* Test it out.
 *
 * BASH:
 *
 * gcc copy.c -o copy -Wall -g
 * echo 'Hello, world!' > src.txt
 * ./copy src.txt dst.txt
 * [ -z "$(diff src.txt dst.txt)" ]
 *
 */
int main(int argc, char* argv[argc]){
  if(argc!=3){
    printf("Usage: %s <SOURCE> <DESTINATION>\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  return copy(argv[1], argv[2]);
}
Concert answered 7/8, 2021 at 20:25 Comment(0)
M
0

Just use it

#include <stdio.h>
#include <unistd.h> // For system calls write, read e close
#include <fcntl.h>

#define BUFFER_SIZE 1024

int main(int argc, char* argv[]) {
    if (argc != 3) {
        printf("Usage %s Src_file Dest_file\n", argv[0]);
        return -1;
    }
    
    unsigned char buffer[BUFFER_SIZE] = {0};
    ssize_t byte = 0;
    
    int rfd, wfd;
    
    // open file in read mode
    if ((rfd = open(argv[1], O_RDONLY)) == -1) {
        printf("Failed to open input file %s\n", argv[1]);
        return -1;
    }
    
    // open file in write mode and already exists to overwrite
    if ((wfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 644)) == -1) {
        printf("Failed to create output file %s\n", argv[2]);
    }
    
    // loop
    while (1) {
        // read buffer
        byte = read(rfd, buffer, sizeof(buffer));
        // error with reading
        if (byte == -1) {
            printf("Encountered an error\n");
            break;
        } else if (byte == 0) {
            // file end exit loop
            printf("File copying successful.\n");
            break;
        }
        
        // error with writing
       if (write(wfd, buffer, byte) == -1) {
        printf("Failed to copying file\n");
          break;
       }
    
    }
    
    // Close file
    close(rfd);
    close(wfd);
    
    return 0;
}

Run

./program Src_file Dest_file
Mezzorilievo answered 6/12, 2022 at 14:6 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.