2025年12月24日 星期三

在 linux 系統下簡單的 tar 檔案讀寫程式

參考網站: https://github.com/calccrypto/tar/tree/master, 

改寫成我想用的: listtar.cpp

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <grp.h>
#include <pwd.h>
#include <dirent.h>
#define debug_printf(fmt, ...)  fprintf(stderr, fmt, ##__VA_ARGS__)
typedef struct Link_list_meta TarLinkList;
struct Link_list_meta {
    union {
        char block[512];// metadata
        union {
            struct {// Pre-POSIX.1-1988 format
                char name[100];             // file name
                char mode[8];               // permissions
                char uid[8];                // user id (octal)
                char gid[8];                // group id (octal)
                char size[12];              // size (octal)
                char mtime[12];             // modification time (octal)
                char check[8];              // checksum of the block, with spaces in the check field while calculation is done (octal)
                char link;                  // link indicator
                char link_name[100];        // name of linked file
            };
            struct {// UStar: Unix Standard TAR format (POSIX IEEE P1003.1)
                char old[156];              // first 156 octets of Pre-POSIX.1-1988 format
                char filetype;              // file type
                char also_link_name[100];   // name of linked file
                char ustar[6];              // ustar\0
                char version[2];            // #Version
                char owner[32];             // user name (string)
                char group[32];             // group name (string)
                char major[8];              // device major number
                char minor[8];              // device minor number
                char prefix[155];
            };
        };
    };
    TarLinkList *next;
    ssize_t append(int fd, void *buf, int n) {  return write(fd, buf, n);  }// todo: append buf into fd at the end
    void block_update(int fd, char *filename, off_t filesize, mode_t filemode = 0, time_t *ct = nullptr) {
        if (fd < 0) return;
        memset(check, ' ', sizeof(check));// init string, It must be empty before caculation.
        sprintf(name, "%s"  , filename);
        sprintf(mode, "%07o", filemode > 0 ? filemode & 0777 : 0664);
        if (filesize > 0) sprintf(size , "%011o",(unsigned int)filesize);
        else memset(size , '0', sizeof(size));
        if (ct) sprintf(mtime, "%011o",(unsigned int)*ct);
        else { // using current time if not provide.
            time_t now;
            time(&now);
            sprintf(mtime, "%011o",(unsigned int)now);
        }        
        int n = sizeof(block), checksum = 0;
        for (int i = 0; i < n; i++) checksum += (unsigned char)block[i];// caculate checksum in the block
        sprintf(check, "%07o", checksum);
        append(fd, block, n);
    }
    int open_ram2tar(char *dir_name, char *create_name=nullptr) {//todo: validate dir_name
        char backup_name[strlen(dir_name) + 16];
        if (create_name == nullptr) {
            sprintf(backup_name, "_%s.tar", dir_name);
            create_name = backup_name;
        }
        int tar_fd = open(create_name, O_RDWR | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR);
        if (tar_fd < 0) return -1;
        block_update(tar_fd, dir_name, 0, 0775);// chmod ug+rwx o+rx dir_name
        filetype = '0';// change to normal file.
        debug_printf("createe file: %s\n", create_name);
        return tar_fd;
    }
    Link_list_meta(bool is_directory = true) {// constructor to initialize the block
        memset(block, 0, sizeof(block));
        version[0] = '0', version[1] = '0';
        filetype = is_directory ? '5' : '0';// todo: other type
        uid_t user_id = getuid();
        sprintf(uid, "%07o", user_id);
        sprintf(gid, "%07o", getgid());
        sprintf(ustar, "%s", "ustar");
        struct passwd *pwd = getpwuid(user_id);// user info get from UID
        if (pwd) {
            struct group *grp = getgrgid(pwd->pw_gid);
            sprintf(owner, "%s", pwd->pw_name);
            sprintf(group, "%s", grp ? grp->gr_name: "None");
        }
    }
};
bool is_empty(char *buffer, int n) { // make sure first n's data in buffer are all 0s
    for (int i = 0; i < n; i ++) if (*buffer ++) return false;    
    return true;
}
long int o2l(char *octal_str, int n) {// 8 進位轉長整數, todo: negative number
    long int val_long = 0l;
    for (int i = 0; i < n; i ++, octal_str ++) {
        if (*octal_str == 0) break;
        val_long <<= 3;
        val_long |= *octal_str - '0';
    }
    return val_long;
}
void dir2tar(const char *foldername, char *create_name = nullptr) {  
    char *path2folder = (char *)foldername;
    if (*path2folder == '.') {
        path2folder ++;
        if (*path2folder == '.') path2folder ++;
    }
    if (*path2folder == '/' ) path2folder ++;
    else path2folder = (char *)foldername;
    while (*path2folder == '/') path2folder ++;// remove another '/'
    struct stat file_st;
    if (lstat(path2folder, &file_st)!=0 || (file_st.st_mode & S_IFMT)!=S_IFDIR) return;
    DIR *cd = opendir(path2folder);// change into the directory
    if (!cd) return; // make sure user has rights to access
    TarLinkList metadata;
    int tar_fd = metadata.open_ram2tar(path2folder);
    if (tar_fd > 0) {
        char fd_buf[512];
        struct dirent *temp;
        while ((temp = readdir(cd))) { // todo: to proceed child directory
            if (temp->d_name[0] == '.') continue;// skip . and ..
            char temp_fullname[strlen(path2folder) + strlen(temp->d_name) + 2];// + '/' and EOS
            sprintf(temp_fullname, "%s/%s", path2folder, temp->d_name);// fullname
            lstat(temp_fullname, &file_st);
            if ((file_st.st_mode & S_IFMT) != S_IFREG || file_st.st_size <= 0) continue; // todo: other type support
            int temp_fd = open(temp_fullname, O_RDONLY);
            if (temp_fd < 0) continue;
            off_t temp_len = file_st.st_size;// to append file content
            off_t zeros_pad = temp_len % 512;// check remain
            if (zeros_pad) zeros_pad = 512 - zeros_pad;// number of zeros need to pad
            metadata.block_update(tar_fd, temp_fullname, temp_len, file_st.st_mode, &file_st.st_mtim.tv_sec);
            debug_printf("%s: size = %ld, checksum = %6s\n", temp_fullname, temp_len, metadata.check);
            while (temp_len > 0) {
                ssize_t l = read(temp_fd, fd_buf, temp_len > 512 ? 512 : temp_len);
                if (l <= 0) break;//todo: error correction
                metadata.append(tar_fd, fd_buf, l);
                temp_len -= l;
            }
            close(temp_fd);
            if (zeros_pad) {
                memset(fd_buf, 0, zeros_pad);
                metadata.append(tar_fd, fd_buf, zeros_pad);
            }
        }
        memset(fd_buf, 0, 512);// need 2 block of zeros in the end for tar file
        for (int i = 0; i < 2; i ++) metadata.append(tar_fd, fd_buf, 512);
        close(tar_fd);
    }
    closedir(cd);
}
void list_tarfile(const char *tar) {
    int fd = open(tar, O_RDONLY);
    if (fd > 0) {// in linux: stdin = 0, stdout = 1, stderr = 2
        TarLinkList *archive = nullptr;// start
        TarLinkList **tarlist = &archive;// get pointer of archive
        int block_size = sizeof(archive->block);
        while (true) {
            TarLinkList *temp = (TarLinkList *)calloc(1, sizeof(TarLinkList));// 分配空間並初始為 0
            if (temp == nullptr) break;
            if (read(fd, temp->block, block_size) != block_size) {// to read 512 bytes metadata
                debug_printf("讀取錯誤,忽略!\n");
                free(temp);
                break;
            }
            if (is_empty(temp->block, block_size)) {// EOF, enough to stop
                if (read(fd, temp->block, block_size) == block_size) {// check 2nd EOF
                    if (is_empty(temp->block, block_size)) {
                        debug_printf("正常檔尾,結束:\n");
                    }
                }
                free(temp);
                break;
            }
            *tarlist = temp;// fill temp as current entry
            tarlist = &temp->next;// to fill for next time
            long int goahead = o2l(temp->size, 11);// 檔案長度: 8 進位(12 bytes)
            int r = goahead % 512; // 取餘數
            if (r) goahead += 512 - r;// 若非 512 倍數, 無條件補滿成 512 倍數
            if (lseek(fd, goahead, SEEK_CUR) < 0) { // 前進到下個位置
                debug_printf("前進錯誤,忽略!\n");
                break;
            }
        }
        *tarlist = nullptr;// end of List
        while (archive) {// list and free
            time_t t = o2l(archive->mtime, 11);// 更新時間
            struct tm *ct = localtime(&t);
            debug_printf("%s@%s\t", archive->owner, archive->group); // 使用者@群組
            debug_printf("%d-%02d-%02d:%02d.%02d\t",
                ct->tm_year + 1900,
                ct->tm_mon + 1,
                ct->tm_mday,
                ct->tm_hour,
                ct->tm_min
            );// 年-月-日-時:分
            switch (archive->filetype) {
                case '0':
                    debug_printf("%ld (bytes)", o2l(archive->size, 11));// 檔案長度
                    break;
                case '1': case '2':
                    debug_printf("檔案連結");
                    break;
                case '3': case '4':
                    debug_printf("裝置檔案-%04ld::%04ld-", o2l(archive->major, 7), o2l(archive->minor, 7));// 設備編號
                    break;
                case '5':
                    debug_printf(" <目錄> ");
                    break;
                case '6':
                    debug_printf("先進先出");
                    break;
                default:
                    debug_printf("????");
                    break;
            }
            debug_printf("\t<- (%6s) %-32s\n", archive->check, archive->name);// 檔名
            TarLinkList *temp = archive -> next;// remove later
            free(archive);
            archive = temp;
        }
        close(fd);
    }
}
void dump_tarfile(const char *tar, const char *filename) {
    int fd = open(tar, O_RDONLY);
    if (fd > 0) {
        char fd_buf[512];// as buffer
        TarLinkList *archive = (TarLinkList *)fd_buf; // point to fd_buf
        long int goahead = 0l;
        while (lseek(fd, goahead, SEEK_CUR)>= 0 && read(fd, fd_buf, 512) == 512 && !is_empty(fd_buf, 512)) {
            if (strcmp(archive->name, filename) == 0) {
                long int filesize = o2l(archive->size, 11);
                while (filesize > 0) {
                    int l = read(fd, fd_buf, (filesize > 512) ? 512 : filesize);
                    if (l <= 0) continue;
                    for (int i = 0; i < l; i++) debug_printf("%c", fd_buf[i]);
                    filesize -= l;
                }
                break;
            }
            goahead = o2l(archive->size, 11);// 檔案長度: 8 進位(12 bytes)
            int r = goahead % 512; // 取餘數
            if (r) goahead += 512 - r;// 若非 512 倍數, 無條件補滿成 512 倍數
        }
        close(fd);
    }
}
int main(int argc, char *argv[]) {
    if (argc > 1 && argv[1]) {
        struct stat file_st;
        if (lstat(argv[1], &file_st) == 0) {//  make sure argv[1] file exists.
            if ((file_st.st_mode & S_IFMT) == S_IFDIR) {
                dir2tar(argv[1]);// createe tar file to store all files in argv[1] which is a directory.
            } else {// todo: make sure argv[1] is a tar file
                if (argc > 2 && argv[2]) {
                    printf("===%s:%s===\n", argv[1], argv[2]);
                    dump_tarfile(argv[1], argv[2]); // to dump argv[2] in argv[1]
                    printf("\n=== EOF ===\n");// end of file
                } else {
                    list_tarfile(argv[1]);// list all files in tar
                }
            }
        }
    }
    return 0;
}

一個將 ram 資料寫入 tar file 測試程式: test_ram2tar.c

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include <unistd.h>
#include <grp.h>
#include <pwd.h>
#define debug_printf(fmt, ...)  fprintf(stderr, fmt, ##__VA_ARGS__)
typedef struct Link_list_meta TarLinkList;
struct Link_list_meta {
    union {
        char block[512];// metadata
        union {
            struct {// Pre-POSIX.1-1988 format
                char name[100];             // file name
                char mode[8];               // permissions
                char uid[8];                // user id (octal)
                char gid[8];                // group id (octal)
                char size[12];              // size (octal)
                char mtime[12];             // modification time (octal)
                char check[8];              // checksum of the block, with spaces in the check field while calculation is done (octal)
                char link;                  // link indicator
                char link_name[100];        // name of linked file
            };
            struct {// UStar: Unix Standard TAR format (POSIX IEEE P1003.1)
                char old[156];              // first 156 octets of Pre-POSIX.1-1988 format
                char filetype;              // file type
                char also_link_name[100];   // name of linked file
                char ustar[6];              // ustar\0
                char version[2];            // #Version
                char owner[32];             // user name (string)
                char group[32];             // group name (string)
                char major[8];              // device major number
                char minor[8];              // device minor number
                char prefix[155];
            };
        };
    };
    TarLinkList *next;
    ssize_t append(int fd, void *buf, int n) {  return write(fd, buf, n);  }// todo: append buf into fd at the end
    void block_update(int fd, char *filename, off_t filesize, mode_t filemode = 0, time_t *ct = nullptr) {
        if (fd < 0) return;
        memset(check, ' ', sizeof(check));// init string, It must be empty before caculation.
        sprintf(name, "%s"  , filename);
        sprintf(mode, "%07o", filemode > 0 ? filemode & 0777 : 0664);
        if (filesize > 0) sprintf(size , "%011o",(unsigned int)filesize);
        else memset(size , '0', sizeof(size));
        if (ct) sprintf(mtime, "%011o",(unsigned int)*ct);
        else { // using current time if not provide.
            time_t now;
            time(&now);
            sprintf(mtime, "%011o",(unsigned int)now);
        }        
        int n = sizeof(block), checksum = 0;
        for (int i = 0; i < n; i++) checksum += (unsigned char)block[i];// caculate checksum in the block
        sprintf(check, "%07o", checksum);
        append(fd, block, n);
    }
    int open_ram2tar(char *dir_name, char *create_name=nullptr) {//todo: validate dir_name
        char backup_name[strlen(dir_name) + 16];
        if (create_name == nullptr) {
            sprintf(backup_name, "_%s.tar", dir_name);
            create_name = backup_name;
        }
        int tar_fd = open(create_name, O_RDWR | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR);
        if (tar_fd < 0) return -1;
        block_update(tar_fd, dir_name, 0, 0775);// chmod ug+rwx o+rx dir_name
        filetype = '0';// change to normal file.
        debug_printf("createe file: %s\n", create_name);
        return tar_fd;
    }
    Link_list_meta(bool is_directory = true) {// constructor to initialize the block
        memset(block, 0, sizeof(block));
        version[0] = '0', version[1] = '0';
        filetype = is_directory ? '5' : '0';// todo: other type
        uid_t user_id = getuid();
        sprintf(uid, "%07o", user_id);
        sprintf(gid, "%07o", getgid());
        sprintf(ustar, "%s", "ustar");
        struct passwd *pwd = getpwuid(user_id);// user info get from UID
        if (pwd) {
            struct group *grp = getgrgid(pwd->pw_gid);
            sprintf(owner, "%s", pwd->pw_name);
            sprintf(group, "%s", grp ? grp->gr_name: "None");
        }
    }
};
int main(int argc, char *argv[]) {
    TarLinkList metadata;
    char dir_name[16] = {"bb"};
    int tar_fd = metadata.open_ram2tar(dir_name);// create folder first
    if (tar_fd > 0) {
        char fd_buf[512], ram_name[100];
        for(int i = 0; i < 10; i ++) { // to create 10 example files
            sprintf(ram_name, "%s/%d", dir_name, i);// combine folder name with specific name as ram file name
            sprintf(fd_buf, "%s:%d", ram_name, i + 1);// fill content for the ram file
            metadata.block_update(tar_fd, ram_name, strlen(fd_buf));// append metadata into tar
            metadata.append(tar_fd, fd_buf, 512);// append 512 bytes of ram into tar
        }
        memset(fd_buf, 0, 512);
        for (int i = 0; i < 2; i ++) metadata.append(tar_fd, fd_buf, 512);// append 2 block of zeros in the end
        close(tar_fd);
    }
    return 0;
}


沒有留言:

張貼留言

在 linux 系統下簡單的 tar 檔案讀寫程式

參考網站: https://github.com/calccrypto/tar/tree/master,  改寫成我想用的: listtar.cpp #include <stdio.h> #include <stdlib.h> #include <s...