2025年12月24日 星期三

在 linux 系統下簡單的 tar 檔案讀寫程式

參考網站: https://github.com/calccrypto/tar/tree/master, 

改寫成我想用的: listtar.cpp

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <grp.h>
#include <pwd.h>
#include <dirent.h>
#define debug_printf(fmt, ...)  fprintf(stderr, fmt, ##__VA_ARGS__)
typedef struct Link_list_meta TarLinkList;
struct Link_list_meta {
    union {
        char block[512];// metadata
        union {
            struct {// Pre-POSIX.1-1988 format
                char name[100];             // file name
                char mode[8];               // permissions
                char uid[8];                // user id (octal)
                char gid[8];                // group id (octal)
                char size[12];              // size (octal)
                char mtime[12];             // modification time (octal)
                char check[8]; // checksum of the block, with spaces in the check field while calculation is done (octal)
                char link;                  // link indicator
                char link_name[100];        // name of linked file
            };
            struct {// UStar: Unix Standard TAR format (POSIX IEEE P1003.1)
                char old[156];              // first 156 octets of Pre-POSIX.1-1988 format
                char type;                  // file type
                char also_link_name[100];   // name of linked file
                char ustar[8];              // 'ustar' + ' ' + #Version#
                char owner[32];             // user name (string)
                char group[32];             // group name (string)
                char major[8];              // device major number
                char minor[8];              // device minor number
                char prefix[155];
            };
        };
    };
    TarLinkList *next;
    unsigned int checksum() {
        int n = sizeof(block);
        unsigned int sum = 0;
        while (n -- > 0) sum += (unsigned char)block[n];
        return sum;
    }
};
bool is_empty(char *buffer, int n) { // make sure first n's data in buffer are all 0s
    for (int i = 0; i < n; i ++) if (*buffer ++) return false;    
    return true;
}
long int o2l(char *octal_str, int n) {// 8 進位轉長整數, todo: negative number
    long int val_long = 0l;
    for (int i = 0; i < n; i ++, octal_str ++) {
        if (*octal_str == 0) break;
        val_long <<= 3;
        val_long |= *octal_str - '0';
    }
    return val_long;
}
void dir2tar(const char *foldername, char *creat_name = nullptr) {  
    char *path2folder = (char *)foldername;
    if (*path2folder == '.') {
        path2folder ++;
        if (*path2folder == '.') path2folder ++;
    }
    if (*path2folder == '/' ) path2folder ++;
    else path2folder = (char *)foldername;
    while (*path2folder == '/') path2folder ++;// remove another '/'
    struct stat file_st;
    if (lstat(path2folder, &file_st) == 0 && (file_st.st_mode & S_IFMT) == S_IFDIR) {
        struct passwd *pwd = getpwuid(file_st.st_uid);  // Get user info from UID
        struct group  *grp = pwd ? getgrgid(pwd->pw_gid) : nullptr;// Get group info
        TarLinkList metadata = {0};
        int block_size = sizeof(metadata.block);
        metadata.type = '5';// directory
        memset(metadata.name , ' ', sizeof(metadata.name ));// init string
        memset(metadata.ustar, ' ', sizeof(metadata.ustar));// init string
        memset(metadata.owner, ' ', sizeof(metadata.owner));// init string
        memset(metadata.group, ' ', sizeof(metadata.group));// init string
        memset(metadata.check, ' ', sizeof(metadata.check));// init string
        memset(metadata.size , '0', sizeof(metadata.size)) ;// octal string
        sprintf(metadata.name , "%s"   , path2folder);
        sprintf(metadata.ustar, "%s"   , "ustar");
        sprintf(metadata.owner, "%s"   , pwd->pw_name);
        sprintf(metadata.group, "%s"   , grp->gr_name);
        sprintf(metadata.uid  , "%07o" , file_st.st_uid);
        sprintf(metadata.gid  , "%07o" , file_st.st_gid);
        sprintf(metadata.mtime, "%011o", (int) file_st.st_mtime);
        sprintf(metadata.mode , "%07o" , file_st.st_mode & 0777);
        sprintf(metadata.check, "%06o" , metadata.checksum());
        char backup_name[strlen(path2folder) + 16]; // to append string at the end
        if (creat_name == nullptr) {
            creat_name = backup_name;
            sprintf(creat_name, "%s.tar", path2folder);// append .tar\0
        }
        int tar_fd = open(creat_name, O_RDWR | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR);
        if (tar_fd > 0) {// ready to tar folder, todo: append file to the end
            debug_printf("Create file: %s\n", creat_name);
            DIR *cd = opendir(path2folder);
            if (cd) { // change into the directory, make sure access rights.
                write(tar_fd, metadata.block, block_size);
                metadata.type = '0';// to save normal file only
                struct dirent *temp;
                char fd_buf[512];
                while ((temp = readdir(cd))) { // todo: to proceed child directory
                    char *last_name = temp->d_name;
                    if (last_name[0] == '.') continue;// skip . and ..
                    char final_name[strlen(path2folder) + strlen(last_name) + 2];// additional '/' and EOS
                    sprintf(final_name, "%s/%s", path2folder, last_name);
                    lstat(final_name, &file_st);
                    if ((file_st.st_mode & S_IFMT) != S_IFREG) continue; // todo: support other type
                    if (file_st.st_size) { // to append file content
                        memset(metadata.check, ' ', sizeof(metadata.check));
                        sprintf(metadata.size, "%011o", (int)file_st.st_size);// actual filesize
                        sprintf(metadata.name, "%s"   , final_name);
                        sprintf(metadata.mode, "%07o" , file_st.st_mode & 0777);
                        sprintf(metadata.check, "%06o", metadata.checksum());// checksum must caculate at last
                        write(tar_fd, metadata.block, block_size);
                        long int file_size = file_st.st_size;
                        int zeros_pad = file_size % 512;// check remain number
                        if (zeros_pad) zeros_pad = 512 - zeros_pad;// number of zeros need to pad
                        int file_fd = open(metadata.name, O_RDONLY);
                        if (file_fd > 0) {
                            while (file_size > 0) {
                                int l = read(file_fd, fd_buf, file_size > 512 ? 512 : file_size);
                                if (l <= 0) break;
                                write(tar_fd, fd_buf, l);
                                file_size -= l;
                            }
                            close(file_fd);
                            if (zeros_pad) {
                                memset(fd_buf, 0, zeros_pad);
                                write(tar_fd, fd_buf, zeros_pad);
                            }
                        }
                        debug_printf("%s: size = %ld, checksum = %6s\n", final_name, file_size, metadata.check);
                    }
                }
                closedir(cd);
                memset(fd_buf, 0, 512);// 2 blocks zeros as EOF
                write(tar_fd, fd_buf, 512);
                write(tar_fd, fd_buf, 512);
            }
            close(tar_fd);
        }
    }
}
void list_tarfile(const char *tar) {
    int fd = open(tar, O_RDONLY);
    if (fd > 0) {// in linux: stdin = 0, stdout = 1, stderr = 2
        TarLinkList *archive = nullptr;// start
        TarLinkList **tarlist = &archive;// get pointer of archive
        int block_size = sizeof(archive->block);
        while (true) {
            TarLinkList *temp = (TarLinkList *)calloc(1, sizeof(TarLinkList));// 分配空間並初始為 0
            if (temp == nullptr) break;
            if (read(fd, temp->block, block_size) != block_size) {// to read 512 bytes metadata
                debug_printf("讀取錯誤,忽略!\n");
                free(temp);
                break;
            }
            if (is_empty(temp->block, block_size)) {// EOF, enough to stop
                if (read(fd, temp->block, block_size) == block_size) {// check 2nd EOF
                    if (is_empty(temp->block, block_size)) {
                        debug_printf("正常檔尾,結束:\n");
                    }
                }
                free(temp);
                break;
            }
            *tarlist = temp;// fill temp as current entry
            tarlist = &temp->next;// to fill for next time
            long int goahead = o2l(temp->size, 11);// 檔案長度: 8 進位(12 bytes)
            int r = goahead % 512; // 取餘數
            if (r) goahead += 512 - r;// 若非 512 倍數, 無條件補滿成 512 倍數
            if (lseek(fd, goahead, SEEK_CUR) < 0) { // 前進到下個位置
                debug_printf("前進錯誤,忽略!\n");
                break;
            }
        }
        *tarlist = nullptr;// end of List
        while (archive) {// list and free
            time_t t = o2l(archive->mtime, 11);// 更新時間
            struct tm *ct = localtime(&t);
            debug_printf("%s@%s\t", archive->owner, archive->group); // 使用者@群組
            debug_printf("%d-%02d-%02d:%02d.%02d\t",
                ct->tm_year + 1900,
                ct->tm_mon + 1,
                ct->tm_mday,
                ct->tm_hour,
                ct->tm_min
            );// 年-月-日-時:分
            switch (archive->type) {
                case '0':
                    debug_printf("%ld (bytes)", o2l(archive->size, 11));// 檔案長度
                    break;
                case '1': case '2':
                    debug_printf("檔案連結");
                    break;
                case '3': case '4':
                    debug_printf("裝置檔案-%04ld::%04ld-", o2l(archive->major, 7), o2l(archive->minor, 7));// 設備編號
                    break;
                case '5':
                    debug_printf(" <目錄> ");
                    break;
                case '6':
                    debug_printf("先進先出");
                    break;
                default:
                    debug_printf("????");
                    break;
            }
            debug_printf("\t<- (%6s) %-32s\n", archive->check, archive->name);// 檔名
            TarLinkList *temp = archive -> next;// remove later
            free(archive);
            archive = temp;
        }
        close(fd);
    }
}
void dump_tarfile(const char *tar, const char *filename) {
    int fd = open(tar, O_RDONLY);
    if (fd > 0) {
        char fd_buf[512];// as buffer
        TarLinkList *archive = (TarLinkList *)fd_buf; // point to fd_buf
        long int goahead = 0l;
        while (lseek(fd, goahead, SEEK_CUR)>= 0 && read(fd, fd_buf, 512) == 512 && !is_empty(fd_buf, 512)) {
            if (strcmp(archive->name, filename) == 0) {
                long int filesize = o2l(archive->size, 11);
                while (filesize > 0) {
                    int l = read(fd, fd_buf, (filesize > 512) ? 512 : filesize);
                    if (l <= 0) continue;
                    for (int i = 0; i < l; i++) debug_printf("%c", fd_buf[i]);
                    filesize -= l;
                }
                break;
            }
            goahead = o2l(archive->size, 11);// 檔案長度: 8 進位(12 bytes)
            int r = goahead % 512; // 取餘數
            if (r) goahead += 512 - r;// 若非 512 倍數, 無條件補滿成 512 倍數
        }
        close(fd);
    }
}
int main(int argc, char *argv[]) {
    if (argc > 1 && argv[1]) {
        struct stat file_st;
        if (lstat(argv[1], &file_st) == 0) {//  make sure argv[1] file exists.
            if ((file_st.st_mode & S_IFMT) == S_IFDIR) {
                dir2tar(argv[1]);// create tar file to store all files in argv[1], which is a directory.
            } else {// todo: make sure argv[1] is a tar file
                if (argc > 2 && argv[2]) {
                    printf("===%s:%s===\n", argv[1], argv[2]);
                    dump_tarfile(argv[1], argv[2]); // to dump argv[2] in argv[1]
                    printf("=== EOF ===\n");// end of file
                } else {
                    list_tarfile(argv[1]);// list all files in tar
                }
            }
        }
    }
    return 0;
}

在 linux 系統下簡單的 tar 檔案讀寫程式

參考網站: https://github.com/calccrypto/tar/tree/master,  改寫成我想用的: listtar.cpp #include <stdio.h> #include <stdlib.h> #include <s...